freedreno: Re-indent

clang-format -fallback-style=none --style=file -i src/gallium/drivers/freedreno/*.[ch] src/gallium/drivers/freedreno/*/*.[ch]

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8883>
This commit is contained in:
Rob Clark 2021-04-14 08:04:06 -07:00 committed by Marge Bot
parent fdcae5b5b8
commit 2d439343ea
176 changed files with 30083 additions and 30077 deletions

View file

@ -26,87 +26,90 @@
#include "pipe/p_state.h"
#include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd2_blend.h"
#include "fd2_context.h"
#include "fd2_util.h"
static enum a2xx_rb_blend_opcode
blend_func(unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
return BLEND2_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND2_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND2_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND2_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND2_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
switch (func) {
case PIPE_BLEND_ADD:
return BLEND2_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND2_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND2_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND2_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND2_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
}
void *
fd2_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso)
const struct pipe_blend_state *cso)
{
const struct pipe_rt_blend_state *rt = &cso->rt[0];
struct fd2_blend_stateobj *so;
unsigned rop = PIPE_LOGICOP_COPY;
const struct pipe_rt_blend_state *rt = &cso->rt[0];
struct fd2_blend_stateobj *so;
unsigned rop = PIPE_LOGICOP_COPY;
if (cso->logicop_enable)
rop = cso->logicop_func; /* 1:1 mapping with hw */
if (cso->logicop_enable)
rop = cso->logicop_func; /* 1:1 mapping with hw */
if (cso->independent_blend_enable) {
DBG("Unsupported! independent blend state");
return NULL;
}
if (cso->independent_blend_enable) {
DBG("Unsupported! independent blend state");
return NULL;
}
so = CALLOC_STRUCT(fd2_blend_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd2_blend_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(rop);
so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(rop);
so->rb_blendcontrol =
A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(fd_blend_factor(rt->rgb_src_factor)) |
A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(fd_blend_factor(rt->rgb_dst_factor));
so->rb_blendcontrol =
A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(
fd_blend_factor(rt->rgb_src_factor)) |
A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(
fd_blend_factor(rt->rgb_dst_factor));
/* hardware doesn't support SRC_ALPHA_SATURATE for alpha, but it is equivalent to ONE */
unsigned alpha_src_factor = rt->alpha_src_factor;
if (alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
alpha_src_factor = PIPE_BLENDFACTOR_ONE;
/* hardware doesn't support SRC_ALPHA_SATURATE for alpha, but it is
* equivalent to ONE */
unsigned alpha_src_factor = rt->alpha_src_factor;
if (alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
alpha_src_factor = PIPE_BLENDFACTOR_ONE;
so->rb_blendcontrol |=
A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(fd_blend_factor(alpha_src_factor)) |
A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(fd_blend_factor(rt->alpha_dst_factor));
so->rb_blendcontrol |=
A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(fd_blend_factor(alpha_src_factor)) |
A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(
fd_blend_factor(rt->alpha_dst_factor));
if (rt->colormask & PIPE_MASK_R)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_RED;
if (rt->colormask & PIPE_MASK_G)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_GREEN;
if (rt->colormask & PIPE_MASK_B)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_BLUE;
if (rt->colormask & PIPE_MASK_A)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_ALPHA;
if (rt->colormask & PIPE_MASK_R)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_RED;
if (rt->colormask & PIPE_MASK_G)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_GREEN;
if (rt->colormask & PIPE_MASK_B)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_BLUE;
if (rt->colormask & PIPE_MASK_A)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_ALPHA;
if (!rt->blend_enable)
so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_BLEND_DISABLE;
if (!rt->blend_enable)
so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_BLEND_DISABLE;
if (cso->dither)
so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS);
if (cso->dither)
so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS);
return so;
return so;
}

View file

@ -27,23 +27,23 @@
#ifndef FD2_BLEND_H_
#define FD2_BLEND_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd2_blend_stateobj {
struct pipe_blend_state base;
uint32_t rb_blendcontrol;
uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */
uint32_t rb_colormask;
struct pipe_blend_state base;
uint32_t rb_blendcontrol;
uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */
uint32_t rb_colormask;
};
static inline struct fd2_blend_stateobj *
fd2_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd2_blend_stateobj *)blend;
return (struct fd2_blend_stateobj *)blend;
}
void * fd2_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);
void *fd2_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);
#endif /* FD2_BLEND_H_ */

View file

@ -24,7 +24,6 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "fd2_context.h"
#include "fd2_blend.h"
#include "fd2_draw.h"
@ -37,11 +36,10 @@
#include "fd2_zsa.h"
static void
fd2_context_destroy(struct pipe_context *pctx)
in_dt
fd2_context_destroy(struct pipe_context *pctx) in_dt
{
fd_context_destroy(pctx);
free(pctx);
fd_context_destroy(pctx);
free(pctx);
}
static struct pipe_resource *
@ -64,11 +62,12 @@ create_solid_vertexbuf(struct pipe_context *pctx)
};
/* clang-format on */
struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
pipe_buffer_write(pctx, prsc, 0,
sizeof(init_shader_const), init_shader_const);
return prsc;
struct pipe_resource *prsc =
pipe_buffer_create(pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
sizeof(init_shader_const));
pipe_buffer_write(pctx, prsc, 0, sizeof(init_shader_const),
init_shader_const);
return prsc;
}
/* clang-format off */
@ -95,40 +94,40 @@ static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = {
struct pipe_context *
fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
{
struct fd_screen *screen = fd_screen(pscreen);
struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context);
struct pipe_context *pctx;
struct fd_screen *screen = fd_screen(pscreen);
struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context);
struct pipe_context *pctx;
if (!fd2_ctx)
return NULL;
if (!fd2_ctx)
return NULL;
pctx = &fd2_ctx->base.base;
pctx->screen = pscreen;
pctx = &fd2_ctx->base.base;
pctx->screen = pscreen;
fd2_ctx->base.dev = fd_device_ref(screen->dev);
fd2_ctx->base.screen = fd_screen(pscreen);
fd2_ctx->base.dev = fd_device_ref(screen->dev);
fd2_ctx->base.screen = fd_screen(pscreen);
pctx->destroy = fd2_context_destroy;
pctx->create_blend_state = fd2_blend_state_create;
pctx->create_rasterizer_state = fd2_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd2_zsa_state_create;
pctx->destroy = fd2_context_destroy;
pctx->create_blend_state = fd2_blend_state_create;
pctx->create_rasterizer_state = fd2_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd2_zsa_state_create;
fd2_draw_init(pctx);
fd2_gmem_init(pctx);
fd2_texture_init(pctx);
fd2_prog_init(pctx);
fd2_emit_init(pctx);
fd2_draw_init(pctx);
fd2_gmem_init(pctx);
fd2_texture_init(pctx);
fd2_prog_init(pctx);
fd2_emit_init(pctx);
pctx = fd_context_init(&fd2_ctx->base, pscreen,
(screen->gpu_id >= 220) ? a22x_primtypes : a20x_primtypes,
priv, flags);
if (!pctx)
return NULL;
pctx = fd_context_init(
&fd2_ctx->base, pscreen,
(screen->gpu_id >= 220) ? a22x_primtypes : a20x_primtypes, priv, flags);
if (!pctx)
return NULL;
/* construct vertex state used for solid ops (clear, and gmem<->mem) */
fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
/* construct vertex state used for solid ops (clear, and gmem<->mem) */
fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
fd2_query_context_init(pctx);
fd2_query_context_init(pctx);
return pctx;
return pctx;
}

View file

@ -30,21 +30,21 @@
#include "freedreno_context.h"
struct fd2_context {
struct fd_context base;
struct fd_context base;
/* vertex buf used for clear/gmem->mem vertices, and mem->gmem
* vertices and tex coords:
*/
struct pipe_resource *solid_vertexbuf;
/* vertex buf used for clear/gmem->mem vertices, and mem->gmem
* vertices and tex coords:
*/
struct pipe_resource *solid_vertexbuf;
};
static inline struct fd2_context *
fd2_context(struct fd_context *ctx)
{
return (struct fd2_context *)ctx;
return (struct fd2_context *)ctx;
}
struct pipe_context *
fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
struct pipe_context *fd2_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags);
#endif /* FD2_CONTEXT_H_ */

File diff suppressed because it is too large Load diff

View file

@ -34,10 +34,10 @@
void fd2_draw_init(struct pipe_context *pctx);
enum {
GMEM_PATCH_FASTCLEAR_COLOR,
GMEM_PATCH_FASTCLEAR_DEPTH,
GMEM_PATCH_FASTCLEAR_COLOR_DEPTH,
GMEM_PATCH_RESTORE_INFO,
GMEM_PATCH_FASTCLEAR_COLOR,
GMEM_PATCH_FASTCLEAR_DEPTH,
GMEM_PATCH_FASTCLEAR_COLOR_DEPTH,
GMEM_PATCH_RESTORE_INFO,
};
#endif /* FD2_DRAW_H_ */

View file

@ -25,15 +25,15 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_helpers.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "freedreno_resource.h"
#include "fd2_emit.h"
#include "fd2_blend.h"
#include "fd2_context.h"
#include "fd2_emit.h"
#include "fd2_program.h"
#include "fd2_rasterizer.h"
#include "fd2_texture.h"
@ -49,347 +49,353 @@
static void
emit_constants(struct fd_ringbuffer *ring, uint32_t base,
struct fd_constbuf_stateobj *constbuf,
struct fd2_shader_stateobj *shader)
struct fd_constbuf_stateobj *constbuf,
struct fd2_shader_stateobj *shader)
{
uint32_t enabled_mask = constbuf->enabled_mask;
uint32_t start_base = base;
unsigned i;
uint32_t enabled_mask = constbuf->enabled_mask;
uint32_t start_base = base;
unsigned i;
/* emit user constants: */
while (enabled_mask) {
unsigned index = ffs(enabled_mask) - 1;
struct pipe_constant_buffer *cb = &constbuf->cb[index];
unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
/* emit user constants: */
while (enabled_mask) {
unsigned index = ffs(enabled_mask) - 1;
struct pipe_constant_buffer *cb = &constbuf->cb[index];
unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
// I expect that size should be a multiple of vec4's:
assert(size == align(size, 4));
// I expect that size should be a multiple of vec4's:
assert(size == align(size, 4));
/* hmm, sometimes we still seem to end up with consts bound,
* even if shader isn't using them, which ends up overwriting
* const reg's used for immediates.. this is a hack to work
* around that:
*/
if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
break;
/* hmm, sometimes we still seem to end up with consts bound,
* even if shader isn't using them, which ends up overwriting
* const reg's used for immediates.. this is a hack to work
* around that:
*/
if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
break;
const uint32_t *dwords;
const uint32_t *dwords;
if (cb->user_buffer) {
dwords = cb->user_buffer;
} else {
struct fd_resource *rsc = fd_resource(cb->buffer);
dwords = fd_bo_map(rsc->bo);
}
if (cb->user_buffer) {
dwords = cb->user_buffer;
} else {
struct fd_resource *rsc = fd_resource(cb->buffer);
dwords = fd_bo_map(rsc->bo);
}
dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);
dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);
OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
OUT_RING(ring, base);
for (i = 0; i < size; i++)
OUT_RING(ring, *(dwords++));
OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
OUT_RING(ring, base);
for (i = 0; i < size; i++)
OUT_RING(ring, *(dwords++));
base += size;
enabled_mask &= ~(1 << index);
}
base += size;
enabled_mask &= ~(1 << index);
}
/* emit shader immediates: */
if (shader) {
for (i = 0; i < shader->num_immediates; i++) {
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, start_base + (4 * (shader->first_immediate + i)));
OUT_RING(ring, shader->immediates[i].val[0]);
OUT_RING(ring, shader->immediates[i].val[1]);
OUT_RING(ring, shader->immediates[i].val[2]);
OUT_RING(ring, shader->immediates[i].val[3]);
base += 4;
}
}
/* emit shader immediates: */
if (shader) {
for (i = 0; i < shader->num_immediates; i++) {
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, start_base + (4 * (shader->first_immediate + i)));
OUT_RING(ring, shader->immediates[i].val[0]);
OUT_RING(ring, shader->immediates[i].val[1]);
OUT_RING(ring, shader->immediates[i].val[2]);
OUT_RING(ring, shader->immediates[i].val[3]);
base += 4;
}
}
}
typedef uint32_t texmask;
static texmask
emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
{
unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
static const struct fd2_sampler_stateobj dummy_sampler = {};
static const struct fd2_pipe_sampler_view dummy_view = {};
const struct fd2_sampler_stateobj *sampler;
const struct fd2_pipe_sampler_view *view;
struct fd_resource *rsc;
unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
static const struct fd2_sampler_stateobj dummy_sampler = {};
static const struct fd2_pipe_sampler_view dummy_view = {};
const struct fd2_sampler_stateobj *sampler;
const struct fd2_pipe_sampler_view *view;
struct fd_resource *rsc;
if (emitted & (1 << const_idx))
return 0;
if (emitted & (1 << const_idx))
return 0;
sampler = tex->samplers[samp_id] ?
fd2_sampler_stateobj(tex->samplers[samp_id]) :
&dummy_sampler;
view = tex->textures[samp_id] ?
fd2_pipe_sampler_view(tex->textures[samp_id]) :
&dummy_view;
sampler = tex->samplers[samp_id]
? fd2_sampler_stateobj(tex->samplers[samp_id])
: &dummy_sampler;
view = tex->textures[samp_id] ? fd2_pipe_sampler_view(tex->textures[samp_id])
: &dummy_view;
rsc = view->base.texture ? fd_resource(view->base.texture) : NULL;
rsc = view->base.texture ? fd_resource(view->base.texture) : NULL;
OUT_PKT3(ring, CP_SET_CONSTANT, 7);
OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
OUT_PKT3(ring, CP_SET_CONSTANT, 7);
OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
OUT_RING(ring, sampler->tex0 | view->tex0);
if (rsc)
OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0);
else
OUT_RING(ring, 0);
OUT_RING(ring, sampler->tex0 | view->tex0);
if (rsc)
OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0);
else
OUT_RING(ring, 0);
OUT_RING(ring, view->tex2);
OUT_RING(ring, sampler->tex3 | view->tex3);
OUT_RING(ring, sampler->tex4 | view->tex4);
OUT_RING(ring, view->tex2);
OUT_RING(ring, sampler->tex3 | view->tex3);
OUT_RING(ring, sampler->tex4 | view->tex4);
if (rsc && rsc->b.b.last_level)
OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0);
else
OUT_RING(ring, view->tex5);
if (rsc && rsc->b.b.last_level)
OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0);
else
OUT_RING(ring, view->tex5);
return (1 << const_idx);
return (1 << const_idx);
}
static void
emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx)
{
struct fd_texture_stateobj *fragtex = &ctx->tex[PIPE_SHADER_FRAGMENT];
struct fd_texture_stateobj *verttex = &ctx->tex[PIPE_SHADER_VERTEX];
texmask emitted = 0;
unsigned i;
struct fd_texture_stateobj *fragtex = &ctx->tex[PIPE_SHADER_FRAGMENT];
struct fd_texture_stateobj *verttex = &ctx->tex[PIPE_SHADER_VERTEX];
texmask emitted = 0;
unsigned i;
for (i = 0; i < verttex->num_samplers; i++)
if (verttex->samplers[i])
emitted |= emit_texture(ring, ctx, verttex, i, emitted);
for (i = 0; i < verttex->num_samplers; i++)
if (verttex->samplers[i])
emitted |= emit_texture(ring, ctx, verttex, i, emitted);
for (i = 0; i < fragtex->num_samplers; i++)
if (fragtex->samplers[i])
emitted |= emit_texture(ring, ctx, fragtex, i, emitted);
for (i = 0; i < fragtex->num_samplers; i++)
if (fragtex->samplers[i])
emitted |= emit_texture(ring, ctx, fragtex, i, emitted);
}
void
fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
struct fd2_vertex_buf *vbufs, uint32_t n)
struct fd2_vertex_buf *vbufs, uint32_t n)
{
unsigned i;
unsigned i;
OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
for (i = 0; i < n; i++) {
struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
OUT_RING (ring, vbufs[i].size);
}
OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
for (i = 0; i < n; i++) {
struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
OUT_RING(ring, vbufs[i].size);
}
}
void
fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
fd2_emit_state_binning(struct fd_context *ctx,
const enum fd_dirty_3d_state dirty)
{
struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
struct fd_ringbuffer *ring = ctx->batch->binning;
struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
struct fd_ringbuffer *ring = ctx->batch->binning;
/* subset of fd2_emit_state needed for hw binning on a20x */
/* subset of fd2_emit_state needed for hw binning on a20x */
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE))
fd2_program_emit(ctx, ring, &ctx->prog);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE))
fd2_program_emit(ctx, ring, &ctx->prog);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
emit_constants(ring, VS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_VERTEX],
(dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
}
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
emit_constants(ring, VS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_VERTEX],
(dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
}
if (dirty & FD_DIRTY_VIEWPORT) {
OUT_PKT3(ring, CP_SET_CONSTANT, 9);
OUT_RING(ring, 0x00000184);
OUT_RING(ring, fui(ctx->viewport.translate[0]));
OUT_RING(ring, fui(ctx->viewport.translate[1]));
OUT_RING(ring, fui(ctx->viewport.translate[2]));
OUT_RING(ring, fui(0.0f));
OUT_RING(ring, fui(ctx->viewport.scale[0]));
OUT_RING(ring, fui(ctx->viewport.scale[1]));
OUT_RING(ring, fui(ctx->viewport.scale[2]));
OUT_RING(ring, fui(0.0f));
}
if (dirty & FD_DIRTY_VIEWPORT) {
OUT_PKT3(ring, CP_SET_CONSTANT, 9);
OUT_RING(ring, 0x00000184);
OUT_RING(ring, fui(ctx->viewport.translate[0]));
OUT_RING(ring, fui(ctx->viewport.translate[1]));
OUT_RING(ring, fui(ctx->viewport.translate[2]));
OUT_RING(ring, fui(0.0f));
OUT_RING(ring, fui(ctx->viewport.scale[0]));
OUT_RING(ring, fui(ctx->viewport.scale[1]));
OUT_RING(ring, fui(ctx->viewport.scale[2]));
OUT_RING(ring, fui(0.0f));
}
/* not sure why this is needed */
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
OUT_RING(ring, blend->rb_blendcontrol);
/* not sure why this is needed */
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
OUT_RING(ring, blend->rb_blendcontrol);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, blend->rb_colormask);
}
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, blend->rb_colormask);
}
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE);
}
void
fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
{
struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
struct fd2_shader_stateobj *fs = ctx->prog.fs;
struct fd_ringbuffer *ring = ctx->batch->draw;
struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
struct fd2_shader_stateobj *fs = ctx->prog.fs;
struct fd_ringbuffer *ring = ctx->batch->draw;
/* NOTE: we probably want to eventually refactor this so each state
* object handles emitting it's own state.. although the mapping of
* state to registers is not always orthogonal, sometimes a single
* register contains bitfields coming from multiple state objects,
* so not sure the best way to deal with that yet.
*/
/* NOTE: we probably want to eventually refactor this so each state
* object handles emitting it's own state.. although the mapping of
* state to registers is not always orthogonal, sometimes a single
* register contains bitfields coming from multiple state objects,
* so not sure the best way to deal with that yet.
*/
if (dirty & FD_DIRTY_SAMPLE_MASK) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
OUT_RING(ring, ctx->sample_mask);
}
if (dirty & FD_DIRTY_SAMPLE_MASK) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
OUT_RING(ring, ctx->sample_mask);
}
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF | FD_DIRTY_PROG)) {
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
uint32_t val = zsa->rb_depthcontrol;
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF | FD_DIRTY_PROG)) {
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
uint32_t val = zsa->rb_depthcontrol;
if (fs->has_kill)
val &= ~A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
if (fs->has_kill)
val &= ~A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
OUT_RING(ring, val);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
OUT_RING(ring, val);
OUT_PKT3(ring, CP_SET_CONSTANT, 4);
OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
OUT_RING(ring, zsa->rb_stencilrefmask_bf |
A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
OUT_RING(ring, zsa->rb_stencilrefmask |
A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
OUT_RING(ring, zsa->rb_alpha_ref);
}
OUT_PKT3(ring, CP_SET_CONSTANT, 4);
OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
OUT_RING(ring, zsa->rb_stencilrefmask_bf |
A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
OUT_RING(ring, zsa->rb_stencilrefmask |
A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
OUT_RING(ring, zsa->rb_alpha_ref);
}
if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) {
struct fd2_rasterizer_stateobj *rasterizer =
fd2_rasterizer_stateobj(ctx->rasterizer);
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
OUT_RING(ring, rasterizer->pa_cl_clip_cntl);
OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl |
A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE);
if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) {
struct fd2_rasterizer_stateobj *rasterizer =
fd2_rasterizer_stateobj(ctx->rasterizer);
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
OUT_RING(ring, rasterizer->pa_cl_clip_cntl);
OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl |
A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE);
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE));
OUT_RING(ring, rasterizer->pa_su_point_size);
OUT_RING(ring, rasterizer->pa_su_point_minmax);
OUT_RING(ring, rasterizer->pa_su_line_cntl);
OUT_RING(ring, rasterizer->pa_sc_line_stipple);
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE));
OUT_RING(ring, rasterizer->pa_su_point_size);
OUT_RING(ring, rasterizer->pa_su_point_minmax);
OUT_RING(ring, rasterizer->pa_su_line_cntl);
OUT_RING(ring, rasterizer->pa_sc_line_stipple);
OUT_PKT3(ring, CP_SET_CONSTANT, 6);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL));
OUT_RING(ring, rasterizer->pa_su_vtx_cntl);
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_CLIP_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_DISC_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_CLIP_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_DISC_ADJ */
OUT_PKT3(ring, CP_SET_CONSTANT, 6);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL));
OUT_RING(ring, rasterizer->pa_su_vtx_cntl);
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_CLIP_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_DISC_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_CLIP_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_DISC_ADJ */
if (rasterizer->base.offset_tri) {
/* TODO: why multiply scale by 2 ? without it deqp test fails
* deqp/piglit tests aren't very precise
*/
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE));
OUT_RING(ring, fui(rasterizer->base.offset_scale * 2.0f)); /* FRONT_SCALE */
OUT_RING(ring, fui(rasterizer->base.offset_units)); /* FRONT_OFFSET */
OUT_RING(ring, fui(rasterizer->base.offset_scale * 2.0f)); /* BACK_SCALE */
OUT_RING(ring, fui(rasterizer->base.offset_units)); /* BACK_OFFSET */
}
}
if (rasterizer->base.offset_tri) {
/* TODO: why multiply scale by 2 ? without it deqp test fails
* deqp/piglit tests aren't very precise
*/
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE));
OUT_RING(ring,
fui(rasterizer->base.offset_scale * 2.0f)); /* FRONT_SCALE */
OUT_RING(ring, fui(rasterizer->base.offset_units)); /* FRONT_OFFSET */
OUT_RING(ring,
fui(rasterizer->base.offset_scale * 2.0f)); /* BACK_SCALE */
OUT_RING(ring, fui(rasterizer->base.offset_units)); /* BACK_OFFSET */
}
}
/* NOTE: scissor enabled bit is part of rasterizer state: */
if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
/* NOTE: scissor enabled bit is part of rasterizer state: */
if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
OUT_RING(ring, xy2d(scissor->minx, /* PA_SC_WINDOW_SCISSOR_TL */
scissor->miny));
OUT_RING(ring, xy2d(scissor->maxx, /* PA_SC_WINDOW_SCISSOR_BR */
scissor->maxy));
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
OUT_RING(ring, xy2d(scissor->minx, /* PA_SC_WINDOW_SCISSOR_TL */
scissor->miny));
OUT_RING(ring, xy2d(scissor->maxx, /* PA_SC_WINDOW_SCISSOR_BR */
scissor->maxy));
ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
}
ctx->batch->max_scissor.minx =
MIN2(ctx->batch->max_scissor.minx, scissor->minx);
ctx->batch->max_scissor.miny =
MIN2(ctx->batch->max_scissor.miny, scissor->miny);
ctx->batch->max_scissor.maxx =
MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
ctx->batch->max_scissor.maxy =
MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
}
if (dirty & FD_DIRTY_VIEWPORT) {
OUT_PKT3(ring, CP_SET_CONSTANT, 7);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
OUT_RING(ring, fui(ctx->viewport.scale[0])); /* PA_CL_VPORT_XSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[0])); /* PA_CL_VPORT_XOFFSET */
OUT_RING(ring, fui(ctx->viewport.scale[1])); /* PA_CL_VPORT_YSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */
OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */
if (dirty & FD_DIRTY_VIEWPORT) {
OUT_PKT3(ring, CP_SET_CONSTANT, 7);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
OUT_RING(ring, fui(ctx->viewport.scale[0])); /* PA_CL_VPORT_XSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[0])); /* PA_CL_VPORT_XOFFSET */
OUT_RING(ring, fui(ctx->viewport.scale[1])); /* PA_CL_VPORT_YSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */
OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */
/* set viewport in C65/C66, for a20x hw binning and fragcoord.z */
OUT_PKT3(ring, CP_SET_CONSTANT, 9);
OUT_RING(ring, 0x00000184);
/* set viewport in C65/C66, for a20x hw binning and fragcoord.z */
OUT_PKT3(ring, CP_SET_CONSTANT, 9);
OUT_RING(ring, 0x00000184);
OUT_RING(ring, fui(ctx->viewport.translate[0]));
OUT_RING(ring, fui(ctx->viewport.translate[1]));
OUT_RING(ring, fui(ctx->viewport.translate[2]));
OUT_RING(ring, fui(0.0f));
OUT_RING(ring, fui(ctx->viewport.translate[0]));
OUT_RING(ring, fui(ctx->viewport.translate[1]));
OUT_RING(ring, fui(ctx->viewport.translate[2]));
OUT_RING(ring, fui(0.0f));
OUT_RING(ring, fui(ctx->viewport.scale[0]));
OUT_RING(ring, fui(ctx->viewport.scale[1]));
OUT_RING(ring, fui(ctx->viewport.scale[2]));
OUT_RING(ring, fui(0.0f));
}
OUT_RING(ring, fui(ctx->viewport.scale[0]));
OUT_RING(ring, fui(ctx->viewport.scale[1]));
OUT_RING(ring, fui(ctx->viewport.scale[2]));
OUT_RING(ring, fui(0.0f));
}
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE))
fd2_program_emit(ctx, ring, &ctx->prog);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE))
fd2_program_emit(ctx, ring, &ctx->prog);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
emit_constants(ring, VS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_VERTEX],
(dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
emit_constants(ring, PS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_FRAGMENT],
(dirty & FD_DIRTY_PROG) ? ctx->prog.fs : NULL);
}
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
emit_constants(ring, VS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_VERTEX],
(dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
emit_constants(ring, PS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_FRAGMENT],
(dirty & FD_DIRTY_PROG) ? ctx->prog.fs : NULL);
}
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
}
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
}
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
OUT_RING(ring, blend->rb_blendcontrol);
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
OUT_RING(ring, blend->rb_blendcontrol);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, blend->rb_colormask);
}
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, blend->rb_colormask);
}
if (dirty & FD_DIRTY_BLEND_COLOR) {
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3]));
}
if (dirty & FD_DIRTY_BLEND_COLOR) {
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3]));
}
if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG))
emit_textures(ring, ctx);
if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG))
emit_textures(ring, ctx);
}
/* emit per-context initialization:
@ -397,177 +403,175 @@ fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
void
fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
if (is_a20x(ctx->screen)) {
OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
OUT_RING(ring,
A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |
A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
if (is_a20x(ctx->screen)) {
OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
OUT_RING(ring, A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |
A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
/* not sure why this is required */
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY));
OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16));
/* not sure why this is required */
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY));
OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
OUT_RING(ring, 0x00000002);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
OUT_RING(ring, 0x00000002);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL));
OUT_RING(ring, 0x00000002);
} else {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
OUT_RING(ring, 0x0000003b);
}
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL));
OUT_RING(ring, 0x00000002);
} else {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
OUT_RING(ring, 0x0000003b);
}
/* enable perfcntrs */
OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1);
OUT_RING(ring, COND(FD_DBG(PERFC), 1));
/* enable perfcntrs */
OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1);
OUT_RING(ring, COND(FD_DBG(PERFC), 1));
/* note: perfcntrs don't work without the PM_OVERRIDE bit */
OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2);
OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0x00000fff);
/* note: perfcntrs don't work without the PM_OVERRIDE bit */
OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2);
OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0x00000fff);
OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
OUT_RING(ring, 0x00000002);
OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
OUT_RING(ring, 0x00000002);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00007fff);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00007fff);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST));
OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) |
A2XX_SQ_VS_CONST_SIZE(0x100));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST));
OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) |
A2XX_SQ_VS_CONST_SIZE(0x100));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST));
OUT_RING(ring, A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) |
A2XX_SQ_PS_CONST_SIZE(0xe0));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST));
OUT_RING(ring,
A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) | A2XX_SQ_PS_CONST_SIZE(0xe0));
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL));
OUT_RING(ring, 0xffffffff);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL));
OUT_RING(ring, 0xffffffff);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL));
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL));
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
OUT_RING(ring, 0x00000000);
// XXX we change this dynamically for draw/clear.. vs gmem<->mem..
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
// XXX we change this dynamically for draw/clear.. vs gmem<->mem..
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
OUT_RING(ring, 0x88888888);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
OUT_RING(ring, 0x88888888);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK));
OUT_RING(ring, 0xffffffff);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK));
OUT_RING(ring, 0xffffffff);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO));
OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
A2XX_RB_COPY_DEST_INFO_WRITE_RED |
A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO));
OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
A2XX_RB_COPY_DEST_INFO_WRITE_RED |
A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0));
OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_0 */
OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_1 */
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0));
OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_0 */
OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_1 */
OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
OUT_RING(ring, 0x000005d0);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x5f601000);
OUT_RING(ring, 0x00000001);
OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
OUT_RING(ring, 0x000005d0);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x5f601000);
OUT_RING(ring, 0x00000001);
OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1);
OUT_RING(ring, 0x00000180);
OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1);
OUT_RING(ring, 0x00000180);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00000300);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00000300);
OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
OUT_RING(ring, 0x80000180);
OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
OUT_RING(ring, 0x80000180);
/* not sure what this form of CP_SET_CONSTANT is.. */
OUT_PKT3(ring, CP_SET_CONSTANT, 13);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x469c4000);
OUT_RING(ring, 0x3f800000);
OUT_RING(ring, 0x3f000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x40000000);
OUT_RING(ring, 0x3f400000);
OUT_RING(ring, 0x3ec00000);
OUT_RING(ring, 0x3e800000);
/* not sure what this form of CP_SET_CONSTANT is.. */
OUT_PKT3(ring, CP_SET_CONSTANT, 13);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x469c4000);
OUT_RING(ring, 0x3f800000);
OUT_RING(ring, 0x3f000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x40000000);
OUT_RING(ring, 0x3f400000);
OUT_RING(ring, 0x3ec00000);
OUT_RING(ring, 0x3e800000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
A2XX_RB_COLOR_MASK_WRITE_GREEN |
A2XX_RB_COLOR_MASK_WRITE_BLUE |
A2XX_RB_COLOR_MASK_WRITE_ALPHA);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring,
A2XX_RB_COLOR_MASK_WRITE_RED | A2XX_RB_COLOR_MASK_WRITE_GREEN |
A2XX_RB_COLOR_MASK_WRITE_BLUE | A2XX_RB_COLOR_MASK_WRITE_ALPHA);
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */
OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */
OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */
OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */
OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */
OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */
OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
}
void
fd2_emit_init_screen(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
screen->emit_ib = fd2_emit_ib;
struct fd_screen *screen = fd_screen(pscreen);
screen->emit_ib = fd2_emit_ib;
}
void

View file

@ -34,14 +34,16 @@
struct fd_ringbuffer;
struct fd2_vertex_buf {
unsigned offset, size;
struct pipe_resource *prsc;
unsigned offset, size;
struct pipe_resource *prsc;
};
void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
struct fd2_vertex_buf *vbufs, uint32_t n);
void fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) assert_dt;
void fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) assert_dt;
struct fd2_vertex_buf *vbufs, uint32_t n);
void fd2_emit_state_binning(struct fd_context *ctx,
const enum fd_dirty_3d_state dirty) assert_dt;
void fd2_emit_state(struct fd_context *ctx,
const enum fd_dirty_3d_state dirty) assert_dt;
void fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring);
void fd2_emit_init_screen(struct pipe_screen *pscreen);
@ -50,7 +52,7 @@ void fd2_emit_init(struct pipe_context *pctx);
static inline void
fd2_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
__OUT_IB(ring, false, target);
__OUT_IB(ring, false, target);
}
#endif /* FD2_EMIT_H */

File diff suppressed because it is too large Load diff

View file

@ -25,319 +25,321 @@
* Jonathan Marek <jonathan@marek.ca>
*/
#include "nir/tgsi_to_nir.h"
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "nir/tgsi_to_nir.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "freedreno_program.h"
#include "ir2.h"
#include "ir2/instr-a2xx.h"
#include "fd2_program.h"
#include "fd2_texture.h"
#include "fd2_util.h"
#include "ir2/instr-a2xx.h"
#include "ir2.h"
static struct fd2_shader_stateobj *
create_shader(struct pipe_context *pctx, gl_shader_stage type)
{
struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
if (!so)
return NULL;
so->type = type;
so->is_a20x = is_a20x(fd_context(pctx)->screen);
return so;
struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
if (!so)
return NULL;
so->type = type;
so->is_a20x = is_a20x(fd_context(pctx)->screen);
return so;
}
static void
delete_shader(struct fd2_shader_stateobj *so)
{
if (!so)
return;
ralloc_free(so->nir);
for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
free(so->variant[i].info.dwords);
free(so);
if (!so)
return;
ralloc_free(so->nir);
for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
free(so->variant[i].info.dwords);
free(so);
}
static void
emit(struct fd_ringbuffer *ring, gl_shader_stage type,
struct ir2_shader_info *info, struct util_dynarray *patches)
struct ir2_shader_info *info, struct util_dynarray *patches)
{
unsigned i;
unsigned i;
assert(info->sizedwords);
assert(info->sizedwords);
OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
OUT_RING(ring, info->sizedwords);
OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
OUT_RING(ring, info->sizedwords);
if (patches)
util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]);
if (patches)
util_dynarray_append(patches, uint32_t *,
&ring->cur[info->mem_export_ptr]);
for (i = 0; i < info->sizedwords; i++)
OUT_RING(ring, info->dwords[i]);
for (i = 0; i < info->sizedwords; i++)
OUT_RING(ring, info->dwords[i]);
}
static int
ir2_glsl_type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
return glsl_count_attribute_slots(type, false);
}
static void *
fd2_fp_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
const struct pipe_shader_state *cso)
{
struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
if (!so)
return NULL;
struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
if (!so)
return NULL;
so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir :
tgsi_to_nir(cso->tokens, pctx->screen, false);
so->nir = (cso->type == PIPE_SHADER_IR_NIR)
? cso->ir.nir
: tgsi_to_nir(cso->tokens, pctx->screen, false);
NIR_PASS_V(so->nir, nir_lower_io,
nir_var_shader_in | nir_var_shader_out,
ir2_glsl_type_size, (nir_lower_io_options)0);
NIR_PASS_V(so->nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
ir2_glsl_type_size, (nir_lower_io_options)0);
if (ir2_optimize_nir(so->nir, true))
goto fail;
if (ir2_optimize_nir(so->nir, true))
goto fail;
so->first_immediate = so->nir->num_uniforms;
so->first_immediate = so->nir->num_uniforms;
ir2_compile(so, 0, NULL);
ir2_compile(so, 0, NULL);
ralloc_free(so->nir);
so->nir = NULL;
return so;
ralloc_free(so->nir);
so->nir = NULL;
return so;
fail:
delete_shader(so);
return NULL;
delete_shader(so);
return NULL;
}
static void
fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
{
struct fd2_shader_stateobj *so = hwcso;
delete_shader(so);
struct fd2_shader_stateobj *so = hwcso;
delete_shader(so);
}
static void *
fd2_vp_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
const struct pipe_shader_state *cso)
{
struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
if (!so)
return NULL;
struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
if (!so)
return NULL;
so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir :
tgsi_to_nir(cso->tokens, pctx->screen, false);
so->nir = (cso->type == PIPE_SHADER_IR_NIR)
? cso->ir.nir
: tgsi_to_nir(cso->tokens, pctx->screen, false);
NIR_PASS_V(so->nir, nir_lower_io,
nir_var_shader_in | nir_var_shader_out,
ir2_glsl_type_size, (nir_lower_io_options)0);
NIR_PASS_V(so->nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
ir2_glsl_type_size, (nir_lower_io_options)0);
if (ir2_optimize_nir(so->nir, true))
goto fail;
if (ir2_optimize_nir(so->nir, true))
goto fail;
so->first_immediate = so->nir->num_uniforms;
so->first_immediate = so->nir->num_uniforms;
/* compile binning variant now */
ir2_compile(so, 0, NULL);
/* compile binning variant now */
ir2_compile(so, 0, NULL);
return so;
return so;
fail:
delete_shader(so);
return NULL;
delete_shader(so);
return NULL;
}
static void
fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
{
struct fd2_shader_stateobj *so = hwcso;
delete_shader(so);
struct fd2_shader_stateobj *so = hwcso;
delete_shader(so);
}
static void
patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
instr_fetch_vtx_t *instr, uint16_t dst_swiz)
assert_dt
instr_fetch_vtx_t *instr, uint16_t dst_swiz) assert_dt
{
struct surface_format fmt = fd2_pipe2surface(elem->src_format);
struct surface_format fmt = fd2_pipe2surface(elem->src_format);
instr->dst_swiz = fd2_vtx_swiz(elem->src_format, dst_swiz);
instr->format_comp_all = fmt.sign == SQ_TEX_SIGN_SIGNED;
instr->num_format_all = fmt.num_format;
instr->format = fmt.format;
instr->exp_adjust_all = fmt.exp_adjust;
instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride;
instr->offset = elem->src_offset;
instr->dst_swiz = fd2_vtx_swiz(elem->src_format, dst_swiz);
instr->format_comp_all = fmt.sign == SQ_TEX_SIGN_SIGNED;
instr->num_format_all = fmt.num_format;
instr->format = fmt.format;
instr->exp_adjust_all = fmt.exp_adjust;
instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride;
instr->offset = elem->src_offset;
}
static void
patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,
struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex)
assert_dt
struct fd_vertex_stateobj *vtx,
struct fd_texture_stateobj *tex) assert_dt
{
for (int i = 0; i < info->num_fetch_instrs; i++) {
struct ir2_fetch_info *fi = &info->fetch_info[i];
for (int i = 0; i < info->num_fetch_instrs; i++) {
struct ir2_fetch_info *fi = &info->fetch_info[i];
instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset];
if (instr->opc == VTX_FETCH) {
unsigned idx = (instr->vtx.const_index - 20) * 3 +
instr->vtx.const_index_sel;
patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
continue;
}
instr_fetch_t *instr = (instr_fetch_t *)&info->dwords[fi->offset];
if (instr->opc == VTX_FETCH) {
unsigned idx =
(instr->vtx.const_index - 20) * 3 + instr->vtx.const_index_sel;
patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
continue;
}
assert(instr->opc == TEX_FETCH);
instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
instr->tex.src_swiz = fi->tex.src_swiz;
}
assert(instr->opc == TEX_FETCH);
instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
instr->tex.src_swiz = fi->tex.src_swiz;
}
}
void
fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog)
struct fd_program_stateobj *prog)
{
struct fd2_shader_stateobj *fp = NULL, *vp;
struct ir2_shader_info *fpi, *vpi;
struct ir2_frag_linkage *f;
uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
bool binning = (ctx->batch && ring == ctx->batch->binning);
unsigned variant = 0;
struct fd2_shader_stateobj *fp = NULL, *vp;
struct ir2_shader_info *fpi, *vpi;
struct ir2_frag_linkage *f;
uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
bool binning = (ctx->batch && ring == ctx->batch->binning);
unsigned variant = 0;
vp = prog->vs;
vp = prog->vs;
/* find variant matching the linked fragment shader */
if (!binning) {
fp = prog->fs;
for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
/* if checked all variants, compile a new variant */
if (!vp->variant[variant].info.sizedwords) {
ir2_compile(vp, variant, fp);
break;
}
/* find variant matching the linked fragment shader */
if (!binning) {
fp = prog->fs;
for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
/* if checked all variants, compile a new variant */
if (!vp->variant[variant].info.sizedwords) {
ir2_compile(vp, variant, fp);
break;
}
/* check if fragment shader linkage matches */
if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
sizeof(struct ir2_frag_linkage)))
break;
}
assert(variant < ARRAY_SIZE(vp->variant));
}
/* check if fragment shader linkage matches */
if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
sizeof(struct ir2_frag_linkage)))
break;
}
assert(variant < ARRAY_SIZE(vp->variant));
}
vpi = &vp->variant[variant].info;
fpi = &fp->variant[0].info;
f = &fp->variant[0].f;
vpi = &vp->variant[variant].info;
fpi = &fp->variant[0].info;
f = &fp->variant[0].f;
/* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
if (fp)
patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
}
/* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
if (fp)
patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
}
emit(ring, MESA_SHADER_VERTEX, vpi,
binning ? &ctx->batch->shader_patches : NULL);
emit(ring, MESA_SHADER_VERTEX, vpi,
binning ? &ctx->batch->shader_patches : NULL);
if (fp) {
emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
vs_export = MAX2(1, f->inputs_count) - 1;
}
if (fp) {
emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
vs_export = MAX2(1, f->inputs_count) - 1;
}
vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
if (vp->writes_psize && !binning)
mode = POSITION_2_VECTORS_SPRITE;
if (vp->writes_psize && !binning)
mode = POSITION_2_VECTORS_SPRITE;
/* set register to use for param (fragcoord/pointcoord/frontfacing) */
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
/* we need SCREEN_XY for both fragcoord and frontfacing */
A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
/* set register to use for param (fragcoord/pointcoord/frontfacing) */
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
OUT_RING(ring,
A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
/* we need SCREEN_XY for both fragcoord and frontfacing */
A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
OUT_RING(ring,
A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
}
void
fd2_prog_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_program_stateobj *prog;
struct fd2_shader_stateobj *so;
struct ir2_shader_info *info;
instr_fetch_vtx_t *instr;
struct fd_context *ctx = fd_context(pctx);
struct fd_program_stateobj *prog;
struct fd2_shader_stateobj *so;
struct ir2_shader_info *info;
instr_fetch_vtx_t *instr;
pctx->create_fs_state = fd2_fp_state_create;
pctx->delete_fs_state = fd2_fp_state_delete;
pctx->create_fs_state = fd2_fp_state_create;
pctx->delete_fs_state = fd2_fp_state_delete;
pctx->create_vs_state = fd2_vp_state_create;
pctx->delete_vs_state = fd2_vp_state_delete;
pctx->create_vs_state = fd2_vp_state_create;
pctx->delete_vs_state = fd2_vp_state_delete;
fd_prog_init(pctx);
fd_prog_init(pctx);
/* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
/* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
prog = &ctx->solid_prog;
so = prog->vs;
ir2_compile(prog->vs, 1, prog->fs);
prog = &ctx->solid_prog;
so = prog->vs;
ir2_compile(prog->vs, 1, prog->fs);
#define IR2_FETCH_SWIZ_XY01 0xb08
#define IR2_FETCH_SWIZ_XYZ1 0xa88
info = &so->variant[1].info;
info = &so->variant[1].info;
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
instr->const_index = 26;
instr->const_index_sel = 0;
instr->format = FMT_32_32_32_FLOAT;
instr->format_comp_all = false;
instr->stride = 12;
instr->num_format_all = true;
instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[0].offset];
instr->const_index = 26;
instr->const_index_sel = 0;
instr->format = FMT_32_32_32_FLOAT;
instr->format_comp_all = false;
instr->stride = 12;
instr->num_format_all = true;
instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
prog = &ctx->blit_prog[0];
so = prog->vs;
ir2_compile(prog->vs, 1, prog->fs);
prog = &ctx->blit_prog[0];
so = prog->vs;
ir2_compile(prog->vs, 1, prog->fs);
info = &so->variant[1].info;
info = &so->variant[1].info;
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
instr->const_index = 26;
instr->const_index_sel = 1;
instr->format = FMT_32_32_FLOAT;
instr->format_comp_all = false;
instr->stride = 8;
instr->num_format_all = false;
instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[0].offset];
instr->const_index = 26;
instr->const_index_sel = 1;
instr->format = FMT_32_32_FLOAT;
instr->format_comp_all = false;
instr->stride = 8;
instr->num_format_all = false;
instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset];
instr->const_index = 26;
instr->const_index_sel = 0;
instr->format = FMT_32_32_32_FLOAT;
instr->format_comp_all = false;
instr->stride = 12;
instr->num_format_all = false;
instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[1].offset];
instr->const_index = 26;
instr->const_index_sel = 0;
instr->format = FMT_32_32_32_FLOAT;
instr->format_comp_all = false;
instr->stride = 12;
instr->num_format_all = false;
instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
}

View file

@ -31,39 +31,39 @@
#include "freedreno_context.h"
#include "ir2.h"
#include "disasm.h"
#include "ir2.h"
struct fd2_shader_stateobj {
nir_shader *nir;
gl_shader_stage type;
bool is_a20x;
nir_shader *nir;
gl_shader_stage type;
bool is_a20x;
/* note: using same set of immediates for all variants
* it doesn't matter, other than the slightly larger command stream
*/
unsigned first_immediate; /* const reg # of first immediate */
unsigned num_immediates;
struct {
uint32_t val[4];
unsigned ncomp;
} immediates[64];
/* note: using same set of immediates for all variants
* it doesn't matter, other than the slightly larger command stream
*/
unsigned first_immediate; /* const reg # of first immediate */
unsigned num_immediates;
struct {
uint32_t val[4];
unsigned ncomp;
} immediates[64];
bool writes_psize;
bool need_param;
bool has_kill;
bool writes_psize;
bool need_param;
bool has_kill;
/* note:
* fragment shader only has one variant
* first vertex shader variant is always binning shader
* we should use a dynamic array but in normal case there is
* only 2 variants (and 3 sometimes with GALLIUM_HUD)
*/
struct ir2_shader_variant variant[8];
/* note:
* fragment shader only has one variant
* first vertex shader variant is always binning shader
* we should use a dynamic array but in normal case there is
* only 2 variants (and 3 sometimes with GALLIUM_HUD)
*/
struct ir2_shader_variant variant[8];
};
void fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog) assert_dt;
struct fd_program_stateobj *prog) assert_dt;
void fd2_prog_init(struct pipe_context *pctx);

View file

@ -37,20 +37,19 @@
#include "fd2_query.h"
struct PACKED fd2_query_sample {
uint32_t start;
uint32_t stop;
uint32_t start;
uint32_t stop;
};
/* offset of a single field of an array of fd2_query_sample: */
#define query_sample_idx(aq, idx, field) \
fd_resource((aq)->prsc)->bo, \
(idx * sizeof(struct fd2_query_sample)) + \
offsetof(struct fd2_query_sample, field), \
0, 0
#define query_sample_idx(aq, idx, field) \
fd_resource((aq)->prsc)->bo, \
(idx * sizeof(struct fd2_query_sample)) + \
offsetof(struct fd2_query_sample, field), \
0, 0
/* offset of a single field of fd2_query_sample: */
#define query_sample(aq, field) \
query_sample_idx(aq, 0, field)
#define query_sample(aq, field) query_sample_idx(aq, 0, field)
/*
* Performance Counter (batch) queries:
@ -62,186 +61,183 @@ struct PACKED fd2_query_sample {
*/
struct fd_batch_query_entry {
uint8_t gid; /* group-id */
uint8_t cid; /* countable-id within the group */
uint8_t gid; /* group-id */
uint8_t cid; /* countable-id within the group */
};
struct fd_batch_query_data {
struct fd_screen *screen;
unsigned num_query_entries;
struct fd_batch_query_entry query_entries[];
struct fd_screen *screen;
unsigned num_query_entries;
struct fd_batch_query_entry query_entries[];
};
static void
perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
assert_dt
perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw;
struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw;
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
fd_wfi(batch, ring);
fd_wfi(batch, ring);
/* configure performance counters for the requested queries: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
/* configure performance counters for the requested queries: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
debug_assert(counter_idx < g->num_counters);
debug_assert(counter_idx < g->num_counters);
OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1);
OUT_RING(ring, g->countables[entry->cid].selector);
}
OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1);
OUT_RING(ring, g->countables[entry->cid].selector);
}
memset(counters_per_group, 0, sizeof(counters_per_group));
memset(counters_per_group, 0, sizeof(counters_per_group));
/* and snapshot the start values */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
/* and snapshot the start values */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
OUT_RELOC(ring, query_sample_idx(aq, i, start));
}
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
OUT_RELOC(ring, query_sample_idx(aq, i, start));
}
}
static void
perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
assert_dt
perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw;
struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw;
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
fd_wfi(batch, ring);
fd_wfi(batch, ring);
/* TODO do we need to bother to turn anything off? */
/* TODO do we need to bother to turn anything off? */
/* snapshot the end values: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
/* snapshot the end values: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
OUT_RELOC(ring, query_sample_idx(aq, i, stop));
}
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
OUT_RELOC(ring, query_sample_idx(aq, i, stop));
}
}
static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result)
union pipe_query_result *result)
{
struct fd_batch_query_data *data = aq->query_data;
struct fd2_query_sample *sp = buf;
struct fd_batch_query_data *data = aq->query_data;
struct fd2_query_sample *sp = buf;
for (unsigned i = 0; i < data->num_query_entries; i++)
result->batch[i].u64 = sp[i].stop - sp[i].start;
for (unsigned i = 0; i < data->num_query_entries; i++)
result->batch[i].u64 = sp[i].stop - sp[i].start;
}
static const struct fd_acc_sample_provider perfcntr = {
.query_type = FD_QUERY_FIRST_PERFCNTR,
.always = true,
.resume = perfcntr_resume,
.pause = perfcntr_pause,
.result = perfcntr_accumulate_result,
.query_type = FD_QUERY_FIRST_PERFCNTR,
.always = true,
.resume = perfcntr_resume,
.pause = perfcntr_pause,
.result = perfcntr_accumulate_result,
};
static struct pipe_query *
fd2_create_batch_query(struct pipe_context *pctx,
unsigned num_queries, unsigned *query_types)
fd2_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
unsigned *query_types)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_screen *screen = ctx->screen;
struct fd_query *q;
struct fd_acc_query *aq;
struct fd_batch_query_data *data;
struct fd_context *ctx = fd_context(pctx);
struct fd_screen *screen = ctx->screen;
struct fd_query *q;
struct fd_acc_query *aq;
struct fd_batch_query_data *data;
data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
num_queries * sizeof(data->query_entries[0]));
data = CALLOC_VARIANT_LENGTH_STRUCT(
fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
data->screen = screen;
data->num_query_entries = num_queries;
data->screen = screen;
data->num_query_entries = num_queries;
/* validate the requested query_types and ensure we don't try
* to request more query_types of a given group than we have
* counters:
*/
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
/* validate the requested query_types and ensure we don't try
* to request more query_types of a given group than we have
* counters:
*/
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
for (unsigned i = 0; i < num_queries; i++) {
unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
for (unsigned i = 0; i < num_queries; i++) {
unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
/* verify valid query_type, ie. is it actually a perfcntr? */
if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
(idx >= screen->num_perfcntr_queries)) {
mesa_loge("invalid batch query query_type: %u", query_types[i]);
goto error;
}
/* verify valid query_type, ie. is it actually a perfcntr? */
if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
(idx >= screen->num_perfcntr_queries)) {
mesa_loge("invalid batch query query_type: %u", query_types[i]);
goto error;
}
struct fd_batch_query_entry *entry = &data->query_entries[i];
struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
struct fd_batch_query_entry *entry = &data->query_entries[i];
struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
entry->gid = pq->group_id;
entry->gid = pq->group_id;
/* the perfcntr_queries[] table flattens all the countables
* for each group in series, ie:
*
* (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
*
* So to find the countable index just step back through the
* table to find the first entry with the same group-id.
*/
while (pq > screen->perfcntr_queries) {
pq--;
if (pq->group_id == entry->gid)
entry->cid++;
}
/* the perfcntr_queries[] table flattens all the countables
* for each group in series, ie:
*
* (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
*
* So to find the countable index just step back through the
* table to find the first entry with the same group-id.
*/
while (pq > screen->perfcntr_queries) {
pq--;
if (pq->group_id == entry->gid)
entry->cid++;
}
if (counters_per_group[entry->gid] >=
screen->perfcntr_groups[entry->gid].num_counters) {
mesa_loge("too many counters for group %u", entry->gid);
goto error;
}
if (counters_per_group[entry->gid] >=
screen->perfcntr_groups[entry->gid].num_counters) {
mesa_loge("too many counters for group %u", entry->gid);
goto error;
}
counters_per_group[entry->gid]++;
}
counters_per_group[entry->gid]++;
}
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q);
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q);
/* sample buffer size is based on # of queries: */
aq->size = num_queries * sizeof(struct fd2_query_sample);
aq->query_data = data;
/* sample buffer size is based on # of queries: */
aq->size = num_queries * sizeof(struct fd2_query_sample);
aq->query_data = data;
return (struct pipe_query *)q;
return (struct pipe_query *)q;
error:
free(data);
return NULL;
free(data);
return NULL;
}
void
fd2_query_context_init(struct pipe_context *pctx)
disable_thread_safety_analysis
fd2_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
struct fd_context *ctx = fd_context(pctx);
ctx->create_query = fd_acc_create_query;
ctx->query_update_batch = fd_acc_query_update_batch;
ctx->create_query = fd_acc_create_query;
ctx->query_update_batch = fd_acc_query_update_batch;
pctx->create_batch_query = fd2_create_batch_query;
pctx->create_batch_query = fd2_create_batch_query;
}

View file

@ -24,88 +24,86 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd2_rasterizer.h"
#include "fd2_context.h"
#include "fd2_rasterizer.h"
#include "fd2_util.h"
void *
fd2_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso)
const struct pipe_rasterizer_state *cso)
{
struct fd2_rasterizer_stateobj *so;
float psize_min, psize_max;
struct fd2_rasterizer_stateobj *so;
float psize_min, psize_max;
so = CALLOC_STRUCT(fd2_rasterizer_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd2_rasterizer_stateobj);
if (!so)
return NULL;
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 8192.0 - 0.0625;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 8192.0 - 0.0625;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}
so->base = *cso;
so->base = *cso;
so->pa_sc_line_stipple = cso->line_stipple_enable ?
A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) |
A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor) : 0;
so->pa_sc_line_stipple =
cso->line_stipple_enable
? A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) |
A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor)
: 0;
so->pa_cl_clip_cntl = 0; // TODO
so->pa_cl_clip_cntl = 0; // TODO
so->pa_su_vtx_cntl =
A2XX_PA_SU_VTX_CNTL_PIX_CENTER(cso->half_pixel_center ? PIXCENTER_OGL : PIXCENTER_D3D) |
A2XX_PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH);
so->pa_su_vtx_cntl =
A2XX_PA_SU_VTX_CNTL_PIX_CENTER(cso->half_pixel_center ? PIXCENTER_OGL
: PIXCENTER_D3D) |
A2XX_PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH);
so->pa_su_point_size =
A2XX_PA_SU_POINT_SIZE_HEIGHT(cso->point_size/2) |
A2XX_PA_SU_POINT_SIZE_WIDTH(cso->point_size/2);
so->pa_su_point_size = A2XX_PA_SU_POINT_SIZE_HEIGHT(cso->point_size / 2) |
A2XX_PA_SU_POINT_SIZE_WIDTH(cso->point_size / 2);
so->pa_su_point_minmax =
A2XX_PA_SU_POINT_MINMAX_MIN(psize_min/2) |
A2XX_PA_SU_POINT_MINMAX_MAX(psize_max/2);
so->pa_su_point_minmax = A2XX_PA_SU_POINT_MINMAX_MIN(psize_min / 2) |
A2XX_PA_SU_POINT_MINMAX_MAX(psize_max / 2);
so->pa_su_line_cntl =
A2XX_PA_SU_LINE_CNTL_WIDTH(cso->line_width/2);
so->pa_su_line_cntl = A2XX_PA_SU_LINE_CNTL_WIDTH(cso->line_width / 2);
so->pa_su_sc_mode_cntl =
A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
so->pa_su_sc_mode_cntl =
A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
if (cso->cull_face & PIPE_FACE_FRONT)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK;
if (!cso->flatshade_first)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST;
if (!cso->front_ccw)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_FACE;
if (cso->line_stipple_enable)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE;
if (cso->multisample)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK;
if (!cso->flatshade_first)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST;
if (!cso->front_ccw)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_FACE;
if (cso->line_stipple_enable)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE;
if (cso->multisample)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE;
if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE);
else
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED);
if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE);
else
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED);
if (cso->offset_tri)
so->pa_su_sc_mode_cntl |=
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE;
if (cso->offset_tri)
so->pa_su_sc_mode_cntl |=
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE;
return so;
return so;
}

View file

@ -27,27 +27,27 @@
#ifndef FD2_RASTERIZER_H_
#define FD2_RASTERIZER_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd2_rasterizer_stateobj {
struct pipe_rasterizer_state base;
uint32_t pa_sc_line_stipple;
uint32_t pa_cl_clip_cntl;
uint32_t pa_su_vtx_cntl;
uint32_t pa_su_point_size;
uint32_t pa_su_point_minmax;
uint32_t pa_su_line_cntl;
uint32_t pa_su_sc_mode_cntl;
struct pipe_rasterizer_state base;
uint32_t pa_sc_line_stipple;
uint32_t pa_cl_clip_cntl;
uint32_t pa_su_vtx_cntl;
uint32_t pa_su_point_size;
uint32_t pa_su_point_minmax;
uint32_t pa_su_line_cntl;
uint32_t pa_su_sc_mode_cntl;
};
static inline struct fd2_rasterizer_stateobj *
fd2_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd2_rasterizer_stateobj *)rast;
return (struct fd2_rasterizer_stateobj *)rast;
}
void * fd2_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
void *fd2_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
#endif /* FD2_RASTERIZER_H_ */

View file

@ -29,40 +29,40 @@
uint32_t
fd2_setup_slices(struct fd_resource *rsc)
{
struct pipe_resource *prsc = &rsc->b.b;
enum pipe_format format = prsc->format;
uint32_t height0 = util_format_get_nblocksy(format, prsc->height0);
uint32_t level, size = 0;
struct pipe_resource *prsc = &rsc->b.b;
enum pipe_format format = prsc->format;
uint32_t height0 = util_format_get_nblocksy(format, prsc->height0);
uint32_t level, size = 0;
/* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
/* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl2_pitch(&rsc->layout, level);
uint32_t nblocksy = align(u_minify(height0, level), 32);
for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl2_pitch(&rsc->layout, level);
uint32_t nblocksy = align(u_minify(height0, level), 32);
/* mipmaps have power of two sizes in memory */
if (level)
nblocksy = util_next_power_of_two(nblocksy);
/* mipmaps have power of two sizes in memory */
if (level)
nblocksy = util_next_power_of_two(nblocksy);
slice->offset = size;
slice->size0 = align(pitch * nblocksy, 4096);
slice->offset = size;
slice->size0 = align(pitch * nblocksy, 4096);
size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
}
size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
}
return size;
return size;
}
unsigned
fd2_tile_mode(const struct pipe_resource *tmpl)
{
/* disable tiling for cube maps, freedreno uses a 2D array for the staging texture,
* (a2xx supports 2D arrays but it is not implemented)
*/
if (tmpl->target == PIPE_TEXTURE_CUBE)
return 0;
/* we can enable tiling for any resource we can render to */
return (tmpl->bind & PIPE_BIND_RENDER_TARGET) ? 1 : 0;
/* disable tiling for cube maps, freedreno uses a 2D array for the staging
* texture, (a2xx supports 2D arrays but it is not implemented)
*/
if (tmpl->target == PIPE_TEXTURE_CUBE)
return 0;
/* we can enable tiling for any resource we can render to */
return (tmpl->bind & PIPE_BIND_RENDER_TARGET) ? 1 : 0;
}

View file

@ -27,90 +27,84 @@
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
#include "fd2_screen.h"
#include "fd2_context.h"
#include "fd2_emit.h"
#include "fd2_util.h"
#include "fd2_resource.h"
#include "fd2_screen.h"
#include "fd2_util.h"
static bool
fd2_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count,
unsigned usage)
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count, unsigned usage)
{
unsigned retval = 0;
unsigned retval = 0;
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return false;
}
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return false;
}
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
if ((usage & PIPE_BIND_RENDER_TARGET) &&
fd2_pipe2color(format) != (enum a2xx_colorformatx)~0) {
retval |= PIPE_BIND_RENDER_TARGET;
}
if ((usage & PIPE_BIND_RENDER_TARGET) &&
fd2_pipe2color(format) != (enum a2xx_colorformatx) ~0) {
retval |= PIPE_BIND_RENDER_TARGET;
}
if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) &&
!util_format_is_srgb(format) &&
!util_format_is_pure_integer(format) &&
fd2_pipe2surface(format).format != FMT_INVALID) {
retval |= usage & PIPE_BIND_VERTEX_BUFFER;
/* the only npot blocksize supported texture format is R32G32B32_FLOAT */
if (util_is_power_of_two_or_zero(util_format_get_blocksize(format)) ||
format == PIPE_FORMAT_R32G32B32_FLOAT)
retval |= usage & PIPE_BIND_SAMPLER_VIEW;
}
if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) &&
!util_format_is_srgb(format) && !util_format_is_pure_integer(format) &&
fd2_pipe2surface(format).format != FMT_INVALID) {
retval |= usage & PIPE_BIND_VERTEX_BUFFER;
/* the only npot blocksize supported texture format is R32G32B32_FLOAT */
if (util_is_power_of_two_or_zero(util_format_get_blocksize(format)) ||
format == PIPE_FORMAT_R32G32B32_FLOAT)
retval |= usage & PIPE_BIND_SAMPLER_VIEW;
}
if ((usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) &&
(fd2_pipe2color(format) != (enum a2xx_colorformatx)~0)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
}
if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) &&
(fd2_pipe2color(format) != (enum a2xx_colorformatx) ~0)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd_pipe2depth(format) != (enum adreno_rb_depth_format)~0)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd_pipe2depth(format) != (enum adreno_rb_depth_format) ~0)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}
if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format),
target, sample_count, usage, retval);
}
if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x",
util_format_name(format), target, sample_count, usage, retval);
}
return retval == usage;
return retval == usage;
}
void
fd2_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = 1;
pscreen->context_create = fd2_context_create;
pscreen->is_format_supported = fd2_screen_is_format_supported;
screen->max_rts = 1;
pscreen->context_create = fd2_context_create;
pscreen->is_format_supported = fd2_screen_is_format_supported;
screen->setup_slices = fd2_setup_slices;
if (FD_DBG(TTILE))
screen->tile_mode = fd2_tile_mode;
screen->setup_slices = fd2_setup_slices;
if (FD_DBG(TTILE))
screen->tile_mode = fd2_tile_mode;
fd2_emit_init_screen(pscreen);
fd2_emit_init_screen(pscreen);
}

View file

@ -25,9 +25,9 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd2_texture.h"
#include "fd2_util.h"
@ -35,200 +35,190 @@
static enum sq_tex_clamp
tex_clamp(unsigned wrap)
{
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return SQ_TEX_WRAP;
case PIPE_TEX_WRAP_CLAMP:
return SQ_TEX_CLAMP_HALF_BORDER;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return SQ_TEX_CLAMP_LAST_TEXEL;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return SQ_TEX_CLAMP_BORDER;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return SQ_TEX_MIRROR;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
return SQ_TEX_MIRROR_ONCE_BORDER;
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return SQ_TEX_WRAP;
case PIPE_TEX_WRAP_CLAMP:
return SQ_TEX_CLAMP_HALF_BORDER;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return SQ_TEX_CLAMP_LAST_TEXEL;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return SQ_TEX_CLAMP_BORDER;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return SQ_TEX_MIRROR;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
return SQ_TEX_MIRROR_ONCE_BORDER;
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
}
static enum sq_tex_filter
tex_filter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return SQ_TEX_FILTER_POINT;
case PIPE_TEX_FILTER_LINEAR:
return SQ_TEX_FILTER_BILINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return SQ_TEX_FILTER_POINT;
case PIPE_TEX_FILTER_LINEAR:
return SQ_TEX_FILTER_BILINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
}
static enum sq_tex_filter
mip_filter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_MIPFILTER_NONE:
return SQ_TEX_FILTER_BASEMAP;
case PIPE_TEX_MIPFILTER_NEAREST:
return SQ_TEX_FILTER_POINT;
case PIPE_TEX_MIPFILTER_LINEAR:
return SQ_TEX_FILTER_BILINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
switch (filter) {
case PIPE_TEX_MIPFILTER_NONE:
return SQ_TEX_FILTER_BASEMAP;
case PIPE_TEX_MIPFILTER_NEAREST:
return SQ_TEX_FILTER_POINT;
case PIPE_TEX_MIPFILTER_LINEAR:
return SQ_TEX_FILTER_BILINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
}
static void *
fd2_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
const struct pipe_sampler_state *cso)
{
struct fd2_sampler_stateobj *so = CALLOC_STRUCT(fd2_sampler_stateobj);
struct fd2_sampler_stateobj *so = CALLOC_STRUCT(fd2_sampler_stateobj);
if (!so)
return NULL;
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
/* TODO
* cso->max_anisotropy
* cso->normalized_coords (dealt with by shader for rect textures?)
*/
/* TODO
* cso->max_anisotropy
* cso->normalized_coords (dealt with by shader for rect textures?)
*/
/* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
so->tex0 =
A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) |
A2XX_SQ_TEX_0_CLAMP_Y(tex_clamp(cso->wrap_t)) |
A2XX_SQ_TEX_0_CLAMP_Z(tex_clamp(cso->wrap_r));
/* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
so->tex0 = A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) |
A2XX_SQ_TEX_0_CLAMP_Y(tex_clamp(cso->wrap_t)) |
A2XX_SQ_TEX_0_CLAMP_Z(tex_clamp(cso->wrap_r));
so->tex3 =
A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)) |
A2XX_SQ_TEX_3_MIP_FILTER(mip_filter(cso->min_mip_filter));
so->tex3 = A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)) |
A2XX_SQ_TEX_3_MIP_FILTER(mip_filter(cso->min_mip_filter));
so->tex4 = 0;
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE)
so->tex4 = A2XX_SQ_TEX_4_LOD_BIAS(cso->lod_bias);
so->tex4 = 0;
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE)
so->tex4 = A2XX_SQ_TEX_4_LOD_BIAS(cso->lod_bias);
return so;
return so;
}
static void
fd2_sampler_states_bind(struct pipe_context *pctx,
enum pipe_shader_type shader, unsigned start,
unsigned nr, void **hwcso)
in_dt
fd2_sampler_states_bind(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr, void **hwcso) in_dt
{
if (!hwcso)
nr = 0;
if (!hwcso)
nr = 0;
if (shader == PIPE_SHADER_FRAGMENT) {
struct fd_context *ctx = fd_context(pctx);
if (shader == PIPE_SHADER_FRAGMENT) {
struct fd_context *ctx = fd_context(pctx);
/* on a2xx, since there is a flat address space for textures/samplers,
* a change in # of fragment textures/samplers will trigger patching and
* re-emitting the vertex shader:
*/
if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers)
ctx->dirty |= FD_DIRTY_TEXSTATE;
}
/* on a2xx, since there is a flat address space for textures/samplers,
* a change in # of fragment textures/samplers will trigger patching and
* re-emitting the vertex shader:
*/
if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers)
ctx->dirty |= FD_DIRTY_TEXSTATE;
}
fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
}
static enum sq_tex_dimension
tex_dimension(unsigned target)
{
switch (target) {
default:
assert(0);
case PIPE_TEXTURE_1D:
assert(0); /* TODO */
return SQ_TEX_DIMENSION_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
return SQ_TEX_DIMENSION_2D;
case PIPE_TEXTURE_3D:
assert(0); /* TODO */
return SQ_TEX_DIMENSION_3D;
case PIPE_TEXTURE_CUBE:
return SQ_TEX_DIMENSION_CUBE;
}
switch (target) {
default:
assert(0);
case PIPE_TEXTURE_1D:
assert(0); /* TODO */
return SQ_TEX_DIMENSION_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
return SQ_TEX_DIMENSION_2D;
case PIPE_TEXTURE_3D:
assert(0); /* TODO */
return SQ_TEX_DIMENSION_3D;
case PIPE_TEXTURE_CUBE:
return SQ_TEX_DIMENSION_CUBE;
}
}
static struct pipe_sampler_view *
fd2_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
const struct pipe_sampler_view *cso)
{
struct fd2_pipe_sampler_view *so = CALLOC_STRUCT(fd2_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
struct surface_format fmt = fd2_pipe2surface(cso->format);
struct fd2_pipe_sampler_view *so = CALLOC_STRUCT(fd2_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
struct surface_format fmt = fd2_pipe2surface(cso->format);
if (!so)
return NULL;
if (!so)
return NULL;
so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->tex0 =
A2XX_SQ_TEX_0_SIGN_X(fmt.sign) |
A2XX_SQ_TEX_0_SIGN_Y(fmt.sign) |
A2XX_SQ_TEX_0_SIGN_Z(fmt.sign) |
A2XX_SQ_TEX_0_SIGN_W(fmt.sign) |
A2XX_SQ_TEX_0_PITCH(fdl2_pitch_pixels(&rsc->layout, 0) *
util_format_get_blockwidth(prsc->format)) |
COND(rsc->layout.tile_mode, A2XX_SQ_TEX_0_TILED);
so->tex1 =
A2XX_SQ_TEX_1_FORMAT(fmt.format) |
A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL);
so->tex2 =
A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) |
A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1);
so->tex3 =
A2XX_SQ_TEX_3_NUM_FORMAT(fmt.num_format) |
fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a) |
A2XX_SQ_TEX_3_EXP_ADJUST(fmt.exp_adjust);
so->tex0 = A2XX_SQ_TEX_0_SIGN_X(fmt.sign) | A2XX_SQ_TEX_0_SIGN_Y(fmt.sign) |
A2XX_SQ_TEX_0_SIGN_Z(fmt.sign) | A2XX_SQ_TEX_0_SIGN_W(fmt.sign) |
A2XX_SQ_TEX_0_PITCH(fdl2_pitch_pixels(&rsc->layout, 0) *
util_format_get_blockwidth(prsc->format)) |
COND(rsc->layout.tile_mode, A2XX_SQ_TEX_0_TILED);
so->tex1 = A2XX_SQ_TEX_1_FORMAT(fmt.format) |
A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL);
so->tex2 = A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) |
A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1);
so->tex3 = A2XX_SQ_TEX_3_NUM_FORMAT(fmt.num_format) |
fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a) |
A2XX_SQ_TEX_3_EXP_ADJUST(fmt.exp_adjust);
so->tex4 =
A2XX_SQ_TEX_4_MIP_MIN_LEVEL(fd_sampler_first_level(cso)) |
A2XX_SQ_TEX_4_MIP_MAX_LEVEL(fd_sampler_last_level(cso));
so->tex4 = A2XX_SQ_TEX_4_MIP_MIN_LEVEL(fd_sampler_first_level(cso)) |
A2XX_SQ_TEX_4_MIP_MAX_LEVEL(fd_sampler_last_level(cso));
so->tex5 = A2XX_SQ_TEX_5_DIMENSION(tex_dimension(prsc->target));
so->tex5 = A2XX_SQ_TEX_5_DIMENSION(tex_dimension(prsc->target));
return &so->base;
return &so->base;
}
static void
fd2_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr, unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
in_dt
unsigned start, unsigned nr,
unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views) in_dt
{
if (shader == PIPE_SHADER_FRAGMENT) {
struct fd_context *ctx = fd_context(pctx);
if (shader == PIPE_SHADER_FRAGMENT) {
struct fd_context *ctx = fd_context(pctx);
/* on a2xx, since there is a flat address space for textures/samplers,
* a change in # of fragment textures/samplers will trigger patching and
* re-emitting the vertex shader:
*/
if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_textures)
ctx->dirty |= FD_DIRTY_TEXSTATE;
}
/* on a2xx, since there is a flat address space for textures/samplers,
* a change in # of fragment textures/samplers will trigger patching and
* re-emitting the vertex shader:
*/
if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_textures)
ctx->dirty |= FD_DIRTY_TEXSTATE;
}
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views);
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
views);
}
/* map gallium sampler-id to hw const-idx.. adreno uses a flat address
@ -244,19 +234,18 @@ fd2_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
*/
unsigned
fd2_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex,
unsigned samp_id)
assert_dt
unsigned samp_id) assert_dt
{
if (tex == &ctx->tex[PIPE_SHADER_FRAGMENT])
return samp_id;
return samp_id + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers;
if (tex == &ctx->tex[PIPE_SHADER_FRAGMENT])
return samp_id;
return samp_id + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers;
}
void
fd2_texture_init(struct pipe_context *pctx)
{
pctx->create_sampler_state = fd2_sampler_state_create;
pctx->bind_sampler_states = fd2_sampler_states_bind;
pctx->create_sampler_view = fd2_sampler_view_create;
pctx->set_sampler_views = fd2_set_sampler_views;
pctx->create_sampler_state = fd2_sampler_state_create;
pctx->bind_sampler_states = fd2_sampler_states_bind;
pctx->create_sampler_view = fd2_sampler_view_create;
pctx->set_sampler_views = fd2_set_sampler_views;
}

View file

@ -29,36 +29,36 @@
#include "pipe/p_context.h"
#include "freedreno_texture.h"
#include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "fd2_context.h"
#include "fd2_util.h"
struct fd2_sampler_stateobj {
struct pipe_sampler_state base;
uint32_t tex0, tex3, tex4;
struct pipe_sampler_state base;
uint32_t tex0, tex3, tex4;
};
static inline struct fd2_sampler_stateobj *
fd2_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd2_sampler_stateobj *)samp;
return (struct fd2_sampler_stateobj *)samp;
}
struct fd2_pipe_sampler_view {
struct pipe_sampler_view base;
uint32_t tex0, tex1, tex2, tex3, tex4, tex5;
struct pipe_sampler_view base;
uint32_t tex0, tex1, tex2, tex3, tex4, tex5;
};
static inline struct fd2_pipe_sampler_view *
fd2_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd2_pipe_sampler_view *)pview;
return (struct fd2_pipe_sampler_view *)pview;
}
unsigned fd2_get_const_idx(struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id);
struct fd_texture_stateobj *tex, unsigned samp_id);
void fd2_texture_init(struct pipe_context *pctx);

View file

@ -32,53 +32,54 @@
static enum a2xx_sq_surfaceformat
pipe2surface(enum pipe_format format, struct surface_format *fmt)
{
const struct util_format_description *desc = util_format_description(format);
const struct util_format_description *desc = util_format_description(format);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
switch (format) {
/* Compressed textures. */
case PIPE_FORMAT_ETC1_RGB8:
return FMT_ETC1_RGB;
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
return FMT_DXT1;
case PIPE_FORMAT_DXT3_RGBA:
return FMT_DXT2_3;
case PIPE_FORMAT_DXT5_RGBA:
return FMT_DXT4_5;
case PIPE_FORMAT_ATC_RGB:
return FMT_ATI_TC_555_565_RGB;
case PIPE_FORMAT_ATC_RGBA_EXPLICIT:
return FMT_ATI_TC_555_565_RGBA;
case PIPE_FORMAT_ATC_RGBA_INTERPOLATED:
return FMT_ATI_TC_555_565_RGBA_INTERP;
/* YUV buffers. */
case PIPE_FORMAT_UYVY:
return FMT_Y1_Cr_Y0_Cb;
case PIPE_FORMAT_YUYV:
return FMT_Cr_Y1_Cb_Y0;
default:
return ~0;
}
}
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
switch (format) {
/* Compressed textures. */
case PIPE_FORMAT_ETC1_RGB8:
return FMT_ETC1_RGB;
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
return FMT_DXT1;
case PIPE_FORMAT_DXT3_RGBA:
return FMT_DXT2_3;
case PIPE_FORMAT_DXT5_RGBA:
return FMT_DXT4_5;
case PIPE_FORMAT_ATC_RGB:
return FMT_ATI_TC_555_565_RGB;
case PIPE_FORMAT_ATC_RGBA_EXPLICIT:
return FMT_ATI_TC_555_565_RGBA;
case PIPE_FORMAT_ATC_RGBA_INTERPOLATED:
return FMT_ATI_TC_555_565_RGBA_INTERP;
/* YUV buffers. */
case PIPE_FORMAT_UYVY:
return FMT_Y1_Cr_Y0_Cb;
case PIPE_FORMAT_YUYV:
return FMT_Cr_Y1_Cb_Y0;
default:
return ~0;
}
}
uint32_t channel_size = 0;
for (unsigned i = 0; i < 4; i++)
channel_size |= desc->channel[i].size << i*8;
uint32_t channel_size = 0;
for (unsigned i = 0; i < 4; i++)
channel_size |= desc->channel[i].size << i * 8;
unsigned i = util_format_get_first_non_void_channel(format);
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
fmt->sign = SQ_TEX_SIGN_SIGNED;
if (!desc->channel[i].normalized)
fmt->num_format = SQ_TEX_NUM_FORMAT_INT;
if (desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
fmt->exp_adjust = -16;
unsigned i = util_format_get_first_non_void_channel(format);
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
fmt->sign = SQ_TEX_SIGN_SIGNED;
if (!desc->channel[i].normalized)
fmt->num_format = SQ_TEX_NUM_FORMAT_INT;
if (desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
fmt->exp_adjust = -16;
/* Note: the 3 channel 24bpp/48bpp/96bpp formats are only for vertex fetch
* we can use the 4 channel format and ignore the 4th component just isn't used
* XXX: is it possible for the extra loaded component to cause a MMU fault?
*/
/* Note: the 3 channel 24bpp/48bpp/96bpp formats are only for vertex fetch
* we can use the 4 channel format and ignore the 4th component just isn't
* used
* XXX: is it possible for the extra loaded component to cause a MMU fault?
*/
#define CASE(r, g, b, a) case (r | g << 8 | b << 16 | a << 24)
@ -119,116 +120,125 @@ pipe2surface(enum pipe_format format, struct surface_format *fmt)
/* clang-format on */
#undef CASE
return ~0;
return ~0;
}
struct surface_format
fd2_pipe2surface(enum pipe_format format)
{
struct surface_format fmt = {
.sign = SQ_TEX_SIGN_UNSIGNED,
.num_format = SQ_TEX_NUM_FORMAT_FRAC,
.exp_adjust = 0,
};
fmt.format = pipe2surface(format, &fmt);
return fmt;
struct surface_format fmt = {
.sign = SQ_TEX_SIGN_UNSIGNED,
.num_format = SQ_TEX_NUM_FORMAT_FRAC,
.exp_adjust = 0,
};
fmt.format = pipe2surface(format, &fmt);
return fmt;
}
enum a2xx_colorformatx
fd2_pipe2color(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
case PIPE_FORMAT_R8_UNORM:
return COLORX_8;
case PIPE_FORMAT_B2G3R3_UNORM:
return COLORX_2_3_3; /* note: untested */
switch (format) {
/* 8-bit buffers. */
case PIPE_FORMAT_R8_UNORM:
return COLORX_8;
case PIPE_FORMAT_B2G3R3_UNORM:
return COLORX_2_3_3; /* note: untested */
/* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return COLORX_5_6_5;
case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_B5G5R5X1_UNORM:
return COLORX_1_5_5_5;
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B4G4R4X4_UNORM:
return COLORX_4_4_4_4;
case PIPE_FORMAT_R8G8_UNORM:
return COLORX_8_8;
/* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return COLORX_5_6_5;
case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_B5G5R5X1_UNORM:
return COLORX_1_5_5_5;
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B4G4R4X4_UNORM:
return COLORX_4_4_4_4;
case PIPE_FORMAT_R8G8_UNORM:
return COLORX_8_8;
/* 32-bit buffers. */
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
return COLORX_8_8_8_8;
/* Note: snorm untested */
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_R8G8B8X8_SNORM:
return COLORX_S8_8_8_8;
/* 32-bit buffers. */
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
return COLORX_8_8_8_8;
/* Note: snorm untested */
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_R8G8B8X8_SNORM:
return COLORX_S8_8_8_8;
/* float buffers */
case PIPE_FORMAT_R16_FLOAT:
return COLORX_16_FLOAT;
case PIPE_FORMAT_R16G16_FLOAT:
return COLORX_16_16_FLOAT;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
return COLORX_16_16_16_16_FLOAT;
case PIPE_FORMAT_R32_FLOAT:
return COLORX_32_FLOAT;
case PIPE_FORMAT_R32G32_FLOAT:
return COLORX_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return COLORX_32_32_32_32_FLOAT;
/* float buffers */
case PIPE_FORMAT_R16_FLOAT:
return COLORX_16_FLOAT;
case PIPE_FORMAT_R16G16_FLOAT:
return COLORX_16_16_FLOAT;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
return COLORX_16_16_16_16_FLOAT;
case PIPE_FORMAT_R32_FLOAT:
return COLORX_32_FLOAT;
case PIPE_FORMAT_R32G32_FLOAT:
return COLORX_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return COLORX_32_32_32_32_FLOAT;
default:
return ~0;
}
default:
return ~0;
}
}
static inline enum sq_tex_swiz
tex_swiz(unsigned swiz)
{
switch (swiz) {
default:
case PIPE_SWIZZLE_X: return SQ_TEX_X;
case PIPE_SWIZZLE_Y: return SQ_TEX_Y;
case PIPE_SWIZZLE_Z: return SQ_TEX_Z;
case PIPE_SWIZZLE_W: return SQ_TEX_W;
case PIPE_SWIZZLE_0: return SQ_TEX_ZERO;
case PIPE_SWIZZLE_1: return SQ_TEX_ONE;
}
switch (swiz) {
default:
case PIPE_SWIZZLE_X:
return SQ_TEX_X;
case PIPE_SWIZZLE_Y:
return SQ_TEX_Y;
case PIPE_SWIZZLE_Z:
return SQ_TEX_Z;
case PIPE_SWIZZLE_W:
return SQ_TEX_W;
case PIPE_SWIZZLE_0:
return SQ_TEX_ZERO;
case PIPE_SWIZZLE_1:
return SQ_TEX_ONE;
}
}
uint32_t
fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a)
unsigned swizzle_b, unsigned swizzle_a)
{
const struct util_format_description *desc =
util_format_description(format);
unsigned char swiz[4] = {
swizzle_r, swizzle_g, swizzle_b, swizzle_a,
}, rswiz[4];
const struct util_format_description *desc = util_format_description(format);
unsigned char swiz[4] =
{
swizzle_r,
swizzle_g,
swizzle_b,
swizzle_a,
},
rswiz[4];
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return A2XX_SQ_TEX_3_SWIZ_X(tex_swiz(rswiz[0])) |
A2XX_SQ_TEX_3_SWIZ_Y(tex_swiz(rswiz[1])) |
A2XX_SQ_TEX_3_SWIZ_Z(tex_swiz(rswiz[2])) |
A2XX_SQ_TEX_3_SWIZ_W(tex_swiz(rswiz[3]));
return A2XX_SQ_TEX_3_SWIZ_X(tex_swiz(rswiz[0])) |
A2XX_SQ_TEX_3_SWIZ_Y(tex_swiz(rswiz[1])) |
A2XX_SQ_TEX_3_SWIZ_Z(tex_swiz(rswiz[2])) |
A2XX_SQ_TEX_3_SWIZ_W(tex_swiz(rswiz[3]));
}
uint32_t
fd2_vtx_swiz(enum pipe_format format, unsigned swizzle)
{
const struct util_format_description *desc =
util_format_description(format);
unsigned char swiz[4], rswiz[4];
const struct util_format_description *desc = util_format_description(format);
unsigned char swiz[4], rswiz[4];
for (unsigned i = 0; i < 4; i++)
swiz[i] = (swizzle >> i * 3) & 7;
for (unsigned i = 0; i < 4; i++)
swiz[i] = (swizzle >> i * 3) & 7;
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return rswiz[0] | rswiz[1] << 3 | rswiz[2] << 6 | rswiz[3] << 9;
return rswiz[0] | rswiz[1] << 3 | rswiz[2] << 6 | rswiz[3] << 9;
}

View file

@ -33,23 +33,25 @@
struct surface_format {
/* If enum is a signed type, 0x7f is out of range. Cast it to avoid warnings. */
#define FMT_INVALID ((enum a2xx_sq_surfaceformat) 0x7f)
enum a2xx_sq_surfaceformat format : 7;
enum sq_tex_sign sign : 2;
enum sq_tex_num_format num_format : 1;
int exp_adjust : 6;
#define FMT_INVALID ((enum a2xx_sq_surfaceformat)0x7f)
enum a2xx_sq_surfaceformat format : 7;
enum sq_tex_sign sign : 2;
enum sq_tex_num_format num_format : 1;
int exp_adjust : 6;
};
struct surface_format fd2_pipe2surface(enum pipe_format format);
enum a2xx_colorformatx fd2_pipe2color(enum pipe_format format);
uint32_t fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
unsigned swizzle_g, unsigned swizzle_b,
unsigned swizzle_a);
uint32_t fd2_vtx_swiz(enum pipe_format format, unsigned swizzle);
/* convert x,y to dword */
static inline uint32_t xy2d(uint16_t x, uint16_t y)
static inline uint32_t
xy2d(uint16_t x, uint16_t y)
{
return ((y & 0x3fff) << 16) | (x & 0x3fff);
return ((y & 0x3fff) << 16) | (x & 0x3fff);
}
#endif /* FD2_UTIL_H_ */

View file

@ -24,72 +24,71 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd2_zsa.h"
#include "fd2_context.h"
#include "fd2_util.h"
#include "fd2_zsa.h"
void *
fd2_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso)
const struct pipe_depth_stencil_alpha_state *cso)
{
struct fd2_zsa_stateobj *so;
struct fd2_zsa_stateobj *so;
so = CALLOC_STRUCT(fd2_zsa_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd2_zsa_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
if (cso->depth_enabled)
so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_ENABLE |
COND(!cso->alpha_enabled, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
if (cso->depth_writemask)
so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE;
if (cso->depth_enabled)
so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_Z_ENABLE |
COND(!cso->alpha_enabled, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
if (cso->depth_writemask)
so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE;
if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];
if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];
so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
A2XX_RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */
A2XX_RB_DEPTHCONTROL_STENCILFAIL(fd_stencil_op(s->fail_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZPASS(fd_stencil_op(s->zpass_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |=
0xff000000 | /* ??? */
A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A2XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
A2XX_RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */
A2XX_RB_DEPTHCONTROL_STENCILFAIL(fd_stencil_op(s->fail_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZPASS(fd_stencil_op(s->zpass_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |=
0xff000000 | /* ??? */
A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A2XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE |
A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */
A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(fd_stencil_op(bs->fail_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */
A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
A2XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
}
}
so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE |
A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */
A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(fd_stencil_op(bs->fail_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */
A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
A2XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
}
}
if (cso->alpha_enabled) {
so->rb_colorcontrol =
A2XX_RB_COLORCONTROL_ALPHA_FUNC(cso->alpha_func) |
A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE;
so->rb_alpha_ref = fui(cso->alpha_ref_value);
}
if (cso->alpha_enabled) {
so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ALPHA_FUNC(cso->alpha_func) |
A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE;
so->rb_alpha_ref = fui(cso->alpha_ref_value);
}
return so;
return so;
}

View file

@ -27,28 +27,27 @@
#ifndef FD2_ZSA_H_
#define FD2_ZSA_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd2_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base;
uint32_t rb_depthcontrol;
uint32_t rb_colorcontrol; /* must be OR'd w/ blend->rb_colorcontrol */
uint32_t rb_alpha_ref;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
struct pipe_depth_stencil_alpha_state base;
uint32_t rb_depthcontrol;
uint32_t rb_colorcontrol; /* must be OR'd w/ blend->rb_colorcontrol */
uint32_t rb_alpha_ref;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
};
static inline struct fd2_zsa_stateobj *
fd2_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd2_zsa_stateobj *)zsa;
return (struct fd2_zsa_stateobj *)zsa;
}
void * fd2_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);
void *fd2_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD2_ZSA_H_ */

View file

@ -26,120 +26,124 @@
#include "ir2_private.h"
static bool scalar_possible(struct ir2_instr *instr)
static bool
scalar_possible(struct ir2_instr *instr)
{
if (instr->alu.scalar_opc == SCALAR_NONE)
return false;
if (instr->alu.scalar_opc == SCALAR_NONE)
return false;
return src_ncomp(instr) == 1;
return src_ncomp(instr) == 1;
}
static bool is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b)
static bool
is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b)
{
if (!a)
return true;
if (!a)
return true;
/* dont use same instruction twice */
if (a == b)
return false;
/* dont use same instruction twice */
if (a == b)
return false;
/* PRED_SET must be alone */
if (b->alu.scalar_opc >= PRED_SETEs &&
b->alu.scalar_opc <= PRED_SET_RESTOREs)
return false;
/* PRED_SET must be alone */
if (b->alu.scalar_opc >= PRED_SETEs &&
b->alu.scalar_opc <= PRED_SET_RESTOREs)
return false;
/* must write to same export (issues otherwise?) */
return a->alu.export == b->alu.export;
/* must write to same export (issues otherwise?) */
return a->alu.export == b->alu.export;
}
/* priority of vector instruction for scheduling (lower=higher prio) */
static unsigned alu_vector_prio(struct ir2_instr *instr)
static unsigned
alu_vector_prio(struct ir2_instr *instr)
{
if (instr->alu.vector_opc == VECTOR_NONE)
return ~0u;
if (instr->alu.vector_opc == VECTOR_NONE)
return ~0u;
if (is_export(instr))
return 4;
if (is_export(instr))
return 4;
/* TODO check src type and ncomps */
if (instr->src_count == 3)
return 0;
/* TODO check src type and ncomps */
if (instr->src_count == 3)
return 0;
if (!scalar_possible(instr))
return 1;
if (!scalar_possible(instr))
return 1;
return instr->src_count == 2 ? 2 : 3;
return instr->src_count == 2 ? 2 : 3;
}
/* priority of scalar instruction for scheduling (lower=higher prio) */
static unsigned alu_scalar_prio(struct ir2_instr *instr)
static unsigned
alu_scalar_prio(struct ir2_instr *instr)
{
if (!scalar_possible(instr))
return ~0u;
if (!scalar_possible(instr))
return ~0u;
/* this case is dealt with later */
if (instr->src_count > 1)
return ~0u;
/* this case is dealt with later */
if (instr->src_count > 1)
return ~0u;
if (is_export(instr))
return 4;
if (is_export(instr))
return 4;
/* PRED to end of block */
if (instr->alu.scalar_opc >= PRED_SETEs &&
instr->alu.scalar_opc <= PRED_SET_RESTOREs)
return 5;
/* PRED to end of block */
if (instr->alu.scalar_opc >= PRED_SETEs &&
instr->alu.scalar_opc <= PRED_SET_RESTOREs)
return 5;
/* scalar only have highest priority */
return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3;
/* scalar only have highest priority */
return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3;
}
/* this is a bit messy:
* we want to find a slot where we can insert a scalar MOV with
* a vector instruction that was already scheduled
*/
static struct ir2_sched_instr*
static struct ir2_sched_instr *
insert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx,
struct ir2_src src1, unsigned *comp)
struct ir2_src src1, unsigned *comp)
{
struct ir2_sched_instr *sched = NULL, *s;
unsigned i, mask = 0xf;
struct ir2_sched_instr *sched = NULL, *s;
unsigned i, mask = 0xf;
/* go first earliest point where the mov can be inserted */
for (i = ctx->instr_sched_count-1; i > 0; i--) {
s = &ctx->instr_sched[i - 1];
/* go first earliest point where the mov can be inserted */
for (i = ctx->instr_sched_count - 1; i > 0; i--) {
s = &ctx->instr_sched[i - 1];
if (s->instr && s->instr->block_idx != block_idx)
break;
if (s->instr_s && s->instr_s->block_idx != block_idx)
break;
if (s->instr && s->instr->block_idx != block_idx)
break;
if (s->instr_s && s->instr_s->block_idx != block_idx)
break;
if (src1.type == IR2_SRC_SSA) {
if ((s->instr && s->instr->idx == src1.num) ||
(s->instr_s && s->instr_s->idx == src1.num))
break;
}
if (src1.type == IR2_SRC_SSA) {
if ((s->instr && s->instr->idx == src1.num) ||
(s->instr_s && s->instr_s->idx == src1.num))
break;
}
unsigned mr = ~(s->reg_state[reg_idx/8] >> reg_idx%8*4 & 0xf);
if ((mask & mr) == 0)
break;
unsigned mr = ~(s->reg_state[reg_idx / 8] >> reg_idx % 8 * 4 & 0xf);
if ((mask & mr) == 0)
break;
mask &= mr;
if (s->instr_s || s->instr->src_count == 3)
continue;
mask &= mr;
if (s->instr_s || s->instr->src_count == 3)
continue;
if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0)
continue;
if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0)
continue;
sched = s;
}
*comp = ffs(mask) - 1;
sched = s;
}
*comp = ffs(mask) - 1;
if (sched) {
for (s = sched; s != &ctx->instr_sched[ctx->instr_sched_count]; s++)
s->reg_state[reg_idx/8] |= 1 << (*comp+reg_idx%8*4);
}
if (sched) {
for (s = sched; s != &ctx->instr_sched[ctx->instr_sched_count]; s++)
s->reg_state[reg_idx / 8] |= 1 << (*comp + reg_idx % 8 * 4);
}
return sched;
return sched;
}
/* case1:
@ -152,313 +156,326 @@ insert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx,
static bool
scalarize_case1(struct ir2_context *ctx, struct ir2_instr *instr, bool order)
{
struct ir2_src src0 = instr->src[ order];
struct ir2_src src1 = instr->src[!order];
struct ir2_sched_instr *sched;
struct ir2_instr *ins;
struct ir2_reg *reg;
unsigned idx, comp;
struct ir2_src src0 = instr->src[order];
struct ir2_src src1 = instr->src[!order];
struct ir2_sched_instr *sched;
struct ir2_instr *ins;
struct ir2_reg *reg;
unsigned idx, comp;
switch (src0.type) {
case IR2_SRC_CONST:
case IR2_SRC_INPUT:
return false;
default:
break;
}
switch (src0.type) {
case IR2_SRC_CONST:
case IR2_SRC_INPUT:
return false;
default:
break;
}
/* TODO, insert needs logic for this */
if (src1.type == IR2_SRC_REG)
return false;
/* TODO, insert needs logic for this */
if (src1.type == IR2_SRC_REG)
return false;
/* we could do something if they match src1.. */
if (src0.negate || src0.abs)
return false;
/* we could do something if they match src1.. */
if (src0.negate || src0.abs)
return false;
reg = get_reg_src(ctx, &src0);
reg = get_reg_src(ctx, &src0);
/* result not used more since we will overwrite */
for (int i = 0; i < 4; i++)
if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i))
return false;
/* result not used more since we will overwrite */
for (int i = 0; i < 4; i++)
if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i))
return false;
/* find a place to insert the mov */
sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp);
if (!sched)
return false;
/* find a place to insert the mov */
sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp);
if (!sched)
return false;
ins = &ctx->instr[idx = ctx->instr_count++];
ins->idx = idx;
ins->type = IR2_ALU;
ins->src[0] = src1;
ins->src_count = 1;
ins->is_ssa = true;
ins->ssa.idx = reg->idx;
ins->ssa.ncomp = 1;
ins->ssa.comp[0].c = comp;
ins->alu.scalar_opc = MAXs;
ins->alu.export = -1;
ins->alu.write_mask = 1;
ins->pred = instr->pred;
ins->block_idx = instr->block_idx;
ins = &ctx->instr[idx = ctx->instr_count++];
ins->idx = idx;
ins->type = IR2_ALU;
ins->src[0] = src1;
ins->src_count = 1;
ins->is_ssa = true;
ins->ssa.idx = reg->idx;
ins->ssa.ncomp = 1;
ins->ssa.comp[0].c = comp;
ins->alu.scalar_opc = MAXs;
ins->alu.export = -1;
ins->alu.write_mask = 1;
ins->pred = instr->pred;
ins->block_idx = instr->block_idx;
instr->src[0] = src0;
instr->alu.src1_swizzle = comp;
instr->src[0] = src0;
instr->alu.src1_swizzle = comp;
sched->instr_s = ins;
return true;
sched->instr_s = ins;
return true;
}
/* fill sched with next fetch or (vector and/or scalar) alu instruction */
static int sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched)
static int
sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched)
{
struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL;
unsigned avail_count = 0;
struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL;
unsigned avail_count = 0;
instr_alloc_type_t export = ~0u;
int block_idx = -1;
instr_alloc_type_t export = ~0u;
int block_idx = -1;
/* XXX merge this loop with the other one somehow? */
ir2_foreach_instr(instr, ctx) {
if (!instr->need_emit)
continue;
if (is_export(instr))
export = MIN2(export, export_buf(instr->alu.export));
}
/* XXX merge this loop with the other one somehow? */
ir2_foreach_instr(instr, ctx)
{
if (!instr->need_emit)
continue;
if (is_export(instr))
export = MIN2(export, export_buf(instr->alu.export));
}
ir2_foreach_instr(instr, ctx) {
if (!instr->need_emit)
continue;
ir2_foreach_instr(instr, ctx)
{
if (!instr->need_emit)
continue;
/* dont mix exports */
if (is_export(instr) && export_buf(instr->alu.export) != export)
continue;
/* dont mix exports */
if (is_export(instr) && export_buf(instr->alu.export) != export)
continue;
if (block_idx < 0)
block_idx = instr->block_idx;
else if (block_idx != instr->block_idx || /* must be same block */
instr->type == IR2_CF || /* CF/MEM must be alone */
(is_export(instr) && export == SQ_MEMORY))
break;
/* it works because IR2_CF is always at end of block
* and somewhat same idea with MEM exports, which might not be alone
* but will end up in-order at least
*/
if (block_idx < 0)
block_idx = instr->block_idx;
else if (block_idx != instr->block_idx || /* must be same block */
instr->type == IR2_CF || /* CF/MEM must be alone */
(is_export(instr) && export == SQ_MEMORY))
break;
/* it works because IR2_CF is always at end of block
* and somewhat same idea with MEM exports, which might not be alone
* but will end up in-order at least
*/
/* check if dependencies are satisfied */
bool is_ok = true;
ir2_foreach_src(src, instr) {
if (src->type == IR2_SRC_REG) {
/* need to check if all previous instructions in the block
* which write the reg have been emitted
* slow..
* XXX: check components instead of whole register
*/
struct ir2_reg *reg = get_reg_src(ctx, src);
ir2_foreach_instr(p, ctx) {
if (!p->is_ssa && p->reg == reg && p->idx < instr->idx)
is_ok &= !p->need_emit;
}
} else if (src->type == IR2_SRC_SSA) {
/* in this case its easy, just check need_emit */
is_ok &= !ctx->instr[src->num].need_emit;
}
}
/* don't reorder non-ssa write before read */
if (!instr->is_ssa) {
ir2_foreach_instr(p, ctx) {
if (!p->need_emit || p->idx >= instr->idx)
continue;
/* check if dependencies are satisfied */
bool is_ok = true;
ir2_foreach_src(src, instr)
{
if (src->type == IR2_SRC_REG) {
/* need to check if all previous instructions in the block
* which write the reg have been emitted
* slow..
* XXX: check components instead of whole register
*/
struct ir2_reg *reg = get_reg_src(ctx, src);
ir2_foreach_instr(p, ctx)
{
if (!p->is_ssa && p->reg == reg && p->idx < instr->idx)
is_ok &= !p->need_emit;
}
} else if (src->type == IR2_SRC_SSA) {
/* in this case its easy, just check need_emit */
is_ok &= !ctx->instr[src->num].need_emit;
}
}
/* don't reorder non-ssa write before read */
if (!instr->is_ssa) {
ir2_foreach_instr(p, ctx)
{
if (!p->need_emit || p->idx >= instr->idx)
continue;
ir2_foreach_src(src, p) {
if (get_reg_src(ctx, src) == instr->reg)
is_ok = false;
}
}
}
/* don't reorder across predicates */
if (avail_count && instr->pred != avail[0]->pred)
is_ok = false;
ir2_foreach_src(src, p)
{
if (get_reg_src(ctx, src) == instr->reg)
is_ok = false;
}
}
}
/* don't reorder across predicates */
if (avail_count && instr->pred != avail[0]->pred)
is_ok = false;
if (!is_ok)
continue;
if (!is_ok)
continue;
avail[avail_count++] = instr;
}
avail[avail_count++] = instr;
}
if (!avail_count) {
assert(block_idx == -1);
return -1;
}
if (!avail_count) {
assert(block_idx == -1);
return -1;
}
/* priority to FETCH instructions */
ir2_foreach_avail(instr) {
if (instr->type == IR2_ALU)
continue;
/* priority to FETCH instructions */
ir2_foreach_avail(instr)
{
if (instr->type == IR2_ALU)
continue;
ra_src_free(ctx, instr);
ra_reg(ctx, get_reg(instr), -1, false, 0);
ra_src_free(ctx, instr);
ra_reg(ctx, get_reg(instr), -1, false, 0);
instr->need_emit = false;
sched->instr = instr;
sched->instr_s = NULL;
return block_idx;
}
instr->need_emit = false;
sched->instr = instr;
sched->instr_s = NULL;
return block_idx;
}
/* TODO precompute priorities */
/* TODO precompute priorities */
unsigned prio_v = ~0u, prio_s = ~0u, prio;
ir2_foreach_avail(instr) {
prio = alu_vector_prio(instr);
if (prio < prio_v) {
instr_v = instr;
prio_v = prio;
}
}
unsigned prio_v = ~0u, prio_s = ~0u, prio;
ir2_foreach_avail(instr)
{
prio = alu_vector_prio(instr);
if (prio < prio_v) {
instr_v = instr;
prio_v = prio;
}
}
/* TODO can still insert scalar if src_count=3, if smart about it */
if (!instr_v || instr_v->src_count < 3) {
ir2_foreach_avail(instr) {
bool compat = is_alu_compatible(instr_v, instr);
/* TODO can still insert scalar if src_count=3, if smart about it */
if (!instr_v || instr_v->src_count < 3) {
ir2_foreach_avail(instr)
{
bool compat = is_alu_compatible(instr_v, instr);
prio = alu_scalar_prio(instr);
if (prio >= prio_v && !compat)
continue;
prio = alu_scalar_prio(instr);
if (prio >= prio_v && !compat)
continue;
if (prio < prio_s) {
instr_s = instr;
prio_s = prio;
if (!compat)
instr_v = NULL;
}
}
}
if (prio < prio_s) {
instr_s = instr;
prio_s = prio;
if (!compat)
instr_v = NULL;
}
}
}
assert(instr_v || instr_s);
assert(instr_v || instr_s);
/* now, we try more complex insertion of vector instruction as scalar
* TODO: if we are smart we can still insert if instr_v->src_count==3
*/
if (!instr_s && instr_v->src_count < 3) {
ir2_foreach_avail(instr) {
if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr))
continue;
/* now, we try more complex insertion of vector instruction as scalar
* TODO: if we are smart we can still insert if instr_v->src_count==3
*/
if (!instr_s && instr_v->src_count < 3) {
ir2_foreach_avail(instr)
{
if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr))
continue;
/* at this point, src_count should always be 2 */
assert(instr->src_count == 2);
/* at this point, src_count should always be 2 */
assert(instr->src_count == 2);
if (scalarize_case1(ctx, instr, 0)) {
instr_s = instr;
break;
}
if (scalarize_case1(ctx, instr, 1)) {
instr_s = instr;
break;
}
}
}
if (scalarize_case1(ctx, instr, 0)) {
instr_s = instr;
break;
}
if (scalarize_case1(ctx, instr, 1)) {
instr_s = instr;
break;
}
}
}
/* free src registers */
if (instr_v) {
instr_v->need_emit = false;
ra_src_free(ctx, instr_v);
}
/* free src registers */
if (instr_v) {
instr_v->need_emit = false;
ra_src_free(ctx, instr_v);
}
if (instr_s) {
instr_s->need_emit = false;
ra_src_free(ctx, instr_s);
}
if (instr_s) {
instr_s->need_emit = false;
ra_src_free(ctx, instr_s);
}
/* allocate dst registers */
if (instr_v)
ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v), instr_v->alu.write_mask);
/* allocate dst registers */
if (instr_v)
ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v),
instr_v->alu.write_mask);
if (instr_s)
ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s), instr_s->alu.write_mask);
if (instr_s)
ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s),
instr_s->alu.write_mask);
sched->instr = instr_v;
sched->instr_s = instr_s;
return block_idx;
sched->instr = instr_v;
sched->instr_s = instr_s;
return block_idx;
}
/* scheduling: determine order of instructions */
static void schedule_instrs(struct ir2_context *ctx)
static void
schedule_instrs(struct ir2_context *ctx)
{
struct ir2_sched_instr *sched;
int block_idx;
struct ir2_sched_instr *sched;
int block_idx;
/* allocate input registers */
for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++)
if (ctx->input[idx].initialized)
ra_reg(ctx, &ctx->input[idx], idx, false, 0);
/* allocate input registers */
for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++)
if (ctx->input[idx].initialized)
ra_reg(ctx, &ctx->input[idx], idx, false, 0);
for (;;) {
sched = &ctx->instr_sched[ctx->instr_sched_count++];
block_idx = sched_next(ctx, sched);
if (block_idx < 0)
break;
memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state));
for (;;) {
sched = &ctx->instr_sched[ctx->instr_sched_count++];
block_idx = sched_next(ctx, sched);
if (block_idx < 0)
break;
memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state));
/* catch texture fetch after scheduling and insert the
* SET_TEX_LOD right before it if necessary
* TODO clean this up
*/
struct ir2_instr *instr = sched->instr, *tex_lod;
if (instr && instr->type == IR2_FETCH &&
instr->fetch.opc == TEX_FETCH && instr->src_count == 2) {
/* generate the SET_LOD instruction */
tex_lod = &ctx->instr[ctx->instr_count++];
tex_lod->type = IR2_FETCH;
tex_lod->block_idx = instr->block_idx;
tex_lod->pred = instr->pred;
tex_lod->fetch.opc = TEX_SET_TEX_LOD;
tex_lod->src[0] = instr->src[1];
tex_lod->src_count = 1;
/* catch texture fetch after scheduling and insert the
* SET_TEX_LOD right before it if necessary
* TODO clean this up
*/
struct ir2_instr *instr = sched->instr, *tex_lod;
if (instr && instr->type == IR2_FETCH && instr->fetch.opc == TEX_FETCH &&
instr->src_count == 2) {
/* generate the SET_LOD instruction */
tex_lod = &ctx->instr[ctx->instr_count++];
tex_lod->type = IR2_FETCH;
tex_lod->block_idx = instr->block_idx;
tex_lod->pred = instr->pred;
tex_lod->fetch.opc = TEX_SET_TEX_LOD;
tex_lod->src[0] = instr->src[1];
tex_lod->src_count = 1;
sched[1] = sched[0];
sched->instr = tex_lod;
ctx->instr_sched_count++;
}
sched[1] = sched[0];
sched->instr = tex_lod;
ctx->instr_sched_count++;
}
bool free_block = true;
ir2_foreach_instr(instr, ctx)
free_block &= instr->block_idx != block_idx;
if (free_block)
ra_block_free(ctx, block_idx);
};
ctx->instr_sched_count--;
bool free_block = true;
ir2_foreach_instr(instr, ctx) free_block &= instr->block_idx != block_idx;
if (free_block)
ra_block_free(ctx, block_idx);
};
ctx->instr_sched_count--;
}
void
ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
struct fd2_shader_stateobj *fp)
struct fd2_shader_stateobj *fp)
{
struct ir2_context ctx = { };
bool binning = !fp && so->type == MESA_SHADER_VERTEX;
struct ir2_context ctx = {};
bool binning = !fp && so->type == MESA_SHADER_VERTEX;
if (fp)
so->variant[variant].f = fp->variant[0].f;
if (fp)
so->variant[variant].f = fp->variant[0].f;
ctx.so = so;
ctx.info = &so->variant[variant].info;
ctx.f = &so->variant[variant].f;
ctx.info->max_reg = -1;
ctx.so = so;
ctx.info = &so->variant[variant].info;
ctx.f = &so->variant[variant].f;
ctx.info->max_reg = -1;
/* convert nir to internal representation */
ir2_nir_compile(&ctx, binning);
/* convert nir to internal representation */
ir2_nir_compile(&ctx, binning);
/* copy propagate srcs */
cp_src(&ctx);
/* copy propagate srcs */
cp_src(&ctx);
/* get ref_counts and kill non-needed instructions */
ra_count_refs(&ctx);
/* get ref_counts and kill non-needed instructions */
ra_count_refs(&ctx);
/* remove movs used to write outputs */
cp_export(&ctx);
/* remove movs used to write outputs */
cp_export(&ctx);
/* instruction order.. and vector->scalar conversions */
schedule_instrs(&ctx);
/* instruction order.. and vector->scalar conversions */
schedule_instrs(&ctx);
/* finally, assemble to bitcode */
assemble(&ctx, binning);
/* finally, assemble to bitcode */
assemble(&ctx, binning);
}

View file

@ -31,66 +31,66 @@
#include "pipe/p_context.h"
struct ir2_fetch_info {
/* dword offset of the fetch instruction */
uint16_t offset;
union {
/* swizzle to merge with tgsi swizzle */
struct {
uint16_t dst_swiz;
} vtx;
/* sampler id to patch const_idx */
struct {
uint16_t samp_id;
uint8_t src_swiz;
} tex;
};
/* dword offset of the fetch instruction */
uint16_t offset;
union {
/* swizzle to merge with tgsi swizzle */
struct {
uint16_t dst_swiz;
} vtx;
/* sampler id to patch const_idx */
struct {
uint16_t samp_id;
uint8_t src_swiz;
} tex;
};
};
struct ir2_shader_info {
/* compiler shader */
uint32_t *dwords;
/* compiler shader */
uint32_t *dwords;
/* size of the compiled shader in dwords */
uint16_t sizedwords;
/* size of the compiled shader in dwords */
uint16_t sizedwords;
/* highest GPR # used by shader */
int8_t max_reg;
/* highest GPR # used by shader */
int8_t max_reg;
/* offset in dwords of first MEMORY export CF (for a20x hw binning) */
int16_t mem_export_ptr;
/* offset in dwords of first MEMORY export CF (for a20x hw binning) */
int16_t mem_export_ptr;
/* fetch instruction info for patching */
uint16_t num_fetch_instrs;
struct ir2_fetch_info fetch_info[64];
/* fetch instruction info for patching */
uint16_t num_fetch_instrs;
struct ir2_fetch_info fetch_info[64];
};
struct ir2_frag_linkage {
unsigned inputs_count;
struct {
uint8_t slot;
uint8_t ncomp;
} inputs[16];
unsigned inputs_count;
struct {
uint8_t slot;
uint8_t ncomp;
} inputs[16];
/* driver_location of fragcoord.zw, -1 if not used */
int fragcoord;
/* driver_location of fragcoord.zw, -1 if not used */
int fragcoord;
};
struct ir2_shader_variant {
struct ir2_shader_info info;
struct ir2_frag_linkage f;
struct ir2_shader_info info;
struct ir2_frag_linkage f;
};
struct fd2_shader_stateobj;
struct tgsi_token;
void ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
struct fd2_shader_stateobj *fp);
struct fd2_shader_stateobj *fp);
struct nir_shader *ir2_tgsi_to_nir(const struct tgsi_token *tokens,
struct pipe_screen *screen);
struct pipe_screen *screen);
const nir_shader_compiler_options *ir2_get_compiler_options(void);
int ir2_optimize_nir(nir_shader *s, bool lower);
#endif /* IR2_H_ */
#endif /* IR2_H_ */

View file

@ -29,22 +29,22 @@
static unsigned
src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
{
struct ir2_reg_component *comps;
unsigned swiz = 0;
struct ir2_reg_component *comps;
unsigned swiz = 0;
switch (src->type) {
case IR2_SRC_SSA:
case IR2_SRC_REG:
break;
default:
return src->swizzle;
}
/* we need to take into account where the components were allocated */
comps = get_reg_src(ctx, src)->comp;
for (int i = 0; i < ncomp; i++) {
swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i);
}
return swiz;
switch (src->type) {
case IR2_SRC_SSA:
case IR2_SRC_REG:
break;
default:
return src->swizzle;
}
/* we need to take into account where the components were allocated */
comps = get_reg_src(ctx, src)->comp;
for (int i = 0; i < ncomp; i++) {
swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i);
}
return swiz;
}
/* alu instr need to take into how the output components are allocated */
@ -54,46 +54,47 @@ src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
static unsigned
alu_swizzle_scalar(struct ir2_context *ctx, struct ir2_src *reg)
{
/* hardware seems to take from W, but swizzle everywhere just in case */
return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX);
/* hardware seems to take from W, but swizzle everywhere just in case */
return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX);
}
static unsigned
alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr, struct ir2_src *src)
alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr,
struct ir2_src *src)
{
struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr));
unsigned swiz = 0;
struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr));
unsigned swiz = 0;
/* non per component special cases */
switch (instr->alu.vector_opc) {
case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
return alu_swizzle_scalar(ctx, src);
case DOT2ADDv:
case DOT3v:
case DOT4v:
case CUBEv:
return swiz0;
default:
break;
}
/* non per component special cases */
switch (instr->alu.vector_opc) {
case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
return alu_swizzle_scalar(ctx, src);
case DOT2ADDv:
case DOT3v:
case DOT4v:
case CUBEv:
return swiz0;
default:
break;
}
for (int i = 0, j = 0; i < dst_ncomp(instr); j++) {
if (instr->alu.write_mask & 1 << j) {
if (comp[j].c != 7)
swiz |= swiz_set(i, comp[j].c);
i++;
}
}
return swiz_merge(swiz0, swiz);
for (int i = 0, j = 0; i < dst_ncomp(instr); j++) {
if (instr->alu.write_mask & 1 << j) {
if (comp[j].c != 7)
swiz |= swiz_set(i, comp[j].c);
i++;
}
}
return swiz_merge(swiz0, swiz);
}
static unsigned
alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1)
{
/* hardware seems to take from ZW, but swizzle everywhere (ABAB) */
unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0);
return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY);
/* hardware seems to take from ZW, but swizzle everywhere (ABAB) */
unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0);
return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY);
}
/* write_mask needs to be transformed by allocation information */
@ -101,15 +102,15 @@ alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1)
static unsigned
alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr)
{
struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned write_mask = 0;
struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned write_mask = 0;
for (int i = 0; i < 4; i++) {
if (instr->alu.write_mask & 1 << i)
write_mask |= 1 << comp[i].c;
}
for (int i = 0; i < 4; i++) {
if (instr->alu.write_mask & 1 << i)
write_mask |= 1 << comp[i].c;
}
return write_mask;
return write_mask;
}
/* fetch instructions can swizzle dest, but src swizzle needs conversion */
@ -117,432 +118,436 @@ alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr)
static unsigned
fetch_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
{
unsigned alu_swiz = src_swizzle(ctx, src, ncomp);
unsigned swiz = 0;
for (int i = 0; i < ncomp; i++)
swiz |= swiz_get(alu_swiz, i) << i * 2;
return swiz;
unsigned alu_swiz = src_swizzle(ctx, src, ncomp);
unsigned swiz = 0;
for (int i = 0; i < ncomp; i++)
swiz |= swiz_get(alu_swiz, i) << i * 2;
return swiz;
}
static unsigned
fetch_dst_swiz(struct ir2_context *ctx, struct ir2_instr *instr)
{
struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned dst_swiz = 0xfff;
for (int i = 0; i < dst_ncomp(instr); i++) {
dst_swiz &= ~(7 << comp[i].c * 3);
dst_swiz |= i << comp[i].c * 3;
}
return dst_swiz;
struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned dst_swiz = 0xfff;
for (int i = 0; i < dst_ncomp(instr); i++) {
dst_swiz &= ~(7 << comp[i].c * 3);
dst_swiz |= i << comp[i].c * 3;
}
return dst_swiz;
}
/* register / export # for instr */
static unsigned
dst_to_reg(struct ir2_context *ctx, struct ir2_instr *instr)
{
if (is_export(instr))
return instr->alu.export;
if (is_export(instr))
return instr->alu.export;
return get_reg(instr)->idx;
return get_reg(instr)->idx;
}
/* register # for src */
static unsigned src_to_reg(struct ir2_context *ctx, struct ir2_src *src)
static unsigned
src_to_reg(struct ir2_context *ctx, struct ir2_src *src)
{
return get_reg_src(ctx, src)->idx;
return get_reg_src(ctx, src)->idx;
}
static unsigned src_reg_byte(struct ir2_context *ctx, struct ir2_src *src)
static unsigned
src_reg_byte(struct ir2_context *ctx, struct ir2_src *src)
{
if (src->type == IR2_SRC_CONST) {
assert(!src->abs); /* no abs bit for const */
return src->num;
}
return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0);
if (src->type == IR2_SRC_CONST) {
assert(!src->abs); /* no abs bit for const */
return src->num;
}
return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0);
}
/* produce the 12 byte binary instruction for a given sched_instr */
static void
fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched,
instr_t *bc, bool * is_fetch)
fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched, instr_t *bc,
bool *is_fetch)
{
struct ir2_instr *instr = sched->instr, *instr_s, *instr_v;
struct ir2_instr *instr = sched->instr, *instr_s, *instr_v;
*bc = (instr_t) {};
*bc = (instr_t){};
if (instr && instr->type == IR2_FETCH) {
*is_fetch = true;
if (instr && instr->type == IR2_FETCH) {
*is_fetch = true;
bc->fetch.opc = instr->fetch.opc;
bc->fetch.pred_select = !!instr->pred;
bc->fetch.pred_condition = instr->pred & 1;
bc->fetch.opc = instr->fetch.opc;
bc->fetch.pred_select = !!instr->pred;
bc->fetch.pred_condition = instr->pred & 1;
struct ir2_src *src = instr->src;
struct ir2_src *src = instr->src;
if (instr->fetch.opc == VTX_FETCH) {
instr_fetch_vtx_t *vtx = &bc->fetch.vtx;
if (instr->fetch.opc == VTX_FETCH) {
instr_fetch_vtx_t *vtx = &bc->fetch.vtx;
assert(instr->fetch.vtx.const_idx <= 0x1f);
assert(instr->fetch.vtx.const_idx_sel <= 0x3);
assert(instr->fetch.vtx.const_idx <= 0x1f);
assert(instr->fetch.vtx.const_idx_sel <= 0x3);
vtx->src_reg = src_to_reg(ctx, src);
vtx->src_swiz = fetch_swizzle(ctx, src, 1);
vtx->dst_reg = dst_to_reg(ctx, instr);
vtx->dst_swiz = fetch_dst_swiz(ctx, instr);
vtx->src_reg = src_to_reg(ctx, src);
vtx->src_swiz = fetch_swizzle(ctx, src, 1);
vtx->dst_reg = dst_to_reg(ctx, instr);
vtx->dst_swiz = fetch_dst_swiz(ctx, instr);
vtx->must_be_one = 1;
vtx->const_index = instr->fetch.vtx.const_idx;
vtx->const_index_sel = instr->fetch.vtx.const_idx_sel;
vtx->must_be_one = 1;
vtx->const_index = instr->fetch.vtx.const_idx;
vtx->const_index_sel = instr->fetch.vtx.const_idx_sel;
/* other fields will be patched */
/* other fields will be patched */
/* XXX seems like every FETCH but the first has
* this bit set:
*/
vtx->reserved3 = instr->idx ? 0x1 : 0x0;
vtx->reserved0 = instr->idx ? 0x2 : 0x3;
} else if (instr->fetch.opc == TEX_FETCH) {
instr_fetch_tex_t *tex = &bc->fetch.tex;
/* XXX seems like every FETCH but the first has
* this bit set:
*/
vtx->reserved3 = instr->idx ? 0x1 : 0x0;
vtx->reserved0 = instr->idx ? 0x2 : 0x3;
} else if (instr->fetch.opc == TEX_FETCH) {
instr_fetch_tex_t *tex = &bc->fetch.tex;
tex->src_reg = src_to_reg(ctx, src);
tex->src_swiz = fetch_swizzle(ctx, src, 3);
tex->dst_reg = dst_to_reg(ctx, instr);
tex->dst_swiz = fetch_dst_swiz(ctx, instr);
/* tex->const_idx = patch_fetches */
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT;
tex->use_reg_lod = instr->src_count == 2;
tex->sample_location = SAMPLE_CENTER;
tex->tx_coord_denorm = instr->fetch.tex.is_rect;
} else if (instr->fetch.opc == TEX_SET_TEX_LOD) {
instr_fetch_tex_t *tex = &bc->fetch.tex;
tex->src_reg = src_to_reg(ctx, src);
tex->src_swiz = fetch_swizzle(ctx, src, 3);
tex->dst_reg = dst_to_reg(ctx, instr);
tex->dst_swiz = fetch_dst_swiz(ctx, instr);
/* tex->const_idx = patch_fetches */
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT;
tex->use_reg_lod = instr->src_count == 2;
tex->sample_location = SAMPLE_CENTER;
tex->tx_coord_denorm = instr->fetch.tex.is_rect;
} else if (instr->fetch.opc == TEX_SET_TEX_LOD) {
instr_fetch_tex_t *tex = &bc->fetch.tex;
tex->src_reg = src_to_reg(ctx, src);
tex->src_swiz = fetch_swizzle(ctx, src, 1);
tex->dst_reg = 0;
tex->dst_swiz = 0xfff;
tex->src_reg = src_to_reg(ctx, src);
tex->src_swiz = fetch_swizzle(ctx, src, 1);
tex->dst_reg = 0;
tex->dst_swiz = 0xfff;
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->use_comp_lod = 1;
tex->use_reg_lod = 0;
tex->sample_location = SAMPLE_CENTER;
} else {
assert(0);
}
return;
}
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->use_comp_lod = 1;
tex->use_reg_lod = 0;
tex->sample_location = SAMPLE_CENTER;
} else {
assert(0);
}
return;
}
instr_v = sched->instr;
instr_s = sched->instr_s;
instr_v = sched->instr;
instr_s = sched->instr_s;
if (instr_v) {
struct ir2_src src1, src2, *src3;
if (instr_v) {
struct ir2_src src1, src2, *src3;
src1 = instr_v->src[0];
src2 = instr_v->src[instr_v->src_count > 1];
src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL;
src1 = instr_v->src[0];
src2 = instr_v->src[instr_v->src_count > 1];
src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL;
bc->alu.vector_opc = instr_v->alu.vector_opc;
bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v);
bc->alu.vector_dest = dst_to_reg(ctx, instr_v);
bc->alu.vector_clamp = instr_v->alu.saturate;
bc->alu.export_data = instr_v->alu.export >= 0;
bc->alu.vector_opc = instr_v->alu.vector_opc;
bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v);
bc->alu.vector_dest = dst_to_reg(ctx, instr_v);
bc->alu.vector_clamp = instr_v->alu.saturate;
bc->alu.export_data = instr_v->alu.export >= 0;
/* single operand SETEv, use 0.0f as src2 */
if (instr_v->src_count == 1 &&
(bc->alu.vector_opc == SETEv ||
bc->alu.vector_opc == SETNEv ||
bc->alu.vector_opc == SETGTv ||
bc->alu.vector_opc == SETGTEv))
src2 = ir2_zero(ctx);
/* single operand SETEv, use 0.0f as src2 */
if (instr_v->src_count == 1 &&
(bc->alu.vector_opc == SETEv || bc->alu.vector_opc == SETNEv ||
bc->alu.vector_opc == SETGTv || bc->alu.vector_opc == SETGTEv))
src2 = ir2_zero(ctx);
/* export32 instr for a20x hw binning has this bit set..
* it seems to do more than change the base address of constants
* XXX this is a hack
*/
bc->alu.relative_addr =
(bc->alu.export_data && bc->alu.vector_dest == 32);
/* export32 instr for a20x hw binning has this bit set..
* it seems to do more than change the base address of constants
* XXX this is a hack
*/
bc->alu.relative_addr =
(bc->alu.export_data && bc->alu.vector_dest == 32);
bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1);
bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1);
bc->alu.src1_reg_negate = src1.negate;
bc->alu.src1_sel = src1.type != IR2_SRC_CONST;
bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1);
bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1);
bc->alu.src1_reg_negate = src1.negate;
bc->alu.src1_sel = src1.type != IR2_SRC_CONST;
bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2);
bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2);
bc->alu.src2_reg_negate = src2.negate;
bc->alu.src2_sel = src2.type != IR2_SRC_CONST;
bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2);
bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2);
bc->alu.src2_reg_negate = src2.negate;
bc->alu.src2_sel = src2.type != IR2_SRC_CONST;
if (src3) {
bc->alu.src3_reg_byte = src_reg_byte(ctx, src3);
bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3);
bc->alu.src3_reg_negate = src3->negate;
bc->alu.src3_sel = src3->type != IR2_SRC_CONST;
}
if (src3) {
bc->alu.src3_reg_byte = src_reg_byte(ctx, src3);
bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3);
bc->alu.src3_reg_negate = src3->negate;
bc->alu.src3_sel = src3->type != IR2_SRC_CONST;
}
bc->alu.pred_select = instr_v->pred;
}
bc->alu.pred_select = instr_v->pred;
}
if (instr_s) {
struct ir2_src *src = instr_s->src;
if (instr_s) {
struct ir2_src *src = instr_s->src;
bc->alu.scalar_opc = instr_s->alu.scalar_opc;
bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s);
bc->alu.scalar_dest = dst_to_reg(ctx, instr_s);
bc->alu.scalar_clamp = instr_s->alu.saturate;
bc->alu.export_data = instr_s->alu.export >= 0;
bc->alu.scalar_opc = instr_s->alu.scalar_opc;
bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s);
bc->alu.scalar_dest = dst_to_reg(ctx, instr_s);
bc->alu.scalar_clamp = instr_s->alu.saturate;
bc->alu.export_data = instr_s->alu.export >= 0;
if (instr_s->src_count == 1) {
bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src);
bc->alu.src3_reg_negate = src->negate;
bc->alu.src3_sel = src->type != IR2_SRC_CONST;
} else {
assert(instr_s->src_count == 2);
if (instr_s->src_count == 1) {
bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src);
bc->alu.src3_reg_negate = src->negate;
bc->alu.src3_sel = src->type != IR2_SRC_CONST;
} else {
assert(instr_s->src_count == 2);
bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
bc->alu.src3_swiz = alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle);
bc->alu.src3_reg_negate = src->negate;
bc->alu.src3_sel = src->type != IR2_SRC_CONST;;
}
bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
bc->alu.src3_swiz =
alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle);
bc->alu.src3_reg_negate = src->negate;
bc->alu.src3_sel = src->type != IR2_SRC_CONST;
;
}
if (instr_v)
assert(instr_s->pred == instr_v->pred);
bc->alu.pred_select = instr_s->pred;
}
if (instr_v)
assert(instr_s->pred == instr_v->pred);
bc->alu.pred_select = instr_s->pred;
}
*is_fetch = false;
return;
*is_fetch = false;
return;
}
static unsigned
write_cfs(struct ir2_context *ctx, instr_cf_t * cfs, unsigned cf_idx,
instr_cf_alloc_t *alloc, instr_cf_exec_t *exec)
write_cfs(struct ir2_context *ctx, instr_cf_t *cfs, unsigned cf_idx,
instr_cf_alloc_t *alloc, instr_cf_exec_t *exec)
{
assert(exec->count);
assert(exec->count);
if (alloc)
cfs[cf_idx++].alloc = *alloc;
if (alloc)
cfs[cf_idx++].alloc = *alloc;
/* for memory alloc offset for patching */
if (alloc && alloc->buffer_select == SQ_MEMORY &&
ctx->info->mem_export_ptr == -1)
ctx->info->mem_export_ptr = cf_idx / 2 * 3;
/* for memory alloc offset for patching */
if (alloc && alloc->buffer_select == SQ_MEMORY &&
ctx->info->mem_export_ptr == -1)
ctx->info->mem_export_ptr = cf_idx / 2 * 3;
cfs[cf_idx++].exec = *exec;
exec->address += exec->count;
exec->serialize = 0;
exec->count = 0;
cfs[cf_idx++].exec = *exec;
exec->address += exec->count;
exec->serialize = 0;
exec->count = 0;
return cf_idx;
return cf_idx;
}
/* assemble the final shader */
void assemble(struct ir2_context *ctx, bool binning)
void
assemble(struct ir2_context *ctx, bool binning)
{
/* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384)
* address is 9 bits so could it be 512 ?
*/
instr_cf_t cfs[384];
instr_t bytecode[384], bc;
unsigned block_addr[128];
unsigned num_cf = 0;
/* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384)
* address is 9 bits so could it be 512 ?
*/
instr_cf_t cfs[384];
instr_t bytecode[384], bc;
unsigned block_addr[128];
unsigned num_cf = 0;
/* CF instr state */
instr_cf_exec_t exec = {.opc = EXEC};
instr_cf_alloc_t alloc = {.opc = ALLOC};
/* CF instr state */
instr_cf_exec_t exec = {.opc = EXEC};
instr_cf_alloc_t alloc = {.opc = ALLOC};
int sync_id, sync_id_prev = -1;
bool is_fetch = false;
bool need_sync = true;
bool need_alloc = false;
unsigned block_idx = 0;
int sync_id, sync_id_prev = -1;
bool is_fetch = false;
bool need_sync = true;
bool need_alloc = false;
unsigned block_idx = 0;
ctx->info->mem_export_ptr = -1;
ctx->info->num_fetch_instrs = 0;
ctx->info->mem_export_ptr = -1;
ctx->info->num_fetch_instrs = 0;
/* vertex shader always needs to allocate at least one parameter
* if it will never happen,
*/
if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) {
alloc.buffer_select = SQ_PARAMETER_PIXEL;
cfs[num_cf++].alloc = alloc;
}
/* vertex shader always needs to allocate at least one parameter
* if it will never happen,
*/
if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) {
alloc.buffer_select = SQ_PARAMETER_PIXEL;
cfs[num_cf++].alloc = alloc;
}
block_addr[0] = 0;
block_addr[0] = 0;
for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) {
struct ir2_instr *instr = ctx->instr_sched[j].instr;
for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) {
struct ir2_instr *instr = ctx->instr_sched[j].instr;
/* catch IR2_CF since it isn't a regular instruction */
if (instr && instr->type == IR2_CF) {
assert(!need_alloc); /* XXX */
/* catch IR2_CF since it isn't a regular instruction */
if (instr && instr->type == IR2_CF) {
assert(!need_alloc); /* XXX */
/* flush any exec cf before inserting jmp */
if (exec.count)
num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec);
/* flush any exec cf before inserting jmp */
if (exec.count)
num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec);
cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t) {
.opc = COND_JMP,
.address = instr->cf.block_idx, /* will be fixed later */
.force_call = !instr->pred,
.predicated_jmp = 1,
.direction = instr->cf.block_idx > instr->block_idx,
.condition = instr->pred & 1,
};
continue;
}
cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t){
.opc = COND_JMP,
.address = instr->cf.block_idx, /* will be fixed later */
.force_call = !instr->pred,
.predicated_jmp = 1,
.direction = instr->cf.block_idx > instr->block_idx,
.condition = instr->pred & 1,
};
continue;
}
/* fill the 3 dwords for the instruction */
fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch);
/* fill the 3 dwords for the instruction */
fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch);
/* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */
sync_id = 0;
if (is_fetch)
sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2;
/* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */
sync_id = 0;
if (is_fetch)
sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2;
need_sync = sync_id != sync_id_prev;
sync_id_prev = sync_id;
need_sync = sync_id != sync_id_prev;
sync_id_prev = sync_id;
unsigned block;
{
unsigned block;
{
if (ctx->instr_sched[j].instr)
block = ctx->instr_sched[j].instr->block_idx;
else
block = ctx->instr_sched[j].instr_s->block_idx;
if (ctx->instr_sched[j].instr)
block = ctx->instr_sched[j].instr->block_idx;
else
block = ctx->instr_sched[j].instr_s->block_idx;
assert(block_idx <= block);
}
assert(block_idx <= block);
}
/* info for patching */
if (is_fetch) {
struct ir2_fetch_info *info =
&ctx->info->fetch_info[ctx->info->num_fetch_instrs++];
info->offset = i * 3; /* add cf offset later */
/* info for patching */
if (is_fetch) {
struct ir2_fetch_info *info =
&ctx->info->fetch_info[ctx->info->num_fetch_instrs++];
info->offset = i * 3; /* add cf offset later */
if (bc.fetch.opc == VTX_FETCH) {
info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz;
} else if (bc.fetch.opc == TEX_FETCH) {
info->tex.samp_id = instr->fetch.tex.samp_id;
info->tex.src_swiz = bc.fetch.tex.src_swiz;
} else {
ctx->info->num_fetch_instrs--;
}
}
if (bc.fetch.opc == VTX_FETCH) {
info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz;
} else if (bc.fetch.opc == TEX_FETCH) {
info->tex.samp_id = instr->fetch.tex.samp_id;
info->tex.src_swiz = bc.fetch.tex.src_swiz;
} else {
ctx->info->num_fetch_instrs--;
}
}
/* exec cf after 6 instr or when switching between fetch / alu */
if (exec.count == 6 || (exec.count && (need_sync || block != block_idx))) {
num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
need_alloc = false;
}
/* exec cf after 6 instr or when switching between fetch / alu */
if (exec.count == 6 ||
(exec.count && (need_sync || block != block_idx))) {
num_cf =
write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
need_alloc = false;
}
/* update block_addrs for jmp patching */
while (block_idx < block)
block_addr[++block_idx] = num_cf;
/* update block_addrs for jmp patching */
while (block_idx < block)
block_addr[++block_idx] = num_cf;
/* export - fill alloc cf */
if (!is_fetch && bc.alu.export_data) {
/* get the export buffer from either vector/scalar dest */
instr_alloc_type_t buffer =
export_buf(bc.alu.vector_dest);
if (bc.alu.scalar_write_mask) {
if (bc.alu.vector_write_mask)
assert(buffer == export_buf(bc.alu.scalar_dest));
buffer = export_buf(bc.alu.scalar_dest);
}
/* export - fill alloc cf */
if (!is_fetch && bc.alu.export_data) {
/* get the export buffer from either vector/scalar dest */
instr_alloc_type_t buffer = export_buf(bc.alu.vector_dest);
if (bc.alu.scalar_write_mask) {
if (bc.alu.vector_write_mask)
assert(buffer == export_buf(bc.alu.scalar_dest));
buffer = export_buf(bc.alu.scalar_dest);
}
/* flush previous alloc if the buffer changes */
bool need_new_alloc = buffer != alloc.buffer_select;
/* flush previous alloc if the buffer changes */
bool need_new_alloc = buffer != alloc.buffer_select;
/* memory export always in 32/33 pair, new alloc on 32 */
if (bc.alu.vector_dest == 32)
need_new_alloc = true;
/* memory export always in 32/33 pair, new alloc on 32 */
if (bc.alu.vector_dest == 32)
need_new_alloc = true;
if (need_new_alloc && exec.count) {
num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
need_alloc = false;
}
if (need_new_alloc && exec.count) {
num_cf =
write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
need_alloc = false;
}
need_alloc |= need_new_alloc;
need_alloc |= need_new_alloc;
alloc.size = 0;
alloc.buffer_select = buffer;
alloc.size = 0;
alloc.buffer_select = buffer;
if (buffer == SQ_PARAMETER_PIXEL && ctx->so->type == MESA_SHADER_VERTEX)
alloc.size = ctx->f->inputs_count - 1;
if (buffer == SQ_PARAMETER_PIXEL &&
ctx->so->type == MESA_SHADER_VERTEX)
alloc.size = ctx->f->inputs_count - 1;
if (buffer == SQ_POSITION)
alloc.size = ctx->so->writes_psize;
}
if (buffer == SQ_POSITION)
alloc.size = ctx->so->writes_psize;
}
if (is_fetch)
exec.serialize |= 0x1 << exec.count * 2;
if (need_sync)
exec.serialize |= 0x2 << exec.count * 2;
if (is_fetch)
exec.serialize |= 0x1 << exec.count * 2;
if (need_sync)
exec.serialize |= 0x2 << exec.count * 2;
need_sync = false;
exec.count += 1;
bytecode[i++] = bc;
}
need_sync = false;
exec.count += 1;
bytecode[i++] = bc;
}
/* final exec cf */
exec.opc = EXEC_END;
num_cf =
write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
/* final exec cf */
exec.opc = EXEC_END;
num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
/* insert nop to get an even # of CFs */
if (num_cf % 2)
cfs[num_cf++] = (instr_cf_t) {
.opc = NOP};
/* insert nop to get an even # of CFs */
if (num_cf % 2)
cfs[num_cf++] = (instr_cf_t){.opc = NOP};
/* patch cf addrs */
for (int idx = 0; idx < num_cf; idx++) {
switch (cfs[idx].opc) {
case NOP:
case ALLOC:
break;
case EXEC:
case EXEC_END:
cfs[idx].exec.address += num_cf / 2;
break;
case COND_JMP:
cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address];
break;
default:
assert(0);
}
}
/* patch cf addrs */
for (int idx = 0; idx < num_cf; idx++) {
switch (cfs[idx].opc) {
case NOP:
case ALLOC:
break;
case EXEC:
case EXEC_END:
cfs[idx].exec.address += num_cf / 2;
break;
case COND_JMP:
cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address];
break;
default:
assert(0);
}
}
/* concatenate cfs and alu/fetch */
uint32_t cfdwords = num_cf / 2 * 3;
uint32_t alufetchdwords = exec.address * 3;
uint32_t sizedwords = cfdwords + alufetchdwords;
uint32_t *dwords = malloc(sizedwords * 4);
assert(dwords);
memcpy(dwords, cfs, cfdwords * 4);
memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4);
/* concatenate cfs and alu/fetch */
uint32_t cfdwords = num_cf / 2 * 3;
uint32_t alufetchdwords = exec.address * 3;
uint32_t sizedwords = cfdwords + alufetchdwords;
uint32_t *dwords = malloc(sizedwords * 4);
assert(dwords);
memcpy(dwords, cfs, cfdwords * 4);
memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4);
/* finalize ir2_shader_info */
ctx->info->dwords = dwords;
ctx->info->sizedwords = sizedwords;
for (int i = 0; i < ctx->info->num_fetch_instrs; i++)
ctx->info->fetch_info[i].offset += cfdwords;
/* finalize ir2_shader_info */
ctx->info->dwords = dwords;
ctx->info->sizedwords = sizedwords;
for (int i = 0; i < ctx->info->num_fetch_instrs; i++)
ctx->info->fetch_info[i].offset += cfdwords;
if (FD_DBG(DISASM)) {
DBG("disassemble: type=%d", ctx->so->type);
disasm_a2xx(dwords, sizedwords, 0, ctx->so->type);
}
if (FD_DBG(DISASM)) {
DBG("disassemble: type=%d", ctx->so->type);
disasm_a2xx(dwords, sizedwords, 0, ctx->so->type);
}
}

View file

@ -26,20 +26,22 @@
#include "ir2_private.h"
static bool is_mov(struct ir2_instr *instr)
static bool
is_mov(struct ir2_instr *instr)
{
return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
instr->src_count == 1;
return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
instr->src_count == 1;
}
static void src_combine(struct ir2_src *src, struct ir2_src b)
static void
src_combine(struct ir2_src *src, struct ir2_src b)
{
src->num = b.num;
src->type = b.type;
src->swizzle = swiz_merge(b.swizzle, src->swizzle);
if (!src->abs) /* if we have abs we don't care about previous negate */
src->negate ^= b.negate;
src->abs |= b.abs;
src->num = b.num;
src->type = b.type;
src->swizzle = swiz_merge(b.swizzle, src->swizzle);
if (!src->abs) /* if we have abs we don't care about previous negate */
src->negate ^= b.negate;
src->abs |= b.abs;
}
/* cp_src: replace src regs when they refer to a mov instruction
@ -49,37 +51,40 @@ static void src_combine(struct ir2_src *src, struct ir2_src b)
* becomes:
* ALU: MULADDv R7 = C7, R10, R0.xxxx
*/
void cp_src(struct ir2_context *ctx)
void
cp_src(struct ir2_context *ctx)
{
struct ir2_instr *p;
struct ir2_instr *p;
ir2_foreach_instr(instr, ctx) {
ir2_foreach_src(src, instr) {
/* loop to replace recursively */
do {
if (src->type != IR2_SRC_SSA)
break;
ir2_foreach_instr(instr, ctx)
{
ir2_foreach_src(src, instr)
{
/* loop to replace recursively */
do {
if (src->type != IR2_SRC_SSA)
break;
p = &ctx->instr[src->num];
/* don't work across blocks to avoid possible issues */
if (p->block_idx != instr->block_idx)
break;
p = &ctx->instr[src->num];
/* don't work across blocks to avoid possible issues */
if (p->block_idx != instr->block_idx)
break;
if (!is_mov(p))
break;
if (!is_mov(p))
break;
if (p->alu.saturate)
break;
if (p->alu.saturate)
break;
/* cant apply abs to const src, const src only for alu */
if (p->src[0].type == IR2_SRC_CONST &&
(src->abs || instr->type != IR2_ALU))
break;
/* cant apply abs to const src, const src only for alu */
if (p->src[0].type == IR2_SRC_CONST &&
(src->abs || instr->type != IR2_ALU))
break;
src_combine(src, p->src[0]);
} while (1);
}
}
src_combine(src, p->src[0]);
} while (1);
}
}
}
/* cp_export: replace mov to export when possible
@ -94,136 +99,138 @@ void cp_src(struct ir2_context *ctx)
* ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx?
*
*/
void cp_export(struct ir2_context *ctx)
void
cp_export(struct ir2_context *ctx)
{
struct ir2_instr *c[4], *ins[4];
struct ir2_src *src;
struct ir2_reg *reg;
unsigned ncomp;
struct ir2_instr *c[4], *ins[4];
struct ir2_src *src;
struct ir2_reg *reg;
unsigned ncomp;
ir2_foreach_instr(instr, ctx) {
if (!is_export(instr)) /* TODO */
continue;
ir2_foreach_instr(instr, ctx)
{
if (!is_export(instr)) /* TODO */
continue;
if (!is_mov(instr))
continue;
if (!is_mov(instr))
continue;
src = &instr->src[0];
src = &instr->src[0];
if (src->negate || src->abs) /* TODO handle these cases */
continue;
if (src->negate || src->abs) /* TODO handle these cases */
continue;
if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
continue;
if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
continue;
reg = get_reg_src(ctx, src);
ncomp = dst_ncomp(instr);
reg = get_reg_src(ctx, src);
ncomp = dst_ncomp(instr);
unsigned reswiz[4] = {};
unsigned num_instr = 0;
unsigned reswiz[4] = {};
unsigned num_instr = 0;
/* fill array c with pointers to instrs that write each component */
if (src->type == IR2_SRC_SSA) {
struct ir2_instr *instr = &ctx->instr[src->num];
/* fill array c with pointers to instrs that write each component */
if (src->type == IR2_SRC_SSA) {
struct ir2_instr *instr = &ctx->instr[src->num];
if (instr->type != IR2_ALU)
continue;
if (instr->type != IR2_ALU)
continue;
for (int i = 0; i < ncomp; i++)
c[i] = instr;
for (int i = 0; i < ncomp; i++)
c[i] = instr;
ins[num_instr++] = instr;
reswiz[0] = src->swizzle;
} else {
bool ok = true;
unsigned write_mask = 0;
ins[num_instr++] = instr;
reswiz[0] = src->swizzle;
} else {
bool ok = true;
unsigned write_mask = 0;
ir2_foreach_instr(instr, ctx) {
if (instr->is_ssa || instr->reg != reg)
continue;
ir2_foreach_instr(instr, ctx)
{
if (instr->is_ssa || instr->reg != reg)
continue;
/* set by non-ALU */
if (instr->type != IR2_ALU) {
ok = false;
break;
}
/* set by non-ALU */
if (instr->type != IR2_ALU) {
ok = false;
break;
}
/* component written more than once */
if (write_mask & instr->alu.write_mask) {
ok = false;
break;
}
/* component written more than once */
if (write_mask & instr->alu.write_mask) {
ok = false;
break;
}
write_mask |= instr->alu.write_mask;
write_mask |= instr->alu.write_mask;
/* src pointers for components */
for (int i = 0, j = 0; i < 4; i++) {
unsigned k = swiz_get(src->swizzle, i);
if (instr->alu.write_mask & 1 << k) {
c[i] = instr;
/* src pointers for components */
for (int i = 0, j = 0; i < 4; i++) {
unsigned k = swiz_get(src->swizzle, i);
if (instr->alu.write_mask & 1 << k) {
c[i] = instr;
/* reswiz = compressed src->swizzle */
unsigned x = 0;
for (int i = 0; i < k; i++)
x += !!(instr->alu.write_mask & 1 << i);
/* reswiz = compressed src->swizzle */
unsigned x = 0;
for (int i = 0; i < k; i++)
x += !!(instr->alu.write_mask & 1 << i);
assert(src->swizzle || x == j);
reswiz[num_instr] |= swiz_set(x, j++);
}
}
ins[num_instr++] = instr;
}
if (!ok)
continue;
}
assert(src->swizzle || x == j);
reswiz[num_instr] |= swiz_set(x, j++);
}
}
ins[num_instr++] = instr;
}
if (!ok)
continue;
}
bool redirect = true;
bool redirect = true;
/* must all be in same block */
for (int i = 0; i < ncomp; i++)
redirect &= (c[i]->block_idx == instr->block_idx);
/* must all be in same block */
for (int i = 0; i < ncomp; i++)
redirect &= (c[i]->block_idx == instr->block_idx);
/* no other instr using the value */
ir2_foreach_instr(p, ctx) {
if (p == instr)
continue;
ir2_foreach_src(src, p)
redirect &= reg != get_reg_src(ctx, src);
}
/* no other instr using the value */
ir2_foreach_instr(p, ctx)
{
if (p == instr)
continue;
ir2_foreach_src(src, p) redirect &= reg != get_reg_src(ctx, src);
}
if (!redirect)
continue;
if (!redirect)
continue;
/* redirect the instructions writing to the register */
for (int i = 0; i < num_instr; i++) {
struct ir2_instr *p = ins[i];
/* redirect the instructions writing to the register */
for (int i = 0; i < num_instr; i++) {
struct ir2_instr *p = ins[i];
p->alu.export = instr->alu.export;
p->alu.write_mask = 0;
p->is_ssa = true;
p->ssa.ncomp = 0;
memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
p->alu.saturate |= instr->alu.saturate;
p->alu.export = instr->alu.export;
p->alu.write_mask = 0;
p->is_ssa = true;
p->ssa.ncomp = 0;
memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
p->alu.saturate |= instr->alu.saturate;
switch (p->alu.vector_opc) {
case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
case DOT2ADDv:
case DOT3v:
case DOT4v:
case CUBEv:
continue;
default:
break;
}
ir2_foreach_src(s, p)
swiz_merge_p(&s->swizzle, reswiz[i]);
}
switch (p->alu.vector_opc) {
case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
case DOT2ADDv:
case DOT3v:
case DOT4v:
case CUBEv:
continue;
default:
break;
}
ir2_foreach_src(s, p) swiz_merge_p(&s->swizzle, reswiz[i]);
}
for (int i = 0; i < ncomp; i++) {
c[i]->alu.write_mask |= (1 << i);
c[i]->ssa.ncomp++;
}
instr->type = IR2_NONE;
instr->need_emit = false;
}
for (int i = 0; i < ncomp; i++) {
c[i]->alu.write_mask |= (1 << i);
c[i]->ssa.ncomp++;
}
instr->type = IR2_NONE;
instr->need_emit = false;
}
}

File diff suppressed because it is too large Load diff

View file

@ -24,175 +24,175 @@
* Jonathan Marek <jonathan@marek.ca>
*/
#include <stdlib.h>
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "ir2.h"
#include "fd2_program.h"
#include "ir2/instr-a2xx.h"
#include "fd2_program.h"
#include "ir2.h"
enum ir2_src_type {
IR2_SRC_SSA,
IR2_SRC_REG,
IR2_SRC_INPUT,
IR2_SRC_CONST,
IR2_SRC_SSA,
IR2_SRC_REG,
IR2_SRC_INPUT,
IR2_SRC_CONST,
};
struct ir2_src {
/* num can mean different things
* ssa: index of instruction
* reg: index in ctx->reg array
* input: index in ctx->input array
* const: constant index (C0, C1, etc)
*/
uint16_t num;
uint8_t swizzle;
enum ir2_src_type type : 2;
uint8_t abs : 1;
uint8_t negate : 1;
uint8_t : 4;
/* num can mean different things
* ssa: index of instruction
* reg: index in ctx->reg array
* input: index in ctx->input array
* const: constant index (C0, C1, etc)
*/
uint16_t num;
uint8_t swizzle;
enum ir2_src_type type : 2;
uint8_t abs : 1;
uint8_t negate : 1;
uint8_t : 4;
};
struct ir2_reg_component {
uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
bool alloc : 1; /* is it currently allocated */
uint8_t ref_count; /* for ra */
uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
bool alloc : 1; /* is it currently allocated */
uint8_t ref_count; /* for ra */
};
struct ir2_reg {
uint8_t idx; /* assigned hardware register */
uint8_t ncomp;
uint8_t idx; /* assigned hardware register */
uint8_t ncomp;
uint8_t loop_depth;
bool initialized;
/* block_idx to free on (-1 = free on ref_count==0) */
int block_idx_free;
struct ir2_reg_component comp[4];
uint8_t loop_depth;
bool initialized;
/* block_idx to free on (-1 = free on ref_count==0) */
int block_idx_free;
struct ir2_reg_component comp[4];
};
struct ir2_instr {
unsigned idx;
unsigned idx;
unsigned block_idx;
unsigned block_idx;
enum {
IR2_NONE,
IR2_FETCH,
IR2_ALU,
IR2_CF,
} type : 2;
enum {
IR2_NONE,
IR2_FETCH,
IR2_ALU,
IR2_CF,
} type : 2;
/* instruction needs to be emitted (for scheduling) */
bool need_emit : 1;
/* instruction needs to be emitted (for scheduling) */
bool need_emit : 1;
/* predicate value - (usually) same for entire block */
uint8_t pred : 2;
/* predicate value - (usually) same for entire block */
uint8_t pred : 2;
/* src */
uint8_t src_count;
struct ir2_src src[4];
/* src */
uint8_t src_count;
struct ir2_src src[4];
/* dst */
bool is_ssa;
union {
struct ir2_reg ssa;
struct ir2_reg *reg;
};
/* dst */
bool is_ssa;
union {
struct ir2_reg ssa;
struct ir2_reg *reg;
};
/* type-specific */
union {
struct {
instr_fetch_opc_t opc : 5;
union {
struct {
uint8_t const_idx;
uint8_t const_idx_sel;
} vtx;
struct {
bool is_cube : 1;
bool is_rect : 1;
uint8_t samp_id;
} tex;
};
} fetch;
struct {
/* store possible opcs, then we can choose vector/scalar instr */
instr_scalar_opc_t scalar_opc : 6;
instr_vector_opc_t vector_opc : 5;
/* same as nir */
uint8_t write_mask : 4;
bool saturate : 1;
/* type-specific */
union {
struct {
instr_fetch_opc_t opc : 5;
union {
struct {
uint8_t const_idx;
uint8_t const_idx_sel;
} vtx;
struct {
bool is_cube : 1;
bool is_rect : 1;
uint8_t samp_id;
} tex;
};
} fetch;
struct {
/* store possible opcs, then we can choose vector/scalar instr */
instr_scalar_opc_t scalar_opc : 6;
instr_vector_opc_t vector_opc : 5;
/* same as nir */
uint8_t write_mask : 4;
bool saturate : 1;
/* export idx (-1 no export) */
int8_t export;
/* export idx (-1 no export) */
int8_t export;
/* for scalarized 2 src instruction */
uint8_t src1_swizzle;
} alu;
struct {
/* jmp dst block_idx */
uint8_t block_idx;
} cf;
};
/* for scalarized 2 src instruction */
uint8_t src1_swizzle;
} alu;
struct {
/* jmp dst block_idx */
uint8_t block_idx;
} cf;
};
};
struct ir2_sched_instr {
uint32_t reg_state[8];
struct ir2_instr *instr, *instr_s;
uint32_t reg_state[8];
struct ir2_instr *instr, *instr_s;
};
struct ir2_context {
struct fd2_shader_stateobj *so;
struct fd2_shader_stateobj *so;
unsigned block_idx, pred_idx;
uint8_t pred;
bool block_has_jump[64];
unsigned block_idx, pred_idx;
uint8_t pred;
bool block_has_jump[64];
unsigned loop_last_block[64];
unsigned loop_depth;
unsigned loop_last_block[64];
unsigned loop_depth;
nir_shader *nir;
nir_shader *nir;
/* ssa index of position output */
struct ir2_src position;
/* ssa index of position output */
struct ir2_src position;
/* to translate SSA ids to instruction ids */
int16_t ssa_map[1024];
/* to translate SSA ids to instruction ids */
int16_t ssa_map[1024];
struct ir2_shader_info *info;
struct ir2_frag_linkage *f;
struct ir2_shader_info *info;
struct ir2_frag_linkage *f;
int prev_export;
int prev_export;
/* RA state */
struct ir2_reg* live_regs[64];
uint32_t reg_state[256/32]; /* 64*4 bits */
/* RA state */
struct ir2_reg *live_regs[64];
uint32_t reg_state[256 / 32]; /* 64*4 bits */
/* inputs */
struct ir2_reg input[16 + 1]; /* 16 + param */
/* inputs */
struct ir2_reg input[16 + 1]; /* 16 + param */
/* non-ssa regs */
struct ir2_reg reg[64];
unsigned reg_count;
/* non-ssa regs */
struct ir2_reg reg[64];
unsigned reg_count;
struct ir2_instr instr[0x300];
unsigned instr_count;
struct ir2_instr instr[0x300];
unsigned instr_count;
struct ir2_sched_instr instr_sched[0x180];
unsigned instr_sched_count;
struct ir2_sched_instr instr_sched[0x180];
unsigned instr_sched_count;
};
void assemble(struct ir2_context *ctx, bool binning);
void ir2_nir_compile(struct ir2_context *ctx, bool binning);
bool ir2_nir_lower_scalar(nir_shader * shader);
bool ir2_nir_lower_scalar(nir_shader *shader);
void ra_count_refs(struct ir2_context *ctx);
void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
bool export, uint8_t export_writemask);
bool export, uint8_t export_writemask);
void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
void ra_block_free(struct ir2_context *ctx, unsigned block);
@ -201,196 +201,212 @@ void cp_export(struct ir2_context *ctx);
/* utils */
enum {
IR2_SWIZZLE_Y = 1 << 0,
IR2_SWIZZLE_Z = 2 << 0,
IR2_SWIZZLE_W = 3 << 0,
IR2_SWIZZLE_Y = 1 << 0,
IR2_SWIZZLE_Z = 2 << 0,
IR2_SWIZZLE_W = 3 << 0,
IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
};
#define compile_error(ctx, args...) ({ \
printf(args); \
assert(0); \
})
#define compile_error(ctx, args...) \
({ \
printf(args); \
assert(0); \
})
static inline struct ir2_src
ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
{
return (struct ir2_src) {
.num = num,
.swizzle = swizzle,
.type = type
};
return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};
}
/* ir2_assemble uses it .. */
struct ir2_src ir2_zero(struct ir2_context *ctx);
#define ir2_foreach_instr(it, ctx) \
for (struct ir2_instr *it = (ctx)->instr; ({ \
while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \
it != &(ctx)->instr[(ctx)->instr_count]; }); it++)
#define ir2_foreach_instr(it, ctx) \
for (struct ir2_instr *it = (ctx)->instr; ({ \
while (it != &(ctx)->instr[(ctx)->instr_count] && \
it->type == IR2_NONE) \
it++; \
it != &(ctx)->instr[(ctx)->instr_count]; \
}); \
it++)
#define ir2_foreach_live_reg(it, ctx) \
for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \
__ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++)
#define ir2_foreach_live_reg(it, ctx) \
for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) \
__ptr++; \
__ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL; \
}); \
it++)
#define ir2_foreach_avail(it) \
for (struct ir2_instr **__instrp = avail, *it; \
it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
#define ir2_foreach_avail(it) \
for (struct ir2_instr **__instrp = avail, *it; \
it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
#define ir2_foreach_src(it, instr) \
for (struct ir2_src *it = instr->src; \
it != &instr->src[instr->src_count]; it++)
#define ir2_foreach_src(it, instr) \
for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count]; \
it++)
/* mask for register allocation
* 64 registers with 4 components each = 256 bits
*/
/* typedef struct {
uint64_t data[4];
uint64_t data[4];
} regmask_t; */
static inline bool mask_isset(uint32_t * mask, unsigned num)
static inline bool
mask_isset(uint32_t *mask, unsigned num)
{
return ! !(mask[num / 32] & 1 << num % 32);
return !!(mask[num / 32] & 1 << num % 32);
}
static inline void mask_set(uint32_t * mask, unsigned num)
static inline void
mask_set(uint32_t *mask, unsigned num)
{
mask[num / 32] |= 1 << num % 32;
mask[num / 32] |= 1 << num % 32;
}
static inline void mask_unset(uint32_t * mask, unsigned num)
static inline void
mask_unset(uint32_t *mask, unsigned num)
{
mask[num / 32] &= ~(1 << num % 32);
mask[num / 32] &= ~(1 << num % 32);
}
static inline unsigned mask_reg(uint32_t * mask, unsigned num)
static inline unsigned
mask_reg(uint32_t *mask, unsigned num)
{
return mask[num / 8] >> num % 8 * 4 & 0xf;
return mask[num / 8] >> num % 8 * 4 & 0xf;
}
static inline bool is_export(struct ir2_instr *instr)
static inline bool
is_export(struct ir2_instr *instr)
{
return instr->type == IR2_ALU && instr->alu.export >= 0;
return instr->type == IR2_ALU && instr->alu.export >= 0;
}
static inline instr_alloc_type_t export_buf(unsigned num)
static inline instr_alloc_type_t
export_buf(unsigned num)
{
return num < 32 ? SQ_PARAMETER_PIXEL :
num >= 62 ? SQ_POSITION : SQ_MEMORY;
return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;
}
/* component c for channel i */
static inline unsigned swiz_set(unsigned c, unsigned i)
static inline unsigned
swiz_set(unsigned c, unsigned i)
{
return ((c - i) & 3) << i * 2;
return ((c - i) & 3) << i * 2;
}
/* get swizzle in channel i */
static inline unsigned swiz_get(unsigned swiz, unsigned i)
static inline unsigned
swiz_get(unsigned swiz, unsigned i)
{
return ((swiz >> i * 2) + i) & 3;
return ((swiz >> i * 2) + i) & 3;
}
static inline unsigned swiz_merge(unsigned swiz0, unsigned swiz1)
static inline unsigned
swiz_merge(unsigned swiz0, unsigned swiz1)
{
unsigned swiz = 0;
for (int i = 0; i < 4; i++)
swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
return swiz;
unsigned swiz = 0;
for (int i = 0; i < 4; i++)
swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
return swiz;
}
static inline void swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
static inline void
swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
{
unsigned swiz = 0;
for (int i = 0; i < 4; i++)
swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
*swiz0 = swiz;
unsigned swiz = 0;
for (int i = 0; i < 4; i++)
swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
*swiz0 = swiz;
}
static inline struct ir2_reg * get_reg(struct ir2_instr *instr)
static inline struct ir2_reg *
get_reg(struct ir2_instr *instr)
{
return instr->is_ssa ? &instr->ssa : instr->reg;
return instr->is_ssa ? &instr->ssa : instr->reg;
}
static inline struct ir2_reg *
get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
{
switch (src->type) {
case IR2_SRC_INPUT:
return &ctx->input[src->num];
case IR2_SRC_SSA:
return &ctx->instr[src->num].ssa;
case IR2_SRC_REG:
return &ctx->reg[src->num];
default:
return NULL;
}
switch (src->type) {
case IR2_SRC_INPUT:
return &ctx->input[src->num];
case IR2_SRC_SSA:
return &ctx->instr[src->num].ssa;
case IR2_SRC_REG:
return &ctx->reg[src->num];
default:
return NULL;
}
}
/* gets a ncomp value for the dst */
static inline unsigned dst_ncomp(struct ir2_instr *instr)
static inline unsigned
dst_ncomp(struct ir2_instr *instr)
{
if (instr->is_ssa)
return instr->ssa.ncomp;
if (instr->is_ssa)
return instr->ssa.ncomp;
if (instr->type == IR2_FETCH)
return instr->reg->ncomp;
if (instr->type == IR2_FETCH)
return instr->reg->ncomp;
assert(instr->type == IR2_ALU);
assert(instr->type == IR2_ALU);
unsigned ncomp = 0;
for (int i = 0; i < instr->reg->ncomp; i++)
ncomp += !!(instr->alu.write_mask & 1 << i);
return ncomp;
unsigned ncomp = 0;
for (int i = 0; i < instr->reg->ncomp; i++)
ncomp += !!(instr->alu.write_mask & 1 << i);
return ncomp;
}
/* gets a ncomp value for the src registers */
static inline unsigned src_ncomp(struct ir2_instr *instr)
static inline unsigned
src_ncomp(struct ir2_instr *instr)
{
if (instr->type == IR2_FETCH) {
switch (instr->fetch.opc) {
case VTX_FETCH:
return 1;
case TEX_FETCH:
return instr->fetch.tex.is_cube ? 3 : 2;
case TEX_SET_TEX_LOD:
return 1;
default:
assert(0);
}
}
if (instr->type == IR2_FETCH) {
switch (instr->fetch.opc) {
case VTX_FETCH:
return 1;
case TEX_FETCH:
return instr->fetch.tex.is_cube ? 3 : 2;
case TEX_SET_TEX_LOD:
return 1;
default:
assert(0);
}
}
switch (instr->alu.scalar_opc) {
case PRED_SETEs ... KILLONEs:
return 1;
default:
break;
}
switch (instr->alu.scalar_opc) {
case PRED_SETEs ... KILLONEs:
return 1;
default:
break;
}
switch (instr->alu.vector_opc) {
case DOT2ADDv:
return 2;
case DOT3v:
return 3;
case DOT4v:
case CUBEv:
case PRED_SETE_PUSHv:
return 4;
default:
return dst_ncomp(instr);
}
switch (instr->alu.vector_opc) {
case DOT2ADDv:
return 2;
case DOT3v:
return 3;
case DOT4v:
case CUBEv:
case PRED_SETE_PUSHv:
return 4;
default:
return dst_ncomp(instr);
}
}

View file

@ -27,201 +27,217 @@
#include "ir2_private.h"
/* if an instruction has side effects, we should never kill it */
static bool has_side_effects(struct ir2_instr *instr)
static bool
has_side_effects(struct ir2_instr *instr)
{
if (instr->type == IR2_CF)
return true;
else if (instr->type == IR2_FETCH)
return false;
if (instr->type == IR2_CF)
return true;
else if (instr->type == IR2_FETCH)
return false;
switch (instr->alu.scalar_opc) {
case PRED_SETEs ... KILLONEs:
return true;
default:
break;
}
switch (instr->alu.scalar_opc) {
case PRED_SETEs ... KILLONEs:
return true;
default:
break;
}
switch (instr->alu.vector_opc) {
case PRED_SETE_PUSHv ... KILLNEv:
return true;
default:
break;
}
switch (instr->alu.vector_opc) {
case PRED_SETE_PUSHv ... KILLNEv:
return true;
default:
break;
}
return instr->alu.export >= 0;
return instr->alu.export >= 0;
}
/* mark an instruction as required, and all its sources recursively */
static void set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
static void
set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
{
struct ir2_reg *reg;
struct ir2_reg *reg;
/* don't repeat work already done */
if (instr->need_emit)
return;
/* don't repeat work already done */
if (instr->need_emit)
return;
instr->need_emit = true;
instr->need_emit = true;
ir2_foreach_src(src, instr) {
switch (src->type) {
case IR2_SRC_SSA:
set_need_emit(ctx, &ctx->instr[src->num]);
break;
case IR2_SRC_REG:
/* slow .. */
reg = get_reg_src(ctx, src);
ir2_foreach_instr(instr, ctx) {
if (!instr->is_ssa && instr->reg == reg)
set_need_emit(ctx, instr);
}
break;
default:
break;
}
}
ir2_foreach_src(src, instr)
{
switch (src->type) {
case IR2_SRC_SSA:
set_need_emit(ctx, &ctx->instr[src->num]);
break;
case IR2_SRC_REG:
/* slow .. */
reg = get_reg_src(ctx, src);
ir2_foreach_instr(instr, ctx)
{
if (!instr->is_ssa && instr->reg == reg)
set_need_emit(ctx, instr);
}
break;
default:
break;
}
}
}
/* get current bit mask of allocated components for a register */
static unsigned reg_mask(struct ir2_context *ctx, unsigned idx)
static unsigned
reg_mask(struct ir2_context *ctx, unsigned idx)
{
return ctx->reg_state[idx/8] >> idx%8*4 & 0xf;
return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;
}
static void reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
static void
reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
{
idx = idx * 4 + c;
ctx->reg_state[idx/32] |= 1 << idx%32;
idx = idx * 4 + c;
ctx->reg_state[idx / 32] |= 1 << idx % 32;
}
static void reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
static void
reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
{
idx = idx * 4 + c;
ctx->reg_state[idx/32] &= ~(1 << idx%32);
idx = idx * 4 + c;
ctx->reg_state[idx / 32] &= ~(1 << idx % 32);
}
void ra_count_refs(struct ir2_context *ctx)
void
ra_count_refs(struct ir2_context *ctx)
{
struct ir2_reg *reg;
struct ir2_reg *reg;
/* mark instructions as needed
* need to do this because "substitutions" pass makes many movs not needed
*/
ir2_foreach_instr(instr, ctx) {
if (has_side_effects(instr))
set_need_emit(ctx, instr);
}
/* mark instructions as needed
* need to do this because "substitutions" pass makes many movs not needed
*/
ir2_foreach_instr(instr, ctx)
{
if (has_side_effects(instr))
set_need_emit(ctx, instr);
}
/* compute ref_counts */
ir2_foreach_instr(instr, ctx) {
/* kill non-needed so they can be skipped */
if (!instr->need_emit) {
instr->type = IR2_NONE;
continue;
}
/* compute ref_counts */
ir2_foreach_instr(instr, ctx)
{
/* kill non-needed so they can be skipped */
if (!instr->need_emit) {
instr->type = IR2_NONE;
continue;
}
ir2_foreach_src(src, instr) {
if (src->type == IR2_SRC_CONST)
continue;
ir2_foreach_src(src, instr)
{
if (src->type == IR2_SRC_CONST)
continue;
reg = get_reg_src(ctx, src);
for (int i = 0; i < src_ncomp(instr); i++)
reg->comp[swiz_get(src->swizzle, i)].ref_count++;
}
}
reg = get_reg_src(ctx, src);
for (int i = 0; i < src_ncomp(instr); i++)
reg->comp[swiz_get(src->swizzle, i)].ref_count++;
}
}
}
void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
bool export, uint8_t export_writemask)
void
ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,
uint8_t export_writemask)
{
/* for export, don't allocate anything but set component layout */
if (export) {
for (int i = 0; i < 4; i++)
reg->comp[i].c = i;
return;
}
/* for export, don't allocate anything but set component layout */
if (export) {
for (int i = 0; i < 4; i++)
reg->comp[i].c = i;
return;
}
unsigned idx = force_idx;
unsigned idx = force_idx;
/* TODO: allocate into the same register if theres room
* note: the blob doesn't do it, so verify that it is indeed better
* also, doing it would conflict with scalar mov insertion
*/
/* TODO: allocate into the same register if theres room
* note: the blob doesn't do it, so verify that it is indeed better
* also, doing it would conflict with scalar mov insertion
*/
/* check if already allocated */
for (int i = 0; i < reg->ncomp; i++) {
if (reg->comp[i].alloc)
return;
}
/* check if already allocated */
for (int i = 0; i < reg->ncomp; i++) {
if (reg->comp[i].alloc)
return;
}
if (force_idx < 0) {
for (idx = 0; idx < 64; idx++) {
if (reg_mask(ctx, idx) == 0)
break;
}
}
assert(idx != 64); /* TODO ran out of register space.. */
if (force_idx < 0) {
for (idx = 0; idx < 64; idx++) {
if (reg_mask(ctx, idx) == 0)
break;
}
}
assert(idx != 64); /* TODO ran out of register space.. */
/* update max_reg value */
ctx->info->max_reg = MAX2(ctx->info->max_reg, (int) idx);
/* update max_reg value */
ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);
unsigned mask = reg_mask(ctx, idx);
unsigned mask = reg_mask(ctx, idx);
for (int i = 0; i < reg->ncomp; i++) {
/* don't allocate never used values */
if (reg->comp[i].ref_count == 0) {
reg->comp[i].c = 7;
continue;
}
for (int i = 0; i < reg->ncomp; i++) {
/* don't allocate never used values */
if (reg->comp[i].ref_count == 0) {
reg->comp[i].c = 7;
continue;
}
/* TODO */
unsigned c = 1 ? i : (ffs(~mask) - 1);
mask |= 1 << c;
reg->comp[i].c = c;
reg_setmask(ctx, idx, c);
reg->comp[i].alloc = true;
}
/* TODO */
unsigned c = 1 ? i : (ffs(~mask) - 1);
mask |= 1 << c;
reg->comp[i].c = c;
reg_setmask(ctx, idx, c);
reg->comp[i].alloc = true;
}
reg->idx = idx;
ctx->live_regs[reg->idx] = reg;
reg->idx = idx;
ctx->live_regs[reg->idx] = reg;
}
/* reduce srcs ref_count and free if needed */
void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
void
ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
{
struct ir2_reg *reg;
struct ir2_reg_component *comp;
struct ir2_reg *reg;
struct ir2_reg_component *comp;
ir2_foreach_src(src, instr) {
if (src->type == IR2_SRC_CONST)
continue;
ir2_foreach_src(src, instr)
{
if (src->type == IR2_SRC_CONST)
continue;
reg = get_reg_src(ctx, src);
/* XXX use before write case */
reg = get_reg_src(ctx, src);
/* XXX use before write case */
for (int i = 0; i < src_ncomp(instr); i++) {
comp = &reg->comp[swiz_get(src->swizzle, i)];
if (!--comp->ref_count && reg->block_idx_free < 0) {
reg_freemask(ctx, reg->idx, comp->c);
comp->alloc = false;
}
}
}
for (int i = 0; i < src_ncomp(instr); i++) {
comp = &reg->comp[swiz_get(src->swizzle, i)];
if (!--comp->ref_count && reg->block_idx_free < 0) {
reg_freemask(ctx, reg->idx, comp->c);
comp->alloc = false;
}
}
}
}
/* free any regs left for a block */
void ra_block_free(struct ir2_context *ctx, unsigned block)
void
ra_block_free(struct ir2_context *ctx, unsigned block)
{
ir2_foreach_live_reg(reg, ctx) {
if (reg->block_idx_free != block)
continue;
ir2_foreach_live_reg(reg, ctx)
{
if (reg->block_idx_free != block)
continue;
for (int i = 0; i < reg->ncomp; i++) {
if (!reg->comp[i].alloc) /* XXX should never be true? */
continue;
for (int i = 0; i < reg->ncomp; i++) {
if (!reg->comp[i].alloc) /* XXX should never be true? */
continue;
reg_freemask(ctx, reg->idx, reg->comp[i].c);
reg->comp[i].alloc = false;
}
ctx->live_regs[reg->idx] = NULL;
}
reg_freemask(ctx, reg->idx, reg->comp[i].c);
reg->comp[i].alloc = false;
}
ctx->live_regs[reg->idx] = NULL;
}
}

View file

@ -27,88 +27,92 @@
#include "pipe/p_state.h"
#include "util/u_blend.h"
#include "util/u_dual_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd3_blend.h"
#include "fd3_context.h"
#include "fd3_format.h"
static enum a3xx_rb_blend_opcode
blend_func(unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
switch (func) {
case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
}
void *
fd3_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso)
const struct pipe_blend_state *cso)
{
struct fd3_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
int i;
struct fd3_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
int i;
if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func);
}
if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func);
}
so = CALLOC_STRUCT(fd3_blend_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd3_blend_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt;
if (cso->independent_blend_enable)
rt = &cso->rt[i];
else
rt = &cso->rt[0];
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt;
if (cso->independent_blend_enable)
rt = &cso->rt[i];
else
rt = &cso->rt[0];
so->rb_mrt[i].blend_control =
A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].blend_control =
A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
fd_blend_factor(rt->rgb_src_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
fd_blend_factor(rt->rgb_dst_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
fd_blend_factor(rt->alpha_src_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
blend_func(rt->alpha_func)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].control =
A3XX_RB_MRT_CONTROL_ROP_CODE(rop) |
A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
so->rb_mrt[i].control =
A3XX_RB_MRT_CONTROL_ROP_CODE(rop) |
A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable)
so->rb_mrt[i].control |=
A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A3XX_RB_MRT_CONTROL_BLEND |
A3XX_RB_MRT_CONTROL_BLEND2;
if (rt->blend_enable)
so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A3XX_RB_MRT_CONTROL_BLEND |
A3XX_RB_MRT_CONTROL_BLEND2;
if (reads_dest)
so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
if (reads_dest)
so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
if (cso->dither)
so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
}
if (cso->dither)
so->rb_mrt[i].control |=
A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
}
if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
return so;
return so;
}

View file

@ -27,27 +27,27 @@
#ifndef FD3_BLEND_H_
#define FD3_BLEND_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd3_blend_stateobj {
struct pipe_blend_state base;
uint32_t rb_render_control;
struct {
uint32_t blend_control;
uint32_t control;
} rb_mrt[A3XX_MAX_RENDER_TARGETS];
struct pipe_blend_state base;
uint32_t rb_render_control;
struct {
uint32_t blend_control;
uint32_t control;
} rb_mrt[A3XX_MAX_RENDER_TARGETS];
};
static inline struct fd3_blend_stateobj *
fd3_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd3_blend_stateobj *)blend;
return (struct fd3_blend_stateobj *)blend;
}
void * fd3_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);
void *fd3_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);
#endif /* FD3_BLEND_H_ */

View file

@ -26,8 +26,8 @@
#include "freedreno_query_hw.h"
#include "fd3_context.h"
#include "fd3_blend.h"
#include "fd3_context.h"
#include "fd3_draw.h"
#include "fd3_emit.h"
#include "fd3_gmem.h"
@ -38,25 +38,24 @@
#include "fd3_zsa.h"
static void
fd3_context_destroy(struct pipe_context *pctx)
in_dt
fd3_context_destroy(struct pipe_context *pctx) in_dt
{
struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
u_upload_destroy(fd3_ctx->border_color_uploader);
pipe_resource_reference(&fd3_ctx->border_color_buf, NULL);
u_upload_destroy(fd3_ctx->border_color_uploader);
pipe_resource_reference(&fd3_ctx->border_color_buf, NULL);
fd_context_destroy(pctx);
fd_context_destroy(pctx);
fd_bo_del(fd3_ctx->vs_pvt_mem);
fd_bo_del(fd3_ctx->fs_pvt_mem);
fd_bo_del(fd3_ctx->vsc_size_mem);
fd_bo_del(fd3_ctx->vs_pvt_mem);
fd_bo_del(fd3_ctx->fs_pvt_mem);
fd_bo_del(fd3_ctx->vsc_size_mem);
fd_context_cleanup_common_vbos(&fd3_ctx->base);
fd_context_cleanup_common_vbos(&fd3_ctx->base);
fd_hw_query_fini(pctx);
fd_hw_query_fini(pctx);
free(fd3_ctx);
free(fd3_ctx);
}
/* clang-format off */
@ -73,55 +72,55 @@ static const uint8_t primtypes[] = {
/* clang-format on */
struct pipe_context *
fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
in_dt
fd3_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags) in_dt
{
struct fd_screen *screen = fd_screen(pscreen);
struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
struct pipe_context *pctx;
struct fd_screen *screen = fd_screen(pscreen);
struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
struct pipe_context *pctx;
if (!fd3_ctx)
return NULL;
if (!fd3_ctx)
return NULL;
pctx = &fd3_ctx->base.base;
pctx->screen = pscreen;
pctx = &fd3_ctx->base.base;
pctx->screen = pscreen;
fd3_ctx->base.dev = fd_device_ref(screen->dev);
fd3_ctx->base.screen = fd_screen(pscreen);
fd3_ctx->base.last.key = &fd3_ctx->last_key;
fd3_ctx->base.dev = fd_device_ref(screen->dev);
fd3_ctx->base.screen = fd_screen(pscreen);
fd3_ctx->base.last.key = &fd3_ctx->last_key;
pctx->destroy = fd3_context_destroy;
pctx->create_blend_state = fd3_blend_state_create;
pctx->create_rasterizer_state = fd3_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create;
pctx->destroy = fd3_context_destroy;
pctx->create_blend_state = fd3_blend_state_create;
pctx->create_rasterizer_state = fd3_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create;
fd3_draw_init(pctx);
fd3_gmem_init(pctx);
fd3_texture_init(pctx);
fd3_prog_init(pctx);
fd3_emit_init(pctx);
fd3_draw_init(pctx);
fd3_gmem_init(pctx);
fd3_texture_init(pctx);
fd3_prog_init(pctx);
fd3_emit_init(pctx);
pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx)
return NULL;
pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx)
return NULL;
fd_hw_query_init(pctx);
fd_hw_query_init(pctx);
fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
fd3_ctx->vs_pvt_mem =
fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
fd3_ctx->fs_pvt_mem =
fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd3_ctx->vsc_size_mem =
fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd_context_setup_common_vbos(&fd3_ctx->base);
fd_context_setup_common_vbos(&fd3_ctx->base);
fd3_query_context_init(pctx);
fd3_query_context_init(pctx);
fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
PIPE_USAGE_STREAM, 0);
fd3_ctx->border_color_uploader =
u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
return pctx;
return pctx;
}

View file

@ -33,31 +33,30 @@
#include "ir3/ir3_shader.h"
struct fd3_context {
struct fd_context base;
struct fd_context base;
struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
*/
struct fd_bo *vsc_size_mem;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
*/
struct fd_bo *vsc_size_mem;
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;
};
static inline struct fd3_context *
fd3_context(struct fd_context *ctx)
{
return (struct fd3_context *)ctx;
return (struct fd3_context *)ctx;
}
struct pipe_context *
fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
struct pipe_context *fd3_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags);
#endif /* FD3_CONTEXT_H_ */

View file

@ -25,142 +25,146 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/format/u_format.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
#include "util/format/u_format.h"
#include "util/u_string.h"
#include "freedreno_state.h"
#include "freedreno_resource.h"
#include "freedreno_state.h"
#include "fd3_draw.h"
#include "fd3_context.h"
#include "fd3_draw.h"
#include "fd3_emit.h"
#include "fd3_program.h"
#include "fd3_format.h"
#include "fd3_program.h"
#include "fd3_zsa.h"
static inline uint32_t
add_sat(uint32_t a, int32_t b)
{
int64_t ret = (uint64_t)a + (int64_t)b;
if (ret > ~0U)
return ~0U;
if (ret < 0)
return 0;
return (uint32_t)ret;
int64_t ret = (uint64_t)a + (int64_t)b;
if (ret > ~0U)
return ~0U;
if (ret < 0)
return 0;
return (uint32_t)ret;
}
static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd3_emit *emit, unsigned index_offset)
assert_dt
struct fd3_emit *emit, unsigned index_offset) assert_dt
{
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
fd3_emit_state(ctx, ring, emit);
fd3_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd3_emit_vertex_bufs(ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd3_emit_vertex_bufs(ring, emit);
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
OUT_RING(ring, info->index_bounds_valid ? add_sat(info->min_index, info->index_size ? info->index_bias : 0) : 0); /* VFD_INDEX_MIN */
OUT_RING(ring, info->index_bounds_valid ? add_sat(info->max_index, info->index_size ? info->index_bias : 0) : ~0); /* VFD_INDEX_MAX */
OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
OUT_RING(ring, info->index_bounds_valid
? add_sat(info->min_index,
info->index_size ? info->index_bias : 0)
: 0); /* VFD_INDEX_MIN */
OUT_RING(ring, info->index_bounds_valid
? add_sat(info->max_index,
info->index_size ? info->index_bias : 0)
: ~0); /* VFD_INDEX_MAX */
OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, info->index_size ? info->index_bias
: emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index
: 0xffffffff);
/* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
fd3_emit_get_vp(emit)->writes_psize &&
(info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
/* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
fd3_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
fd_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
info, emit->draw, index_offset);
fd_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
emit->draw, index_offset);
}
static bool
fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
unsigned index_offset)
in_dt
unsigned index_offset) in_dt
{
struct fd3_emit emit = {
.debug = &ctx->debug,
.vtx = &ctx->vtx,
.info = info,
.indirect = indirect,
.draw = draw,
.key = {
.vs = ctx->prog.vs,
.fs = ctx->prog.fs,
},
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
struct fd3_emit emit = {
.debug = &ctx->debug,
.vtx = &ctx->vtx,
.info = info,
.indirect = indirect,
.draw = draw,
.key =
{
.vs = ctx->prog.vs,
.fs = ctx->prog.fs,
},
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
if (info->mode != PIPE_PRIM_MAX &&
!indirect &&
!info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
return false;
if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
return false;
if (fd3_needs_manual_clipping(ir3_get_shader(ctx->prog.vs), ctx->rasterizer))
emit.key.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
if (fd3_needs_manual_clipping(ir3_get_shader(ctx->prog.vs), ctx->rasterizer))
emit.key.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
ir3_fixup_shader_state(&ctx->base, &emit.key.key);
ir3_fixup_shader_state(&ctx->base, &emit.key.key);
unsigned dirty = ctx->dirty;
unsigned dirty = ctx->dirty;
emit.prog = fd3_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
emit.prog = fd3_program_state(
ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
/* bail if compile failed: */
if (!emit.prog)
return false;
/* bail if compile failed: */
if (!emit.prog)
return false;
const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit);
const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit);
ir3_update_max_tf_vtx(ctx, vp);
ir3_update_max_tf_vtx(ctx, vp);
/* do regular pass first: */
/* do regular pass first: */
if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
}
if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
}
emit.binning_pass = false;
emit.dirty = dirty;
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
emit.binning_pass = false;
emit.dirty = dirty;
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
/* and now binning pass: */
emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vs */
emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
/* and now binning pass: */
emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vs */
emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
fd_context_all_clean(ctx);
fd_context_all_clean(ctx);
return true;
return true;
}
void
fd3_draw_init(struct pipe_context *pctx)
disable_thread_safety_analysis
fd3_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd3_draw_vbo;
struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd3_draw_vbo;
}

File diff suppressed because it is too large Load diff

View file

@ -29,69 +29,71 @@
#include "pipe/p_context.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "fd3_format.h"
#include "fd3_program.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "ir3_cache.h"
#include "ir3_gallium.h"
struct fd_ringbuffer;
void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
struct pipe_surface **psurf, int bufs);
struct pipe_surface **psurf, int bufs);
/* grouped together emit-state for prog/vertex/state emit: */
struct fd3_emit {
struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx;
const struct fd3_program_state *prog;
const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw;
bool binning_pass;
struct ir3_cache_key key;
enum fd_dirty_3d_state dirty;
struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx;
const struct fd3_program_state *prog;
const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw;
bool binning_pass;
struct ir3_cache_key key;
enum fd_dirty_3d_state dirty;
uint32_t sprite_coord_enable;
bool sprite_coord_mode;
bool rasterflat;
bool skip_consts;
uint32_t sprite_coord_enable;
bool sprite_coord_mode;
bool rasterflat;
bool skip_consts;
/* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs;
/* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs;
};
static inline const struct ir3_shader_variant *
fd3_emit_get_vp(struct fd3_emit *emit)
{
if (!emit->vs) {
emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
}
return emit->vs;
if (!emit->vs) {
emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
}
return emit->vs;
}
static inline const struct ir3_shader_variant *
fd3_emit_get_fp(struct fd3_emit *emit)
{
if (!emit->fs) {
if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs;
} else {
emit->fs = emit->prog->fs;
}
}
return emit->fs;
if (!emit->fs) {
if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs;
} else {
emit->fs = emit->prog->fs;
}
}
return emit->fs;
}
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) assert_dt;
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd3_emit *emit) assert_dt;
void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd3_emit *emit) assert_dt;
struct fd3_emit *emit) assert_dt;
void fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
void fd3_emit_restore(struct fd_batch *batch,
struct fd_ringbuffer *ring) assert_dt;
void fd3_emit_init_screen(struct pipe_screen *pscreen);
void fd3_emit_init(struct pipe_context *pctx);
@ -99,19 +101,19 @@ void fd3_emit_init(struct pipe_context *pctx);
static inline void
fd3_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
__OUT_IB(ring, true, target);
__OUT_IB(ring, true, target);
}
static inline void
fd3_emit_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
assert_dt
fd3_emit_cache_flush(struct fd_batch *batch,
struct fd_ringbuffer *ring) assert_dt
{
fd_wfi(batch, ring);
OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
fd_wfi(batch, ring);
OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
}
#endif /* FD3_EMIT_H */

View file

@ -32,42 +32,36 @@
*/
struct fd3_format {
enum a3xx_vtx_fmt vtx;
enum a3xx_tex_fmt tex;
enum a3xx_color_fmt rb;
enum a3xx_color_swap swap;
boolean present;
enum a3xx_vtx_fmt vtx;
enum a3xx_tex_fmt tex;
enum a3xx_color_fmt rb;
enum a3xx_color_swap swap;
boolean present;
};
/* vertex + texture */
#define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT_ ## fmt, \
.tex = TFMT_ ## fmt, \
.rb = RB_ ## rbfmt, \
.swap = swapfmt \
}
#define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT_##fmt, \
.tex = TFMT_##fmt, \
.rb = RB_##rbfmt, \
.swap = swapfmt}
/* texture-only */
#define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT_NONE, \
.tex = TFMT_ ## fmt, \
.rb = RB_ ## rbfmt, \
.swap = swapfmt \
}
#define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT_NONE, \
.tex = TFMT_##fmt, \
.rb = RB_##rbfmt, \
.swap = swapfmt}
/* vertex-only */
#define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT_ ## fmt, \
.tex = TFMT_NONE, \
.rb = RB_ ## rbfmt, \
.swap = swapfmt \
}
#define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT_##fmt, \
.tex = TFMT_NONE, \
.rb = RB_##rbfmt, \
.swap = swapfmt}
/* clang-format off */
static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
@ -294,80 +288,90 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
enum a3xx_vtx_fmt
fd3_pipe2vtx(enum pipe_format format)
{
if (!formats[format].present)
return VFMT_NONE;
return formats[format].vtx;
if (!formats[format].present)
return VFMT_NONE;
return formats[format].vtx;
}
enum a3xx_tex_fmt
fd3_pipe2tex(enum pipe_format format)
{
if (!formats[format].present)
return TFMT_NONE;
return formats[format].tex;
if (!formats[format].present)
return TFMT_NONE;
return formats[format].tex;
}
enum a3xx_color_fmt
fd3_pipe2color(enum pipe_format format)
{
if (!formats[format].present)
return RB_NONE;
return formats[format].rb;
if (!formats[format].present)
return RB_NONE;
return formats[format].rb;
}
enum a3xx_color_swap
fd3_pipe2swap(enum pipe_format format)
{
if (!formats[format].present)
return WZYX;
return formats[format].swap;
if (!formats[format].present)
return WZYX;
return formats[format].swap;
}
enum a3xx_color_fmt
fd3_fs_output_format(enum pipe_format format)
{
if (util_format_is_srgb(format))
return RB_R16G16B16A16_FLOAT;
switch (format) {
case PIPE_FORMAT_R16_FLOAT:
case PIPE_FORMAT_R16G16_FLOAT:
case PIPE_FORMAT_R11G11B10_FLOAT:
return RB_R16G16B16A16_FLOAT;
case PIPE_FORMAT_L8_UNORM:
return RB_R8G8B8A8_UNORM;
default:
return fd3_pipe2color(format);
}
if (util_format_is_srgb(format))
return RB_R16G16B16A16_FLOAT;
switch (format) {
case PIPE_FORMAT_R16_FLOAT:
case PIPE_FORMAT_R16G16_FLOAT:
case PIPE_FORMAT_R11G11B10_FLOAT:
return RB_R16G16B16A16_FLOAT;
case PIPE_FORMAT_L8_UNORM:
return RB_R8G8B8A8_UNORM;
default:
return fd3_pipe2color(format);
}
}
static inline enum a3xx_tex_swiz
tex_swiz(unsigned swiz)
{
switch (swiz) {
default:
case PIPE_SWIZZLE_X: return A3XX_TEX_X;
case PIPE_SWIZZLE_Y: return A3XX_TEX_Y;
case PIPE_SWIZZLE_Z: return A3XX_TEX_Z;
case PIPE_SWIZZLE_W: return A3XX_TEX_W;
case PIPE_SWIZZLE_0: return A3XX_TEX_ZERO;
case PIPE_SWIZZLE_1: return A3XX_TEX_ONE;
}
switch (swiz) {
default:
case PIPE_SWIZZLE_X:
return A3XX_TEX_X;
case PIPE_SWIZZLE_Y:
return A3XX_TEX_Y;
case PIPE_SWIZZLE_Z:
return A3XX_TEX_Z;
case PIPE_SWIZZLE_W:
return A3XX_TEX_W;
case PIPE_SWIZZLE_0:
return A3XX_TEX_ZERO;
case PIPE_SWIZZLE_1:
return A3XX_TEX_ONE;
}
}
uint32_t
fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a)
unsigned swizzle_b, unsigned swizzle_a)
{
const struct util_format_description *desc =
util_format_description(format);
unsigned char swiz[4] = {
swizzle_r, swizzle_g, swizzle_b, swizzle_a,
}, rswiz[4];
const struct util_format_description *desc = util_format_description(format);
unsigned char swiz[4] =
{
swizzle_r,
swizzle_g,
swizzle_b,
swizzle_a,
},
rswiz[4];
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
}

View file

@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format);
enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
unsigned swizzle_g, unsigned swizzle_b,
unsigned swizzle_a);
#endif /* FD3_FORMAT_H_ */

File diff suppressed because it is too large Load diff

View file

@ -25,465 +25,467 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/u_string.h"
#include "freedreno_program.h"
#include "fd3_program.h"
#include "fd3_emit.h"
#include "fd3_texture.h"
#include "fd3_format.h"
#include "fd3_program.h"
#include "fd3_texture.h"
bool
fd3_needs_manual_clipping(const struct ir3_shader *shader,
const struct pipe_rasterizer_state *rast)
const struct pipe_rasterizer_state *rast)
{
uint64_t outputs = ir3_shader_outputs(shader);
uint64_t outputs = ir3_shader_outputs(shader);
return (!rast->depth_clip_near ||
util_bitcount(rast->clip_plane_enable) > 6 ||
outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
(1ULL << VARYING_SLOT_CLIP_DIST0) |
(1ULL << VARYING_SLOT_CLIP_DIST1)));
return (!rast->depth_clip_near ||
util_bitcount(rast->clip_plane_enable) > 6 ||
outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
(1ULL << VARYING_SLOT_CLIP_DIST0) |
(1ULL << VARYING_SLOT_CLIP_DIST1)));
}
static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
const struct ir3_info *si = &so->info;
enum adreno_state_block sb;
enum adreno_state_src src;
uint32_t i, sz, *bin;
const struct ir3_info *si = &so->info;
enum adreno_state_block sb;
enum adreno_state_src src;
uint32_t i, sz, *bin;
if (so->type == MESA_SHADER_VERTEX) {
sb = SB_VERT_SHADER;
} else {
sb = SB_FRAG_SHADER;
}
if (so->type == MESA_SHADER_VERTEX) {
sb = SB_VERT_SHADER;
} else {
sb = SB_FRAG_SHADER;
}
if (FD_DBG(DIRECT)) {
sz = si->sizedwords;
src = SS_DIRECT;
bin = fd_bo_map(so->bo);
} else {
sz = 0;
src = SS_INDIRECT;
bin = NULL;
}
if (FD_DBG(DIRECT)) {
sz = si->sizedwords;
src = SS_DIRECT;
bin = fd_bo_map(so->bo);
} else {
sz = 0;
src = SS_INDIRECT;
bin = NULL;
}
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(src) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) |
CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
if (bin) {
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
} else {
OUT_RELOC(ring, so->bo, 0,
CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
}
for (i = 0; i < sz; i++) {
OUT_RING(ring, bin[i]);
}
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) |
CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
if (bin) {
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
} else {
OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
}
for (i = 0; i < sz; i++) {
OUT_RING(ring, bin[i]);
}
}
void
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
int nr, struct pipe_surface **bufs)
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr,
struct pipe_surface **bufs)
{
const struct ir3_shader_variant *vp, *fp;
const struct ir3_info *vsi, *fsi;
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
uint32_t fpbuffersz, vpbuffersz, fsoff;
uint32_t pos_regid, posz_regid, psize_regid;
uint32_t ij_regid[4], face_regid, coord_regid, zwcoord_regid;
uint32_t color_regid[4] = {0};
int constmode;
int i, j;
const struct ir3_shader_variant *vp, *fp;
const struct ir3_info *vsi, *fsi;
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
uint32_t fpbuffersz, vpbuffersz, fsoff;
uint32_t pos_regid, posz_regid, psize_regid;
uint32_t ij_regid[4], face_regid, coord_regid, zwcoord_regid;
uint32_t color_regid[4] = {0};
int constmode;
int i, j;
debug_assert(nr <= ARRAY_SIZE(color_regid));
debug_assert(nr <= ARRAY_SIZE(color_regid));
vp = fd3_emit_get_vp(emit);
fp = fd3_emit_get_fp(emit);
vp = fd3_emit_get_vp(emit);
fp = fd3_emit_get_fp(emit);
vsi = &vp->info;
fsi = &fp->info;
vsi = &vp->info;
fsi = &fp->info;
fpbuffer = BUFFER;
vpbuffer = BUFFER;
fpbuffersz = fp->instrlen;
vpbuffersz = vp->instrlen;
fpbuffer = BUFFER;
vpbuffer = BUFFER;
fpbuffersz = fp->instrlen;
vpbuffersz = vp->instrlen;
/*
* Decide whether to use BUFFER or CACHE mode for VS and FS. It
* appears like 256 is the hard limit, but when the combined size
* exceeds 128 then blob will try to keep FS in BUFFER mode and
* switch to CACHE for VS until VS is too large. The blob seems
* to switch FS out of BUFFER mode at slightly under 128. But
* a bit fuzzy on the decision tree, so use slightly conservative
* limits.
*
* TODO check if these thresholds for BUFFER vs CACHE mode are the
* same for all a3xx or whether we need to consider the gpuid
*/
/*
* Decide whether to use BUFFER or CACHE mode for VS and FS. It
* appears like 256 is the hard limit, but when the combined size
* exceeds 128 then blob will try to keep FS in BUFFER mode and
* switch to CACHE for VS until VS is too large. The blob seems
* to switch FS out of BUFFER mode at slightly under 128. But
* a bit fuzzy on the decision tree, so use slightly conservative
* limits.
*
* TODO check if these thresholds for BUFFER vs CACHE mode are the
* same for all a3xx or whether we need to consider the gpuid
*/
if ((fpbuffersz + vpbuffersz) > 128) {
if (fpbuffersz < 112) {
/* FP:BUFFER VP:CACHE */
vpbuffer = CACHE;
vpbuffersz = 256 - fpbuffersz;
} else if (vpbuffersz < 112) {
/* FP:CACHE VP:BUFFER */
fpbuffer = CACHE;
fpbuffersz = 256 - vpbuffersz;
} else {
/* FP:CACHE VP:CACHE */
vpbuffer = fpbuffer = CACHE;
vpbuffersz = fpbuffersz = 192;
}
}
if ((fpbuffersz + vpbuffersz) > 128) {
if (fpbuffersz < 112) {
/* FP:BUFFER VP:CACHE */
vpbuffer = CACHE;
vpbuffersz = 256 - fpbuffersz;
} else if (vpbuffersz < 112) {
/* FP:CACHE VP:BUFFER */
fpbuffer = CACHE;
fpbuffersz = 256 - vpbuffersz;
} else {
/* FP:CACHE VP:CACHE */
vpbuffer = fpbuffer = CACHE;
vpbuffersz = fpbuffersz = 192;
}
}
if (fpbuffer == BUFFER) {
fsoff = 128 - fpbuffersz;
} else {
fsoff = 256 - fpbuffersz;
}
if (fpbuffer == BUFFER) {
fsoff = 128 - fpbuffersz;
} else {
fsoff = 256 - fpbuffersz;
}
/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
if (fp->color0_mrt) {
color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
} else {
color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
}
pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
if (fp->color0_mrt) {
color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
} else {
color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
}
face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
ij_regid[0] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
ij_regid[1] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
ij_regid[2] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
ij_regid[3] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
zwcoord_regid =
(coord_regid == regid(63, 0)) ? regid(63, 0) : (coord_regid + 2);
ij_regid[0] =
ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
ij_regid[1] =
ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
ij_regid[2] =
ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
ij_regid[3] =
ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
/* adjust regids for alpha output formats. there is no alpha render
* format, so it's just treated like red
*/
for (i = 0; i < nr; i++)
if (util_format_is_alpha(pipe_surface_format(bufs[i])))
color_regid[i] += 3;
/* adjust regids for alpha output formats. there is no alpha render
* format, so it's just treated like red
*/
for (i = 0; i < nr; i++)
if (util_format_is_alpha(pipe_surface_format(bufs[i])))
color_regid[i] += 3;
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
*/
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
*/
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
/* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
* flush some caches? I think we only need to set those
* bits if we have updated const or shader..
*/
A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
OUT_RING(ring,
A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTERREGID(ij_regid[0]) |
A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTERREGID(ij_regid[1]) |
A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTROIDREGID(ij_regid[2]) |
A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTROIDREGID(ij_regid[3]));
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
/* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
* flush some caches? I think we only need to set those
* bits if we have updated const or shader..
*/
A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
OUT_RING(ring,
A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTERREGID(ij_regid[0]) |
A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTERREGID(ij_regid[1]) |
A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTROIDREGID(ij_regid[2]) |
A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTROIDREGID(ij_regid[3]));
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
A3XX_SP_SP_CTRL_REG_L0MODE(0));
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
A3XX_SP_SP_CTRL_REG_L0MODE(0));
OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen - 1, 0)));
OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));
OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
OUT_RING(ring,
A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
OUT_RING(ring,
A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen - 1, 0)));
OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));
struct ir3_shader_linkage l = {0};
ir3_link_shaders(&l, vp, fp, false);
struct ir3_shader_linkage l = {0};
ir3_link_shaders(&l, vp, fp, false);
for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
uint32_t reg = 0;
for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
uint32_t reg = 0;
OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
j++;
reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
j++;
reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
j++;
reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
j++;
OUT_RING(ring, reg);
}
OUT_RING(ring, reg);
}
for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
uint32_t reg = 0;
for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
uint32_t reg = 0;
OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
OUT_RING(ring, reg);
}
OUT_RING(ring, reg);
}
OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
if (emit->binning_pass) {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, 0x00000000);
if (emit->binning_pass) {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
} else {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
} else {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
COND(fp->need_pixlod, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->sysval_in) |
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen - 1, 0)) |
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring,
A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
COND(fp->need_pixlod, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->sysval_in) |
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(
MAX2(fp->constlen - 1, 0)) |
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
MAX2(128, vp->constlen)) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
}
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
MAX2(128, vp->constlen)) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
}
OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
OUT_RING(ring,
COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
OUT_RING(ring, COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
for (i = 0; i < 4; i++) {
uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
COND(color_regid[i] & HALF_REG_ID, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
for (i = 0; i < 4; i++) {
uint32_t mrt_reg =
A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
COND(color_regid[i] & HALF_REG_ID, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
if (i < nr) {
enum pipe_format fmt = pipe_surface_format(bufs[i]);
mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
}
OUT_RING(ring, mrt_reg);
}
if (i < nr) {
enum pipe_format fmt = pipe_surface_format(bufs[i]);
mrt_reg |=
COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
}
OUT_RING(ring, mrt_reg);
}
if (emit->binning_pass) {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
A3XX_VPC_ATTR_LMSIZE(1) |
COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
OUT_RING(ring, 0x00000000);
} else {
uint32_t vinterp[4], flatshade[2], vpsrepl[4];
if (emit->binning_pass) {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) |
COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
OUT_RING(ring, 0x00000000);
} else {
uint32_t vinterp[4], flatshade[2], vpsrepl[4];
memset(vinterp, 0, sizeof(vinterp));
memset(flatshade, 0, sizeof(flatshade));
memset(vpsrepl, 0, sizeof(vpsrepl));
memset(vinterp, 0, sizeof(vinterp));
memset(flatshade, 0, sizeof(flatshade));
memset(vpsrepl, 0, sizeof(vpsrepl));
/* figure out VARYING_INTERP / FLAT_SHAD register values: */
for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
/* NOTE: varyings are packed, so if compmask is 0xb
* then first, third, and fourth component occupy
* three consecutive varying slots:
*/
unsigned compmask = fp->inputs[j].compmask;
/* figure out VARYING_INTERP / FLAT_SHAD register values: */
for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count;) {
/* NOTE: varyings are packed, so if compmask is 0xb
* then first, third, and fourth component occupy
* three consecutive varying slots:
*/
unsigned compmask = fp->inputs[j].compmask;
uint32_t inloc = fp->inputs[j].inloc;
uint32_t inloc = fp->inputs[j].inloc;
if (fp->inputs[j].flat ||
(fp->inputs[j].rasterflat && emit->rasterflat)) {
uint32_t loc = inloc;
if (fp->inputs[j].flat ||
(fp->inputs[j].rasterflat && emit->rasterflat)) {
uint32_t loc = inloc;
for (i = 0; i < 4; i++) {
if (compmask & (1 << i)) {
vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
flatshade[loc / 32] |= 1 << (loc % 32);
loc++;
}
}
}
for (i = 0; i < 4; i++) {
if (compmask & (1 << i)) {
vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
flatshade[loc / 32] |= 1 << (loc % 32);
loc++;
}
}
}
bool coord_mode = emit->sprite_coord_mode;
if (ir3_point_sprite(fp, j, emit->sprite_coord_enable, &coord_mode)) {
/* mask is two 2-bit fields, where:
* '01' -> S
* '10' -> T
* '11' -> 1 - T (flip mode)
*/
unsigned mask = coord_mode ? 0b1101 : 0b1001;
uint32_t loc = inloc;
if (compmask & 0x1) {
vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
loc++;
}
if (compmask & 0x2) {
vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
loc++;
}
if (compmask & 0x4) {
/* .z <- 0.0f */
vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
loc++;
}
if (compmask & 0x8) {
/* .w <- 1.0f */
vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
loc++;
}
}
}
bool coord_mode = emit->sprite_coord_mode;
if (ir3_point_sprite(fp, j, emit->sprite_coord_enable, &coord_mode)) {
/* mask is two 2-bit fields, where:
* '01' -> S
* '10' -> T
* '11' -> 1 - T (flip mode)
*/
unsigned mask = coord_mode ? 0b1101 : 0b1001;
uint32_t loc = inloc;
if (compmask & 0x1) {
vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
loc++;
}
if (compmask & 0x2) {
vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
loc++;
}
if (compmask & 0x4) {
/* .z <- 0.0f */
vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
loc++;
}
if (compmask & 0x8) {
/* .w <- 1.0f */
vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
loc++;
}
}
}
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
A3XX_VPC_ATTR_THRDASSIGN(1) |
A3XX_VPC_ATTR_LMSIZE(1) |
COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) |
COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
}
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
}
if (vpbuffer == BUFFER)
emit_shader(ring, vp);
if (vpbuffer == BUFFER)
emit_shader(ring, vp);
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
if (!emit->binning_pass) {
if (fpbuffer == BUFFER)
emit_shader(ring, fp);
if (!emit->binning_pass) {
if (fpbuffer == BUFFER)
emit_shader(ring, fp);
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
}
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
}
}
static struct ir3_program_state *
fd3_program_create(void *data, struct ir3_shader_variant *bs,
struct ir3_shader_variant *vs,
struct ir3_shader_variant *hs,
struct ir3_shader_variant *ds,
struct ir3_shader_variant *gs,
struct ir3_shader_variant *fs,
const struct ir3_shader_key *key)
in_dt
struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
struct ir3_shader_variant *fs,
const struct ir3_shader_key *key) in_dt
{
struct fd_context *ctx = fd_context(data);
struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state);
struct fd_context *ctx = fd_context(data);
struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state);
tc_assert_driver_thread(ctx->tc);
tc_assert_driver_thread(ctx->tc);
state->bs = bs;
state->vs = vs;
state->fs = fs;
state->bs = bs;
state->vs = vs;
state->fs = fs;
return &state->base;
return &state->base;
}
static void
fd3_program_destroy(void *data, struct ir3_program_state *state)
{
struct fd3_program_state *so = fd3_program_state(state);
free(so);
struct fd3_program_state *so = fd3_program_state(state);
free(so);
}
static const struct ir3_cache_funcs cache_funcs = {
.create_state = fd3_program_create,
.destroy_state = fd3_program_destroy,
.create_state = fd3_program_create,
.destroy_state = fd3_program_destroy,
};
void
fd3_prog_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_context *ctx = fd_context(pctx);
ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
ir3_prog_init(pctx);
fd_prog_init(pctx);
ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
ir3_prog_init(pctx);
fd_prog_init(pctx);
}

View file

@ -36,24 +36,24 @@
struct fd3_emit;
struct fd3_program_state {
struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */
struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */
};
static inline struct fd3_program_state *
fd3_program_state(struct ir3_program_state *state)
{
return (struct fd3_program_state *)state;
return (struct fd3_program_state *)state;
}
void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
int nr, struct pipe_surface **bufs);
void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr,
struct pipe_surface **bufs);
void fd3_prog_init(struct pipe_context *pctx);
bool fd3_needs_manual_clipping(const struct ir3_shader *,
const struct pipe_rasterizer_state *);
const struct pipe_rasterizer_state *);
#endif /* FD3_PROGRAM_H_ */

View file

@ -24,17 +24,16 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "freedreno_query_hw.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "freedreno_query_hw.h"
#include "freedreno_util.h"
#include "fd3_query.h"
#include "fd3_format.h"
#include "fd3_query.h"
struct fd_rb_samp_ctrs {
uint64_t ctr[16];
uint64_t ctr[16];
};
/*
@ -47,104 +46,103 @@ struct fd_rb_samp_ctrs {
static struct fd_hw_sample *
occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
struct fd_hw_sample *samp =
fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
struct fd_hw_sample *samp =
fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
* HW_QUERY_BASE_REG register:
*/
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
OUT_RING(ring, HW_QUERY_BASE_REG);
OUT_RING(ring, samp->offset);
/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
* HW_QUERY_BASE_REG register:
*/
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
OUT_RING(ring, HW_QUERY_BASE_REG);
OUT_RING(ring, samp->offset);
OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT3(ring, CP_DRAW_INDX, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
INDEX_SIZE_IGN, USE_VISIBILITY, 0));
OUT_RING(ring, 0); /* NumIndices */
OUT_PKT3(ring, CP_DRAW_INDX, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
INDEX_SIZE_IGN, USE_VISIBILITY, 0));
OUT_RING(ring, 0); /* NumIndices */
fd_event_write(batch, ring, ZPASS_DONE);
fd_event_write(batch, ring, ZPASS_DONE);
OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 |
A3XX_VBIF_PERF_CNT_EN_CNT1 |
A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 | A3XX_VBIF_PERF_CNT_EN_CNT1 |
A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
return samp;
return samp;
}
static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end)
const struct fd_rb_samp_ctrs *end)
{
uint64_t n = 0;
unsigned i;
uint64_t n = 0;
unsigned i;
/* not quite sure what all of these are, possibly different
* counters for each MRT render target:
*/
for (i = 0; i < 16; i += 4)
n += end->ctr[i] - start->ctr[i];
/* not quite sure what all of these are, possibly different
* counters for each MRT render target:
*/
for (i = 0; i < 16; i += 4)
n += end->ctr[i] - start->ctr[i];
return n;
return n;
}
static void
occlusion_counter_accumulate_result(struct fd_context *ctx,
const void *start, const void *end,
union pipe_query_result *result)
occlusion_counter_accumulate_result(struct fd_context *ctx, const void *start,
const void *end,
union pipe_query_result *result)
{
uint64_t n = count_samples(start, end);
result->u64 += n;
uint64_t n = count_samples(start, end);
result->u64 += n;
}
static void
occlusion_predicate_accumulate_result(struct fd_context *ctx,
const void *start, const void *end,
union pipe_query_result *result)
occlusion_predicate_accumulate_result(struct fd_context *ctx, const void *start,
const void *end,
union pipe_query_result *result)
{
uint64_t n = count_samples(start, end);
result->b |= (n > 0);
uint64_t n = count_samples(start, end);
result->b |= (n > 0);
}
static const struct fd_hw_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_counter_accumulate_result,
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_counter_accumulate_result,
};
static const struct fd_hw_sample_provider occlusion_predicate = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result,
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result,
};
static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result,
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result,
};
void fd3_query_context_init(struct pipe_context *pctx)
disable_thread_safety_analysis
void
fd3_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
struct fd_context *ctx = fd_context(pctx);
ctx->create_query = fd_hw_create_query;
ctx->query_prepare = fd_hw_query_prepare;
ctx->query_prepare_tile = fd_hw_query_prepare_tile;
ctx->query_update_batch = fd_hw_query_update_batch;
ctx->create_query = fd_hw_create_query;
ctx->query_prepare = fd_hw_query_prepare;
ctx->query_prepare_tile = fd_hw_query_prepare_tile;
ctx->query_update_batch = fd_hw_query_update_batch;
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
}

View file

@ -24,80 +24,79 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd3_rasterizer.h"
#include "fd3_context.h"
#include "fd3_format.h"
#include "fd3_rasterizer.h"
void *
fd3_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso)
const struct pipe_rasterizer_state *cso)
{
struct fd3_rasterizer_stateobj *so;
float psize_min, psize_max;
struct fd3_rasterizer_stateobj *so;
float psize_min, psize_max;
so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 4092;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 4092;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}
/*
if (cso->line_stipple_enable) {
??? TODO line stipple
}
TODO cso->half_pixel_center
if (cso->multisample)
TODO
*/
so->gras_cl_clip_cntl =
COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
so->gras_su_point_minmax =
A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_poly_offset_scale =
A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
so->gras_su_poly_offset_offset =
A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
/*
if (cso->line_stipple_enable) {
??? TODO line stipple
}
TODO cso->half_pixel_center
if (cso->multisample)
TODO
*/
so->gras_cl_clip_cntl =
COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
so->gras_su_point_minmax = A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_poly_offset_scale =
A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
so->gras_su_poly_offset_offset =
A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
so->gras_su_mode_control =
A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
so->gras_su_mode_control =
A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width / 2.0);
so->pc_prim_vtx_cntl =
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
so->pc_prim_vtx_cntl = A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(
fd_polygon_mode(cso->fill_front)) |
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(
fd_polygon_mode(cso->fill_back));
if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE;
if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
if (!cso->front_ccw)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
if (!cso->flatshade_first)
so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
if (!cso->front_ccw)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
if (!cso->flatshade_first)
so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
if (cso->offset_tri)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
if (!cso->depth_clip_near)
so->gras_cl_clip_cntl |= A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
if (cso->offset_tri)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
if (!cso->depth_clip_near)
so->gras_cl_clip_cntl |= A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
return so;
return so;
}

View file

@ -27,28 +27,28 @@
#ifndef FD3_RASTERIZER_H_
#define FD3_RASTERIZER_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd3_rasterizer_stateobj {
struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset;
struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset;
uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl;
uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl;
};
static inline struct fd3_rasterizer_stateobj *
fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd3_rasterizer_stateobj *)rast;
return (struct fd3_rasterizer_stateobj *)rast;
}
void * fd3_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
void *fd3_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
#endif /* FD3_RASTERIZER_H_ */

View file

@ -26,95 +26,96 @@
#include "fd3_format.h"
static uint32_t
setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
setup_slices(struct fd_resource *rsc, uint32_t alignment,
enum pipe_format format)
{
struct pipe_resource *prsc = &rsc->b.b;
uint32_t level, size = 0;
uint32_t width0 = prsc->width0;
struct pipe_resource *prsc = &rsc->b.b;
uint32_t level, size = 0;
uint32_t width0 = prsc->width0;
if (rsc->layout.tile_mode && prsc->target != PIPE_TEXTURE_CUBE)
width0 = util_next_power_of_two(width0);
if (rsc->layout.tile_mode && prsc->target != PIPE_TEXTURE_CUBE)
width0 = util_next_power_of_two(width0);
/* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
/* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl_pitch(&rsc->layout, level);
uint32_t height = u_minify(prsc->height0, level);
if (rsc->layout.tile_mode) {
height = align(height, 4);
if (prsc->target != PIPE_TEXTURE_CUBE)
height = util_next_power_of_two(height);
}
for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl_pitch(&rsc->layout, level);
uint32_t height = u_minify(prsc->height0, level);
if (rsc->layout.tile_mode) {
height = align(height, 4);
if (prsc->target != PIPE_TEXTURE_CUBE)
height = util_next_power_of_two(height);
}
uint32_t nblocksy = util_format_get_nblocksy(format, height);
uint32_t nblocksy = util_format_get_nblocksy(format, height);
slice->offset = size;
/* 1d array and 2d array textures must all have the same layer size
* for each miplevel on a3xx. 3d textures can have different layer
* sizes for high levels, but the hw auto-sizer is buggy (or at least
* different than what this code does), so as soon as the layer size
* range gets into range, we stop reducing it.
*/
if (prsc->target == PIPE_TEXTURE_3D && (
level == 1 ||
(level > 1 && fd_resource_slice(rsc, level - 1)->size0 > 0xf000)))
slice->size0 = align(nblocksy * pitch, alignment);
else if (level == 0 || alignment == 1)
slice->size0 = align(nblocksy * pitch, alignment);
else
slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
slice->offset = size;
/* 1d array and 2d array textures must all have the same layer size
* for each miplevel on a3xx. 3d textures can have different layer
* sizes for high levels, but the hw auto-sizer is buggy (or at least
* different than what this code does), so as soon as the layer size
* range gets into range, we stop reducing it.
*/
if (prsc->target == PIPE_TEXTURE_3D &&
(level == 1 ||
(level > 1 && fd_resource_slice(rsc, level - 1)->size0 > 0xf000)))
slice->size0 = align(nblocksy * pitch, alignment);
else if (level == 0 || alignment == 1)
slice->size0 = align(nblocksy * pitch, alignment);
else
slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
}
size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
}
return size;
return size;
}
uint32_t
fd3_setup_slices(struct fd_resource *rsc)
{
uint32_t alignment;
uint32_t alignment;
switch (rsc->b.b.target) {
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
alignment = 4096;
break;
default:
alignment = 1;
break;
}
switch (rsc->b.b.target) {
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
alignment = 4096;
break;
default:
alignment = 1;
break;
}
return setup_slices(rsc, alignment, rsc->b.b.format);
return setup_slices(rsc, alignment, rsc->b.b.format);
}
static bool
ok_format(enum pipe_format pfmt)
{
enum a3xx_color_fmt fmt = fd3_pipe2color(pfmt);
enum a3xx_color_fmt fmt = fd3_pipe2color(pfmt);
if (fmt == RB_NONE)
return false;
if (fmt == RB_NONE)
return false;
switch (pfmt) {
case PIPE_FORMAT_R8_UINT:
case PIPE_FORMAT_R8_SINT:
case PIPE_FORMAT_Z32_FLOAT:
return false;
default:
break;
}
switch (pfmt) {
case PIPE_FORMAT_R8_UINT:
case PIPE_FORMAT_R8_SINT:
case PIPE_FORMAT_Z32_FLOAT:
return false;
default:
break;
}
return true;
return true;
}
unsigned
fd3_tile_mode(const struct pipe_resource *tmpl)
{
if (ok_format(tmpl->format))
return TILE_4X4;
return LINEAR;
if (ok_format(tmpl->format))
return TILE_4X4;
return LINEAR;
}

View file

@ -27,90 +27,84 @@
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
#include "fd3_screen.h"
#include "fd3_context.h"
#include "fd3_format.h"
#include "fd3_emit.h"
#include "fd3_format.h"
#include "fd3_resource.h"
#include "fd3_screen.h"
#include "ir3/ir3_compiler.h"
static bool
fd3_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count,
unsigned usage)
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count, unsigned usage)
{
unsigned retval = 0;
unsigned retval = 0;
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return false;
}
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return false;
}
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd3_pipe2vtx(format) != VFMT_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd3_pipe2vtx(format) != VFMT_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
(fd3_pipe2tex(format) != TFMT_NONE)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
(fd3_pipe2tex(format) != TFMT_NONE)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
if ((usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED |
PIPE_BIND_BLENDABLE)) &&
(fd3_pipe2color(format) != RB_NONE) &&
(fd3_pipe2tex(format) != TFMT_NONE)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
if (!util_format_is_pure_integer(format))
retval |= usage & PIPE_BIND_BLENDABLE;
}
if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&
(fd3_pipe2color(format) != RB_NONE) &&
(fd3_pipe2tex(format) != TFMT_NONE)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
if (!util_format_is_pure_integer(format))
retval |= usage & PIPE_BIND_BLENDABLE;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd_pipe2depth(format) != (enum adreno_rb_depth_format)~0) &&
(fd3_pipe2tex(format) != TFMT_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd_pipe2depth(format) != (enum adreno_rb_depth_format) ~0) &&
(fd3_pipe2tex(format) != TFMT_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}
if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format),
target, sample_count, usage, retval);
}
if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x",
util_format_name(format), target, sample_count, usage, retval);
}
return retval == usage;
return retval == usage;
}
void
fd3_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A3XX_MAX_RENDER_TARGETS;
pscreen->context_create = fd3_context_create;
pscreen->is_format_supported = fd3_screen_is_format_supported;
fd3_emit_init_screen(pscreen);
ir3_screen_init(pscreen);
struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A3XX_MAX_RENDER_TARGETS;
pscreen->context_create = fd3_context_create;
pscreen->is_format_supported = fd3_screen_is_format_supported;
fd3_emit_init_screen(pscreen);
ir3_screen_init(pscreen);
screen->setup_slices = fd3_setup_slices;
if (FD_DBG(TTILE))
screen->tile_mode = fd3_tile_mode;
screen->setup_slices = fd3_setup_slices;
if (FD_DBG(TTILE))
screen->tile_mode = fd3_tile_mode;
}

View file

@ -25,204 +25,199 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd3_texture.h"
#include "fd3_format.h"
#include "fd3_texture.h"
static enum a3xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border)
{
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return A3XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A3XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true;
return A3XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */
return A3XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A3XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return A3XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A3XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true;
return A3XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */
return A3XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A3XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
}
static enum a3xx_tex_filter
tex_filter(unsigned filter, bool aniso)
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return A3XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return aniso ? A3XX_TEX_ANISO : A3XX_TEX_LINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return A3XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return aniso ? A3XX_TEX_ANISO : A3XX_TEX_LINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
}
static void *
fd3_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
const struct pipe_sampler_state *cso)
{
struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false;
struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false;
if (!so)
return NULL;
if (!so)
return NULL;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
so->base = *cso;
so->base = *cso;
so->needs_border = false;
so->texsamp0 =
COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) |
COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A3XX_TEX_SAMP_0_ANISO(aniso) |
A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
so->needs_border = false;
so->texsamp0 =
COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) |
COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A3XX_TEX_SAMP_0_ANISO(aniso) |
A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
if (cso->compare_mode)
so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
if (cso->compare_mode)
so->texsamp0 |=
A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
so->texsamp1 = A3XX_TEX_SAMP_1_LOD_BIAS(cso->lod_bias);
so->texsamp1 = A3XX_TEX_SAMP_1_LOD_BIAS(cso->lod_bias);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp1 |=
A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A3XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
} else {
/* If we're not doing mipmap filtering, we still need a slightly > 0
* LOD clamp so the HW can decide between min and mag filtering of
* level 0.
*/
so->texsamp1 |=
A3XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
A3XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
}
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp1 |= A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A3XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
} else {
/* If we're not doing mipmap filtering, we still need a slightly > 0
* LOD clamp so the HW can decide between min and mag filtering of
* level 0.
*/
so->texsamp1 |= A3XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
A3XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
}
return so;
return so;
}
static enum a3xx_tex_type
tex_type(unsigned target)
{
switch (target) {
default:
assert(0);
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A3XX_TEX_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
return A3XX_TEX_2D;
case PIPE_TEXTURE_3D:
return A3XX_TEX_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return A3XX_TEX_CUBE;
}
switch (target) {
default:
assert(0);
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A3XX_TEX_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
return A3XX_TEX_2D;
case PIPE_TEXTURE_3D:
return A3XX_TEX_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return A3XX_TEX_CUBE;
}
}
static struct pipe_sampler_view *
fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
const struct pipe_sampler_view *cso)
{
struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
unsigned lvl;
struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
unsigned lvl;
if (!so)
return NULL;
if (!so)
return NULL;
so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->texconst0 =
A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) |
A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
so->texconst0 = A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) |
A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
if (prsc->target == PIPE_BUFFER || util_format_is_pure_integer(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_NOCONVERT;
if (util_format_is_srgb(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
if (prsc->target == PIPE_BUFFER || util_format_is_pure_integer(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_NOCONVERT;
if (util_format_is_srgb(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
if (prsc->target == PIPE_BUFFER) {
lvl = 0;
so->texconst1 =
A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size / util_format_get_blocksize(cso->format)) |
A3XX_TEX_CONST_1_HEIGHT(1);
} else {
unsigned miplevels;
if (prsc->target == PIPE_BUFFER) {
lvl = 0;
so->texconst1 =
A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size /
util_format_get_blocksize(cso->format)) |
A3XX_TEX_CONST_1_HEIGHT(1);
} else {
unsigned miplevels;
lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl;
lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl;
so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 =
A3XX_TEX_CONST_1_PITCHALIGN(rsc->layout.pitchalign - 4) |
A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
}
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
struct fdl_slice *slice = fd_resource_slice(rsc, lvl);
so->texconst2 =
A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
switch (prsc->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 =
A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) |
A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
break;
case PIPE_TEXTURE_3D:
so->texconst3 =
A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(
fd_resource_slice(rsc, prsc->last_level)->size0);
break;
default:
so->texconst3 = 0x00000000;
break;
}
so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 = A3XX_TEX_CONST_1_PITCHALIGN(rsc->layout.pitchalign - 4) |
A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
}
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
struct fdl_slice *slice = fd_resource_slice(rsc, lvl);
so->texconst2 = A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
switch (prsc->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 = A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) |
A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
break;
case PIPE_TEXTURE_3D:
so->texconst3 = A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(
fd_resource_slice(rsc, prsc->last_level)->size0);
break;
default:
so->texconst3 = 0x00000000;
break;
}
return &so->base;
return &so->base;
}
void
fd3_texture_init(struct pipe_context *pctx)
{
pctx->create_sampler_state = fd3_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd3_sampler_view_create;
pctx->set_sampler_views = fd_set_sampler_views;
pctx->create_sampler_state = fd3_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd3_sampler_view_create;
pctx->set_sampler_views = fd_set_sampler_views;
}

View file

@ -29,37 +29,37 @@
#include "pipe/p_context.h"
#include "freedreno_texture.h"
#include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "fd3_context.h"
#include "fd3_format.h"
struct fd3_sampler_stateobj {
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1;
bool needs_border;
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1;
bool needs_border;
};
static inline struct fd3_sampler_stateobj *
fd3_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd3_sampler_stateobj *)samp;
return (struct fd3_sampler_stateobj *)samp;
}
struct fd3_pipe_sampler_view {
struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3;
struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3;
};
static inline struct fd3_pipe_sampler_view *
fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd3_pipe_sampler_view *)pview;
return (struct fd3_pipe_sampler_view *)pview;
}
unsigned fd3_get_const_idx(struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id);
struct fd_texture_stateobj *tex, unsigned samp_id);
void fd3_texture_init(struct pipe_context *pctx);

View file

@ -24,79 +24,75 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd3_zsa.h"
#include "fd3_context.h"
#include "fd3_format.h"
#include "fd3_zsa.h"
void *
fd3_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso)
const struct pipe_depth_stencil_alpha_state *cso)
{
struct fd3_zsa_stateobj *so;
struct fd3_zsa_stateobj *so;
so = CALLOC_STRUCT(fd3_zsa_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd3_zsa_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
so->rb_depth_control |=
A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
so->rb_depth_control |=
A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
if (cso->depth_enabled)
so->rb_depth_control |=
A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
if (cso->depth_enabled)
so->rb_depth_control |=
A3XX_RB_DEPTH_CONTROL_Z_ENABLE | A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
if (cso->depth_writemask)
so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
if (cso->depth_writemask)
so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];
if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];
so->rb_stencil_control |=
A3XX_RB_STENCIL_CONTROL_STENCIL_READ |
A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |=
0xff000000 | /* ??? */
A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
so->rb_stencil_control |=
A3XX_RB_STENCIL_CONTROL_STENCIL_READ |
A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |=
0xff000000 | /* ??? */
A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
so->rb_stencil_control |=
A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */
A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
}
}
so->rb_stencil_control |=
A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */
A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
}
}
if (cso->alpha_enabled) {
so->rb_render_control =
A3XX_RB_RENDER_CONTROL_ALPHA_TEST |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
so->rb_alpha_ref =
A3XX_RB_ALPHA_REF_UINT(cso->alpha_ref_value * 255.0) |
A3XX_RB_ALPHA_REF_FLOAT(cso->alpha_ref_value);
so->rb_depth_control |=
A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
if (cso->alpha_enabled) {
so->rb_render_control =
A3XX_RB_RENDER_CONTROL_ALPHA_TEST |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
so->rb_alpha_ref = A3XX_RB_ALPHA_REF_UINT(cso->alpha_ref_value * 255.0) |
A3XX_RB_ALPHA_REF_FLOAT(cso->alpha_ref_value);
so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
return so;
return so;
}

View file

@ -27,29 +27,28 @@
#ifndef FD3_ZSA_H_
#define FD3_ZSA_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd3_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base;
uint32_t rb_render_control;
uint32_t rb_alpha_ref;
uint32_t rb_depth_control;
uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
struct pipe_depth_stencil_alpha_state base;
uint32_t rb_render_control;
uint32_t rb_alpha_ref;
uint32_t rb_depth_control;
uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
};
static inline struct fd3_zsa_stateobj *
fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd3_zsa_stateobj *)zsa;
return (struct fd3_zsa_stateobj *)zsa;
}
void * fd3_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);
void *fd3_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD3_ZSA_H_ */

View file

@ -26,8 +26,8 @@
#include "pipe/p_state.h"
#include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd4_blend.h"
#include "fd4_context.h"
@ -36,83 +36,89 @@
static enum a3xx_rb_blend_opcode
blend_func(unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
switch (func) {
case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
}
void *
fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso)
const struct pipe_blend_state *cso)
{
struct fd4_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
unsigned i, mrt_blend = 0;
struct fd4_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
unsigned i, mrt_blend = 0;
if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func);
}
if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func);
}
so = CALLOC_STRUCT(fd4_blend_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd4_blend_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt;
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt;
if (cso->independent_blend_enable)
rt = &cso->rt[i];
else
rt = &cso->rt[0];
if (cso->independent_blend_enable)
rt = &cso->rt[i];
else
rt = &cso->rt[0];
so->rb_mrt[i].blend_control =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].blend_control =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
fd_blend_factor(rt->rgb_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
fd_blend_factor(rt->rgb_dst_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
fd_blend_factor(rt->alpha_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
blend_func(rt->alpha_func)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].control =
A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
so->rb_mrt[i].control =
A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable) {
so->rb_mrt[i].control |=
A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A4XX_RB_MRT_CONTROL_BLEND |
A4XX_RB_MRT_CONTROL_BLEND2;
mrt_blend |= (1 << i);
}
if (rt->blend_enable) {
so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A4XX_RB_MRT_CONTROL_BLEND |
A4XX_RB_MRT_CONTROL_BLEND2;
mrt_blend |= (1 << i);
}
if (reads_dest) {
so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
mrt_blend |= (1 << i);
}
if (reads_dest) {
so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
mrt_blend |= (1 << i);
}
if (cso->dither)
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
if (cso->dither)
so->rb_mrt[i].buf_info |=
A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
so->rb_fs_output =
A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
return so;
return so;
}

View file

@ -27,28 +27,28 @@
#ifndef FD4_BLEND_H_
#define FD4_BLEND_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd4_blend_stateobj {
struct pipe_blend_state base;
struct {
uint32_t control;
uint32_t buf_info;
uint32_t blend_control;
} rb_mrt[A4XX_MAX_RENDER_TARGETS];
uint32_t rb_fs_output;
struct pipe_blend_state base;
struct {
uint32_t control;
uint32_t buf_info;
uint32_t blend_control;
} rb_mrt[A4XX_MAX_RENDER_TARGETS];
uint32_t rb_fs_output;
};
static inline struct fd4_blend_stateobj *
fd4_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd4_blend_stateobj *)blend;
return (struct fd4_blend_stateobj *)blend;
}
void * fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);
void *fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);
#endif /* FD4_BLEND_H_ */

View file

@ -26,8 +26,8 @@
#include "freedreno_query_hw.h"
#include "fd4_context.h"
#include "fd4_blend.h"
#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_emit.h"
#include "fd4_gmem.h"
@ -38,25 +38,24 @@
#include "fd4_zsa.h"
static void
fd4_context_destroy(struct pipe_context *pctx)
in_dt
fd4_context_destroy(struct pipe_context *pctx) in_dt
{
struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));
struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));
u_upload_destroy(fd4_ctx->border_color_uploader);
pipe_resource_reference(&fd4_ctx->border_color_buf, NULL);
u_upload_destroy(fd4_ctx->border_color_uploader);
pipe_resource_reference(&fd4_ctx->border_color_buf, NULL);
fd_context_destroy(pctx);
fd_context_destroy(pctx);
fd_bo_del(fd4_ctx->vs_pvt_mem);
fd_bo_del(fd4_ctx->fs_pvt_mem);
fd_bo_del(fd4_ctx->vsc_size_mem);
fd_bo_del(fd4_ctx->vs_pvt_mem);
fd_bo_del(fd4_ctx->fs_pvt_mem);
fd_bo_del(fd4_ctx->vsc_size_mem);
fd_context_cleanup_common_vbos(&fd4_ctx->base);
fd_context_cleanup_common_vbos(&fd4_ctx->base);
fd_hw_query_fini(pctx);
fd_hw_query_fini(pctx);
free(fd4_ctx);
free(fd4_ctx);
}
/* clang-format off */
@ -73,55 +72,55 @@ static const uint8_t primtypes[] = {
/* clang-format on */
struct pipe_context *
fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
in_dt
fd4_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags) in_dt
{
struct fd_screen *screen = fd_screen(pscreen);
struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
struct pipe_context *pctx;
struct fd_screen *screen = fd_screen(pscreen);
struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
struct pipe_context *pctx;
if (!fd4_ctx)
return NULL;
if (!fd4_ctx)
return NULL;
pctx = &fd4_ctx->base.base;
pctx->screen = pscreen;
pctx = &fd4_ctx->base.base;
pctx->screen = pscreen;
fd4_ctx->base.dev = fd_device_ref(screen->dev);
fd4_ctx->base.screen = fd_screen(pscreen);
fd4_ctx->base.last.key = &fd4_ctx->last_key;
fd4_ctx->base.dev = fd_device_ref(screen->dev);
fd4_ctx->base.screen = fd_screen(pscreen);
fd4_ctx->base.last.key = &fd4_ctx->last_key;
pctx->destroy = fd4_context_destroy;
pctx->create_blend_state = fd4_blend_state_create;
pctx->create_rasterizer_state = fd4_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;
pctx->destroy = fd4_context_destroy;
pctx->create_blend_state = fd4_blend_state_create;
pctx->create_rasterizer_state = fd4_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;
fd4_draw_init(pctx);
fd4_gmem_init(pctx);
fd4_texture_init(pctx);
fd4_prog_init(pctx);
fd4_emit_init(pctx);
fd4_draw_init(pctx);
fd4_gmem_init(pctx);
fd4_texture_init(pctx);
fd4_prog_init(pctx);
fd4_emit_init(pctx);
pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx)
return NULL;
pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx)
return NULL;
fd_hw_query_init(pctx);
fd_hw_query_init(pctx);
fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
fd4_ctx->vs_pvt_mem =
fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
fd4_ctx->fs_pvt_mem =
fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd4_ctx->vsc_size_mem =
fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd_context_setup_common_vbos(&fd4_ctx->base);
fd_context_setup_common_vbos(&fd4_ctx->base);
fd4_query_context_init(pctx);
fd4_query_context_init(pctx);
fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
PIPE_USAGE_STREAM, 0);
fd4_ctx->border_color_uploader =
u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
return pctx;
return pctx;
}

View file

@ -34,34 +34,34 @@
#include "ir3/ir3_shader.h"
struct fd4_context {
struct fd_context base;
struct fd_context base;
struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
*
* (upper area used as scratch bo.. see fd4_query)
*/
struct fd_bo *vsc_size_mem;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
*
* (upper area used as scratch bo.. see fd4_query)
*/
struct fd_bo *vsc_size_mem;
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
/* bitmask of samplers which need astc srgb workaround: */
uint16_t vastc_srgb, fastc_srgb;
/* bitmask of samplers which need astc srgb workaround: */
uint16_t vastc_srgb, fastc_srgb;
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;
};
static inline struct fd4_context *
fd4_context(struct fd_context *ctx)
{
return (struct fd4_context *)ctx;
return (struct fd4_context *)ctx;
}
struct pipe_context *
fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
struct pipe_context *fd4_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags);
#endif /* FD4_CONTEXT_H_ */

View file

@ -25,150 +25,148 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
#include "util/u_string.h"
#include "freedreno_state.h"
#include "freedreno_resource.h"
#include "freedreno_state.h"
#include "fd4_draw.h"
#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_emit.h"
#include "fd4_program.h"
#include "fd4_format.h"
#include "fd4_program.h"
#include "fd4_zsa.h"
static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit, unsigned index_offset)
assert_dt
struct fd4_emit *emit, unsigned index_offset) assert_dt
{
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
fd4_emit_state(ctx, ring, emit);
fd4_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd4_emit_vertex_bufs(ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd4_emit_vertex_bufs(ring, emit);
OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */
OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, info->index_size ? info->index_bias
: emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */
OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index
: 0xffffffff);
/* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
fd4_emit_get_vp(emit)->writes_psize &&
(info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
/* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
fd4_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
fd4_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
info, emit->indirect, emit->draw, index_offset);
fd4_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
emit->indirect, emit->draw, index_offset);
}
static bool
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
unsigned index_offset)
in_dt
unsigned index_offset) in_dt
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd4_emit emit = {
.debug = &ctx->debug,
.vtx = &ctx->vtx,
.info = info,
.indirect = indirect,
.draw = draw,
.key = {
.vs = ctx->prog.vs,
.fs = ctx->prog.fs,
.key = {
.rasterflat = ctx->rasterizer->flatshade,
.ucp_enables = ctx->rasterizer->clip_plane_enable,
.has_per_samp = fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb,
.vastc_srgb = fd4_ctx->vastc_srgb,
.fastc_srgb = fd4_ctx->fastc_srgb,
},
},
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd4_emit emit = {
.debug = &ctx->debug,
.vtx = &ctx->vtx,
.info = info,
.indirect = indirect,
.draw = draw,
.key =
{
.vs = ctx->prog.vs,
.fs = ctx->prog.fs,
.key =
{
.rasterflat = ctx->rasterizer->flatshade,
.ucp_enables = ctx->rasterizer->clip_plane_enable,
.has_per_samp = fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb,
.vastc_srgb = fd4_ctx->vastc_srgb,
.fastc_srgb = fd4_ctx->fastc_srgb,
},
},
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
if (info->mode != PIPE_PRIM_MAX &&
!indirect &&
!info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
return false;
if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
return false;
ir3_fixup_shader_state(&ctx->base, &emit.key.key);
ir3_fixup_shader_state(&ctx->base, &emit.key.key);
enum fd_dirty_3d_state dirty = ctx->dirty;
enum fd_dirty_3d_state dirty = ctx->dirty;
emit.prog = fd4_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
emit.prog = fd4_program_state(
ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
/* bail if compile failed: */
if (!emit.prog)
return false;
/* bail if compile failed: */
if (!emit.prog)
return false;
const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit);
const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit);
ir3_update_max_tf_vtx(ctx, vp);
ir3_update_max_tf_vtx(ctx, vp);
/* do regular pass first: */
/* do regular pass first: */
if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
}
if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
}
emit.binning_pass = false;
emit.dirty = dirty;
emit.binning_pass = false;
emit.dirty = dirty;
struct fd_ringbuffer *ring = ctx->batch->draw;
struct fd_ringbuffer *ring = ctx->batch->draw;
if (ctx->rasterizer->rasterizer_discard) {
fd_wfi(ctx->batch, ring);
OUT_PKT3(ring, CP_REG_RMW, 3);
OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
}
if (ctx->rasterizer->rasterizer_discard) {
fd_wfi(ctx->batch, ring);
OUT_PKT3(ring, CP_REG_RMW, 3);
OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
}
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
if (ctx->rasterizer->rasterizer_discard) {
fd_wfi(ctx->batch, ring);
OUT_PKT3(ring, CP_REG_RMW, 3);
OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
OUT_RING(ring, 0);
}
if (ctx->rasterizer->rasterizer_discard) {
fd_wfi(ctx->batch, ring);
OUT_PKT3(ring, CP_REG_RMW, 3);
OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
OUT_RING(ring, 0);
}
/* and now binning pass: */
emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vs */
emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
/* and now binning pass: */
emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vs */
emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
fd_context_all_clean(ctx);
fd_context_all_clean(ctx);
return true;
return true;
}
void
fd4_draw_init(struct pipe_context *pctx)
disable_thread_safety_analysis
fd4_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd4_draw_vbo;
struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd4_draw_vbo;
}

View file

@ -35,118 +35,114 @@ void fd4_draw_init(struct pipe_context *pctx);
/* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */
static inline uint32_t DRAW4(enum pc_di_primtype prim_type,
enum pc_di_src_sel source_select, enum a4xx_index_size index_size,
enum pc_di_vis_cull_mode vis_cull_mode)
static inline uint32_t
DRAW4(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select,
enum a4xx_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode)
{
return CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(prim_type) |
CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(source_select) |
CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
CP_DRAW_INDX_OFFSET_0_VIS_CULL(vis_cull_mode);
return CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(prim_type) |
CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(source_select) |
CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
CP_DRAW_INDX_OFFSET_0_VIS_CULL(vis_cull_mode);
}
static inline void
fd4_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
enum pc_di_src_sel src_sel, uint32_t count,
uint32_t instances, enum a4xx_index_size idx_type,
uint32_t max_indices, uint32_t idx_offset,
struct pipe_resource *idx_buffer)
enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
enum pc_di_src_sel src_sel, uint32_t count, uint32_t instances,
enum a4xx_index_size idx_type, uint32_t max_indices,
uint32_t idx_offset, struct pipe_resource *idx_buffer)
{
/* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB
* (scratch6) and DRAW is enough to "triangulate" the
* particular draw that caused lockup.
*/
emit_marker(ring, 7);
/* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB
* (scratch6) and DRAW is enough to "triangulate" the
* particular draw that caused lockup.
*/
emit_marker(ring, 7);
OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 6 : 3);
if (vismode == USE_VISIBILITY) {
/* leave vis mode blank for now, it will be patched up when
* we know if we are binning or not
*/
OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
&batch->draw_patches);
} else {
OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
}
OUT_RING(ring, instances); /* NumInstances */
OUT_RING(ring, count); /* NumIndices */
if (idx_buffer) {
OUT_RING(ring, 0x0); /* XXX */
OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
OUT_RING (ring, max_indices);
}
OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 6 : 3);
if (vismode == USE_VISIBILITY) {
/* leave vis mode blank for now, it will be patched up when
* we know if we are binning or not
*/
OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
&batch->draw_patches);
} else {
OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
}
OUT_RING(ring, instances); /* NumInstances */
OUT_RING(ring, count); /* NumIndices */
if (idx_buffer) {
OUT_RING(ring, 0x0); /* XXX */
OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
OUT_RING(ring, max_indices);
}
emit_marker(ring, 7);
emit_marker(ring, 7);
fd_reset_wfi(batch);
fd_reset_wfi(batch);
}
static inline void
fd4_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info,
enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
unsigned index_offset)
const struct pipe_draw_start_count *draw, unsigned index_offset)
{
struct pipe_resource *idx_buffer = NULL;
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset;
struct pipe_resource *idx_buffer = NULL;
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset;
if (indirect && indirect->buffer) {
struct fd_resource *ind = fd_resource(indirect->buffer);
if (indirect && indirect->buffer) {
struct fd_resource *ind = fd_resource(indirect->buffer);
emit_marker(ring, 7);
emit_marker(ring, 7);
if (info->index_size) {
struct pipe_resource *idx = info->index.resource;
if (info->index_size) {
struct pipe_resource *idx = info->index.resource;
OUT_PKT3(ring, CP_DRAW_INDX_INDIRECT, 4);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA,
fd4_size2indextype(info->index_size), 0),
&batch->draw_patches);
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
OUT_RING(ring, A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(
idx->width0 - index_offset));
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
} else {
OUT_PKT3(ring, CP_DRAW_INDIRECT, 2);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
&batch->draw_patches);
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
}
OUT_PKT3(ring, CP_DRAW_INDX_INDIRECT, 4);
OUT_RINGP(ring,
DRAW4(primtype, DI_SRC_SEL_DMA,
fd4_size2indextype(info->index_size), 0),
&batch->draw_patches);
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
OUT_RING(ring, A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(idx->width0 -
index_offset));
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
} else {
OUT_PKT3(ring, CP_DRAW_INDIRECT, 2);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
&batch->draw_patches);
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
}
emit_marker(ring, 7);
fd_reset_wfi(batch);
emit_marker(ring, 7);
fd_reset_wfi(batch);
return;
}
return;
}
if (info->index_size) {
assert(!info->has_user_indices);
if (info->index_size) {
assert(!info->has_user_indices);
idx_buffer = info->index.resource;
idx_type = fd4_size2indextype(info->index_size);
idx_size = info->index_size * draw->count;
idx_offset = index_offset + draw->start * info->index_size;
src_sel = DI_SRC_SEL_DMA;
} else {
idx_buffer = NULL;
idx_type = INDEX4_SIZE_32_BIT;
idx_size = 0;
idx_offset = 0;
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
idx_buffer = info->index.resource;
idx_type = fd4_size2indextype(info->index_size);
idx_size = info->index_size * draw->count;
idx_offset = index_offset + draw->start * info->index_size;
src_sel = DI_SRC_SEL_DMA;
} else {
idx_buffer = NULL;
idx_type = INDEX4_SIZE_32_BIT;
idx_size = 0;
idx_offset = 0;
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
fd4_draw(batch, ring, primtype, vismode, src_sel,
draw->count, info->instance_count,
idx_type, idx_size, idx_offset, idx_buffer);
fd4_draw(batch, ring, primtype, vismode, src_sel, draw->count,
info->instance_count, idx_type, idx_size, idx_offset, idx_buffer);
}
#endif /* FD4_DRAW_H_ */

File diff suppressed because it is too large Load diff

View file

@ -29,76 +29,79 @@
#include "pipe/p_context.h"
#include "freedreno_context.h"
#include "fd4_format.h"
#include "fd4_program.h"
#include "freedreno_context.h"
#include "ir3_gallium.h"
struct fd_ringbuffer;
void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
unsigned nr_bufs, struct pipe_surface **bufs);
void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
struct pipe_surface **bufs);
/* grouped together emit-state for prog/vertex/state emit: */
struct fd4_emit {
struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx;
const struct fd4_program_state *prog;
const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw;
bool binning_pass;
struct ir3_cache_key key;
enum fd_dirty_3d_state dirty;
struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx;
const struct fd4_program_state *prog;
const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw;
bool binning_pass;
struct ir3_cache_key key;
enum fd_dirty_3d_state dirty;
uint32_t sprite_coord_enable; /* bitmask */
bool sprite_coord_mode;
bool rasterflat;
bool no_decode_srgb;
bool skip_consts;
uint32_t sprite_coord_enable; /* bitmask */
bool sprite_coord_mode;
bool rasterflat;
bool no_decode_srgb;
bool skip_consts;
/* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs;
/* TODO: other shader stages.. */
/* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs;
/* TODO: other shader stages.. */
};
static inline enum a4xx_color_fmt fd4_emit_format(struct pipe_surface *surf)
static inline enum a4xx_color_fmt
fd4_emit_format(struct pipe_surface *surf)
{
if (!surf)
return 0;
return fd4_pipe2color(surf->format);
if (!surf)
return 0;
return fd4_pipe2color(surf->format);
}
static inline const struct ir3_shader_variant *
fd4_emit_get_vp(struct fd4_emit *emit)
{
if (!emit->vs) {
emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
}
return emit->vs;
if (!emit->vs) {
emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
}
return emit->vs;
}
static inline const struct ir3_shader_variant *
fd4_emit_get_fp(struct fd4_emit *emit)
{
if (!emit->fs) {
if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs;
} else {
emit->fs = emit->prog->fs;
}
}
return emit->fs;
if (!emit->fs) {
if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs;
} else {
emit->fs = emit->prog->fs;
}
}
return emit->fs;
}
void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) assert_dt;
void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd4_emit *emit) assert_dt;
void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit) assert_dt;
struct fd4_emit *emit) assert_dt;
void fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
void fd4_emit_restore(struct fd_batch *batch,
struct fd_ringbuffer *ring) assert_dt;
void fd4_emit_init_screen(struct pipe_screen *pscreen);
void fd4_emit_init(struct pipe_context *pctx);
@ -106,7 +109,7 @@ void fd4_emit_init(struct pipe_context *pctx);
static inline void
fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
__OUT_IB(ring, true, target);
__OUT_IB(ring, true, target);
}
#endif /* FD4_EMIT_H */

View file

@ -29,48 +29,41 @@
#include "fd4_format.h"
/* Specifies the table of all the formats and their features. Also supplies
* the helpers that look up various data in those tables.
*/
struct fd4_format {
enum a4xx_vtx_fmt vtx;
enum a4xx_tex_fmt tex;
enum a4xx_color_fmt rb;
enum a3xx_color_swap swap;
boolean present;
enum a4xx_vtx_fmt vtx;
enum a4xx_tex_fmt tex;
enum a4xx_color_fmt rb;
enum a3xx_color_swap swap;
boolean present;
};
/* vertex + texture */
#define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT4_ ## fmt, \
.tex = TFMT4_ ## fmt, \
.rb = RB4_ ## rbfmt, \
.swap = swapfmt \
}
#define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT4_##fmt, \
.tex = TFMT4_##fmt, \
.rb = RB4_##rbfmt, \
.swap = swapfmt}
/* texture-only */
#define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT4_NONE, \
.tex = TFMT4_ ## fmt, \
.rb = RB4_ ## rbfmt, \
.swap = swapfmt \
}
#define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT4_NONE, \
.tex = TFMT4_##fmt, \
.rb = RB4_##rbfmt, \
.swap = swapfmt}
/* vertex-only */
#define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT4_ ## fmt, \
.tex = TFMT4_NONE, \
.rb = RB4_ ## rbfmt, \
.swap = swapfmt \
}
#define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT4_##fmt, \
.tex = TFMT4_NONE, \
.rb = RB4_##rbfmt, \
.swap = swapfmt}
/* clang-format off */
static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
@ -340,84 +333,94 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
enum a4xx_vtx_fmt
fd4_pipe2vtx(enum pipe_format format)
{
if (!formats[format].present)
return VFMT4_NONE;
return formats[format].vtx;
if (!formats[format].present)
return VFMT4_NONE;
return formats[format].vtx;
}
/* convert pipe format to texture sampler format: */
enum a4xx_tex_fmt
fd4_pipe2tex(enum pipe_format format)
{
if (!formats[format].present)
return TFMT4_NONE;
return formats[format].tex;
if (!formats[format].present)
return TFMT4_NONE;
return formats[format].tex;
}
/* convert pipe format to MRT / copydest format used for render-target: */
enum a4xx_color_fmt
fd4_pipe2color(enum pipe_format format)
{
if (!formats[format].present)
return RB4_NONE;
return formats[format].rb;
if (!formats[format].present)
return RB4_NONE;
return formats[format].rb;
}
enum a3xx_color_swap
fd4_pipe2swap(enum pipe_format format)
{
if (!formats[format].present)
return WZYX;
return formats[format].swap;
if (!formats[format].present)
return WZYX;
return formats[format].swap;
}
enum a4xx_depth_format
fd4_pipe2depth(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return DEPTH4_16;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTH4_24_8;
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return DEPTH4_32;
default:
return ~0;
}
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return DEPTH4_16;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTH4_24_8;
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return DEPTH4_32;
default:
return ~0;
}
}
static inline enum a4xx_tex_swiz
tex_swiz(unsigned swiz)
{
switch (swiz) {
default:
case PIPE_SWIZZLE_X: return A4XX_TEX_X;
case PIPE_SWIZZLE_Y: return A4XX_TEX_Y;
case PIPE_SWIZZLE_Z: return A4XX_TEX_Z;
case PIPE_SWIZZLE_W: return A4XX_TEX_W;
case PIPE_SWIZZLE_0: return A4XX_TEX_ZERO;
case PIPE_SWIZZLE_1: return A4XX_TEX_ONE;
}
switch (swiz) {
default:
case PIPE_SWIZZLE_X:
return A4XX_TEX_X;
case PIPE_SWIZZLE_Y:
return A4XX_TEX_Y;
case PIPE_SWIZZLE_Z:
return A4XX_TEX_Z;
case PIPE_SWIZZLE_W:
return A4XX_TEX_W;
case PIPE_SWIZZLE_0:
return A4XX_TEX_ZERO;
case PIPE_SWIZZLE_1:
return A4XX_TEX_ONE;
}
}
uint32_t
fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a)
unsigned swizzle_b, unsigned swizzle_a)
{
const struct util_format_description *desc =
util_format_description(format);
unsigned char swiz[4] = {
swizzle_r, swizzle_g, swizzle_b, swizzle_a,
}, rswiz[4];
const struct util_format_description *desc = util_format_description(format);
unsigned char swiz[4] =
{
swizzle_r,
swizzle_g,
swizzle_b,
swizzle_a,
},
rswiz[4];
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
}

View file

@ -38,6 +38,7 @@ enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format);
enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format);
uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
unsigned swizzle_g, unsigned swizzle_b,
unsigned swizzle_a);
#endif /* FD4_UTIL_H_ */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -36,20 +36,20 @@
struct fd4_emit;
struct fd4_program_state {
struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */
struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */
};
static inline struct fd4_program_state *
fd4_program_state(struct ir3_program_state *state)
{
return (struct fd4_program_state *)state;
return (struct fd4_program_state *)state;
}
void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
int nr, struct pipe_surface **bufs);
void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr,
struct pipe_surface **bufs);
void fd4_prog_init(struct pipe_context *pctx);

View file

@ -24,18 +24,17 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "freedreno_query_hw.h"
#include "freedreno_context.h"
#include "freedreno_query_hw.h"
#include "freedreno_util.h"
#include "fd4_query.h"
#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_format.h"
#include "fd4_query.h"
struct fd_rb_samp_ctrs {
uint64_t ctr[16];
uint64_t ctr[16];
};
/*
@ -48,57 +47,56 @@ struct fd_rb_samp_ctrs {
static struct fd_hw_sample *
occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
struct fd_hw_sample *samp =
fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
struct fd_hw_sample *samp =
fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
/* low bits of sample addr should be zero (since they are control
* flags in RB_SAMPLE_COUNT_CONTROL):
*/
debug_assert((samp->offset & 0x3) == 0);
/* low bits of sample addr should be zero (since they are control
* flags in RB_SAMPLE_COUNT_CONTROL):
*/
debug_assert((samp->offset & 0x3) == 0);
/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
* HW_QUERY_BASE_REG register:
*/
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000);
OUT_RING(ring, HW_QUERY_BASE_REG);
OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY |
samp->offset);
/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
* HW_QUERY_BASE_REG register:
*/
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000);
OUT_RING(ring, HW_QUERY_BASE_REG);
OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY | samp->offset);
OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3);
OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
INDEX4_SIZE_32_BIT, USE_VISIBILITY));
OUT_RING(ring, 1); /* NumInstances */
OUT_RING(ring, 0); /* NumIndices */
OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3);
OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
INDEX4_SIZE_32_BIT, USE_VISIBILITY));
OUT_RING(ring, 1); /* NumInstances */
OUT_RING(ring, 0); /* NumIndices */
fd_event_write(batch, ring, ZPASS_DONE);
fd_event_write(batch, ring, ZPASS_DONE);
return samp;
return samp;
}
static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end)
const struct fd_rb_samp_ctrs *end)
{
return end->ctr[0] - start->ctr[0];
return end->ctr[0] - start->ctr[0];
}
static void
occlusion_counter_accumulate_result(struct fd_context *ctx,
const void *start, const void *end,
union pipe_query_result *result)
occlusion_counter_accumulate_result(struct fd_context *ctx, const void *start,
const void *end,
union pipe_query_result *result)
{
uint64_t n = count_samples(start, end);
result->u64 += n;
uint64_t n = count_samples(start, end);
result->u64 += n;
}
static void
occlusion_predicate_accumulate_result(struct fd_context *ctx,
const void *start, const void *end,
union pipe_query_result *result)
occlusion_predicate_accumulate_result(struct fd_context *ctx, const void *start,
const void *end,
union pipe_query_result *result)
{
uint64_t n = count_samples(start, end);
result->b |= (n > 0);
uint64_t n = count_samples(start, end);
result->b |= (n > 0);
}
/*
@ -109,161 +107,159 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
*/
static void
time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
assert_dt
time_elapsed_enable(struct fd_context *ctx,
struct fd_ringbuffer *ring) assert_dt
{
/* Right now, the assignment of countable to counter register is
* just hard coded. If we start exposing more countables than we
* have counters, we will need to be more clever.
*/
struct fd_batch *batch = fd_context_batch_locked(ctx);
fd_wfi(batch, ring);
OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
OUT_RING(ring, CP_ALWAYS_COUNT);
fd_batch_unlock_submit(batch);
fd_batch_reference(&batch, NULL);
/* Right now, the assignment of countable to counter register is
* just hard coded. If we start exposing more countables than we
* have counters, we will need to be more clever.
*/
struct fd_batch *batch = fd_context_batch_locked(ctx);
fd_wfi(batch, ring);
OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
OUT_RING(ring, CP_ALWAYS_COUNT);
fd_batch_unlock_submit(batch);
fd_batch_reference(&batch, NULL);
}
static struct fd_hw_sample *
time_elapsed_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
assert_dt
time_elapsed_get_sample(struct fd_batch *batch,
struct fd_ringbuffer *ring) assert_dt
{
struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t));
struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t));
/* use unused part of vsc_size_mem as scratch space, to avoid
* extra allocation:
*/
struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem;
const int sample_off = 128;
const int addr_off = sample_off + 8;
/* use unused part of vsc_size_mem as scratch space, to avoid
* extra allocation:
*/
struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem;
const int sample_off = 128;
const int addr_off = sample_off + 8;
debug_assert(batch->ctx->screen->max_freq > 0);
debug_assert(batch->ctx->screen->max_freq > 0);
/* Basic issue is that we need to read counter value to a relative
* destination (with per-tile offset) rather than absolute dest
* addr. But there is no pm4 packet that can do that. This is
* where it would be *really* nice if we could write our own fw
* since afaict implementing the sort of packet we need would be
* trivial.
*
* Instead, we:
* (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
* (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
* (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
* address to the per-sample offset in the scratch buffer
* (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
* to CP_ME_NRT_ADDR
* (5) CP_MEM_TO_REG's to copy saved counter value from scratch
* buffer to CP_ME_NRT_DATA to trigger the write out to query
* result buffer
*
* Straightforward, right?
*
* Maybe could swap the order of things in the scratch buffer to
* put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
* shot, but that's really just polishing a turd..
*/
/* Basic issue is that we need to read counter value to a relative
* destination (with per-tile offset) rather than absolute dest
* addr. But there is no pm4 packet that can do that. This is
* where it would be *really* nice if we could write our own fw
* since afaict implementing the sort of packet we need would be
* trivial.
*
* Instead, we:
* (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
* (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
* (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
* address to the per-sample offset in the scratch buffer
* (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
* to CP_ME_NRT_ADDR
* (5) CP_MEM_TO_REG's to copy saved counter value from scratch
* buffer to CP_ME_NRT_DATA to trigger the write out to query
* result buffer
*
* Straightforward, right?
*
* Maybe could swap the order of things in the scratch buffer to
* put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
* shot, but that's really just polishing a turd..
*/
fd_wfi(batch, ring);
fd_wfi(batch, ring);
/* copy sample counter _LO and _HI to scratch: */
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_CNT(2)); /* write 2 regs to mem */
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
/* copy sample counter _LO and _HI to scratch: */
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_CNT(2)); /* write 2 regs to mem */
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
/* ok... here we really *would* like to use the CP_SET_CONSTANT
* mode which can add a constant to value in reg2 and write to
* reg1... *but* that only works for banked/context registers,
* and CP_ME_NRT_DATA isn't one of those.. so we need to do some
* CP math to the scratch buffer instead:
*
* (note first 8 bytes are counter value, use offset 0x8 for
* address calculation)
*/
/* ok... here we really *would* like to use the CP_SET_CONSTANT
* mode which can add a constant to value in reg2 and write to
* reg1... *but* that only works for banked/context registers,
* and CP_ME_NRT_DATA isn't one of those.. so we need to do some
* CP math to the scratch buffer instead:
*
* (note first 8 bytes are counter value, use offset 0x8 for
* address calculation)
*/
/* per-sample offset to scratch bo: */
OUT_PKT3(ring, CP_MEM_WRITE, 2);
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
OUT_RING(ring, samp->offset);
/* per-sample offset to scratch bo: */
OUT_PKT3(ring, CP_MEM_WRITE, 2);
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
OUT_RING(ring, samp->offset);
/* now add to that the per-tile base: */
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
CP_REG_TO_MEM_0_ACCUMULATE |
CP_REG_TO_MEM_0_CNT(0)); /* readback 1 regs */
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
/* now add to that the per-tile base: */
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
CP_REG_TO_MEM_0_ACCUMULATE |
CP_REG_TO_MEM_0_CNT(0)); /* readback 1 regs */
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
/* now copy that back to CP_ME_NRT_ADDR: */
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
/* now copy that back to CP_ME_NRT_ADDR: */
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
/* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
* to trigger the write to result buffer
*/
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
/* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
* to trigger the write to result buffer
*/
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
/* and again to get the value of the _HI reg from scratch: */
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
/* and again to get the value of the _HI reg from scratch: */
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
/* Sigh.. */
/* Sigh.. */
return samp;
return samp;
}
static void
time_elapsed_accumulate_result(struct fd_context *ctx,
const void *start, const void *end,
union pipe_query_result *result)
time_elapsed_accumulate_result(struct fd_context *ctx, const void *start,
const void *end, union pipe_query_result *result)
{
uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
/* max_freq is in Hz, convert cycle count to ns: */
result->u64 += n * 1000000000 / ctx->screen->max_freq;
uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
/* max_freq is in Hz, convert cycle count to ns: */
result->u64 += n * 1000000000 / ctx->screen->max_freq;
}
static void
timestamp_accumulate_result(struct fd_context *ctx,
const void *start, const void *end,
union pipe_query_result *result)
timestamp_accumulate_result(struct fd_context *ctx, const void *start,
const void *end, union pipe_query_result *result)
{
/* just return the value from fist tile: */
if (result->u64 != 0)
return;
uint64_t n = *(uint64_t *)start;
/* max_freq is in Hz, convert cycle count to ns: */
result->u64 = n * 1000000000 / ctx->screen->max_freq;
/* just return the value from fist tile: */
if (result->u64 != 0)
return;
uint64_t n = *(uint64_t *)start;
/* max_freq is in Hz, convert cycle count to ns: */
result->u64 = n * 1000000000 / ctx->screen->max_freq;
}
static const struct fd_hw_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_counter_accumulate_result,
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_counter_accumulate_result,
};
static const struct fd_hw_sample_provider occlusion_predicate = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result,
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result,
};
static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result,
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result,
};
static const struct fd_hw_sample_provider time_elapsed = {
.query_type = PIPE_QUERY_TIME_ELAPSED,
.always = true,
.enable = time_elapsed_enable,
.get_sample = time_elapsed_get_sample,
.accumulate_result = time_elapsed_accumulate_result,
.query_type = PIPE_QUERY_TIME_ELAPSED,
.always = true,
.enable = time_elapsed_enable,
.get_sample = time_elapsed_get_sample,
.accumulate_result = time_elapsed_accumulate_result,
};
/* NOTE: timestamp query isn't going to give terribly sensible results
@ -273,26 +269,26 @@ static const struct fd_hw_sample_provider time_elapsed = {
* kind of good enough.
*/
static const struct fd_hw_sample_provider timestamp = {
.query_type = PIPE_QUERY_TIMESTAMP,
.always = true,
.enable = time_elapsed_enable,
.get_sample = time_elapsed_get_sample,
.accumulate_result = timestamp_accumulate_result,
.query_type = PIPE_QUERY_TIMESTAMP,
.always = true,
.enable = time_elapsed_enable,
.get_sample = time_elapsed_get_sample,
.accumulate_result = timestamp_accumulate_result,
};
void fd4_query_context_init(struct pipe_context *pctx)
disable_thread_safety_analysis
void
fd4_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
struct fd_context *ctx = fd_context(pctx);
ctx->create_query = fd_hw_create_query;
ctx->query_prepare = fd_hw_query_prepare;
ctx->query_prepare_tile = fd_hw_query_prepare_tile;
ctx->query_update_batch = fd_hw_query_update_batch;
ctx->create_query = fd_hw_create_query;
ctx->query_prepare = fd_hw_query_prepare;
ctx->query_prepare_tile = fd_hw_query_prepare_tile;
ctx->query_update_batch = fd_hw_query_update_batch;
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_hw_query_register_provider(pctx, &time_elapsed);
fd_hw_query_register_provider(pctx, &timestamp);
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_hw_query_register_provider(pctx, &time_elapsed);
fd_hw_query_register_provider(pctx, &timestamp);
}

View file

@ -24,84 +24,83 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd4_rasterizer.h"
#include "fd4_context.h"
#include "fd4_format.h"
#include "fd4_rasterizer.h"
void *
fd4_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso)
const struct pipe_rasterizer_state *cso)
{
struct fd4_rasterizer_stateobj *so;
float psize_min, psize_max;
struct fd4_rasterizer_stateobj *so;
float psize_min, psize_max;
so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 4092;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 4092;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}
/*
if (cso->line_stipple_enable) {
??? TODO line stipple
}
TODO cso->half_pixel_center
if (cso->multisample)
TODO
*/
so->gras_cl_clip_cntl = 0x80000; /* ??? */
so->gras_su_point_minmax =
A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_poly_offset_scale =
A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
so->gras_su_poly_offset_offset =
A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
so->gras_su_poly_offset_clamp =
A4XX_GRAS_SU_POLY_OFFSET_CLAMP(cso->offset_clamp);
/*
if (cso->line_stipple_enable) {
??? TODO line stipple
}
TODO cso->half_pixel_center
if (cso->multisample)
TODO
*/
so->gras_cl_clip_cntl = 0x80000; /* ??? */
so->gras_su_point_minmax = A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_poly_offset_scale =
A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
so->gras_su_poly_offset_offset =
A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
so->gras_su_poly_offset_clamp =
A4XX_GRAS_SU_POLY_OFFSET_CLAMP(cso->offset_clamp);
so->gras_su_mode_control =
A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
so->pc_prim_vtx_cntl2 =
A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
so->gras_su_mode_control =
A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width / 2.0);
so->pc_prim_vtx_cntl2 = A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(
fd_polygon_mode(cso->fill_front)) |
A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(
fd_polygon_mode(cso->fill_back));
if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
if (!cso->front_ccw)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
if (!cso->flatshade_first)
so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
if (!cso->front_ccw)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
if (!cso->flatshade_first)
so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
if (cso->offset_tri)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
if (cso->offset_tri)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
if (!cso->depth_clip_near)
so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
if (cso->clip_halfz)
so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
if (!cso->depth_clip_near)
so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
if (cso->clip_halfz)
so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
return so;
return so;
}

View file

@ -27,30 +27,30 @@
#ifndef FD4_RASTERIZER_H_
#define FD4_RASTERIZER_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd4_rasterizer_stateobj {
struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset;
uint32_t gras_su_poly_offset_clamp;
struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset;
uint32_t gras_su_poly_offset_clamp;
uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl;
uint32_t pc_prim_vtx_cntl2;
uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl;
uint32_t pc_prim_vtx_cntl2;
};
static inline struct fd4_rasterizer_stateobj *
fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd4_rasterizer_stateobj *)rast;
return (struct fd4_rasterizer_stateobj *)rast;
}
void * fd4_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
void *fd4_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
#endif /* FD4_RASTERIZER_H_ */

View file

@ -30,54 +30,54 @@
uint32_t
fd4_setup_slices(struct fd_resource *rsc)
{
struct pipe_resource *prsc = &rsc->b.b;
enum pipe_format format = prsc->format;
uint32_t level, size = 0;
uint32_t width = prsc->width0;
uint32_t height = prsc->height0;
uint32_t depth = prsc->depth0;
/* in layer_first layout, the level (slice) contains just one
* layer (since in fact the layer contains the slices)
*/
uint32_t layers_in_level, alignment;
struct pipe_resource *prsc = &rsc->b.b;
enum pipe_format format = prsc->format;
uint32_t level, size = 0;
uint32_t width = prsc->width0;
uint32_t height = prsc->height0;
uint32_t depth = prsc->depth0;
/* in layer_first layout, the level (slice) contains just one
* layer (since in fact the layer contains the slices)
*/
uint32_t layers_in_level, alignment;
if (prsc->target == PIPE_TEXTURE_3D) {
rsc->layout.layer_first = false;
layers_in_level = prsc->array_size;
alignment = 4096;
} else {
rsc->layout.layer_first = true;
layers_in_level = 1;
alignment = 1;
}
if (prsc->target == PIPE_TEXTURE_3D) {
rsc->layout.layer_first = false;
layers_in_level = prsc->array_size;
alignment = 4096;
} else {
rsc->layout.layer_first = true;
layers_in_level = 1;
alignment = 1;
}
/* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
/* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl_pitch(&rsc->layout, level);
uint32_t nblocksy = util_format_get_nblocksy(format, height);
for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl_pitch(&rsc->layout, level);
uint32_t nblocksy = util_format_get_nblocksy(format, height);
slice->offset = size;
slice->offset = size;
/* 3d textures can have different layer sizes for high levels, but the
* hw auto-sizer is buggy (or at least different than what this code
* does), so as soon as the layer size range gets into range, we stop
* reducing it.
*/
if (prsc->target == PIPE_TEXTURE_3D &&
(level > 1 && fd_resource_slice(rsc, level - 1)->size0 <= 0xf000))
slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
else
slice->size0 = align(nblocksy * pitch, alignment);
/* 3d textures can have different layer sizes for high levels, but the
* hw auto-sizer is buggy (or at least different than what this code
* does), so as soon as the layer size range gets into range, we stop
* reducing it.
*/
if (prsc->target == PIPE_TEXTURE_3D &&
(level > 1 && fd_resource_slice(rsc, level - 1)->size0 <= 0xf000))
slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
else
slice->size0 = align(nblocksy * pitch, alignment);
size += slice->size0 * depth * layers_in_level;
size += slice->size0 * depth * layers_in_level;
width = u_minify(width, 1);
height = u_minify(height, 1);
depth = u_minify(depth, 1);
}
width = u_minify(width, 1);
height = u_minify(height, 1);
depth = u_minify(depth, 1);
}
return size;
return size;
}

View file

@ -27,91 +27,85 @@
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
#include "fd4_screen.h"
#include "fd4_context.h"
#include "fd4_emit.h"
#include "fd4_format.h"
#include "fd4_resource.h"
#include "fd4_screen.h"
#include "ir3/ir3_compiler.h"
static bool
fd4_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count,
unsigned usage)
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count, unsigned usage)
{
unsigned retval = 0;
unsigned retval = 0;
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return false;
}
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return false;
}
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd4_pipe2vtx(format) != VFMT4_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd4_pipe2vtx(format) != VFMT4_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
(fd4_pipe2tex(format) != TFMT4_NONE) &&
(target == PIPE_BUFFER ||
util_format_get_blocksize(format) != 12)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
(fd4_pipe2tex(format) != TFMT4_NONE) &&
(target == PIPE_BUFFER || util_format_get_blocksize(format) != 12)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
if ((usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) &&
(fd4_pipe2color(format) != RB4_NONE) &&
(fd4_pipe2tex(format) != TFMT4_NONE)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
}
if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) &&
(fd4_pipe2color(format) != RB4_NONE) &&
(fd4_pipe2tex(format) != TFMT4_NONE)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
}
/* For ARB_framebuffer_no_attachments: */
if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
retval |= usage & PIPE_BIND_RENDER_TARGET;
}
/* For ARB_framebuffer_no_attachments: */
if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
retval |= usage & PIPE_BIND_RENDER_TARGET;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd4_pipe2depth(format) != (enum a4xx_depth_format)~0) &&
(fd4_pipe2tex(format) != TFMT4_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd4_pipe2depth(format) != (enum a4xx_depth_format) ~0) &&
(fd4_pipe2tex(format) != TFMT4_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}
if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format),
target, sample_count, usage, retval);
}
if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x",
util_format_name(format), target, sample_count, usage, retval);
}
return retval == usage;
return retval == usage;
}
void
fd4_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A4XX_MAX_RENDER_TARGETS;
screen->setup_slices = fd4_setup_slices;
pscreen->context_create = fd4_context_create;
pscreen->is_format_supported = fd4_screen_is_format_supported;
fd4_emit_init_screen(pscreen);
ir3_screen_init(pscreen);
struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A4XX_MAX_RENDER_TARGETS;
screen->setup_slices = fd4_setup_slices;
pscreen->context_create = fd4_context_create;
pscreen->is_format_supported = fd4_screen_is_format_supported;
fd4_emit_init_screen(pscreen);
ir3_screen_init(pscreen);
}

View file

@ -25,264 +25,257 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd4_texture.h"
#include "fd4_format.h"
#include "fd4_texture.h"
static enum a4xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border)
{
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return A4XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A4XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true;
return A4XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */
return A4XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A4XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return A4XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A4XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true;
return A4XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */
return A4XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A4XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
}
static enum a4xx_tex_filter
tex_filter(unsigned filter, bool aniso)
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return A4XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return aniso ? A4XX_TEX_ANISO : A4XX_TEX_LINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return A4XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return aniso ? A4XX_TEX_ANISO : A4XX_TEX_LINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
}
static void *
fd4_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
const struct pipe_sampler_state *cso)
{
struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false;
struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false;
if (!so)
return NULL;
if (!so)
return NULL;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
so->base = *cso;
so->base = *cso;
so->needs_border = false;
so->texsamp0 =
COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A4XX_TEX_SAMP_0_ANISO(aniso) |
A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
so->needs_border = false;
so->texsamp0 =
COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A4XX_TEX_SAMP_0_ANISO(aniso) |
A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
so->texsamp1 =
// COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
so->texsamp1 =
// COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->texsamp1 |=
A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
}
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->texsamp1 |= A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
}
if (cso->compare_mode)
so->texsamp1 |= A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
if (cso->compare_mode)
so->texsamp1 |=
A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
return so;
return so;
}
static enum a4xx_tex_type
tex_type(unsigned target)
{
switch (target) {
default:
assert(0);
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A4XX_TEX_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
return A4XX_TEX_2D;
case PIPE_TEXTURE_3D:
return A4XX_TEX_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return A4XX_TEX_CUBE;
}
switch (target) {
default:
assert(0);
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A4XX_TEX_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
return A4XX_TEX_2D;
case PIPE_TEXTURE_3D:
return A4XX_TEX_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return A4XX_TEX_CUBE;
}
}
static bool
use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format)
{
return (fd_screen(pctx->screen)->gpu_id == 420) &&
(util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC);
return (fd_screen(pctx->screen)->gpu_id == 420) &&
(util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC);
}
static struct pipe_sampler_view *
fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
const struct pipe_sampler_view *cso)
{
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = cso->format;
unsigned lvl, layers = 0;
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = cso->format;
unsigned lvl, layers = 0;
if (!so)
return NULL;
if (!so)
return NULL;
if (format == PIPE_FORMAT_X32_S8X24_UINT) {
rsc = rsc->stencil;
format = rsc->b.b.format;
}
if (format == PIPE_FORMAT_X32_S8X24_UINT) {
rsc = rsc->stencil;
format = rsc->b.b.format;
}
so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->texconst0 =
A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
so->texconst0 = A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
if (util_format_is_srgb(format)) {
if (use_astc_srgb_workaround(pctx, format))
so->astc_srgb = true;
so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
}
if (util_format_is_srgb(format)) {
if (use_astc_srgb_workaround(pctx, format))
so->astc_srgb = true;
so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
}
if (cso->target == PIPE_BUFFER) {
unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
if (cso->target == PIPE_BUFFER) {
unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
lvl = 0;
so->texconst1 =
A4XX_TEX_CONST_1_WIDTH(elements) |
A4XX_TEX_CONST_1_HEIGHT(1);
so->texconst2 =
A4XX_TEX_CONST_2_PITCH(elements * rsc->layout.cpp);
so->offset = cso->u.buf.offset;
} else {
unsigned miplevels;
lvl = 0;
so->texconst1 =
A4XX_TEX_CONST_1_WIDTH(elements) | A4XX_TEX_CONST_1_HEIGHT(1);
so->texconst2 = A4XX_TEX_CONST_2_PITCH(elements * rsc->layout.cpp);
so->offset = cso->u.buf.offset;
} else {
unsigned miplevels;
lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl;
layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl;
layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 =
A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
so->texconst2 =
A4XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 5) |
A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
}
so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 = A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
so->texconst2 = A4XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 5) |
A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
}
/* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
* we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
* way to re-arrange things so stencil component is where the swiz
* expects.
*
* Note that gallium expects stencil sampler to return (s,s,s,s)
* which isn't quite true. To make that happen we'd have to massage
* the swizzle. But in practice only the .x component is used.
*/
if (format == PIPE_FORMAT_X24S8_UINT)
so->texconst2 |= A4XX_TEX_CONST_2_SWAP(XYZW);
/* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
* we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
* way to re-arrange things so stencil component is where the swiz
* expects.
*
* Note that gallium expects stencil sampler to return (s,s,s,s)
* which isn't quite true. To make that happen we'd have to massage
* the swizzle. But in practice only the .x component is used.
*/
if (format == PIPE_FORMAT_X24S8_UINT)
so->texconst2 |= A4XX_TEX_CONST_2_SWAP(XYZW);
switch (cso->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 =
A4XX_TEX_CONST_3_DEPTH(layers) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
so->texconst3 =
A4XX_TEX_CONST_3_DEPTH(layers / 6) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
break;
case PIPE_TEXTURE_3D:
so->texconst3 =
A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
A4XX_TEX_CONST_3_LAYERSZ(fd_resource_slice(rsc, lvl)->size0);
so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(
fd_resource_slice(rsc, prsc->last_level)->size0);
break;
default:
so->texconst3 = 0x00000000;
break;
}
switch (cso->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 = A4XX_TEX_CONST_3_DEPTH(layers) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
so->texconst3 = A4XX_TEX_CONST_3_DEPTH(layers / 6) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
break;
case PIPE_TEXTURE_3D:
so->texconst3 =
A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
A4XX_TEX_CONST_3_LAYERSZ(fd_resource_slice(rsc, lvl)->size0);
so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(
fd_resource_slice(rsc, prsc->last_level)->size0);
break;
default:
so->texconst3 = 0x00000000;
break;
}
return &so->base;
return &so->base;
}
static void
fd4_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr, unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
unsigned start, unsigned nr,
unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
{
struct fd_context *ctx = fd_context(pctx);
struct fd4_context *fd4_ctx = fd4_context(ctx);
uint16_t astc_srgb = 0;
unsigned i;
struct fd_context *ctx = fd_context(pctx);
struct fd4_context *fd4_ctx = fd4_context(ctx);
uint16_t astc_srgb = 0;
unsigned i;
for (i = 0; i < nr; i++) {
if (views[i]) {
struct fd4_pipe_sampler_view *view =
fd4_pipe_sampler_view(views[i]);
if (view->astc_srgb)
astc_srgb |= (1 << i);
}
}
for (i = 0; i < nr; i++) {
if (views[i]) {
struct fd4_pipe_sampler_view *view = fd4_pipe_sampler_view(views[i]);
if (view->astc_srgb)
astc_srgb |= (1 << i);
}
}
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views);
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
views);
if (shader == PIPE_SHADER_FRAGMENT) {
fd4_ctx->fastc_srgb = astc_srgb;
} else if (shader == PIPE_SHADER_VERTEX) {
fd4_ctx->vastc_srgb = astc_srgb;
}
if (shader == PIPE_SHADER_FRAGMENT) {
fd4_ctx->fastc_srgb = astc_srgb;
} else if (shader == PIPE_SHADER_VERTEX) {
fd4_ctx->vastc_srgb = astc_srgb;
}
}
void
fd4_texture_init(struct pipe_context *pctx)
{
pctx->create_sampler_state = fd4_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd4_sampler_view_create;
pctx->set_sampler_views = fd4_set_sampler_views;
pctx->create_sampler_state = fd4_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd4_sampler_view_create;
pctx->set_sampler_views = fd4_set_sampler_views;
}

View file

@ -29,39 +29,39 @@
#include "pipe/p_context.h"
#include "freedreno_texture.h"
#include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "fd4_context.h"
#include "fd4_format.h"
struct fd4_sampler_stateobj {
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1;
bool needs_border;
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1;
bool needs_border;
};
static inline struct fd4_sampler_stateobj *
fd4_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd4_sampler_stateobj *)samp;
return (struct fd4_sampler_stateobj *)samp;
}
struct fd4_pipe_sampler_view {
struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
uint32_t offset;
bool astc_srgb;
struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
uint32_t offset;
bool astc_srgb;
};
static inline struct fd4_pipe_sampler_view *
fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd4_pipe_sampler_view *)pview;
return (struct fd4_pipe_sampler_view *)pview;
}
unsigned fd4_get_const_idx(struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id);
struct fd_texture_stateobj *tex, unsigned samp_id);
void fd4_texture_init(struct pipe_context *pctx);

View file

@ -24,82 +24,77 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd4_zsa.h"
#include "fd4_context.h"
#include "fd4_format.h"
#include "fd4_zsa.h"
void *
fd4_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso)
const struct pipe_depth_stencil_alpha_state *cso)
{
struct fd4_zsa_stateobj *so;
struct fd4_zsa_stateobj *so;
so = CALLOC_STRUCT(fd4_zsa_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd4_zsa_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
if (cso->depth_enabled)
so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
if (cso->depth_enabled)
so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_Z_ENABLE | A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
if (cso->depth_writemask)
so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
if (cso->depth_writemask)
so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];
if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];
so->rb_stencil_control |=
A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencil_control2 |=
A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER;
so->rb_stencilrefmask |=
0xff000000 | /* ??? */
A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
so->rb_stencil_control |=
A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencil_control2 |= A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER;
so->rb_stencilrefmask |=
0xff000000 | /* ??? */
A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
so->rb_stencil_control |=
A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */
A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
}
}
so->rb_stencil_control |=
A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */
A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
}
}
if (cso->alpha_enabled) {
uint32_t ref = cso->alpha_ref_value * 255.0;
so->gras_alpha_control =
A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
so->rb_alpha_control =
A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
A4XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
if (cso->alpha_enabled) {
uint32_t ref = cso->alpha_ref_value * 255.0;
so->gras_alpha_control = A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
so->rb_alpha_control =
A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
A4XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
return so;
return so;
}

View file

@ -27,30 +27,29 @@
#ifndef FD4_ZSA_H_
#define FD4_ZSA_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd4_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base;
uint32_t gras_alpha_control;
uint32_t rb_alpha_control;
uint32_t rb_depth_control;
uint32_t rb_stencil_control;
uint32_t rb_stencil_control2;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
struct pipe_depth_stencil_alpha_state base;
uint32_t gras_alpha_control;
uint32_t rb_alpha_control;
uint32_t rb_depth_control;
uint32_t rb_stencil_control;
uint32_t rb_stencil_control2;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
};
static inline struct fd4_zsa_stateobj *
fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd4_zsa_stateobj *)zsa;
return (struct fd4_zsa_stateobj *)zsa;
}
void * fd4_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);
void *fd4_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD4_ZSA_H_ */

View file

@ -26,8 +26,8 @@
#include "pipe/p_state.h"
#include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd5_blend.h"
#include "fd5_context.h"
@ -37,90 +37,99 @@
static enum a3xx_rb_blend_opcode
blend_func(unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
switch (func) {
case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC;
default:
DBG("invalid blend func: %x", func);
return 0;
}
}
void *
fd5_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso)
const struct pipe_blend_state *cso)
{
struct fd5_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
unsigned i, mrt_blend = 0;
struct fd5_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
unsigned i, mrt_blend = 0;
if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func);
}
if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func);
}
so = CALLOC_STRUCT(fd5_blend_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd5_blend_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
so->lrz_write = true; /* unless blend enabled for any MRT */
so->lrz_write = true; /* unless blend enabled for any MRT */
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt;
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt;
if (cso->independent_blend_enable)
rt = &cso->rt[i];
else
rt = &cso->rt[0];
if (cso->independent_blend_enable)
rt = &cso->rt[i];
else
rt = &cso->rt[0];
so->rb_mrt[i].blend_control =
A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].blend_control =
A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
fd_blend_factor(rt->rgb_src_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
fd_blend_factor(rt->rgb_dst_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
fd_blend_factor(rt->alpha_src_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
blend_func(rt->alpha_func)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].control =
A5XX_RB_MRT_CONTROL_ROP_CODE(rop) |
COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) |
A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
so->rb_mrt[i].control =
A5XX_RB_MRT_CONTROL_ROP_CODE(rop) |
COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) |
A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable) {
so->rb_mrt[i].control |=
// A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A5XX_RB_MRT_CONTROL_BLEND |
A5XX_RB_MRT_CONTROL_BLEND2;
mrt_blend |= (1 << i);
so->lrz_write = false;
}
if (rt->blend_enable) {
so->rb_mrt[i].control |=
// A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE
//|
A5XX_RB_MRT_CONTROL_BLEND | A5XX_RB_MRT_CONTROL_BLEND2;
mrt_blend |= (1 << i);
so->lrz_write = false;
}
if (reads_dest) {
// so->rb_mrt[i].control |= A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
mrt_blend |= (1 << i);
}
if (reads_dest) {
// so->rb_mrt[i].control |=
//A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
mrt_blend |= (1 << i);
}
// if (cso->dither)
// so->rb_mrt[i].buf_info |= A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
// if (cso->dither)
// so->rb_mrt[i].buf_info |=
//A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
so->rb_blend_cntl = A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) |
COND(cso->alpha_to_coverage, A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) |
COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND);
so->sp_blend_cntl = A5XX_SP_BLEND_CNTL_UNK8 |
COND(cso->alpha_to_coverage, A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE) |
COND(mrt_blend, A5XX_SP_BLEND_CNTL_ENABLED);
so->rb_blend_cntl =
A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) |
COND(cso->alpha_to_coverage, A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) |
COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND);
so->sp_blend_cntl =
A5XX_SP_BLEND_CNTL_UNK8 |
COND(cso->alpha_to_coverage, A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE) |
COND(mrt_blend, A5XX_SP_BLEND_CNTL_ENABLED);
return so;
return so;
}

View file

@ -27,31 +27,31 @@
#ifndef FD5_BLEND_H_
#define FD5_BLEND_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd5_blend_stateobj {
struct pipe_blend_state base;
struct pipe_blend_state base;
struct {
uint32_t control;
uint32_t buf_info;
uint32_t blend_control;
} rb_mrt[A5XX_MAX_RENDER_TARGETS];
uint32_t rb_blend_cntl;
uint32_t sp_blend_cntl;
bool lrz_write;
struct {
uint32_t control;
uint32_t buf_info;
uint32_t blend_control;
} rb_mrt[A5XX_MAX_RENDER_TARGETS];
uint32_t rb_blend_cntl;
uint32_t sp_blend_cntl;
bool lrz_write;
};
static inline struct fd5_blend_stateobj *
fd5_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd5_blend_stateobj *)blend;
return (struct fd5_blend_stateobj *)blend;
}
void * fd5_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);
void *fd5_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso);
#endif /* FD5_BLEND_H_ */

View file

@ -28,8 +28,8 @@
#include "freedreno_resource.h"
#include "fd5_blitter.h"
#include "fd5_format.h"
#include "fd5_emit.h"
#include "fd5_format.h"
/* Make sure none of the requested dimensions extend beyond the size of the
* resource. Not entirely sure why this happens, but sometimes it does, and
@ -39,9 +39,9 @@
static bool
ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
{
return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
(b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
(b->z >= 0) && (b->z + b->depth <= u_minify(r->depth0, lvl));
return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
(b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
(b->z >= 0) && (b->z + b->depth <= u_minify(r->depth0, lvl));
}
/* Not sure if format restrictions differ for src and dst, or if
@ -52,136 +52,136 @@ ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
static bool
ok_format(enum pipe_format fmt)
{
if (util_format_is_compressed(fmt))
return false;
if (util_format_is_compressed(fmt))
return false;
switch (fmt) {
case PIPE_FORMAT_R10G10B10A2_SSCALED:
case PIPE_FORMAT_R10G10B10A2_SNORM:
case PIPE_FORMAT_B10G10R10A2_USCALED:
case PIPE_FORMAT_B10G10R10A2_SSCALED:
case PIPE_FORMAT_B10G10R10A2_SNORM:
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10A2_USCALED:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
case PIPE_FORMAT_B10G10R10A2_UINT:
case PIPE_FORMAT_R10G10B10A2_UINT:
return false;
default:
break;
}
switch (fmt) {
case PIPE_FORMAT_R10G10B10A2_SSCALED:
case PIPE_FORMAT_R10G10B10A2_SNORM:
case PIPE_FORMAT_B10G10R10A2_USCALED:
case PIPE_FORMAT_B10G10R10A2_SSCALED:
case PIPE_FORMAT_B10G10R10A2_SNORM:
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10A2_USCALED:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
case PIPE_FORMAT_B10G10R10A2_UINT:
case PIPE_FORMAT_R10G10B10A2_UINT:
return false;
default:
break;
}
if (fd5_pipe2color(fmt) == RB5_NONE)
return false;
if (fd5_pipe2color(fmt) == RB5_NONE)
return false;
return true;
return true;
}
static bool
can_do_blit(const struct pipe_blit_info *info)
{
/* I think we can do scaling, but not in z dimension since that would
* require blending..
*/
if (info->dst.box.depth != info->src.box.depth)
return false;
/* I think we can do scaling, but not in z dimension since that would
* require blending..
*/
if (info->dst.box.depth != info->src.box.depth)
return false;
if (!ok_format(info->dst.format))
return false;
if (!ok_format(info->dst.format))
return false;
if (!ok_format(info->src.format))
return false;
if (!ok_format(info->src.format))
return false;
/* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE
* is set (not linear). We can kind of get around that when tiling/
* untiling by setting both src and dst COLOR_SWAP=WZYX, but that
* means the formats must match:
*/
if ((fd_resource(info->dst.resource)->layout.tile_mode ||
fd_resource(info->src.resource)->layout.tile_mode) &&
info->dst.format != info->src.format)
return false;
/* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE
* is set (not linear). We can kind of get around that when tiling/
* untiling by setting both src and dst COLOR_SWAP=WZYX, but that
* means the formats must match:
*/
if ((fd_resource(info->dst.resource)->layout.tile_mode ||
fd_resource(info->src.resource)->layout.tile_mode) &&
info->dst.format != info->src.format)
return false;
/* until we figure out a few more registers: */
if ((info->dst.box.width != info->src.box.width) ||
(info->dst.box.height != info->src.box.height))
return false;
/* until we figure out a few more registers: */
if ((info->dst.box.width != info->src.box.width) ||
(info->dst.box.height != info->src.box.height))
return false;
/* src box can be inverted, which we don't support.. dst box cannot: */
if ((info->src.box.width < 0) || (info->src.box.height < 0))
return false;
/* src box can be inverted, which we don't support.. dst box cannot: */
if ((info->src.box.width < 0) || (info->src.box.height < 0))
return false;
if (!ok_dims(info->src.resource, &info->src.box, info->src.level))
return false;
if (!ok_dims(info->src.resource, &info->src.box, info->src.level))
return false;
if (!ok_dims(info->dst.resource, &info->dst.box, info->dst.level))
return false;
if (!ok_dims(info->dst.resource, &info->dst.box, info->dst.level))
return false;
debug_assert(info->dst.box.width >= 0);
debug_assert(info->dst.box.height >= 0);
debug_assert(info->dst.box.depth >= 0);
debug_assert(info->dst.box.width >= 0);
debug_assert(info->dst.box.height >= 0);
debug_assert(info->dst.box.depth >= 0);
if ((info->dst.resource->nr_samples > 1) ||
(info->src.resource->nr_samples > 1))
return false;
if ((info->dst.resource->nr_samples > 1) ||
(info->src.resource->nr_samples > 1))
return false;
if (info->scissor_enable)
return false;
if (info->scissor_enable)
return false;
if (info->window_rectangle_include)
return false;
if (info->window_rectangle_include)
return false;
if (info->render_condition_enable)
return false;
if (info->render_condition_enable)
return false;
if (info->alpha_blend)
return false;
if (info->alpha_blend)
return false;
if (info->filter != PIPE_TEX_FILTER_NEAREST)
return false;
if (info->filter != PIPE_TEX_FILTER_NEAREST)
return false;
if (info->mask != util_format_get_mask(info->src.format))
return false;
if (info->mask != util_format_get_mask(info->src.format))
return false;
if (info->mask != util_format_get_mask(info->dst.format))
return false;
if (info->mask != util_format_get_mask(info->dst.format))
return false;
return true;
return true;
}
static void
emit_setup(struct fd_ringbuffer *ring)
{
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000008);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000008);
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2100, 1);
OUT_RING(ring, 0x86000000); /* UNKNOWN_2100 */
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2100, 1);
OUT_RING(ring, 0x86000000); /* UNKNOWN_2100 */
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2180, 1);
OUT_RING(ring, 0x86000000); /* UNKNOWN_2180 */
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2180, 1);
OUT_RING(ring, 0x86000000); /* UNKNOWN_2180 */
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2184, 1);
OUT_RING(ring, 0x00000009); /* UNKNOWN_2184 */
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2184, 1);
OUT_RING(ring, 0x00000009); /* UNKNOWN_2184 */
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
OUT_RING(ring, 0x00000004); /* RB_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
OUT_RING(ring, 0x00000004); /* RB_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
OUT_RING(ring, 0x0000000c); /* SP_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
OUT_RING(ring, 0x0000000c); /* SP_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
OUT_RING(ring, 0x00000344); /* TPL1_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
OUT_RING(ring, 0x00000344); /* TPL1_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
OUT_RING(ring, 0x00000002); /* HLSQ_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
OUT_RING(ring, 0x00000002); /* HLSQ_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
OUT_RING(ring, 0x00000181); /* GRAS_CL_CNTL */
OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
OUT_RING(ring, 0x00000181); /* GRAS_CL_CNTL */
}
/* buffers need to be handled specially since x/width can exceed the bounds
@ -190,297 +190,297 @@ emit_setup(struct fd_ringbuffer *ring)
static void
emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
{
const struct pipe_box *sbox = &info->src.box;
const struct pipe_box *dbox = &info->dst.box;
struct fd_resource *src, *dst;
unsigned sshift, dshift;
const struct pipe_box *sbox = &info->src.box;
const struct pipe_box *dbox = &info->dst.box;
struct fd_resource *src, *dst;
unsigned sshift, dshift;
src = fd_resource(info->src.resource);
dst = fd_resource(info->dst.resource);
src = fd_resource(info->src.resource);
dst = fd_resource(info->dst.resource);
debug_assert(src->layout.cpp == 1);
debug_assert(dst->layout.cpp == 1);
debug_assert(info->src.resource->format == info->dst.resource->format);
debug_assert((sbox->y == 0) && (sbox->height == 1));
debug_assert((dbox->y == 0) && (dbox->height == 1));
debug_assert((sbox->z == 0) && (sbox->depth == 1));
debug_assert((dbox->z == 0) && (dbox->depth == 1));
debug_assert(sbox->width == dbox->width);
debug_assert(info->src.level == 0);
debug_assert(info->dst.level == 0);
debug_assert(src->layout.cpp == 1);
debug_assert(dst->layout.cpp == 1);
debug_assert(info->src.resource->format == info->dst.resource->format);
debug_assert((sbox->y == 0) && (sbox->height == 1));
debug_assert((dbox->y == 0) && (dbox->height == 1));
debug_assert((sbox->z == 0) && (sbox->depth == 1));
debug_assert((dbox->z == 0) && (dbox->depth == 1));
debug_assert(sbox->width == dbox->width);
debug_assert(info->src.level == 0);
debug_assert(info->dst.level == 0);
/*
* Buffers can have dimensions bigger than max width, remap into
* multiple 1d blits to fit within max dimension
*
* Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
* seems to prevent overfetch related faults. Not quite sure what
* the deal is there.
*
* Low 6 bits of SRC/DST addresses need to be zero (ie. address
* aligned to 64) so we need to shift src/dst x1/x2 to make up the
* difference. On top of already splitting up the blit so width
* isn't > 16k.
*
* We perhaps could do a bit better, if src and dst are aligned but
* in the worst case this means we have to split the copy up into
* 16k (0x4000) minus 64 (0x40).
*/
/*
* Buffers can have dimensions bigger than max width, remap into
* multiple 1d blits to fit within max dimension
*
* Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
* seems to prevent overfetch related faults. Not quite sure what
* the deal is there.
*
* Low 6 bits of SRC/DST addresses need to be zero (ie. address
* aligned to 64) so we need to shift src/dst x1/x2 to make up the
* difference. On top of already splitting up the blit so width
* isn't > 16k.
*
* We perhaps could do a bit better, if src and dst are aligned but
* in the worst case this means we have to split the copy up into
* 16k (0x4000) minus 64 (0x40).
*/
sshift = sbox->x & 0x3f;
dshift = dbox->x & 0x3f;
sshift = sbox->x & 0x3f;
dshift = dbox->x & 0x3f;
for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
unsigned soff, doff, w, p;
for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
unsigned soff, doff, w, p;
soff = (sbox->x + off) & ~0x3f;
doff = (dbox->x + off) & ~0x3f;
soff = (sbox->x + off) & ~0x3f;
doff = (dbox->x + off) & ~0x3f;
w = MIN2(sbox->width - off, (0x4000 - 0x40));
p = align(w, 64);
w = MIN2(sbox->width - off, (0x4000 - 0x40));
p = align(w, 64);
debug_assert((soff + w) <= fd_bo_size(src->bo));
debug_assert((doff + w) <= fd_bo_size(dst->bo));
debug_assert((soff + w) <= fd_bo_size(src->bo));
debug_assert((doff + w) <= fd_bo_size(dst->bo));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
/*
* Emit source:
*/
OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX));
OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) |
A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(128));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
/*
* Emit source:
*/
OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX));
OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) |
A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(128));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(WZYX));
OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(WZYX));
/*
* Emit destination:
*/
OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) |
A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(128));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
/*
* Emit destination:
*/
OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) |
A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(128));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(WZYX));
OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(WZYX));
/*
* Blit command:
*/
OUT_PKT7(ring, CP_BLIT, 5);
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
OUT_RING(ring, CP_BLIT_1_SRC_X1(sshift) | CP_BLIT_1_SRC_Y1(0));
OUT_RING(ring, CP_BLIT_2_SRC_X2(sshift+w-1) | CP_BLIT_2_SRC_Y2(0));
OUT_RING(ring, CP_BLIT_3_DST_X1(dshift) | CP_BLIT_3_DST_Y1(0));
OUT_RING(ring, CP_BLIT_4_DST_X2(dshift+w-1) | CP_BLIT_4_DST_Y2(0));
/*
* Blit command:
*/
OUT_PKT7(ring, CP_BLIT, 5);
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
OUT_RING(ring, CP_BLIT_1_SRC_X1(sshift) | CP_BLIT_1_SRC_Y1(0));
OUT_RING(ring, CP_BLIT_2_SRC_X2(sshift + w - 1) | CP_BLIT_2_SRC_Y2(0));
OUT_RING(ring, CP_BLIT_3_DST_X1(dshift) | CP_BLIT_3_DST_Y1(0));
OUT_RING(ring, CP_BLIT_4_DST_X2(dshift + w - 1) | CP_BLIT_4_DST_Y2(0));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
OUT_WFI5(ring);
}
OUT_WFI5(ring);
}
}
static void
emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
{
const struct pipe_box *sbox = &info->src.box;
const struct pipe_box *dbox = &info->dst.box;
struct fd_resource *src, *dst;
struct fdl_slice *sslice, *dslice;
enum a5xx_color_fmt sfmt, dfmt;
enum a5xx_tile_mode stile, dtile;
enum a3xx_color_swap sswap, dswap;
unsigned ssize, dsize, spitch, dpitch;
unsigned sx1, sy1, sx2, sy2;
unsigned dx1, dy1, dx2, dy2;
const struct pipe_box *sbox = &info->src.box;
const struct pipe_box *dbox = &info->dst.box;
struct fd_resource *src, *dst;
struct fdl_slice *sslice, *dslice;
enum a5xx_color_fmt sfmt, dfmt;
enum a5xx_tile_mode stile, dtile;
enum a3xx_color_swap sswap, dswap;
unsigned ssize, dsize, spitch, dpitch;
unsigned sx1, sy1, sx2, sy2;
unsigned dx1, dy1, dx2, dy2;
src = fd_resource(info->src.resource);
dst = fd_resource(info->dst.resource);
src = fd_resource(info->src.resource);
dst = fd_resource(info->dst.resource);
sslice = fd_resource_slice(src, info->src.level);
dslice = fd_resource_slice(dst, info->dst.level);
sslice = fd_resource_slice(src, info->src.level);
dslice = fd_resource_slice(dst, info->dst.level);
sfmt = fd5_pipe2color(info->src.format);
dfmt = fd5_pipe2color(info->dst.format);
sfmt = fd5_pipe2color(info->src.format);
dfmt = fd5_pipe2color(info->dst.format);
stile = fd_resource_tile_mode(info->src.resource, info->src.level);
dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level);
stile = fd_resource_tile_mode(info->src.resource, info->src.level);
dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level);
sswap = fd5_pipe2swap(info->src.format);
dswap = fd5_pipe2swap(info->dst.format);
sswap = fd5_pipe2swap(info->src.format);
dswap = fd5_pipe2swap(info->dst.format);
spitch = fd_resource_pitch(src, info->src.level);
dpitch = fd_resource_pitch(dst, info->dst.level);
spitch = fd_resource_pitch(src, info->src.level);
dpitch = fd_resource_pitch(dst, info->dst.level);
/* if dtile, then dswap ignored by hw, and likewise if stile then sswap
* ignored by hw.. but in this case we have already rejected the blit
* if src and dst formats differ, so juse use WZYX for both src and
* dst swap mode (so we don't change component order)
*/
if (stile || dtile) {
debug_assert(info->src.format == info->dst.format);
sswap = dswap = WZYX;
}
/* if dtile, then dswap ignored by hw, and likewise if stile then sswap
* ignored by hw.. but in this case we have already rejected the blit
* if src and dst formats differ, so juse use WZYX for both src and
* dst swap mode (so we don't change component order)
*/
if (stile || dtile) {
debug_assert(info->src.format == info->dst.format);
sswap = dswap = WZYX;
}
sx1 = sbox->x;
sy1 = sbox->y;
sx2 = sbox->x + sbox->width - 1;
sy2 = sbox->y + sbox->height - 1;
sx1 = sbox->x;
sy1 = sbox->y;
sx2 = sbox->x + sbox->width - 1;
sy2 = sbox->y + sbox->height - 1;
dx1 = dbox->x;
dy1 = dbox->y;
dx2 = dbox->x + dbox->width - 1;
dy2 = dbox->y + dbox->height - 1;
dx1 = dbox->x;
dy1 = dbox->y;
dx2 = dbox->x + dbox->width - 1;
dy2 = dbox->y + dbox->height - 1;
if (info->src.resource->target == PIPE_TEXTURE_3D)
ssize = sslice->size0;
else
ssize = src->layout.layer_size;
if (info->src.resource->target == PIPE_TEXTURE_3D)
ssize = sslice->size0;
else
ssize = src->layout.layer_size;
if (info->dst.resource->target == PIPE_TEXTURE_3D)
dsize = dslice->size0;
else
dsize = dst->layout.layer_size;
if (info->dst.resource->target == PIPE_TEXTURE_3D)
dsize = dslice->size0;
else
dsize = dst->layout.layer_size;
for (unsigned i = 0; i < info->dst.box.depth; i++) {
unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);
unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);
for (unsigned i = 0; i < info->dst.box.depth; i++) {
unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);
unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);
debug_assert((soff + (sbox->height * spitch)) <= fd_bo_size(src->bo));
debug_assert((doff + (dbox->height * dpitch)) <= fd_bo_size(dst->bo));
debug_assert((soff + (sbox->height * spitch)) <= fd_bo_size(src->bo));
debug_assert((doff + (dbox->height * dpitch)) <= fd_bo_size(dst->bo));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
/*
* Emit source:
*/
OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) |
A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap));
OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) |
A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(ssize));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
/*
* Emit source:
*/
OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) |
A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap));
OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) |
A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(ssize));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) |
A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap));
OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) |
A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap));
/*
* Emit destination:
*/
OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) |
A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(dsize));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
/*
* Emit destination:
*/
OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) |
A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(dsize));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) |
A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) |
A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap));
OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) |
A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) |
A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap));
/*
* Blit command:
*/
OUT_PKT7(ring, CP_BLIT, 5);
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
OUT_RING(ring, CP_BLIT_1_SRC_X1(sx1) | CP_BLIT_1_SRC_Y1(sy1));
OUT_RING(ring, CP_BLIT_2_SRC_X2(sx2) | CP_BLIT_2_SRC_Y2(sy2));
OUT_RING(ring, CP_BLIT_3_DST_X1(dx1) | CP_BLIT_3_DST_Y1(dy1));
OUT_RING(ring, CP_BLIT_4_DST_X2(dx2) | CP_BLIT_4_DST_Y2(dy2));
/*
* Blit command:
*/
OUT_PKT7(ring, CP_BLIT, 5);
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
OUT_RING(ring, CP_BLIT_1_SRC_X1(sx1) | CP_BLIT_1_SRC_Y1(sy1));
OUT_RING(ring, CP_BLIT_2_SRC_X2(sx2) | CP_BLIT_2_SRC_Y2(sy2));
OUT_RING(ring, CP_BLIT_3_DST_X1(dx1) | CP_BLIT_3_DST_Y1(dy1));
OUT_RING(ring, CP_BLIT_4_DST_X2(dx2) | CP_BLIT_4_DST_Y2(dy2));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
}
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
}
}
bool
fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
assert_dt
fd5_blitter_blit(struct fd_context *ctx,
const struct pipe_blit_info *info) assert_dt
{
struct fd_batch *batch;
struct fd_batch *batch;
if (!can_do_blit(info)) {
return false;
}
if (!can_do_blit(info)) {
return false;
}
batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
fd_batch_update_queries(batch);
fd_batch_update_queries(batch);
emit_setup(batch->draw);
emit_setup(batch->draw);
if ((info->src.resource->target == PIPE_BUFFER) &&
(info->dst.resource->target == PIPE_BUFFER)) {
assert(fd_resource(info->src.resource)->layout.tile_mode == TILE5_LINEAR);
assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE5_LINEAR);
emit_blit_buffer(batch->draw, info);
} else {
/* I don't *think* we need to handle blits between buffer <-> !buffer */
debug_assert(info->src.resource->target != PIPE_BUFFER);
debug_assert(info->dst.resource->target != PIPE_BUFFER);
emit_blit(batch->draw, info);
}
if ((info->src.resource->target == PIPE_BUFFER) &&
(info->dst.resource->target == PIPE_BUFFER)) {
assert(fd_resource(info->src.resource)->layout.tile_mode == TILE5_LINEAR);
assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE5_LINEAR);
emit_blit_buffer(batch->draw, info);
} else {
/* I don't *think* we need to handle blits between buffer <-> !buffer */
debug_assert(info->src.resource->target != PIPE_BUFFER);
debug_assert(info->dst.resource->target != PIPE_BUFFER);
emit_blit(batch->draw, info);
}
fd_resource(info->dst.resource)->valid = true;
batch->needs_flush = true;
fd_resource(info->dst.resource)->valid = true;
batch->needs_flush = true;
fd_batch_flush(batch);
fd_batch_reference(&batch, NULL);
fd_batch_flush(batch);
fd_batch_reference(&batch, NULL);
/* Acc query state will have been dirtied by our fd_batch_update_queries, so
* the ctx->batch may need to turn its queries back on.
*/
ctx->update_active_queries = true;
/* Acc query state will have been dirtied by our fd_batch_update_queries, so
* the ctx->batch may need to turn its queries back on.
*/
ctx->update_active_queries = true;
return true;
return true;
}
unsigned
fd5_tile_mode(const struct pipe_resource *tmpl)
{
/* basically just has to be a format we can blit, so uploads/downloads
* via linear staging buffer works:
*/
if (ok_format(tmpl->format))
return TILE5_3;
/* basically just has to be a format we can blit, so uploads/downloads
* via linear staging buffer works:
*/
if (ok_format(tmpl->format))
return TILE5_3;
return TILE5_LINEAR;
return TILE5_LINEAR;
}

View file

@ -31,7 +31,8 @@
#include "freedreno_context.h"
bool fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
bool fd5_blitter_blit(struct fd_context *ctx,
const struct pipe_blit_info *info);
unsigned fd5_tile_mode(const struct pipe_resource *tmpl);
#endif /* FD5_BLIT_H_ */

View file

@ -32,160 +32,167 @@
#include "fd5_context.h"
#include "fd5_emit.h"
/* maybe move to fd5_program? */
static void
cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
{
const struct ir3_info *i = &v->info;
enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS;
unsigned instrlen = v->instrlen;
const struct ir3_info *i = &v->info;
enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS;
unsigned instrlen = v->instrlen;
/* if shader is more than 32*16 instructions, don't preload it. Similar
* to the combined restriction of 64*16 for VS+FS
*/
if (instrlen > 32)
instrlen = 0;
/* if shader is more than 32*16 instructions, don't preload it. Similar
* to the combined restriction of 64*16 for VS+FS
*/
if (instrlen > 32)
instrlen = 0;
OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* SP_SP_CNTL */
OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* SP_SP_CNTL */
OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 1);
OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS) |
A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(thrsz) |
0x00000880 /* XXX */);
OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 1);
OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS) |
A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(thrsz) |
0x00000880 /* XXX */);
OUT_PKT4(ring, REG_A5XX_SP_CS_CTRL_REG0, 1);
OUT_RING(ring, A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow..
0x6 /* XXX */);
OUT_PKT4(ring, REG_A5XX_SP_CS_CTRL_REG0, 1);
OUT_RING(ring,
A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(
0x3) | // XXX need to figure this out somehow..
0x6 /* XXX */);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) |
A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(0) |
A5XX_HLSQ_CS_CONFIG_ENABLED);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) |
A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(0) |
A5XX_HLSQ_CS_CONFIG_ENABLED);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL, 1);
OUT_RING(ring, A5XX_HLSQ_CS_CNTL_INSTRLEN(instrlen) |
COND(v->has_ssbo, A5XX_HLSQ_CS_CNTL_SSBO_ENABLE));
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL, 1);
OUT_RING(ring, A5XX_HLSQ_CS_CNTL_INSTRLEN(instrlen) |
COND(v->has_ssbo, A5XX_HLSQ_CS_CNTL_SSBO_ENABLE));
OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
OUT_RING(ring, A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(0) |
A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(0) |
A5XX_SP_CS_CONFIG_ENABLED);
OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
OUT_RING(ring, A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(0) |
A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(0) |
A5XX_SP_CS_CONFIG_ENABLED);
assert(v->constlen % 4 == 0);
unsigned constlen = v->constlen / 4;
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */
OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */
assert(v->constlen % 4 == 0);
unsigned constlen = v->constlen / 4;
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */
OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */
OUT_PKT4(ring, REG_A5XX_SP_CS_OBJ_START_LO, 2);
OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
OUT_PKT4(ring, REG_A5XX_SP_CS_OBJ_START_LO, 2);
OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0x1f00000);
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0x1f00000);
uint32_t local_invocation_id, work_group_id;
local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID);
uint32_t local_invocation_id, work_group_id;
local_invocation_id =
ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL_0, 2);
OUT_RING(ring, A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
A5XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) |
A5XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) |
A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
OUT_RING(ring, 0x1); /* HLSQ_CS_CNTL_1 */
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL_0, 2);
OUT_RING(ring, A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
A5XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) |
A5XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) |
A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
OUT_RING(ring, 0x1); /* HLSQ_CS_CNTL_1 */
if (instrlen > 0)
fd5_emit_shader(ring, v);
if (instrlen > 0)
fd5_emit_shader(ring, v);
}
static void
fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
assert_dt
fd5_launch_grid(struct fd_context *ctx,
const struct pipe_grid_info *info) assert_dt
{
struct ir3_shader_key key = {};
struct ir3_shader_variant *v;
struct fd_ringbuffer *ring = ctx->batch->draw;
unsigned nglobal = 0;
struct ir3_shader_key key = {};
struct ir3_shader_variant *v;
struct fd_ringbuffer *ring = ctx->batch->draw;
unsigned nglobal = 0;
v = ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug);
if (!v)
return;
v =
ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug);
if (!v)
return;
if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
cs_program_emit(ring, v);
if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
cs_program_emit(ring, v);
fd5_emit_cs_state(ctx, ring, v);
fd5_emit_cs_consts(v, ring, ctx, info);
fd5_emit_cs_state(ctx, ring, v);
fd5_emit_cs_consts(v, ring, ctx, info);
u_foreach_bit(i, ctx->global_bindings.enabled_mask)
nglobal++;
u_foreach_bit (i, ctx->global_bindings.enabled_mask)
nglobal++;
if (nglobal > 0) {
/* global resources don't otherwise get an OUT_RELOC(), since
* the raw ptr address is emitted ir ir3_emit_cs_consts().
* So to make the kernel aware that these buffers are referenced
* by the batch, emit dummy reloc's as part of a no-op packet
* payload:
*/
OUT_PKT7(ring, CP_NOP, 2 * nglobal);
u_foreach_bit(i, ctx->global_bindings.enabled_mask) {
struct pipe_resource *prsc = ctx->global_bindings.buf[i];
OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0);
}
}
if (nglobal > 0) {
/* global resources don't otherwise get an OUT_RELOC(), since
* the raw ptr address is emitted ir ir3_emit_cs_consts().
* So to make the kernel aware that these buffers are referenced
* by the batch, emit dummy reloc's as part of a no-op packet
* payload:
*/
OUT_PKT7(ring, CP_NOP, 2 * nglobal);
u_foreach_bit (i, ctx->global_bindings.enabled_mask) {
struct pipe_resource *prsc = ctx->global_bindings.buf[i];
OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0);
}
}
const unsigned *local_size = info->block; // v->shader->nir->info->cs.local_size;
const unsigned *num_groups = info->grid;
/* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
const unsigned work_dim = info->work_dim ? info->work_dim : 3;
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_NDRANGE_0, 7);
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
const unsigned *local_size =
info->block; // v->shader->nir->info->cs.local_size;
const unsigned *num_groups = info->grid;
/* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
const unsigned work_dim = info->work_dim ? info->work_dim : 3;
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_NDRANGE_0, 7);
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
OUT_RING(ring,
A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
OUT_RING(ring,
A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
OUT_RING(ring,
A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_KERNEL_GROUP_X, 3);
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_KERNEL_GROUP_X, 3);
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
if (info->indirect) {
struct fd_resource *rsc = fd_resource(info->indirect);
if (info->indirect) {
struct fd_resource *rsc = fd_resource(info->indirect);
fd5_emit_flush(ctx, ring);
fd5_emit_flush(ctx, ring);
OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
OUT_RING(ring, 0x00000000);
OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
OUT_RING(ring, A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
} else {
OUT_PKT7(ring, CP_EXEC_CS, 4);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
}
OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
OUT_RING(ring, 0x00000000);
OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
OUT_RING(ring,
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
} else {
OUT_PKT7(ring, CP_EXEC_CS, 4);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
}
}
void
fd5_compute_init(struct pipe_context *pctx)
disable_thread_safety_analysis
fd5_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
ctx->launch_grid = fd5_launch_grid;
pctx->create_compute_state = ir3_shader_compute_state_create;
pctx->delete_compute_state = ir3_shader_state_delete;
struct fd_context *ctx = fd_context(pctx);
ctx->launch_grid = fd5_launch_grid;
pctx->create_compute_state = ir3_shader_compute_state_create;
pctx->delete_compute_state = ir3_shader_state_delete;
}

View file

@ -26,10 +26,10 @@
#include "freedreno_query_acc.h"
#include "fd5_context.h"
#include "fd5_blend.h"
#include "fd5_blitter.h"
#include "fd5_compute.h"
#include "fd5_context.h"
#include "fd5_draw.h"
#include "fd5_emit.h"
#include "fd5_gmem.h"
@ -40,22 +40,21 @@
#include "fd5_zsa.h"
static void
fd5_context_destroy(struct pipe_context *pctx)
in_dt
fd5_context_destroy(struct pipe_context *pctx) in_dt
{
struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx));
struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx));
u_upload_destroy(fd5_ctx->border_color_uploader);
pipe_resource_reference(&fd5_ctx->border_color_buf, NULL);
u_upload_destroy(fd5_ctx->border_color_uploader);
pipe_resource_reference(&fd5_ctx->border_color_buf, NULL);
fd_context_destroy(pctx);
fd_context_destroy(pctx);
fd_bo_del(fd5_ctx->vsc_size_mem);
fd_bo_del(fd5_ctx->blit_mem);
fd_bo_del(fd5_ctx->vsc_size_mem);
fd_bo_del(fd5_ctx->blit_mem);
fd_context_cleanup_common_vbos(&fd5_ctx->base);
fd_context_cleanup_common_vbos(&fd5_ctx->base);
free(fd5_ctx);
free(fd5_ctx);
}
/* clang-format off */
@ -72,56 +71,56 @@ static const uint8_t primtypes[] = {
/* clang-format on */
struct pipe_context *
fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
disable_thread_safety_analysis
fd5_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags) disable_thread_safety_analysis
{
struct fd_screen *screen = fd_screen(pscreen);
struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context);
struct pipe_context *pctx;
struct fd_screen *screen = fd_screen(pscreen);
struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context);
struct pipe_context *pctx;
if (!fd5_ctx)
return NULL;
if (!fd5_ctx)
return NULL;
pctx = &fd5_ctx->base.base;
pctx->screen = pscreen;
pctx = &fd5_ctx->base.base;
pctx->screen = pscreen;
fd5_ctx->base.dev = fd_device_ref(screen->dev);
fd5_ctx->base.screen = fd_screen(pscreen);
fd5_ctx->base.last.key = &fd5_ctx->last_key;
fd5_ctx->base.dev = fd_device_ref(screen->dev);
fd5_ctx->base.screen = fd_screen(pscreen);
fd5_ctx->base.last.key = &fd5_ctx->last_key;
pctx->destroy = fd5_context_destroy;
pctx->create_blend_state = fd5_blend_state_create;
pctx->create_rasterizer_state = fd5_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create;
pctx->destroy = fd5_context_destroy;
pctx->create_blend_state = fd5_blend_state_create;
pctx->create_rasterizer_state = fd5_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create;
fd5_draw_init(pctx);
fd5_compute_init(pctx);
fd5_gmem_init(pctx);
fd5_texture_init(pctx);
fd5_prog_init(pctx);
fd5_emit_init(pctx);
fd5_draw_init(pctx);
fd5_compute_init(pctx);
fd5_gmem_init(pctx);
fd5_texture_init(pctx);
fd5_prog_init(pctx);
fd5_emit_init(pctx);
if (!FD_DBG(NOBLIT))
fd5_ctx->base.blit = fd5_blitter_blit;
if (!FD_DBG(NOBLIT))
fd5_ctx->base.blit = fd5_blitter_blit;
pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx)
return NULL;
pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx)
return NULL;
util_blitter_set_texture_multisample(fd5_ctx->base.blitter, true);
util_blitter_set_texture_multisample(fd5_ctx->base.blitter, true);
fd5_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd5_ctx->vsc_size_mem =
fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd5_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "blit");
fd5_ctx->blit_mem =
fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "blit");
fd_context_setup_common_vbos(&fd5_ctx->base);
fd_context_setup_common_vbos(&fd5_ctx->base);
fd5_query_context_init(pctx);
fd5_query_context_init(pctx);
fd5_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
PIPE_USAGE_STREAM, 0);
fd5_ctx->border_color_uploader =
u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
return pctx;
return pctx;
}

View file

@ -34,55 +34,55 @@
#include "ir3/ir3_shader.h"
struct fd5_context {
struct fd_context base;
struct fd_context base;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
*/
struct fd_bo *vsc_size_mem;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
*/
struct fd_bo *vsc_size_mem;
/* TODO not sure what this is for.. probably similar to
* CACHE_FLUSH_TS on kernel side, where value gets written
* to this address synchronized w/ 3d (ie. a way to
* synchronize when the CP is running far ahead)
*/
struct fd_bo *blit_mem;
/* TODO not sure what this is for.. probably similar to
* CACHE_FLUSH_TS on kernel side, where value gets written
* to this address synchronized w/ 3d (ie. a way to
* synchronize when the CP is running far ahead)
*/
struct fd_bo *blit_mem;
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
/* bitmask of samplers which need astc srgb workaround: */
uint16_t vastc_srgb, fastc_srgb;
/* bitmask of samplers which need astc srgb workaround: */
uint16_t vastc_srgb, fastc_srgb;
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;
/* number of active samples-passed queries: */
int samples_passed_queries;
/* number of active samples-passed queries: */
int samples_passed_queries;
/* cached state about current emitted shader program (3d): */
unsigned max_loc;
/* cached state about current emitted shader program (3d): */
unsigned max_loc;
};
static inline struct fd5_context *
fd5_context(struct fd_context *ctx)
{
return (struct fd5_context *)ctx;
return (struct fd5_context *)ctx;
}
struct pipe_context *
fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
struct pipe_context *fd5_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags);
/* helper for places where we need to stall CP to wait for previous draws: */
static inline void
fd5_emit_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS);
OUT_RELOC(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
OUT_RING(ring, 0x00000000);
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS);
OUT_RELOC(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
OUT_RING(ring, 0x00000000);
OUT_WFI5(ring);
OUT_WFI5(ring);
}
#endif /* FD5_CONTEXT_H_ */

View file

@ -25,343 +25,341 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
#include "util/u_string.h"
#include "freedreno_state.h"
#include "freedreno_resource.h"
#include "freedreno_state.h"
#include "fd5_draw.h"
#include "fd5_context.h"
#include "fd5_draw.h"
#include "fd5_emit.h"
#include "fd5_program.h"
#include "fd5_format.h"
#include "fd5_program.h"
#include "fd5_zsa.h"
static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd5_emit *emit, unsigned index_offset)
assert_dt
struct fd5_emit *emit, unsigned index_offset) assert_dt
{
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
fd5_emit_state(ctx, ring, emit);
fd5_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd5_emit_vertex_bufs(ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd5_emit_vertex_bufs(ring, emit);
OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, info->index_size ? info->index_bias
: emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index
: 0xffffffff);
fd5_emit_render_cntl(ctx, false, emit->binning_pass);
fd5_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
info, emit->indirect, emit->draw, index_offset);
fd5_emit_render_cntl(ctx, false, emit->binning_pass);
fd5_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
emit->indirect, emit->draw, index_offset);
}
static bool
fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
unsigned index_offset)
in_dt
unsigned index_offset) in_dt
{
struct fd5_context *fd5_ctx = fd5_context(ctx);
struct fd5_emit emit = {
.debug = &ctx->debug,
.vtx = &ctx->vtx,
.info = info,
.indirect = indirect,
.draw = draw,
.key = {
.vs = ctx->prog.vs,
.fs = ctx->prog.fs,
.key = {
.rasterflat = ctx->rasterizer->flatshade,
.has_per_samp = fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb,
.vastc_srgb = fd5_ctx->vastc_srgb,
.fastc_srgb = fd5_ctx->fastc_srgb,
},
},
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
struct fd5_context *fd5_ctx = fd5_context(ctx);
struct fd5_emit emit = {
.debug = &ctx->debug,
.vtx = &ctx->vtx,
.info = info,
.indirect = indirect,
.draw = draw,
.key =
{
.vs = ctx->prog.vs,
.fs = ctx->prog.fs,
.key =
{
.rasterflat = ctx->rasterizer->flatshade,
.has_per_samp = fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb,
.vastc_srgb = fd5_ctx->vastc_srgb,
.fastc_srgb = fd5_ctx->fastc_srgb,
},
},
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
/* Technically a5xx should not require this, but it avoids a crash in
* piglit 'spec@!opengl 1.1@ppgtt_memory_alignment' due to a draw with
* no VBO bound but a VS that expects an input. The draw is a single
* vertex with PIPE_PRIM_TRIANGLES so the u_trim_pipe_prim() causes it
* to be skipped.
*/
if (info->mode != PIPE_PRIM_MAX &&
!indirect &&
!info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
return false;
/* Technically a5xx should not require this, but it avoids a crash in
* piglit 'spec@!opengl 1.1@ppgtt_memory_alignment' due to a draw with
* no VBO bound but a VS that expects an input. The draw is a single
* vertex with PIPE_PRIM_TRIANGLES so the u_trim_pipe_prim() causes it
* to be skipped.
*/
if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
return false;
ir3_fixup_shader_state(&ctx->base, &emit.key.key);
ir3_fixup_shader_state(&ctx->base, &emit.key.key);
unsigned dirty = ctx->dirty;
unsigned dirty = ctx->dirty;
emit.prog = fd5_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
emit.prog = fd5_program_state(
ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
/* bail if compile failed: */
if (!emit.prog)
return false;
/* bail if compile failed: */
if (!emit.prog)
return false;
const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit);
const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit);
ir3_update_max_tf_vtx(ctx, vp);
ir3_update_max_tf_vtx(ctx, vp);
/* do regular pass first: */
/* do regular pass first: */
if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
}
if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
}
/* figure out whether we need to disable LRZ write for binning
* pass using draw pass's fp:
*/
emit.no_lrz_write = fp->writes_pos || fp->no_earlyz || fp->has_kill;
/* figure out whether we need to disable LRZ write for binning
* pass using draw pass's fp:
*/
emit.no_lrz_write = fp->writes_pos || fp->no_earlyz || fp->has_kill;
emit.binning_pass = false;
emit.dirty = dirty;
emit.binning_pass = false;
emit.dirty = dirty;
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
/* and now binning pass: */
emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vp */
emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
/* and now binning pass: */
emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vp */
emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
if (emit.streamout_mask) {
struct fd_ringbuffer *ring = ctx->batch->draw;
if (emit.streamout_mask) {
struct fd_ringbuffer *ring = ctx->batch->draw;
for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
if (emit.streamout_mask & (1 << i)) {
fd5_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false);
}
}
}
for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
if (emit.streamout_mask & (1 << i)) {
fd5_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false);
}
}
}
fd_context_all_clean(ctx);
fd_context_all_clean(ctx);
return true;
return true;
}
static bool is_z32(enum pipe_format format)
static bool
is_z32(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
case PIPE_FORMAT_Z32_UNORM:
case PIPE_FORMAT_Z32_FLOAT:
return true;
default:
return false;
}
switch (format) {
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
case PIPE_FORMAT_Z32_UNORM:
case PIPE_FORMAT_Z32_FLOAT:
return true;
default:
return false;
}
}
static void
fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
{
struct fd_ringbuffer *ring;
uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth);
struct fd_ringbuffer *ring;
uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth);
ring = fd_batch_get_prologue(batch);
ring = fd_batch_get_prologue(batch);
OUT_WFI5(ring);
OUT_WFI5(ring);
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
OUT_RING(ring, 0x10000000);
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
OUT_RING(ring, 0x10000000);
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0x20fffff);
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0x20fffff);
OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
OUT_RING(ring, A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0) |
COND(zsbuf->b.b.nr_samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE));
OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
OUT_RING(ring,
A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0) |
COND(zsbuf->b.b.nr_samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE));
OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
OUT_RING(ring, 0x00000181);
OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
OUT_RING(ring, 0x00000181);
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
OUT_RELOC(ring, zsbuf->lrz, 0x1000, 0, 0);
OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
OUT_RELOC(ring, zsbuf->lrz, 0x1000, 0, 0);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1);
OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE));
OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1);
OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE));
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0));
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
A5XX_RB_CLEAR_CNTL_MASK(0xf));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(0xf));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) |
A5XX_RB_RESOLVE_CNTL_1_Y(0));
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
fd5_emit_blit(batch, ring);
fd5_emit_blit(batch, ring);
}
static bool
fd5_clear(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil)
assert_dt
const union pipe_color_union *color, double depth,
unsigned stencil) assert_dt
{
struct fd_ringbuffer *ring = ctx->batch->draw;
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
struct fd_ringbuffer *ring = ctx->batch->draw;
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
is_z32(pfb->zsbuf->format))
return false;
if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
is_z32(pfb->zsbuf->format))
return false;
fd5_emit_render_cntl(ctx, true, false);
fd5_emit_render_cntl(ctx, true, false);
if (buffers & PIPE_CLEAR_COLOR) {
for (int i = 0; i < pfb->nr_cbufs; i++) {
union util_color uc = {0};
if (buffers & PIPE_CLEAR_COLOR) {
for (int i = 0; i < pfb->nr_cbufs; i++) {
union util_color uc = {0};
if (!pfb->cbufs[i])
continue;
if (!pfb->cbufs[i])
continue;
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
continue;
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
continue;
enum pipe_format pfmt = pfb->cbufs[i]->format;
enum pipe_format pfmt = pfb->cbufs[i]->format;
// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
union pipe_color_union swapped;
switch (fd5_pipe2swap(pfmt)) {
case WZYX:
swapped.ui[0] = color->ui[0];
swapped.ui[1] = color->ui[1];
swapped.ui[2] = color->ui[2];
swapped.ui[3] = color->ui[3];
break;
case WXYZ:
swapped.ui[2] = color->ui[0];
swapped.ui[1] = color->ui[1];
swapped.ui[0] = color->ui[2];
swapped.ui[3] = color->ui[3];
break;
case ZYXW:
swapped.ui[3] = color->ui[0];
swapped.ui[0] = color->ui[1];
swapped.ui[1] = color->ui[2];
swapped.ui[2] = color->ui[3];
break;
case XYZW:
swapped.ui[3] = color->ui[0];
swapped.ui[2] = color->ui[1];
swapped.ui[1] = color->ui[2];
swapped.ui[0] = color->ui[3];
break;
}
// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
union pipe_color_union swapped;
switch (fd5_pipe2swap(pfmt)) {
case WZYX:
swapped.ui[0] = color->ui[0];
swapped.ui[1] = color->ui[1];
swapped.ui[2] = color->ui[2];
swapped.ui[3] = color->ui[3];
break;
case WXYZ:
swapped.ui[2] = color->ui[0];
swapped.ui[1] = color->ui[1];
swapped.ui[0] = color->ui[2];
swapped.ui[3] = color->ui[3];
break;
case ZYXW:
swapped.ui[3] = color->ui[0];
swapped.ui[0] = color->ui[1];
swapped.ui[1] = color->ui[2];
swapped.ui[2] = color->ui[3];
break;
case XYZW:
swapped.ui[3] = color->ui[0];
swapped.ui[2] = color->ui[1];
swapped.ui[1] = color->ui[2];
swapped.ui[0] = color->ui[3];
break;
}
util_pack_color_union(pfmt, &uc, &swapped);
util_pack_color_union(pfmt, &uc, &swapped);
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
A5XX_RB_CLEAR_CNTL_MASK(0xf));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring,
A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(0xf));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4);
OUT_RING(ring, uc.ui[0]); /* RB_CLEAR_COLOR_DW0 */
OUT_RING(ring, uc.ui[1]); /* RB_CLEAR_COLOR_DW1 */
OUT_RING(ring, uc.ui[2]); /* RB_CLEAR_COLOR_DW2 */
OUT_RING(ring, uc.ui[3]); /* RB_CLEAR_COLOR_DW3 */
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4);
OUT_RING(ring, uc.ui[0]); /* RB_CLEAR_COLOR_DW0 */
OUT_RING(ring, uc.ui[1]); /* RB_CLEAR_COLOR_DW1 */
OUT_RING(ring, uc.ui[2]); /* RB_CLEAR_COLOR_DW2 */
OUT_RING(ring, uc.ui[3]); /* RB_CLEAR_COLOR_DW3 */
fd5_emit_blit(ctx->batch, ring);
}
}
fd5_emit_blit(ctx->batch, ring);
}
}
if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
uint32_t clear =
util_pack_z_stencil(pfb->zsbuf->format, depth, stencil);
uint32_t mask = 0;
if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
uint32_t clear = util_pack_z_stencil(pfb->zsbuf->format, depth, stencil);
uint32_t mask = 0;
if (buffers & PIPE_CLEAR_DEPTH)
mask |= 0x1;
if (buffers & PIPE_CLEAR_DEPTH)
mask |= 0x1;
if (buffers & PIPE_CLEAR_STENCIL)
mask |= 0x2;
if (buffers & PIPE_CLEAR_STENCIL)
mask |= 0x2;
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS));
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
A5XX_RB_CLEAR_CNTL_MASK(mask));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring,
A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(mask));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
fd5_emit_blit(ctx->batch, ring);
fd5_emit_blit(ctx->batch, ring);
if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
if (zsbuf->lrz) {
zsbuf->lrz_valid = true;
fd5_clear_lrz(ctx->batch, zsbuf, depth);
}
}
}
if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
if (zsbuf->lrz) {
zsbuf->lrz_valid = true;
fd5_clear_lrz(ctx->batch, zsbuf, depth);
}
}
}
/* disable fast clear to not interfere w/ gmem->mem, etc.. */
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, 0x00000000); /* RB_CLEAR_CNTL */
/* disable fast clear to not interfere w/ gmem->mem, etc.. */
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, 0x00000000); /* RB_CLEAR_CNTL */
return true;
return true;
}
void
fd5_draw_init(struct pipe_context *pctx)
disable_thread_safety_analysis
fd5_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd5_draw_vbo;
ctx->clear = fd5_clear;
struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd5_draw_vbo;
ctx->clear = fd5_clear;
}

View file

@ -41,107 +41,103 @@ void fd5_draw_init(struct pipe_context *pctx);
static inline void
fd5_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
enum pc_di_src_sel src_sel, uint32_t count,
uint32_t instances, enum a4xx_index_size idx_type,
uint32_t max_indices, uint32_t idx_offset,
struct pipe_resource *idx_buffer)
enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
enum pc_di_src_sel src_sel, uint32_t count, uint32_t instances,
enum a4xx_index_size idx_type, uint32_t max_indices,
uint32_t idx_offset, struct pipe_resource *idx_buffer)
{
/* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB
* (scratch6) and DRAW is enough to "triangulate" the
* particular draw that caused lockup.
*/
emit_marker5(ring, 7);
/* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB
* (scratch6) and DRAW is enough to "triangulate" the
* particular draw that caused lockup.
*/
emit_marker5(ring, 7);
OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 7 : 3);
if (vismode == USE_VISIBILITY) {
/* leave vis mode blank for now, it will be patched up when
* we know if we are binning or not
*/
OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
&batch->draw_patches);
} else {
OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
}
OUT_RING(ring, instances); /* NumInstances */
OUT_RING(ring, count); /* NumIndices */
if (idx_buffer) {
OUT_RING(ring, 0x0); /* XXX */
OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
OUT_RING (ring, max_indices);
}
OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 7 : 3);
if (vismode == USE_VISIBILITY) {
/* leave vis mode blank for now, it will be patched up when
* we know if we are binning or not
*/
OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
&batch->draw_patches);
} else {
OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
}
OUT_RING(ring, instances); /* NumInstances */
OUT_RING(ring, count); /* NumIndices */
if (idx_buffer) {
OUT_RING(ring, 0x0); /* XXX */
OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
OUT_RING(ring, max_indices);
}
emit_marker5(ring, 7);
emit_marker5(ring, 7);
fd_reset_wfi(batch);
fd_reset_wfi(batch);
}
static inline void
fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info,
enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
unsigned index_offset)
const struct pipe_draw_start_count *draw, unsigned index_offset)
{
struct pipe_resource *idx_buffer = NULL;
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t max_indices, idx_offset;
struct pipe_resource *idx_buffer = NULL;
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t max_indices, idx_offset;
if (indirect && indirect->buffer) {
struct fd_resource *ind = fd_resource(indirect->buffer);
if (indirect && indirect->buffer) {
struct fd_resource *ind = fd_resource(indirect->buffer);
emit_marker5(ring, 7);
emit_marker5(ring, 7);
if (info->index_size) {
struct pipe_resource *idx = info->index.resource;
max_indices = idx->width0 / info->index_size;
if (info->index_size) {
struct pipe_resource *idx = info->index.resource;
max_indices = idx->width0 / info->index_size;
OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA,
fd4_size2indextype(info->index_size), 0),
&batch->draw_patches);
OUT_RELOC(ring, fd_resource(idx)->bo,
index_offset, 0, 0);
OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indices));
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
} else {
OUT_PKT7(ring, CP_DRAW_INDIRECT, 3);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
&batch->draw_patches);
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
}
OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6);
OUT_RINGP(ring,
DRAW4(primtype, DI_SRC_SEL_DMA,
fd4_size2indextype(info->index_size), 0),
&batch->draw_patches);
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indices));
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
} else {
OUT_PKT7(ring, CP_DRAW_INDIRECT, 3);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
&batch->draw_patches);
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
}
emit_marker5(ring, 7);
fd_reset_wfi(batch);
emit_marker5(ring, 7);
fd_reset_wfi(batch);
return;
}
return;
}
if (info->index_size) {
assert(!info->has_user_indices);
if (info->index_size) {
assert(!info->has_user_indices);
idx_buffer = info->index.resource;
idx_type = fd4_size2indextype(info->index_size);
max_indices = idx_buffer->width0 / info->index_size;
idx_offset = index_offset + draw->start * info->index_size;
src_sel = DI_SRC_SEL_DMA;
} else {
idx_buffer = NULL;
idx_type = INDEX4_SIZE_32_BIT;
max_indices = 0;
idx_offset = 0;
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
idx_buffer = info->index.resource;
idx_type = fd4_size2indextype(info->index_size);
max_indices = idx_buffer->width0 / info->index_size;
idx_offset = index_offset + draw->start * info->index_size;
src_sel = DI_SRC_SEL_DMA;
} else {
idx_buffer = NULL;
idx_type = INDEX4_SIZE_32_BIT;
max_indices = 0;
idx_offset = 0;
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
fd5_draw(batch, ring, primtype, vismode, src_sel,
draw->count, info->instance_count,
idx_type, max_indices, idx_offset, idx_buffer);
fd5_draw(batch, ring, primtype, vismode, src_sel, draw->count,
info->instance_count, idx_type, max_indices, idx_offset,
idx_buffer);
}
#endif /* FD5_DRAW_H_ */

File diff suppressed because it is too large Load diff

View file

@ -29,186 +29,191 @@
#include "pipe/p_context.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "fd5_context.h"
#include "fd5_format.h"
#include "fd5_program.h"
#include "fd5_screen.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "ir3_gallium.h"
struct fd_ringbuffer;
/* grouped together emit-state for prog/vertex/state emit: */
struct fd5_emit {
struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx;
const struct fd5_program_state *prog;
const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw;
bool binning_pass;
struct ir3_cache_key key;
enum fd_dirty_3d_state dirty;
struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx;
const struct fd5_program_state *prog;
const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw;
bool binning_pass;
struct ir3_cache_key key;
enum fd_dirty_3d_state dirty;
uint32_t sprite_coord_enable; /* bitmask */
bool sprite_coord_mode;
bool rasterflat;
uint32_t sprite_coord_enable; /* bitmask */
bool sprite_coord_mode;
bool rasterflat;
/* in binning pass, we don't have real frag shader, so we
* don't know if real draw disqualifies lrz write. So just
* figure that out up-front and stash it in the emit.
*/
bool no_lrz_write;
/* in binning pass, we don't have real frag shader, so we
* don't know if real draw disqualifies lrz write. So just
* figure that out up-front and stash it in the emit.
*/
bool no_lrz_write;
/* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs;
/* TODO: other shader stages.. */
/* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs;
/* TODO: other shader stages.. */
unsigned streamout_mask;
unsigned streamout_mask;
};
static inline enum a5xx_color_fmt fd5_emit_format(struct pipe_surface *surf)
static inline enum a5xx_color_fmt
fd5_emit_format(struct pipe_surface *surf)
{
if (!surf)
return 0;
return fd5_pipe2color(surf->format);
if (!surf)
return 0;
return fd5_pipe2color(surf->format);
}
static inline const struct ir3_shader_variant *
fd5_emit_get_vp(struct fd5_emit *emit)
{
if (!emit->vs) {
/* We use nonbinning VS during binning when TFB is enabled because that
* is what has all the outputs that might be involved in TFB.
*/
if (emit->binning_pass && !emit->prog->vs->shader->stream_output.num_outputs)
emit->vs = emit->prog->bs;
else
emit->vs = emit->prog->vs;
}
return emit->vs;
if (!emit->vs) {
/* We use nonbinning VS during binning when TFB is enabled because that
* is what has all the outputs that might be involved in TFB.
*/
if (emit->binning_pass &&
!emit->prog->vs->shader->stream_output.num_outputs)
emit->vs = emit->prog->bs;
else
emit->vs = emit->prog->vs;
}
return emit->vs;
}
static inline const struct ir3_shader_variant *
fd5_emit_get_fp(struct fd5_emit *emit)
{
if (!emit->fs) {
if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs;
} else {
emit->fs = emit->prog->fs;
}
}
return emit->fs;
if (!emit->fs) {
if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs;
} else {
emit->fs = emit->prog->fs;
}
}
return emit->fs;
}
static inline void
fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
assert_dt
fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
{
fd_reset_wfi(batch);
OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */
OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */
fd_wfi(batch, ring);
fd_reset_wfi(batch);
OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */
OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */
fd_wfi(batch, ring);
}
static inline void
fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum render_mode_cmd mode)
enum render_mode_cmd mode)
{
/* TODO add preemption support, gmem bypass, etc */
emit_marker5(ring, 7);
OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
OUT_RING(ring, 0x00000000); /* ADDR_LO */
OUT_RING(ring, 0x00000000); /* ADDR_HI */
OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
OUT_RING(ring, 0x00000000);
emit_marker5(ring, 7);
/* TODO add preemption support, gmem bypass, etc */
emit_marker5(ring, 7);
OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
OUT_RING(ring, 0x00000000); /* ADDR_LO */
OUT_RING(ring, 0x00000000); /* ADDR_HI */
OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
OUT_RING(ring, 0x00000000);
emit_marker5(ring, 7);
}
static inline void
fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum vgt_event_type evt, bool timestamp)
enum vgt_event_type evt, bool timestamp)
{
OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
if (timestamp) {
OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
OUT_RING(ring, 0x00000000);
}
OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
if (timestamp) {
OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0,
0); /* ADDR_LO/HI */
OUT_RING(ring, 0x00000000);
}
}
static inline void
fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
emit_marker5(ring, 7);
fd5_event_write(batch, ring, BLIT, true);
emit_marker5(ring, 7);
emit_marker5(ring, 7);
fd5_event_write(batch, ring, BLIT, true);
emit_marker5(ring, 7);
}
static inline void
fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning)
assert_dt
fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt
{
struct fd_ringbuffer *ring = binning ? ctx->batch->binning : ctx->batch->draw;
struct fd_ringbuffer *ring =
binning ? ctx->batch->binning : ctx->batch->draw;
/* TODO eventually this partially depends on the pfb state, ie.
* which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
* we could probably cache and just regenerate if framebuffer
* state is dirty (or something like that)..
*
* Other bits seem to depend on query state, like if samples-passed
* query is active.
*/
bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
COND(!blit, 0x8));
/* TODO eventually this partially depends on the pfb state, ie.
* which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
* we could probably cache and just regenerate if framebuffer
* state is dirty (or something like that)..
*
* Other bits seem to depend on query state, like if samples-passed
* query is active.
*/
bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
COND(!blit, 0x8));
OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
}
static inline void
fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
/* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
* a workaround and not needed on all a5xx.
*/
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
/* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
* a workaround and not needed on all a5xx.
*/
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
fd5_event_write(batch, ring, LRZ_FLUSH, false);
fd5_event_write(batch, ring, LRZ_FLUSH, false);
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, 0x0);
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, 0x0);
}
void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit) assert_dt;
void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd5_emit *emit) assert_dt;
void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd5_emit *emit) assert_dt;
struct fd5_emit *emit) assert_dt;
void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct ir3_shader_variant *cp) assert_dt;
void fd5_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
struct fd_context *ctx, const struct pipe_grid_info *info) assert_dt;
struct ir3_shader_variant *cp) assert_dt;
void fd5_emit_cs_consts(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_context *ctx,
const struct pipe_grid_info *info) assert_dt;
void fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
void fd5_emit_restore(struct fd_batch *batch,
struct fd_ringbuffer *ring) assert_dt;
void fd5_emit_init_screen(struct pipe_screen *pscreen);
void fd5_emit_init(struct pipe_context *pctx);
@ -216,15 +221,15 @@ void fd5_emit_init(struct pipe_context *pctx);
static inline void
fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
/* for debug after a lock up, write a unique counter value
* to scratch6 for each IB, to make it easier to match up
* register dumps to cmdstream. The combination of IB and
* DRAW (scratch7) is enough to "triangulate" the particular
* draw that caused lockup.
*/
emit_marker5(ring, 6);
__OUT_IB5(ring, target);
emit_marker5(ring, 6);
/* for debug after a lock up, write a unique counter value
* to scratch6 for each IB, to make it easier to match up
* register dumps to cmdstream. The combination of IB and
* DRAW (scratch7) is enough to "triangulate" the particular
* draw that caused lockup.
*/
emit_marker5(ring, 6);
__OUT_IB5(ring, target);
emit_marker5(ring, 6);
}
#endif /* FD5_EMIT_H */

View file

@ -29,48 +29,41 @@
#include "fd5_format.h"
/* Specifies the table of all the formats and their features. Also supplies
* the helpers that look up various data in those tables.
*/
struct fd5_format {
enum a5xx_vtx_fmt vtx;
enum a5xx_tex_fmt tex;
enum a5xx_color_fmt rb;
enum a3xx_color_swap swap;
boolean present;
enum a5xx_vtx_fmt vtx;
enum a5xx_tex_fmt tex;
enum a5xx_color_fmt rb;
enum a3xx_color_swap swap;
boolean present;
};
/* vertex + texture */
#define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT5_ ## fmt, \
.tex = TFMT5_ ## fmt, \
.rb = RB5_ ## rbfmt, \
.swap = swapfmt \
}
#define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT5_##fmt, \
.tex = TFMT5_##fmt, \
.rb = RB5_##rbfmt, \
.swap = swapfmt}
/* texture-only */
#define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT5_NONE, \
.tex = TFMT5_ ## fmt, \
.rb = RB5_ ## rbfmt, \
.swap = swapfmt \
}
#define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT5_NONE, \
.tex = TFMT5_##fmt, \
.rb = RB5_##rbfmt, \
.swap = swapfmt}
/* vertex-only */
#define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \
.present = 1, \
.vtx = VFMT5_ ## fmt, \
.tex = TFMT5_NONE, \
.rb = RB5_ ## rbfmt, \
.swap = swapfmt \
}
#define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_##pipe] = {.present = 1, \
.vtx = VFMT5_##fmt, \
.tex = TFMT5_NONE, \
.rb = RB5_##rbfmt, \
.swap = swapfmt}
/* clang-format off */
static struct fd5_format formats[PIPE_FORMAT_COUNT] = {
@ -343,84 +336,94 @@ static struct fd5_format formats[PIPE_FORMAT_COUNT] = {
enum a5xx_vtx_fmt
fd5_pipe2vtx(enum pipe_format format)
{
if (!formats[format].present)
return VFMT5_NONE;
return formats[format].vtx;
if (!formats[format].present)
return VFMT5_NONE;
return formats[format].vtx;
}
/* convert pipe format to texture sampler format: */
enum a5xx_tex_fmt
fd5_pipe2tex(enum pipe_format format)
{
if (!formats[format].present)
return TFMT5_NONE;
return formats[format].tex;
if (!formats[format].present)
return TFMT5_NONE;
return formats[format].tex;
}
/* convert pipe format to MRT / copydest format used for render-target: */
enum a5xx_color_fmt
fd5_pipe2color(enum pipe_format format)
{
if (!formats[format].present)
return RB5_NONE;
return formats[format].rb;
if (!formats[format].present)
return RB5_NONE;
return formats[format].rb;
}
enum a3xx_color_swap
fd5_pipe2swap(enum pipe_format format)
{
if (!formats[format].present)
return WZYX;
return formats[format].swap;
if (!formats[format].present)
return WZYX;
return formats[format].swap;
}
enum a5xx_depth_format
fd5_pipe2depth(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return DEPTH5_16;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTH5_24_8;
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return DEPTH5_32;
default:
return ~0;
}
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return DEPTH5_16;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTH5_24_8;
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return DEPTH5_32;
default:
return ~0;
}
}
static inline enum a5xx_tex_swiz
tex_swiz(unsigned swiz)
{
switch (swiz) {
default:
case PIPE_SWIZZLE_X: return A5XX_TEX_X;
case PIPE_SWIZZLE_Y: return A5XX_TEX_Y;
case PIPE_SWIZZLE_Z: return A5XX_TEX_Z;
case PIPE_SWIZZLE_W: return A5XX_TEX_W;
case PIPE_SWIZZLE_0: return A5XX_TEX_ZERO;
case PIPE_SWIZZLE_1: return A5XX_TEX_ONE;
}
switch (swiz) {
default:
case PIPE_SWIZZLE_X:
return A5XX_TEX_X;
case PIPE_SWIZZLE_Y:
return A5XX_TEX_Y;
case PIPE_SWIZZLE_Z:
return A5XX_TEX_Z;
case PIPE_SWIZZLE_W:
return A5XX_TEX_W;
case PIPE_SWIZZLE_0:
return A5XX_TEX_ZERO;
case PIPE_SWIZZLE_1:
return A5XX_TEX_ONE;
}
}
uint32_t
fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a)
unsigned swizzle_b, unsigned swizzle_a)
{
const struct util_format_description *desc =
util_format_description(format);
unsigned char swiz[4] = {
swizzle_r, swizzle_g, swizzle_b, swizzle_a,
}, rswiz[4];
const struct util_format_description *desc = util_format_description(format);
unsigned char swiz[4] =
{
swizzle_r,
swizzle_g,
swizzle_b,
swizzle_a,
},
rswiz[4];
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
}

View file

@ -38,6 +38,7 @@ enum a3xx_color_swap fd5_pipe2swap(enum pipe_format format);
enum a5xx_depth_format fd5_pipe2depth(enum pipe_format format);
uint32_t fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
unsigned swizzle_g, unsigned swizzle_b,
unsigned swizzle_a);
#endif /* FD5_UTIL_H_ */

File diff suppressed because it is too large Load diff

View file

@ -26,181 +26,184 @@
#include "pipe/p_state.h"
#include "freedreno_resource.h"
#include "fd5_image.h"
#include "fd5_format.h"
#include "fd5_image.h"
#include "fd5_texture.h"
#include "freedreno_resource.h"
static enum a4xx_state_block texsb[] = {
[PIPE_SHADER_COMPUTE] = SB4_CS_TEX,
[PIPE_SHADER_FRAGMENT] = SB4_FS_TEX,
[PIPE_SHADER_COMPUTE] = SB4_CS_TEX,
[PIPE_SHADER_FRAGMENT] = SB4_FS_TEX,
};
static enum a4xx_state_block imgsb[] = {
[PIPE_SHADER_COMPUTE] = SB4_CS_SSBO,
[PIPE_SHADER_FRAGMENT] = SB4_SSBO,
[PIPE_SHADER_COMPUTE] = SB4_CS_SSBO,
[PIPE_SHADER_FRAGMENT] = SB4_SSBO,
};
struct fd5_image {
enum pipe_format pfmt;
enum a5xx_tex_fmt fmt;
enum a5xx_tex_type type;
bool srgb;
uint32_t cpp;
uint32_t width;
uint32_t height;
uint32_t depth;
uint32_t pitch;
uint32_t array_pitch;
struct fd_bo *bo;
uint32_t offset;
bool buffer;
enum pipe_format pfmt;
enum a5xx_tex_fmt fmt;
enum a5xx_tex_type type;
bool srgb;
uint32_t cpp;
uint32_t width;
uint32_t height;
uint32_t depth;
uint32_t pitch;
uint32_t array_pitch;
struct fd_bo *bo;
uint32_t offset;
bool buffer;
};
static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
static void
translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
{
enum pipe_format format = pimg->format;
struct pipe_resource *prsc = pimg->resource;
struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = pimg->format;
struct pipe_resource *prsc = pimg->resource;
struct fd_resource *rsc = fd_resource(prsc);
if (!pimg->resource) {
memset(img, 0, sizeof(*img));
return;
}
if (!pimg->resource) {
memset(img, 0, sizeof(*img));
return;
}
img->pfmt = format;
img->fmt = fd5_pipe2tex(format);
img->type = fd5_tex_type(prsc->target);
img->srgb = util_format_is_srgb(format);
img->cpp = rsc->layout.cpp;
img->bo = rsc->bo;
img->pfmt = format;
img->fmt = fd5_pipe2tex(format);
img->type = fd5_tex_type(prsc->target);
img->srgb = util_format_is_srgb(format);
img->cpp = rsc->layout.cpp;
img->bo = rsc->bo;
/* Treat cube textures as 2d-array: */
if (img->type == A5XX_TEX_CUBE)
img->type = A5XX_TEX_2D;
/* Treat cube textures as 2d-array: */
if (img->type == A5XX_TEX_CUBE)
img->type = A5XX_TEX_2D;
if (prsc->target == PIPE_BUFFER) {
img->buffer = true;
img->offset = pimg->u.buf.offset;
img->pitch = 0;
img->array_pitch = 0;
if (prsc->target == PIPE_BUFFER) {
img->buffer = true;
img->offset = pimg->u.buf.offset;
img->pitch = 0;
img->array_pitch = 0;
/* size is encoded with low 15b in WIDTH and high bits in
* HEIGHT, in units of elements:
*/
unsigned sz = pimg->u.buf.size / util_format_get_blocksize(format);
img->width = sz & MASK(15);
img->height = sz >> 15;
img->depth = 0;
} else {
img->buffer = false;
/* size is encoded with low 15b in WIDTH and high bits in
* HEIGHT, in units of elements:
*/
unsigned sz = pimg->u.buf.size / util_format_get_blocksize(format);
img->width = sz & MASK(15);
img->height = sz >> 15;
img->depth = 0;
} else {
img->buffer = false;
unsigned lvl = pimg->u.tex.level;
img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
img->pitch = fd_resource_pitch(rsc, lvl);
unsigned lvl = pimg->u.tex.level;
img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
img->pitch = fd_resource_pitch(rsc, lvl);
img->width = u_minify(prsc->width0, lvl);
img->height = u_minify(prsc->height0, lvl);
img->width = u_minify(prsc->width0, lvl);
img->height = u_minify(prsc->height0, lvl);
unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
switch (prsc->target) {
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
img->array_pitch = rsc->layout.layer_size;
img->depth = 1;
break;
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
img->array_pitch = rsc->layout.layer_size;
img->depth = layers;
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
img->array_pitch = rsc->layout.layer_size;
img->depth = layers;
break;
case PIPE_TEXTURE_3D:
img->array_pitch = fd_resource_slice(rsc, lvl)->size0;
img->depth = u_minify(prsc->depth0, lvl);
break;
default:
img->array_pitch = 0;
img->depth = 0;
break;
}
}
switch (prsc->target) {
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
img->array_pitch = rsc->layout.layer_size;
img->depth = 1;
break;
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
img->array_pitch = rsc->layout.layer_size;
img->depth = layers;
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
img->array_pitch = rsc->layout.layer_size;
img->depth = layers;
break;
case PIPE_TEXTURE_3D:
img->array_pitch = fd_resource_slice(rsc, lvl)->size0;
img->depth = u_minify(prsc->depth0, lvl);
break;
default:
img->array_pitch = 0;
img->depth = 0;
break;
}
}
}
static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
struct fd5_image *img, enum pipe_shader_type shader)
static void
emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, struct fd5_image *img,
enum pipe_shader_type shader)
{
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 12);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(texsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 12);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(texsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
OUT_RING(ring, A5XX_TEX_CONST_0_FMT(img->fmt) |
fd5_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
COND(img->srgb, A5XX_TEX_CONST_0_SRGB));
OUT_RING(ring, A5XX_TEX_CONST_1_WIDTH(img->width) |
A5XX_TEX_CONST_1_HEIGHT(img->height));
OUT_RING(ring,
COND(img->buffer, A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31) |
A5XX_TEX_CONST_2_TYPE(img->type) |
A5XX_TEX_CONST_2_PITCH(img->pitch));
OUT_RING(ring, A5XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
if (img->bo) {
OUT_RELOC(ring, img->bo, img->offset,
(uint64_t)A5XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
} else {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, A5XX_TEX_CONST_5_DEPTH(img->depth));
}
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, A5XX_TEX_CONST_0_FMT(img->fmt) |
fd5_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
COND(img->srgb, A5XX_TEX_CONST_0_SRGB));
OUT_RING(ring, A5XX_TEX_CONST_1_WIDTH(img->width) |
A5XX_TEX_CONST_1_HEIGHT(img->height));
OUT_RING(ring,
COND(img->buffer, A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31) |
A5XX_TEX_CONST_2_TYPE(img->type) |
A5XX_TEX_CONST_2_PITCH(img->pitch));
OUT_RING(ring, A5XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
if (img->bo) {
OUT_RELOC(ring, img->bo, img->offset,
(uint64_t)A5XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
} else {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, A5XX_TEX_CONST_5_DEPTH(img->depth));
}
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
struct fd5_image *img, enum pipe_shader_type shader)
static void
emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
struct fd5_image *img, enum pipe_shader_type shader)
{
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
OUT_RING(ring, A5XX_SSBO_1_0_FMT(img->fmt) |
A5XX_SSBO_1_0_WIDTH(img->width));
OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(img->height) |
A5XX_SSBO_1_1_DEPTH(img->depth));
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring,
CP_LOAD_STATE4_1_STATE_TYPE(1) | CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
OUT_RING(ring,
A5XX_SSBO_1_0_FMT(img->fmt) | A5XX_SSBO_1_0_WIDTH(img->width));
OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(img->height) |
A5XX_SSBO_1_1_DEPTH(img->depth));
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
if (img->bo) {
OUT_RELOC(ring, img->bo, img->offset, 0, 0);
} else {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring,
CP_LOAD_STATE4_1_STATE_TYPE(2) | CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
if (img->bo) {
OUT_RELOC(ring, img->bo, img->offset, 0, 0);
} else {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
}
/* Emit required "SSBO" and sampler state. The sampler state is used by the
@ -209,19 +212,21 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
*/
void
fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pipe_shader_type shader, const struct ir3_shader_variant *v)
enum pipe_shader_type shader,
const struct ir3_shader_variant *v)
{
struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
unsigned enabled_mask = so->enabled_mask;
const struct ir3_ibo_mapping *m = &v->image_mapping;
struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
unsigned enabled_mask = so->enabled_mask;
const struct ir3_ibo_mapping *m = &v->image_mapping;
while (enabled_mask) {
unsigned index = u_bit_scan(&enabled_mask);
struct fd5_image img;
while (enabled_mask) {
unsigned index = u_bit_scan(&enabled_mask);
struct fd5_image img;
translate_image(&img, &so->si[index]);
translate_image(&img, &so->si[index]);
emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, shader);
}
emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img,
shader);
}
}

View file

@ -31,6 +31,7 @@
struct ir3_shader_variant;
void fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pipe_shader_type shader, const struct ir3_shader_variant *v);
enum pipe_shader_type shader,
const struct ir3_shader_variant *v);
#endif /* FD5_IMAGE_H_ */

File diff suppressed because it is too large Load diff

View file

@ -36,22 +36,23 @@
struct fd5_emit;
struct fd5_program_state {
struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */
struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */
};
static inline struct fd5_program_state *
fd5_program_state(struct ir3_program_state *state)
{
return (struct fd5_program_state *)state;
return (struct fd5_program_state *)state;
}
void fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so);
void fd5_emit_shader(struct fd_ringbuffer *ring,
const struct ir3_shader_variant *so);
void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd5_emit *emit);
struct fd5_emit *emit);
void fd5_prog_init(struct pipe_context *pctx);

View file

@ -35,21 +35,20 @@
#include "fd5_query.h"
struct PACKED fd5_query_sample {
uint64_t start;
uint64_t result;
uint64_t stop;
uint64_t start;
uint64_t result;
uint64_t stop;
};
/* offset of a single field of an array of fd5_query_sample: */
#define query_sample_idx(aq, idx, field) \
fd_resource((aq)->prsc)->bo, \
(idx * sizeof(struct fd5_query_sample)) + \
offsetof(struct fd5_query_sample, field), \
0, 0
#define query_sample_idx(aq, idx, field) \
fd_resource((aq)->prsc)->bo, \
(idx * sizeof(struct fd5_query_sample)) + \
offsetof(struct fd5_query_sample, field), \
0, 0
/* offset of a single field of fd5_query_sample: */
#define query_sample(aq, field) \
query_sample_idx(aq, 0, field)
#define query_sample(aq, field) query_sample_idx(aq, 0, field)
/*
* Occlusion Query:
@ -61,98 +60,97 @@ struct PACKED fd5_query_sample {
static void
occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
{
struct fd_ringbuffer *ring = batch->draw;
struct fd_ringbuffer *ring = batch->draw;
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
OUT_RELOC(ring, query_sample(aq, start));
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
OUT_RELOC(ring, query_sample(aq, start));
fd5_event_write(batch, ring, ZPASS_DONE, false);
fd_reset_wfi(batch);
fd5_event_write(batch, ring, ZPASS_DONE, false);
fd_reset_wfi(batch);
fd5_context(batch->ctx)->samples_passed_queries++;
fd5_context(batch->ctx)->samples_passed_queries++;
}
static void
occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
{
struct fd_ringbuffer *ring = batch->draw;
struct fd_ringbuffer *ring = batch->draw;
OUT_PKT7(ring, CP_MEM_WRITE, 4);
OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0xffffffff);
OUT_PKT7(ring, CP_MEM_WRITE, 4);
OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0xffffffff);
OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
OUT_RELOC(ring, query_sample(aq, stop));
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
OUT_RELOC(ring, query_sample(aq, stop));
fd5_event_write(batch, ring, ZPASS_DONE, false);
fd_reset_wfi(batch);
fd5_event_write(batch, ring, ZPASS_DONE, false);
fd_reset_wfi(batch);
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
OUT_RING(ring, 0x00000014); // XXX
OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0x00000010); // XXX
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
OUT_RING(ring, 0x00000014); // XXX
OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0x00000010); // XXX
/* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
CP_MEM_TO_MEM_0_NEG_C);
OUT_RELOC(ring, query_sample(aq, result)); /* dst */
OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
/* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
OUT_RELOC(ring, query_sample(aq, result)); /* dst */
OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
fd5_context(batch->ctx)->samples_passed_queries--;
fd5_context(batch->ctx)->samples_passed_queries--;
}
static void
occlusion_counter_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result)
union pipe_query_result *result)
{
struct fd5_query_sample *sp = buf;
result->u64 = sp->result;
struct fd5_query_sample *sp = buf;
result->u64 = sp->result;
}
static void
occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result)
union pipe_query_result *result)
{
struct fd5_query_sample *sp = buf;
result->b = !!sp->result;
struct fd5_query_sample *sp = buf;
result->b = !!sp->result;
}
static const struct fd_acc_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume,
.pause = occlusion_pause,
.result = occlusion_counter_result,
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume,
.pause = occlusion_pause,
.result = occlusion_counter_result,
};
static const struct fd_acc_sample_provider occlusion_predicate = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume,
.pause = occlusion_pause,
.result = occlusion_predicate_result,
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume,
.pause = occlusion_pause,
.result = occlusion_predicate_result,
};
static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume,
.pause = occlusion_pause,
.result = occlusion_predicate_result,
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume,
.pause = occlusion_pause,
.result = occlusion_predicate_result,
};
/*
@ -160,78 +158,75 @@ static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
*/
static void
timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
assert_dt
timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
struct fd_ringbuffer *ring = batch->draw;
struct fd_ringbuffer *ring = batch->draw;
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
CP_EVENT_WRITE_0_TIMESTAMP);
OUT_RELOC(ring, query_sample(aq, start));
OUT_RING(ring, 0x00000000);
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring,
CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
OUT_RELOC(ring, query_sample(aq, start));
OUT_RING(ring, 0x00000000);
fd_reset_wfi(batch);
fd_reset_wfi(batch);
}
static void
timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
assert_dt
timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
struct fd_ringbuffer *ring = batch->draw;
struct fd_ringbuffer *ring = batch->draw;
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
CP_EVENT_WRITE_0_TIMESTAMP);
OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0x00000000);
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring,
CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0x00000000);
fd_reset_wfi(batch);
fd_wfi(batch, ring);
fd_reset_wfi(batch);
fd_wfi(batch, ring);
/* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
CP_MEM_TO_MEM_0_NEG_C);
OUT_RELOC(ring, query_sample(aq, result)); /* dst */
OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
/* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
OUT_RELOC(ring, query_sample(aq, result)); /* dst */
OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
}
static uint64_t
ticks_to_ns(uint32_t ts)
{
/* This is based on the 19.2MHz always-on rbbm timer.
*
* TODO we should probably query this value from kernel..
*/
return ts * (1000000000 / 19200000);
/* This is based on the 19.2MHz always-on rbbm timer.
*
* TODO we should probably query this value from kernel..
*/
return ts * (1000000000 / 19200000);
}
static void
time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result)
union pipe_query_result *result)
{
struct fd5_query_sample *sp = buf;
result->u64 = ticks_to_ns(sp->result);
struct fd5_query_sample *sp = buf;
result->u64 = ticks_to_ns(sp->result);
}
static void
timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result)
union pipe_query_result *result)
{
struct fd5_query_sample *sp = buf;
result->u64 = ticks_to_ns(sp->result);
struct fd5_query_sample *sp = buf;
result->u64 = ticks_to_ns(sp->result);
}
static const struct fd_acc_sample_provider time_elapsed = {
.query_type = PIPE_QUERY_TIME_ELAPSED,
.always = true,
.size = sizeof(struct fd5_query_sample),
.resume = timestamp_resume,
.pause = timestamp_pause,
.result = time_elapsed_accumulate_result,
.query_type = PIPE_QUERY_TIME_ELAPSED,
.always = true,
.size = sizeof(struct fd5_query_sample),
.resume = timestamp_resume,
.pause = timestamp_pause,
.result = time_elapsed_accumulate_result,
};
/* NOTE: timestamp query isn't going to give terribly sensible results
@ -242,12 +237,12 @@ static const struct fd_acc_sample_provider time_elapsed = {
*/
static const struct fd_acc_sample_provider timestamp = {
.query_type = PIPE_QUERY_TIMESTAMP,
.always = true,
.size = sizeof(struct fd5_query_sample),
.resume = timestamp_resume,
.pause = timestamp_pause,
.result = timestamp_accumulate_result,
.query_type = PIPE_QUERY_TIMESTAMP,
.always = true,
.size = sizeof(struct fd5_query_sample),
.resume = timestamp_resume,
.pause = timestamp_pause,
.result = timestamp_accumulate_result,
};
/*
@ -260,208 +255,204 @@ static const struct fd_acc_sample_provider timestamp = {
*/
struct fd_batch_query_entry {
uint8_t gid; /* group-id */
uint8_t cid; /* countable-id within the group */
uint8_t gid; /* group-id */
uint8_t cid; /* countable-id within the group */
};
struct fd_batch_query_data {
struct fd_screen *screen;
unsigned num_query_entries;
struct fd_batch_query_entry query_entries[];
struct fd_screen *screen;
unsigned num_query_entries;
struct fd_batch_query_entry query_entries[];
};
static void
perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
assert_dt
perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw;
struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw;
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
fd_wfi(batch, ring);
fd_wfi(batch, ring);
/* configure performance counters for the requested queries: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
/* configure performance counters for the requested queries: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
debug_assert(counter_idx < g->num_counters);
debug_assert(counter_idx < g->num_counters);
OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
OUT_RING(ring, g->countables[entry->cid].selector);
}
OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
OUT_RING(ring, g->countables[entry->cid].selector);
}
memset(counters_per_group, 0, sizeof(counters_per_group));
memset(counters_per_group, 0, sizeof(counters_per_group));
/* and snapshot the start values */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
/* and snapshot the start values */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
OUT_RELOC(ring, query_sample_idx(aq, i, start));
}
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
OUT_RELOC(ring, query_sample_idx(aq, i, start));
}
}
static void
perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
assert_dt
perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw;
struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw;
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
fd_wfi(batch, ring);
fd_wfi(batch, ring);
/* TODO do we need to bother to turn anything off? */
/* TODO do we need to bother to turn anything off? */
/* snapshot the end values: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
/* snapshot the end values: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
OUT_RELOC(ring, query_sample_idx(aq, i, stop));
}
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
OUT_RELOC(ring, query_sample_idx(aq, i, stop));
}
/* and compute the result: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
/* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
CP_MEM_TO_MEM_0_NEG_C);
OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
}
/* and compute the result: */
for (unsigned i = 0; i < data->num_query_entries; i++) {
/* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
}
}
static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result)
union pipe_query_result *result)
{
struct fd_batch_query_data *data = aq->query_data;
struct fd5_query_sample *sp = buf;
struct fd_batch_query_data *data = aq->query_data;
struct fd5_query_sample *sp = buf;
for (unsigned i = 0; i < data->num_query_entries; i++) {
result->batch[i].u64 = sp[i].result;
}
for (unsigned i = 0; i < data->num_query_entries; i++) {
result->batch[i].u64 = sp[i].result;
}
}
static const struct fd_acc_sample_provider perfcntr = {
.query_type = FD_QUERY_FIRST_PERFCNTR,
.always = true,
.resume = perfcntr_resume,
.pause = perfcntr_pause,
.result = perfcntr_accumulate_result,
.query_type = FD_QUERY_FIRST_PERFCNTR,
.always = true,
.resume = perfcntr_resume,
.pause = perfcntr_pause,
.result = perfcntr_accumulate_result,
};
static struct pipe_query *
fd5_create_batch_query(struct pipe_context *pctx,
unsigned num_queries, unsigned *query_types)
fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
unsigned *query_types)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_screen *screen = ctx->screen;
struct fd_query *q;
struct fd_acc_query *aq;
struct fd_batch_query_data *data;
struct fd_context *ctx = fd_context(pctx);
struct fd_screen *screen = ctx->screen;
struct fd_query *q;
struct fd_acc_query *aq;
struct fd_batch_query_data *data;
data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
num_queries * sizeof(data->query_entries[0]));
data = CALLOC_VARIANT_LENGTH_STRUCT(
fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
data->screen = screen;
data->num_query_entries = num_queries;
data->screen = screen;
data->num_query_entries = num_queries;
/* validate the requested query_types and ensure we don't try
* to request more query_types of a given group than we have
* counters:
*/
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
/* validate the requested query_types and ensure we don't try
* to request more query_types of a given group than we have
* counters:
*/
unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group));
for (unsigned i = 0; i < num_queries; i++) {
unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
for (unsigned i = 0; i < num_queries; i++) {
unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
/* verify valid query_type, ie. is it actually a perfcntr? */
if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
(idx >= screen->num_perfcntr_queries)) {
mesa_loge("invalid batch query query_type: %u", query_types[i]);
goto error;
}
/* verify valid query_type, ie. is it actually a perfcntr? */
if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
(idx >= screen->num_perfcntr_queries)) {
mesa_loge("invalid batch query query_type: %u", query_types[i]);
goto error;
}
struct fd_batch_query_entry *entry = &data->query_entries[i];
struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
struct fd_batch_query_entry *entry = &data->query_entries[i];
struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
entry->gid = pq->group_id;
entry->gid = pq->group_id;
/* the perfcntr_queries[] table flattens all the countables
* for each group in series, ie:
*
* (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
*
* So to find the countable index just step back through the
* table to find the first entry with the same group-id.
*/
while (pq > screen->perfcntr_queries) {
pq--;
if (pq->group_id == entry->gid)
entry->cid++;
}
/* the perfcntr_queries[] table flattens all the countables
* for each group in series, ie:
*
* (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
*
* So to find the countable index just step back through the
* table to find the first entry with the same group-id.
*/
while (pq > screen->perfcntr_queries) {
pq--;
if (pq->group_id == entry->gid)
entry->cid++;
}
if (counters_per_group[entry->gid] >=
screen->perfcntr_groups[entry->gid].num_counters) {
mesa_loge("too many counters for group %u\n", entry->gid);
goto error;
}
if (counters_per_group[entry->gid] >=
screen->perfcntr_groups[entry->gid].num_counters) {
mesa_loge("too many counters for group %u\n", entry->gid);
goto error;
}
counters_per_group[entry->gid]++;
}
counters_per_group[entry->gid]++;
}
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q);
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q);
/* sample buffer size is based on # of queries: */
aq->size = num_queries * sizeof(struct fd5_query_sample);
aq->query_data = data;
/* sample buffer size is based on # of queries: */
aq->size = num_queries * sizeof(struct fd5_query_sample);
aq->query_data = data;
return (struct pipe_query *)q;
return (struct pipe_query *)q;
error:
free(data);
return NULL;
free(data);
return NULL;
}
void
fd5_query_context_init(struct pipe_context *pctx)
disable_thread_safety_analysis
fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
struct fd_context *ctx = fd_context(pctx);
ctx->create_query = fd_acc_create_query;
ctx->query_update_batch = fd_acc_query_update_batch;
ctx->create_query = fd_acc_create_query;
ctx->query_update_batch = fd_acc_query_update_batch;
pctx->create_batch_query = fd5_create_batch_query;
pctx->create_batch_query = fd5_create_batch_query;
fd_acc_query_register_provider(pctx, &occlusion_counter);
fd_acc_query_register_provider(pctx, &occlusion_predicate);
fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_acc_query_register_provider(pctx, &occlusion_counter);
fd_acc_query_register_provider(pctx, &occlusion_predicate);
fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_acc_query_register_provider(pctx, &time_elapsed);
fd_acc_query_register_provider(pctx, &timestamp);
fd_acc_query_register_provider(pctx, &time_elapsed);
fd_acc_query_register_provider(pctx, &timestamp);
}

View file

@ -24,75 +24,73 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd5_rasterizer.h"
#include "fd5_context.h"
#include "fd5_format.h"
#include "fd5_rasterizer.h"
void *
fd5_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso)
const struct pipe_rasterizer_state *cso)
{
struct fd5_rasterizer_stateobj *so;
float psize_min, psize_max;
struct fd5_rasterizer_stateobj *so;
float psize_min, psize_max;
so = CALLOC_STRUCT(fd5_rasterizer_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd5_rasterizer_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 4092;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
psize_max = 4092;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
psize_max = cso->point_size;
}
so->gras_su_point_minmax =
A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
so->gras_su_point_size = A5XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_poly_offset_scale =
A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
so->gras_su_poly_offset_offset =
A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
so->gras_su_poly_offset_clamp =
A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp);
so->gras_su_point_minmax = A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
so->gras_su_point_size = A5XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_poly_offset_scale =
A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
so->gras_su_poly_offset_offset =
A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
so->gras_su_poly_offset_clamp =
A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp);
so->gras_su_cntl =
A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0);
so->pc_raster_cntl =
A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
so->gras_su_cntl = A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width / 2.0);
so->pc_raster_cntl =
A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(
fd_polygon_mode(cso->fill_front)) |
A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_raster_cntl |= A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE;
if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_raster_cntl |= A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
if (!cso->front_ccw)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW;
if (cso->offset_tri)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
if (!cso->front_ccw)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW;
if (cso->offset_tri)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET;
if (!cso->flatshade_first)
so->pc_primitive_cntl |= A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST;
if (!cso->flatshade_first)
so->pc_primitive_cntl |= A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST;
// if (!cso->depth_clip)
// so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
// A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
if (cso->clip_halfz)
so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z;
// if (!cso->depth_clip)
// so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE
//| A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
if (cso->clip_halfz)
so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z;
return so;
return so;
}

View file

@ -27,31 +27,31 @@
#ifndef FD5_RASTERIZER_H_
#define FD5_RASTERIZER_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd5_rasterizer_stateobj {
struct pipe_rasterizer_state base;
struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset;
uint32_t gras_su_poly_offset_clamp;
uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset;
uint32_t gras_su_poly_offset_clamp;
uint32_t gras_su_cntl;
uint32_t gras_cl_clip_cntl;
uint32_t pc_primitive_cntl;
uint32_t pc_raster_cntl;
uint32_t gras_su_cntl;
uint32_t gras_cl_clip_cntl;
uint32_t pc_primitive_cntl;
uint32_t pc_raster_cntl;
};
static inline struct fd5_rasterizer_stateobj *
fd5_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd5_rasterizer_stateobj *)rast;
return (struct fd5_rasterizer_stateobj *)rast;
}
void * fd5_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
void *fd5_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
#endif /* FD5_RASTERIZER_H_ */

View file

@ -29,43 +29,42 @@
static void
setup_lrz(struct fd_resource *rsc)
{
struct fd_screen *screen = fd_screen(rsc->b.b.screen);
const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
unsigned lrz_pitch = align(DIV_ROUND_UP(rsc->b.b.width0, 8), 64);
unsigned lrz_height = DIV_ROUND_UP(rsc->b.b.height0, 8);
struct fd_screen *screen = fd_screen(rsc->b.b.screen);
const uint32_t flags =
DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
unsigned lrz_pitch = align(DIV_ROUND_UP(rsc->b.b.width0, 8), 64);
unsigned lrz_height = DIV_ROUND_UP(rsc->b.b.height0, 8);
/* LRZ buffer is super-sampled: */
switch (rsc->b.b.nr_samples) {
case 4:
lrz_pitch *= 2;
FALLTHROUGH;
case 2:
lrz_height *= 2;
}
/* LRZ buffer is super-sampled: */
switch (rsc->b.b.nr_samples) {
case 4:
lrz_pitch *= 2;
FALLTHROUGH;
case 2:
lrz_height *= 2;
}
unsigned size = lrz_pitch * lrz_height * 2;
unsigned size = lrz_pitch * lrz_height * 2;
size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
rsc->lrz_height = lrz_height;
rsc->lrz_width = lrz_pitch;
rsc->lrz_pitch = lrz_pitch;
rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
rsc->lrz_height = lrz_height;
rsc->lrz_width = lrz_pitch;
rsc->lrz_pitch = lrz_pitch;
rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
}
uint32_t
fd5_setup_slices(struct fd_resource *rsc)
{
struct pipe_resource *prsc = &rsc->b.b;
struct pipe_resource *prsc = &rsc->b.b;
if (FD_DBG(LRZ) && has_depth(rsc->b.b.format))
setup_lrz(rsc);
if (FD_DBG(LRZ) && has_depth(rsc->b.b.format))
setup_lrz(rsc);
fdl5_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
prsc->width0, prsc->height0, prsc->depth0,
prsc->last_level + 1, prsc->array_size,
prsc->target == PIPE_TEXTURE_3D);
fdl5_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
prsc->width0, prsc->height0, prsc->depth0, prsc->last_level + 1,
prsc->array_size, prsc->target == PIPE_TEXTURE_3D);
return rsc->layout.size;
return rsc->layout.size;
}

View file

@ -27,112 +27,106 @@
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
#include "fd5_screen.h"
#include "fd5_blitter.h"
#include "fd5_context.h"
#include "fd5_format.h"
#include "fd5_emit.h"
#include "fd5_format.h"
#include "fd5_resource.h"
#include "fd5_screen.h"
#include "ir3/ir3_compiler.h"
static bool
valid_sample_count(unsigned sample_count)
{
switch (sample_count) {
case 0:
case 1:
case 2:
case 4:
return true;
default:
return false;
}
switch (sample_count) {
case 0:
case 1:
case 2:
case 4:
return true;
default:
return false;
}
}
static bool
fd5_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count,
unsigned usage)
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count, unsigned usage)
{
unsigned retval = 0;
unsigned retval = 0;
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
!valid_sample_count(sample_count)) {
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return false;
}
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
!valid_sample_count(sample_count)) {
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return false;
}
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd5_pipe2vtx(format) != VFMT5_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd5_pipe2vtx(format) != VFMT5_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) &&
(fd5_pipe2tex(format) != TFMT5_NONE) &&
(target == PIPE_BUFFER ||
util_format_get_blocksize(format) != 12)) {
retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
}
if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) &&
(fd5_pipe2tex(format) != TFMT5_NONE) &&
(target == PIPE_BUFFER || util_format_get_blocksize(format) != 12)) {
retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
}
if ((usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED |
PIPE_BIND_COMPUTE_RESOURCE)) &&
(fd5_pipe2color(format) != RB5_NONE) &&
(fd5_pipe2tex(format) != TFMT5_NONE)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED |
PIPE_BIND_COMPUTE_RESOURCE);
}
if ((usage &
(PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_COMPUTE_RESOURCE)) &&
(fd5_pipe2color(format) != RB5_NONE) &&
(fd5_pipe2tex(format) != TFMT5_NONE)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SHARED |
PIPE_BIND_COMPUTE_RESOURCE);
}
/* For ARB_framebuffer_no_attachments: */
if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
retval |= usage & PIPE_BIND_RENDER_TARGET;
}
/* For ARB_framebuffer_no_attachments: */
if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
retval |= usage & PIPE_BIND_RENDER_TARGET;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd5_pipe2depth(format) != (enum a5xx_depth_format)~0) &&
(fd5_pipe2tex(format) != TFMT5_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd5_pipe2depth(format) != (enum a5xx_depth_format) ~0) &&
(fd5_pipe2tex(format) != TFMT5_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER;
}
if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format),
target, sample_count, usage, retval);
}
if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x",
util_format_name(format), target, sample_count, usage, retval);
}
return retval == usage;
return retval == usage;
}
void
fd5_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A5XX_MAX_RENDER_TARGETS;
pscreen->context_create = fd5_context_create;
pscreen->is_format_supported = fd5_screen_is_format_supported;
struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A5XX_MAX_RENDER_TARGETS;
pscreen->context_create = fd5_context_create;
pscreen->is_format_supported = fd5_screen_is_format_supported;
screen->setup_slices = fd5_setup_slices;
if (FD_DBG(TTILE))
screen->tile_mode = fd5_tile_mode;
screen->setup_slices = fd5_setup_slices;
if (FD_DBG(TTILE))
screen->tile_mode = fd5_tile_mode;
fd5_emit_init_screen(pscreen);
ir3_screen_init(pscreen);
fd5_emit_init_screen(pscreen);
ir3_screen_init(pscreen);
}

View file

@ -38,13 +38,13 @@ void fd5_screen_init(struct pipe_screen *pscreen);
static inline void
emit_marker5(struct fd_ringbuffer *ring, int scratch_idx)
{
extern int32_t marker_cnt;
unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx);
if (__EMIT_MARKER) {
OUT_WFI5(ring);
OUT_PKT4(ring, reg, 1);
OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
}
extern int32_t marker_cnt;
unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx);
if (__EMIT_MARKER) {
OUT_WFI5(ring);
OUT_PKT4(ring, reg, 1);
OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
}
}
#endif /* FD5_SCREEN_H_ */

View file

@ -25,263 +25,250 @@
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd5_texture.h"
#include "fd5_format.h"
#include "fd5_texture.h"
static enum a5xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border)
{
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return A5XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A5XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true;
return A5XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */
return A5XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A5XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return A5XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A5XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true;
return A5XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */
return A5XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A5XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/
default:
DBG("invalid wrap: %u", wrap);
return 0;
}
}
static enum a5xx_tex_filter
tex_filter(unsigned filter, bool aniso)
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return A5XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
switch (filter) {
case PIPE_TEX_FILTER_NEAREST:
return A5XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR;
default:
DBG("invalid filter: %u", filter);
return 0;
}
}
static void *
fd5_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
const struct pipe_sampler_state *cso)
{
struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false;
struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false;
if (!so)
return NULL;
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
so->needs_border = false;
so->texsamp0 =
COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A5XX_TEX_SAMP_0_ANISO(aniso) |
A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
so->needs_border = false;
so->texsamp0 =
COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A5XX_TEX_SAMP_0_ANISO(aniso) |
A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
so->texsamp1 =
COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS);
so->texsamp1 =
COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS);
so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp1 |=
A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
} else {
/* If we're not doing mipmap filtering, we still need a slightly > 0
* LOD clamp so the HW can decide between min and mag filtering of
* level 0.
*/
so->texsamp1 |=
A5XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
A5XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
}
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp1 |= A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
} else {
/* If we're not doing mipmap filtering, we still need a slightly > 0
* LOD clamp so the HW can decide between min and mag filtering of
* level 0.
*/
so->texsamp1 |= A5XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
A5XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
}
if (cso->compare_mode)
so->texsamp1 |= A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
if (cso->compare_mode)
so->texsamp1 |=
A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
return so;
return so;
}
static bool
use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format)
{
return false; // TODO check if this is still needed on a5xx
return false; // TODO check if this is still needed on a5xx
}
static struct pipe_sampler_view *
fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
const struct pipe_sampler_view *cso)
{
struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = cso->format;
unsigned lvl, layers = 0;
struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = cso->format;
unsigned lvl, layers = 0;
if (!so)
return NULL;
if (!so)
return NULL;
if (format == PIPE_FORMAT_X32_S8X24_UINT) {
rsc = rsc->stencil;
format = rsc->b.b.format;
}
if (format == PIPE_FORMAT_X32_S8X24_UINT) {
rsc = rsc->stencil;
format = rsc->b.b.format;
}
so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->base = *cso;
pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->texconst0 =
A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(format)) |
A5XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
fd5_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
so->texconst0 = A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(format)) |
A5XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
fd5_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
/* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
* we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
* way to re-arrange things so stencil component is where the swiz
* expects.
*
* Note that gallium expects stencil sampler to return (s,s,s,s)
* which isn't quite true. To make that happen we'd have to massage
* the swizzle. But in practice only the .x component is used.
*/
if (format == PIPE_FORMAT_X24S8_UINT) {
so->texconst0 |= A5XX_TEX_CONST_0_SWAP(XYZW);
}
/* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
* we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
* way to re-arrange things so stencil component is where the swiz
* expects.
*
* Note that gallium expects stencil sampler to return (s,s,s,s)
* which isn't quite true. To make that happen we'd have to massage
* the swizzle. But in practice only the .x component is used.
*/
if (format == PIPE_FORMAT_X24S8_UINT) {
so->texconst0 |= A5XX_TEX_CONST_0_SWAP(XYZW);
}
if (util_format_is_srgb(format)) {
if (use_astc_srgb_workaround(pctx, format))
so->astc_srgb = true;
so->texconst0 |= A5XX_TEX_CONST_0_SRGB;
}
if (util_format_is_srgb(format)) {
if (use_astc_srgb_workaround(pctx, format))
so->astc_srgb = true;
so->texconst0 |= A5XX_TEX_CONST_0_SRGB;
}
if (cso->target == PIPE_BUFFER) {
unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
if (cso->target == PIPE_BUFFER) {
unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
lvl = 0;
so->texconst1 =
A5XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
A5XX_TEX_CONST_1_HEIGHT(elements >> 15);
so->texconst2 =
A5XX_TEX_CONST_2_UNK4 |
A5XX_TEX_CONST_2_UNK31;
so->offset = cso->u.buf.offset;
} else {
unsigned miplevels;
lvl = 0;
so->texconst1 = A5XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
A5XX_TEX_CONST_1_HEIGHT(elements >> 15);
so->texconst2 = A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31;
so->offset = cso->u.buf.offset;
} else {
unsigned miplevels;
lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl;
layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl;
layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 =
A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
so->texconst2 =
A5XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 6) |
A5XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
}
so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 = A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
so->texconst2 = A5XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 6) |
A5XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
}
so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target));
so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target));
switch (cso->target) {
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
so->texconst3 =
A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
so->texconst5 =
A5XX_TEX_CONST_5_DEPTH(1);
break;
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 =
A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
so->texconst5 =
A5XX_TEX_CONST_5_DEPTH(layers);
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
so->texconst3 =
A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
so->texconst5 =
A5XX_TEX_CONST_5_DEPTH(layers / 6);
break;
case PIPE_TEXTURE_3D:
so->texconst3 =
A5XX_TEX_CONST_3_MIN_LAYERSZ(
fd_resource_slice(rsc, prsc->last_level)->size0) |
A5XX_TEX_CONST_3_ARRAY_PITCH(fd_resource_slice(rsc, lvl)->size0);
so->texconst5 =
A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
break;
default:
so->texconst3 = 0x00000000;
break;
}
switch (cso->target) {
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
so->texconst5 = A5XX_TEX_CONST_5_DEPTH(1);
break;
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers);
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers / 6);
break;
case PIPE_TEXTURE_3D:
so->texconst3 =
A5XX_TEX_CONST_3_MIN_LAYERSZ(
fd_resource_slice(rsc, prsc->last_level)->size0) |
A5XX_TEX_CONST_3_ARRAY_PITCH(fd_resource_slice(rsc, lvl)->size0);
so->texconst5 = A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
break;
default:
so->texconst3 = 0x00000000;
break;
}
return &so->base;
return &so->base;
}
static void
fd5_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr, unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
unsigned start, unsigned nr,
unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
{
struct fd_context *ctx = fd_context(pctx);
struct fd5_context *fd5_ctx = fd5_context(ctx);
uint16_t astc_srgb = 0;
unsigned i;
struct fd_context *ctx = fd_context(pctx);
struct fd5_context *fd5_ctx = fd5_context(ctx);
uint16_t astc_srgb = 0;
unsigned i;
for (i = 0; i < nr; i++) {
if (views[i]) {
struct fd5_pipe_sampler_view *view =
fd5_pipe_sampler_view(views[i]);
if (view->astc_srgb)
astc_srgb |= (1 << i);
}
}
for (i = 0; i < nr; i++) {
if (views[i]) {
struct fd5_pipe_sampler_view *view = fd5_pipe_sampler_view(views[i]);
if (view->astc_srgb)
astc_srgb |= (1 << i);
}
}
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views);
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
views);
if (shader == PIPE_SHADER_FRAGMENT) {
fd5_ctx->fastc_srgb = astc_srgb;
} else if (shader == PIPE_SHADER_VERTEX) {
fd5_ctx->vastc_srgb = astc_srgb;
}
if (shader == PIPE_SHADER_FRAGMENT) {
fd5_ctx->fastc_srgb = astc_srgb;
} else if (shader == PIPE_SHADER_VERTEX) {
fd5_ctx->vastc_srgb = astc_srgb;
}
}
void
fd5_texture_init(struct pipe_context *pctx)
{
pctx->create_sampler_state = fd5_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd5_sampler_view_create;
pctx->set_sampler_views = fd5_set_sampler_views;
pctx->create_sampler_state = fd5_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd5_sampler_view_create;
pctx->set_sampler_views = fd5_set_sampler_views;
}

View file

@ -29,61 +29,60 @@
#include "pipe/p_context.h"
#include "freedreno_texture.h"
#include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "fd5_context.h"
#include "fd5_format.h"
struct fd5_sampler_stateobj {
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
bool needs_border;
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
bool needs_border;
};
static inline struct fd5_sampler_stateobj *
fd5_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd5_sampler_stateobj *)samp;
return (struct fd5_sampler_stateobj *)samp;
}
struct fd5_pipe_sampler_view {
struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3, texconst5;
uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
uint32_t offset;
bool astc_srgb;
struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3, texconst5;
uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
uint32_t offset;
bool astc_srgb;
};
static inline struct fd5_pipe_sampler_view *
fd5_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd5_pipe_sampler_view *)pview;
return (struct fd5_pipe_sampler_view *)pview;
}
void fd5_texture_init(struct pipe_context *pctx);
static inline enum a5xx_tex_type
fd5_tex_type(unsigned target)
{
switch (target) {
default:
assert(0);
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A5XX_TEX_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
return A5XX_TEX_2D;
case PIPE_TEXTURE_3D:
return A5XX_TEX_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return A5XX_TEX_CUBE;
}
switch (target) {
default:
assert(0);
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A5XX_TEX_1D;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
return A5XX_TEX_2D;
case PIPE_TEXTURE_3D:
return A5XX_TEX_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return A5XX_TEX_CUBE;
}
}
#endif /* FD5_TEXTURE_H_ */

View file

@ -24,96 +24,95 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd5_zsa.h"
#include "fd5_context.h"
#include "fd5_format.h"
#include "fd5_zsa.h"
void *
fd5_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso)
const struct pipe_depth_stencil_alpha_state *cso)
{
struct fd5_zsa_stateobj *so;
struct fd5_zsa_stateobj *so;
so = CALLOC_STRUCT(fd5_zsa_stateobj);
if (!so)
return NULL;
so = CALLOC_STRUCT(fd5_zsa_stateobj);
if (!so)
return NULL;
so->base = *cso;
so->base = *cso;
switch (cso->depth_func) {
case PIPE_FUNC_LESS:
case PIPE_FUNC_LEQUAL:
so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE;
break;
switch (cso->depth_func) {
case PIPE_FUNC_LESS:
case PIPE_FUNC_LEQUAL:
so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE;
break;
case PIPE_FUNC_GREATER:
case PIPE_FUNC_GEQUAL:
so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER;
break;
case PIPE_FUNC_GREATER:
case PIPE_FUNC_GEQUAL:
so->gras_lrz_cntl =
A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER;
break;
default:
/* LRZ not enabled */
so->gras_lrz_cntl = 0;
break;
}
default:
/* LRZ not enabled */
so->gras_lrz_cntl = 0;
break;
}
if (!(cso->stencil->enabled || cso->alpha_enabled || !cso->depth_writemask))
so->lrz_write = true;
if (!(cso->stencil->enabled || cso->alpha_enabled || !cso->depth_writemask))
so->lrz_write = true;
so->rb_depth_cntl |=
A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth_func); /* maps 1:1 */
so->rb_depth_cntl |=
A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth_func); /* maps 1:1 */
if (cso->depth_enabled)
so->rb_depth_cntl |=
A5XX_RB_DEPTH_CNTL_Z_ENABLE |
A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
if (cso->depth_enabled)
so->rb_depth_cntl |=
A5XX_RB_DEPTH_CNTL_Z_ENABLE | A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
if (cso->depth_writemask)
so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
if (cso->depth_writemask)
so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];
if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0];
so->rb_stencil_control |=
A5XX_RB_STENCIL_CONTROL_STENCIL_READ |
A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |=
A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
so->rb_stencil_control |=
A5XX_RB_STENCIL_CONTROL_STENCIL_READ |
A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |=
A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
so->rb_stencil_control |=
A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
}
}
so->rb_stencil_control |=
A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |=
A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
}
}
if (cso->alpha_enabled) {
uint32_t ref = cso->alpha_ref_value * 255.0;
so->rb_alpha_control =
A5XX_RB_ALPHA_CONTROL_ALPHA_TEST |
A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
// so->rb_depth_control |=
// A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
if (cso->alpha_enabled) {
uint32_t ref = cso->alpha_ref_value * 255.0;
so->rb_alpha_control =
A5XX_RB_ALPHA_CONTROL_ALPHA_TEST |
A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
// so->rb_depth_control |=
// A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
return so;
return so;
}

View file

@ -27,31 +27,30 @@
#ifndef FD5_ZSA_H_
#define FD5_ZSA_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd5_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base;
struct pipe_depth_stencil_alpha_state base;
uint32_t rb_alpha_control;
uint32_t rb_depth_cntl;
uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
uint32_t gras_lrz_cntl;
bool lrz_write;
uint32_t rb_alpha_control;
uint32_t rb_depth_cntl;
uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf;
uint32_t gras_lrz_cntl;
bool lrz_write;
};
static inline struct fd5_zsa_stateobj *
fd5_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd5_zsa_stateobj *)zsa;
return (struct fd5_zsa_stateobj *)zsa;
}
void * fd5_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);
void *fd5_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD5_ZSA_H_ */

Some files were not shown because too many files have changed in this diff Show more