freedreno/a3xx: add MRT support

The hardware only supports 4 MRTs. It should be possible to emulate
support for 8, but doesn't seem worth the trouble.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Ilia Mirkin 2015-02-15 03:39:43 -05:00
parent 6f4c1976f4
commit 4a3c0e9950
9 changed files with 221 additions and 139 deletions

View file

@ -45,11 +45,12 @@ Note: some of the new features are only available with certain drivers.
<ul>
<li>GL_AMD_pinned_memory on r600, radeonsi</li>
<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li>
<li>GL_ARB_draw_instanced on freedreno</li>
<li>GL_ARB_gpu_shader_fp64 on nvc0, softpipe</li>
<li>GL_ARB_instanced_arrays on freedreno</li>
<li>GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe</li>
<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li>
<li>GL_EXT_draw_buffers2 on freedreno</li>
</ul>
<h2>Bug fixes</h2>

View file

@ -129,7 +129,6 @@ static void
fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd3_emit emit = {
.vtx = &ctx->vtx,
.prog = &ctx->prog,
@ -152,7 +151,6 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.vinteger_s = fd3_ctx->vinteger_s,
.finteger_s = fd3_ctx->finteger_s,
},
.format = pipe_surface_format(pfb->cbufs[0]),
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
};
@ -239,17 +237,18 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
struct fd_ringbuffer *ring = ctx->ring;
unsigned dirty = ctx->dirty;
unsigned ce, i;
unsigned i;
struct fd3_emit emit = {
.vtx = &fd3_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
.half_precision = fd3_half_precision(format),
.half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
fd3_half_precision(pfb->cbufs[1]) &&
fd3_half_precision(pfb->cbufs[2]) &&
fd3_half_precision(pfb->cbufs[3])),
},
.format = format,
};
dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
@ -326,17 +325,12 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
}
if (buffers & PIPE_CLEAR_COLOR) {
ce = 0xf;
} else {
ce = 0x0;
}
for (i = 0; i < 4; i++) {
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
COND(buffers & (PIPE_CLEAR_COLOR0 << i),
A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)));
OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |

View file

@ -293,59 +293,92 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
* case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil.
*/
void
fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
struct pipe_surface **psurf,
int bufs)
{
struct fd_resource *rsc = fd_resource(psurf->texture);
unsigned lvl = psurf->u.tex.level;
struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer);
enum pipe_format format = fd3_gmem_restore_format(psurf->format);
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
int i, j;
/* output sampler state: */
OUT_PKT3(ring, CP_LOAD_STATE, 4);
OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
CP_LOAD_STATE_0_NUM_UNIT(1));
CP_LOAD_STATE_0_NUM_UNIT(bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
OUT_RING(ring, 0x00000000);
for (i = 0; i < bufs; i++) {
OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
OUT_RING(ring, 0x00000000);
}
/* emit texture state: */
OUT_PKT3(ring, CP_LOAD_STATE, 6);
OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
CP_LOAD_STATE_0_NUM_UNIT(1));
CP_LOAD_STATE_0_NUM_UNIT(bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
A3XX_TEX_CONST_1_WIDTH(psurf->width) |
A3XX_TEX_CONST_1_HEIGHT(psurf->height));
OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
A3XX_TEX_CONST_2_INDX(0));
OUT_RING(ring, 0x00000000);
for (i = 0; i < bufs; i++) {
if (!psurf[i]) {
OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) |
A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) |
A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) |
A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
OUT_RING(ring, 0x00000000);
continue;
}
struct fd_resource *rsc = fd_resource(psurf[i]->texture);
unsigned lvl = psurf[i]->u.tex.level;
struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format);
debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer);
OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) |
A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height));
OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
OUT_RING(ring, 0x00000000);
}
/* emit mipaddrs: */
OUT_PKT3(ring, CP_LOAD_STATE, 3);
OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
CP_LOAD_STATE_0_NUM_UNIT(1));
CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
OUT_RELOC(ring, rsc->bo, offset, 0, 0);
for (i = 0; i < bufs; i++) {
if (psurf[i]) {
struct fd_resource *rsc = fd_resource(psurf[i]->texture);
unsigned lvl = psurf[i]->u.tex.level;
uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer);
OUT_RELOC(ring, rsc->bo, offset, 0, 0);
} else {
OUT_RING(ring, 0x00000000);
}
/* pad the remaining entries w/ null: */
for (j = 1; j < BASETABLE_SZ; j++) {
OUT_RING(ring, 0x00000000);
}
}
}
void
@ -570,8 +603,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
}
if (dirty & FD_DIRTY_PROG)
fd3_program_emit(ring, emit);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
}
/* TODO we should not need this or fd_wfi() before emit_constants():
*/
@ -624,6 +659,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
}
if (format == PIPE_FORMAT_NONE)
control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
if (has_alpha) {
blend_control |= blend->rb_mrt[i].blend_control_rgb;
} else {

View file

@ -45,7 +45,7 @@ void fd3_emit_constant(struct fd_ringbuffer *ring,
const uint32_t *dwords, struct pipe_resource *prsc);
void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
struct pipe_surface *psurf);
struct pipe_surface **psurf, int bufs);
/* grouped together emit-state for prog/vertex/state emit: */
struct fd3_emit {
@ -53,7 +53,6 @@ struct fd3_emit {
const struct fd_program_stateobj *prog;
const struct pipe_draw_info *info;
struct ir3_shader_key key;
enum pipe_format format;
uint32_t dirty;
uint32_t sprite_coord_enable;

View file

@ -42,8 +42,14 @@ uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
static INLINE bool
fd3_half_precision(enum pipe_format format)
fd3_half_precision(const struct pipe_surface *surface)
{
enum pipe_format format;
if (!surface)
return true;
format = surface->format;
/* colors are provided in consts, which go through cov.f32f16, which will
* break these values
*/

View file

@ -89,6 +89,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
} else {
stride = slice->pitch * rsc->cpp;
}
} else if (i < nr_bufs && bases) {
base = bases[i];
}
OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
@ -97,7 +99,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
if (bin_w || (i >= nr_bufs)) {
if (bin_w || (i >= nr_bufs) || !bufs[i]) {
OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
} else {
OUT_RELOCW(ring, rsc->bo, offset, 0, -1);
@ -110,20 +112,6 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
}
}
static uint32_t
depth_base(struct fd_context *ctx)
{
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
uint32_t cpp = 4;
if (pfb->cbufs[0]) {
struct fd_resource *rsc =
fd_resource(pfb->cbufs[0]->texture);
cpp = rsc->cpp;
}
return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
}
static bool
use_hw_binning(struct fd_context *ctx)
{
@ -167,7 +155,8 @@ emit_binning_workaround(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(0));
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
@ -189,7 +178,7 @@ emit_binning_workaround(struct fd_context *ctx)
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
fd3_program_emit(ring, &emit);
fd3_program_emit(ring, &emit, 0, NULL);
fd3_emit_vertex_bufs(ring, &emit);
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
@ -338,15 +327,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
struct fd3_emit emit = {
.vtx = &fd3_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
.half_precision = fd3_half_precision(format),
.half_precision = true,
},
.format = format,
};
int i;
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
@ -388,7 +376,8 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(0));
OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
@ -419,21 +408,28 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
fd3_program_emit(ring, &emit);
fd3_program_emit(ring, &emit, 0, NULL);
fd3_emit_vertex_bufs(ring, &emit);
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
uint32_t base = depth_base(ctx);
emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf);
}
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL,
ctx->gmem.zsbuf_base, pfb->zsbuf);
if (ctx->resolve & FD_BUFFER_COLOR) {
emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, 0, pfb->cbufs[0]);
for (i = 0; i < pfb->nr_cbufs; i++) {
if (!pfb->cbufs[i])
continue;
if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
continue;
emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE,
ctx->gmem.cbuf_base[i], pfb->cbufs[i]);
}
}
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
@ -444,14 +440,24 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
/* transfer from system memory to gmem */
static void
emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
struct pipe_surface *psurf, uint32_t bin_w)
emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[],
struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
{
struct fd_ringbuffer *ring = ctx->ring;
emit_mrt(ring, 1, &psurf, &base, bin_w);
assert(bufs > 0);
fd3_emit_gmem_restore_tex(ring, psurf);
emit_mrt(ring, bufs, psurf, bases, bin_w);
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
fd3_emit_gmem_restore_tex(ring, psurf, bufs);
fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
@ -464,15 +470,17 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
struct fd3_emit emit = {
.vtx = &fd3_ctx->blit_vbuf_state,
.prog = &ctx->blit_prog[0],
.sprite_coord_enable = 1,
/* NOTE: They all use the same VP, this is for vtx bufs. */
.prog = &ctx->blit_prog[0],
.key = {
.half_precision = fd3_half_precision(format),
.half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
fd3_half_precision(pfb->cbufs[1]) &&
fd3_half_precision(pfb->cbufs[2]) &&
fd3_half_precision(pfb->cbufs[3])),
},
.format = format,
};
float x0, y0, x1, y1;
unsigned bin_w = tile->bin_w;
@ -515,6 +523,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
OUT_RING(ring, 0);
OUT_RING(ring, 0);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */
@ -567,7 +579,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
fd3_program_emit(ring, &emit);
fd3_emit_vertex_bufs(ring, &emit);
/* for gmem pitch/base calculations, we need to use the non-
@ -576,16 +587,27 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
bin_w = gmem->bin_w;
bin_h = gmem->bin_h;
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
}
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
emit.prog = &ctx->blit_prog[0];
fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
}
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
}
static void
@ -617,12 +639,13 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
{
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_ringbuffer *ring = ctx->ring;
uint32_t pitch = 0;
uint32_t i, pitch = 0;
if (pfb->cbufs[0]) {
struct pipe_surface *psurf = pfb->cbufs[0];
unsigned lvl = psurf->u.tex.level;
pitch = fd_resource(psurf->texture)->slices[lvl].pitch;
for (i = 0; i < pfb->nr_cbufs; i++) {
struct pipe_surface *psurf = pfb->cbufs[i];
if (!psurf)
continue;
pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch;
}
fd3_emit_restore(ctx);
@ -647,7 +670,8 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
patch_draws(ctx, IGNORE_VISIBILITY);
patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
@ -734,7 +758,8 @@ emit_binning_pass(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(0));
for (i = 0; i < 4; i++) {
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
@ -774,7 +799,8 @@ emit_binning_pass(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
@ -848,21 +874,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
{
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
uint32_t reg;
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
if (pfb->zsbuf) {
reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
}
OUT_RING(ring, reg);
if (pfb->zsbuf) {
uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
} else {
OUT_RING(ring, 0x00000000);
}
if (ctx->needs_rb_fbd) {
fd_wfi(ctx, ring);
@ -874,7 +885,8 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
}
/* before IB to rendering cmds: */
@ -891,6 +903,21 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
uint32_t x2 = tile->xoff + tile->bin_w - 1;
uint32_t y2 = tile->yoff + tile->bin_h - 1;
uint32_t reg;
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base);
if (pfb->zsbuf) {
reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
}
OUT_RING(ring, reg);
if (pfb->zsbuf) {
uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
} else {
OUT_RING(ring, 0x00000000);
}
if (use_hw_binning(ctx)) {
struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
@ -918,7 +945,7 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w);
/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);

View file

@ -31,8 +31,6 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "freedreno_program.h"
@ -127,13 +125,14 @@ emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
}
void
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
int nr, struct pipe_surface **bufs)
{
const struct ir3_shader_variant *vp, *fp;
const struct ir3_info *vsi, *fsi;
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
uint32_t fpbuffersz, vpbuffersz, fsoff;
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
int constmode;
int i, j, k;
@ -199,11 +198,26 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
psize_regid = ir3_find_output_regid(vp,
ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
color_regid = ir3_find_output_regid(fp,
ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
if (fp->color0_mrt) {
color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
} else {
for (int i = 0; i < fp->outputs_count; i++) {
ir3_semantic sem = fp->outputs[i].semantic;
unsigned idx = sem2idx(sem);
if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
continue;
assert(idx < 4);
color_regid[idx] = fp->outputs[i].regid;
}
}
if (util_format_is_alpha(emit->format))
color_regid += 3;
/* adjust regids for alpha output formats. there is no alpha render
* format, so it's just treated like red
*/
for (i = 0; i < nr; i++)
if (util_format_is_alpha(pipe_surface_format(bufs[i])))
color_regid[i] += 3;
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
@ -345,21 +359,23 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
}
OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
if (fp->writes_pos) {
OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
} else {
OUT_RING(ring, 0x00000000);
}
OUT_RING(ring,
COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) |
COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION) |
COND(util_format_is_pure_uint(emit->format), A3XX_SP_FS_MRT_REG_UINT) |
COND(util_format_is_pure_sint(emit->format), A3XX_SP_FS_MRT_REG_SINT));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
for (i = 0; i < 4; i++) {
uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
if (i < nr) {
enum pipe_format fmt = pipe_surface_format(bufs[i]);
mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
}
OUT_RING(ring, mrt_reg);
}
if (emit->key.binning_pass) {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);

View file

@ -39,7 +39,8 @@ struct fd3_shader_stateobj {
struct fd3_emit;
void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit);
void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
int nr, struct pipe_surface **bufs);
void fd3_prog_init(struct pipe_context *pctx);

View file

@ -103,7 +103,7 @@ fd3_screen_is_format_supported(struct pipe_screen *pscreen,
void
fd3_screen_init(struct pipe_screen *pscreen)
{
fd_screen(pscreen)->max_rts = 1;
fd_screen(pscreen)->max_rts = 4;
pscreen->context_create = fd3_context_create;
pscreen->is_format_supported = fd3_screen_is_format_supported;
}