broadcom/vc5: Use raw stores for MSAA buffers.

We were storing the resolved pixels in all cases, but nr_samples > 0 means
we should be keeping the per-sample values.

We will probably want to change the job structure at some point, as we'll
want to recognize full-buffer resolves and do the resolved store in the
same job as the original rendering, meaning we'll need to track both the
MSAA and single-sample resources in the job.  However, this will be enough
to build the rest of the MSAA support.
This commit is contained in:
Eric Anholt 2017-10-24 13:28:53 -07:00
parent e717e3e7cd
commit 464f1fb733

View file

@ -27,7 +27,45 @@
#include "broadcom/cle/v3d_packet_v33_pack.h"
static void
vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job)
store_raw(struct vc5_cl *cl, struct pipe_surface *psurf, int buffer,
bool color_clear, bool z_clear, bool s_clear)
{
struct vc5_surface *surf = vc5_surface(psurf);
struct vc5_resource *rsc = vc5_resource(psurf->texture);
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
store.raw_mode = true;
store.buffer_to_store = buffer;
store.address = cl_address(rsc->bo, surf->offset);
store.disable_colour_buffers_clear_on_write = !color_clear;
store.disable_z_buffer_clear_on_write = !z_clear;
store.disable_stencil_buffer_clear_on_write = !s_clear;
struct vc5_resource_slice *slice =
&rsc->slices[psurf->u.tex.level];
store.padded_height_of_output_image_in_uif_blocks =
(slice->size / slice->stride) /
(2 * vc5_utile_height(rsc->cpp));
}
}
static int
zs_buffer_from_pipe_bits(int pipe_clear_bits)
{
switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
case PIPE_CLEAR_DEPTHSTENCIL:
return ZSTENCIL;
case PIPE_CLEAR_DEPTH:
return Z;
case PIPE_CLEAR_STENCIL:
return STENCIL;
default:
return NONE;
}
}
static void
vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job, int last_cbuf)
{
/* Emit the generic list in our indirect state -- the rcl will just
* have pointers into it.
@ -66,21 +104,65 @@ vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job)
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
uint32_t color_write_enables =
job->resolve >> first_color_buffer_bit;
bool needs_color_clear = job->cleared & pipe_clear_color_buffers;
bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH;
bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL;
store.disable_color_buffer_write = (~color_write_enables) & 0xf;
store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH;
store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL;
uint32_t stores_pending = job->resolve;
store.disable_colour_buffers_clear_on_write =
(job->cleared & pipe_clear_color_buffers) == 0;
store.disable_z_buffer_clear_on_write =
!(job->cleared & PIPE_CLEAR_DEPTH);
store.disable_stencil_buffer_clear_on_write =
!(job->cleared & PIPE_CLEAR_STENCIL);
};
/* Use raw stores for any MSAA surfaces. These output UIF tiled
* images where each 4x MSAA pixel is a 2x2 quad, and the format will
* be that of the internal_type/internal_bpp, rather than the format
* from GL's perspective.
*/
for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
uint32_t bit = PIPE_CLEAR_COLOR0 << i;
if (!(job->resolve & bit))
continue;
struct pipe_surface *psurf = job->cbufs[i];
if (!psurf || psurf->texture->nr_samples <= 1)
continue;
stores_pending &= ~bit;
store_raw(cl, psurf, RENDER_TARGET_0 + i,
!stores_pending && needs_color_clear,
!stores_pending && needs_z_clear,
!stores_pending && needs_s_clear);
if (stores_pending)
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
}
if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
job->zsbuf->texture->nr_samples > 1) {
stores_pending &= ~PIPE_CLEAR_DEPTHSTENCIL;
store_raw(cl, job->zsbuf,
zs_buffer_from_pipe_bits(job->resolve),
!stores_pending && needs_color_clear,
!stores_pending && needs_z_clear,
!stores_pending && needs_s_clear);
if (stores_pending)
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
}
if (stores_pending) {
cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
store.disable_color_buffer_write =
(~stores_pending >> first_color_buffer_bit) & 0xf;
store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH;
store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL;
store.disable_colour_buffers_clear_on_write =
!needs_color_clear;
store.disable_z_buffer_clear_on_write =
!needs_z_clear;
store.disable_stencil_buffer_clear_on_write =
!needs_s_clear;
};
}
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
@ -272,7 +354,7 @@ vc5_emit_rcl(struct vc5_job *job)
cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
vc5_rcl_emit_generic_per_tile_list(job);
vc5_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1);
cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);