mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-16 22:20:29 +01:00
freedreno/a6xx: Convert gallium to new cs builders
Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36589>
This commit is contained in:
parent
c4ea8375c4
commit
3ddcf8ab92
24 changed files with 2809 additions and 2933 deletions
|
|
@ -52,6 +52,13 @@ __reg_iova(const struct fd_reg_pair *reg)
|
|||
return iova << reg->bo_low;
|
||||
}
|
||||
|
||||
/* Special helper for building UBO descriptors inline with pkt7 */
|
||||
#define A6XX_UBO_DESC(_i, _bo, _bo_offset, _size_vec4s) { \
|
||||
.reg = 3 + (2 * _i), \
|
||||
.value = (uint64_t)A6XX_UBO_1_SIZE(_size_vec4s) << 32, \
|
||||
.bo = _bo, .bo_offset = _bo_offset, \
|
||||
}, {}
|
||||
|
||||
/**
|
||||
* Helper for various builders that use fd_ringbuffer. Not for direct use.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -12,8 +12,7 @@
|
|||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_flushes(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
unsigned flushes)
|
||||
fd6_emit_flushes(struct fd_context *ctx, fd_cs &cs, unsigned flushes)
|
||||
{
|
||||
/* Experiments show that invalidating CCU while it still has data in it
|
||||
* doesn't work, so make sure to always flush before invalidating in case
|
||||
|
|
@ -21,43 +20,34 @@ fd6_emit_flushes(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
* However it does seem to work for UCHE.
|
||||
*/
|
||||
if (flushes & (FD6_FLUSH_CCU_COLOR | FD6_INVALIDATE_CCU_COLOR))
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_COLOR);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_CLEAN_COLOR);
|
||||
|
||||
if (flushes & (FD6_FLUSH_CCU_DEPTH | FD6_INVALIDATE_CCU_DEPTH))
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_DEPTH);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_CLEAN_DEPTH);
|
||||
|
||||
if (flushes & FD6_INVALIDATE_CCU_COLOR)
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_COLOR);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_COLOR);
|
||||
|
||||
if (flushes & FD6_INVALIDATE_CCU_DEPTH)
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_DEPTH);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_DEPTH);
|
||||
|
||||
if (flushes & FD6_FLUSH_CACHE)
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CACHE_CLEAN);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CACHE_CLEAN);
|
||||
|
||||
if (flushes & FD6_INVALIDATE_CACHE)
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CACHE_INVALIDATE);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CACHE_INVALIDATE);
|
||||
|
||||
if (flushes & FD6_WAIT_MEM_WRITES)
|
||||
OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
|
||||
fd_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
|
||||
if (flushes & FD6_WAIT_FOR_IDLE)
|
||||
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
if (flushes & FD6_WAIT_FOR_ME)
|
||||
OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
}
|
||||
FD_GENX(fd6_emit_flushes);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_barrier_flush(struct fd_batch *batch)
|
||||
{
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, batch->draw, batch->barrier);
|
||||
batch->barrier = 0;
|
||||
}
|
||||
FD_GENX(fd6_barrier_flush);
|
||||
|
||||
static void
|
||||
add_flushes(struct pipe_context *pctx, unsigned flushes)
|
||||
assert_dt
|
||||
|
|
|
|||
|
|
@ -6,7 +6,9 @@
|
|||
#ifndef FD6_BARRIER_H_
|
||||
#define FD6_BARRIER_H_
|
||||
|
||||
#include "freedreno_batch.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "fd6_pack.h"
|
||||
|
||||
/**
|
||||
* Various flush operations that could be needed
|
||||
|
|
@ -24,11 +26,17 @@ enum fd6_flush {
|
|||
};
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_emit_flushes(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
unsigned flushes);
|
||||
void fd6_emit_flushes(struct fd_context *ctx, fd_cs &cs, unsigned flushes);
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_barrier_flush(struct fd_batch *batch) assert_dt;
|
||||
static inline void
|
||||
fd6_barrier_flush(fd_cs &cs, struct fd_batch *batch)
|
||||
{
|
||||
if (!batch->barrier)
|
||||
return;
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, cs, batch->barrier);
|
||||
batch->barrier = 0;
|
||||
}
|
||||
|
||||
void fd6_barrier_init(struct pipe_context *pctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -60,9 +60,8 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
|
|||
if (!so)
|
||||
return NULL;
|
||||
|
||||
struct fd_ringbuffer *ring = fd_ringbuffer_new_object(
|
||||
blend->ctx->pipe, ((A6XX_MAX_RENDER_TARGETS * 4) + 6) * 4);
|
||||
so->stateobj = ring;
|
||||
unsigned nregs = (2 * A6XX_MAX_RENDER_TARGETS) + 3;
|
||||
fd_crb crb(blend->ctx->pipe, nregs);
|
||||
|
||||
for (unsigned i = 0; i <= cso->max_rt; i++) {
|
||||
const struct pipe_rt_blend_state *rt;
|
||||
|
|
@ -72,25 +71,21 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
|
|||
else
|
||||
rt = &cso->rt[0];
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_MRT_BLEND_CONTROL(
|
||||
i, .rgb_src_factor = fd_blend_factor(rt->rgb_src_factor),
|
||||
crb.add(A6XX_RB_MRT_BLEND_CONTROL(i,
|
||||
.rgb_src_factor = fd_blend_factor(rt->rgb_src_factor),
|
||||
.rgb_blend_opcode = blend_func(rt->rgb_func),
|
||||
.rgb_dest_factor = fd_blend_factor(rt->rgb_dst_factor),
|
||||
.alpha_src_factor = fd_blend_factor(rt->alpha_src_factor),
|
||||
.alpha_blend_opcode = blend_func(rt->alpha_func),
|
||||
.alpha_dest_factor = fd_blend_factor(rt->alpha_dst_factor), ));
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_MRT_CONTROL(
|
||||
i,
|
||||
.blend = rt->blend_enable,
|
||||
.blend2 = rt->blend_enable,
|
||||
.rop_enable = cso->logicop_enable,
|
||||
.rop_code = rop,
|
||||
.component_enable = rt->colormask,
|
||||
)
|
||||
);
|
||||
.alpha_dest_factor = fd_blend_factor(rt->alpha_dst_factor),
|
||||
))
|
||||
.add(A6XX_RB_MRT_CONTROL(i,
|
||||
.blend = rt->blend_enable,
|
||||
.blend2 = rt->blend_enable,
|
||||
.rop_enable = cso->logicop_enable,
|
||||
.rop_code = rop,
|
||||
.component_enable = rt->colormask,
|
||||
));
|
||||
|
||||
if (rt->blend_enable) {
|
||||
mrt_blend |= (1 << i);
|
||||
|
|
@ -104,8 +99,7 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
|
|||
/* sRGB + dither on a7xx goes badly: */
|
||||
bool dither = (CHIP < A7XX) ? cso->dither : false;
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_DITHER_CNTL(
|
||||
crb.add(A6XX_RB_DITHER_CNTL(
|
||||
.dither_mode_mrt0 = dither ? DITHER_ALWAYS : DITHER_DISABLE,
|
||||
.dither_mode_mrt1 = dither ? DITHER_ALWAYS : DITHER_DISABLE,
|
||||
.dither_mode_mrt2 = dither ? DITHER_ALWAYS : DITHER_DISABLE,
|
||||
|
|
@ -114,29 +108,23 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
|
|||
.dither_mode_mrt5 = dither ? DITHER_ALWAYS : DITHER_DISABLE,
|
||||
.dither_mode_mrt6 = dither ? DITHER_ALWAYS : DITHER_DISABLE,
|
||||
.dither_mode_mrt7 = dither ? DITHER_ALWAYS : DITHER_DISABLE,
|
||||
)
|
||||
);
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_SP_BLEND_CNTL(
|
||||
.enable_blend = mrt_blend,
|
||||
.unk8 = true,
|
||||
.dual_color_in_enable = blend->use_dual_src_blend,
|
||||
.alpha_to_coverage = cso->alpha_to_coverage,
|
||||
),
|
||||
);
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_BLEND_CNTL(
|
||||
.blend_reads_dest = mrt_blend,
|
||||
.independent_blend = cso->independent_blend_enable,
|
||||
.dual_color_in_enable = blend->use_dual_src_blend,
|
||||
.alpha_to_coverage = cso->alpha_to_coverage,
|
||||
.alpha_to_one = cso->alpha_to_one,
|
||||
.sample_mask = sample_mask,
|
||||
),
|
||||
);
|
||||
))
|
||||
.add(A6XX_SP_BLEND_CNTL(
|
||||
.enable_blend = mrt_blend,
|
||||
.unk8 = true,
|
||||
.dual_color_in_enable = blend->use_dual_src_blend,
|
||||
.alpha_to_coverage = cso->alpha_to_coverage,
|
||||
))
|
||||
.add(A6XX_RB_BLEND_CNTL(
|
||||
.blend_reads_dest = mrt_blend,
|
||||
.independent_blend = cso->independent_blend_enable,
|
||||
.dual_color_in_enable = blend->use_dual_src_blend,
|
||||
.alpha_to_coverage = cso->alpha_to_coverage,
|
||||
.alpha_to_one = cso->alpha_to_one,
|
||||
.sample_mask = sample_mask,
|
||||
));
|
||||
|
||||
so->stateobj = crb.ring();
|
||||
so->sample_mask = sample_mask;
|
||||
|
||||
util_dynarray_append(&blend->variants, struct fd6_blend_variant *, so);
|
||||
|
|
|
|||
|
|
@ -259,44 +259,46 @@ can_do_clear(const struct pipe_resource *prsc, unsigned level,
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_setup(struct fd_batch *batch)
|
||||
emit_setup(struct fd_context *ctx, fd_cs &cs)
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
struct fd_screen *screen = batch->ctx->screen;
|
||||
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, ring,
|
||||
fd6_emit_flushes<CHIP>(ctx, cs,
|
||||
FD6_FLUSH_CCU_COLOR |
|
||||
FD6_INVALIDATE_CCU_COLOR |
|
||||
FD6_FLUSH_CCU_DEPTH |
|
||||
FD6_INVALIDATE_CCU_DEPTH);
|
||||
|
||||
/* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
|
||||
fd6_emit_ccu_cntl<CHIP>(ring, screen, false);
|
||||
fd6_emit_ccu_cntl<CHIP>(cs, ctx->screen, false);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_blit_fini(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
emit_blit_fini(struct fd_context *ctx, fd_cs &cs)
|
||||
{
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_LABEL);
|
||||
OUT_WFI5(ring);
|
||||
const struct fd_dev_info *info = ctx->screen->info;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
|
||||
OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_LABEL);
|
||||
|
||||
OUT_PKT7(ring, CP_BLIT, 1);
|
||||
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
|
||||
if (info->a6xx.magic.RB_DBG_ECO_CNTL != info->a6xx.magic.RB_DBG_ECO_CNTL_blit) {
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A6XX_RB_DBG_ECO_CNTL(.dword = info->a6xx.magic.RB_DBG_ECO_CNTL_blit));
|
||||
}
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_BLIT, 1)
|
||||
.add(CP_BLIT_0(.op = BLIT_OP_SCALE));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
|
||||
OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
|
||||
if (info->a6xx.magic.RB_DBG_ECO_CNTL != info->a6xx.magic.RB_DBG_ECO_CNTL_blit) {
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A6XX_RB_DBG_ECO_CNTL(.dword = info->a6xx.magic.RB_DBG_ECO_CNTL));
|
||||
}
|
||||
}
|
||||
FD_GENX(emit_blit_fini);
|
||||
|
||||
/* nregs: 5 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
|
||||
emit_blit_setup(fd_ncrb<CHIP> &ncrb, enum pipe_format pfmt,
|
||||
bool scissor_enable, union pipe_color_union *color,
|
||||
uint32_t unknown_8c01, enum a6xx_rotation rotate)
|
||||
{
|
||||
|
|
@ -316,17 +318,14 @@ emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
|
|||
COND(color, A6XX_RB_A2D_BLT_CNTL_SOLID_COLOR) |
|
||||
COND(scissor_enable, A6XX_RB_A2D_BLT_CNTL_SCISSOR);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_A2D_BLT_CNTL, 1);
|
||||
OUT_RING(ring, blit_cntl);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_A2D_BLT_CNTL, 1);
|
||||
OUT_RING(ring, blit_cntl);
|
||||
ncrb.add(A6XX_RB_A2D_BLT_CNTL(.dword = blit_cntl));
|
||||
ncrb.add(A6XX_GRAS_A2D_BLT_CNTL(.dword = blit_cntl));
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
OUT_REG(ring, A7XX_TPL1_A2D_BLT_CNTL(
|
||||
.raw_copy = false,
|
||||
.start_offset_texels = 0,
|
||||
.type = A6XX_TEX_2D,
|
||||
ncrb.add(A7XX_TPL1_A2D_BLT_CNTL(
|
||||
.raw_copy = false,
|
||||
.start_offset_texels = 0,
|
||||
.type = A6XX_TEX_2D,
|
||||
));
|
||||
}
|
||||
|
||||
|
|
@ -345,34 +344,32 @@ emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
|
|||
* controlling the internal/accumulator format or something like
|
||||
* that. It's certainly not tied to only the src format.
|
||||
*/
|
||||
OUT_REG(ring, SP_A2D_OUTPUT_INFO(
|
||||
CHIP,
|
||||
.ifmt_type = output_ifmt_type,
|
||||
.color_format = fmt,
|
||||
.srgb = is_srgb,
|
||||
.mask = 0xf,
|
||||
ncrb.add(SP_A2D_OUTPUT_INFO(CHIP,
|
||||
.ifmt_type = output_ifmt_type,
|
||||
.color_format = fmt,
|
||||
.srgb = is_srgb,
|
||||
.mask = 0xf,
|
||||
));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_A2D_PIXEL_CNTL, 1);
|
||||
OUT_RING(ring, unknown_8c01);
|
||||
ncrb.add(A6XX_RB_A2D_PIXEL_CNTL(.dword = unknown_8c01));
|
||||
}
|
||||
|
||||
/* nregs: 4 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst,
|
||||
emit_blit_buffer_dst(fd_ncrb<CHIP> &ncrb, struct fd_resource *dst,
|
||||
unsigned off, unsigned size, a6xx_format color_format)
|
||||
{
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_A2D_DEST_BUFFER_INFO(
|
||||
.color_format = color_format,
|
||||
.tile_mode = TILE6_LINEAR,
|
||||
.color_swap = WZYX,
|
||||
),
|
||||
A6XX_RB_A2D_DEST_BUFFER_BASE(
|
||||
.bo = dst->bo,
|
||||
.bo_offset = off,
|
||||
),
|
||||
A6XX_RB_A2D_DEST_BUFFER_PITCH(size),
|
||||
);
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_INFO(
|
||||
.color_format = color_format,
|
||||
.tile_mode = TILE6_LINEAR,
|
||||
.color_swap = WZYX,
|
||||
));
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_BASE(
|
||||
.bo = dst->bo,
|
||||
.bo_offset = off,
|
||||
));
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_PITCH(size));
|
||||
}
|
||||
|
||||
/* buffers need to be handled specially since x/width can exceed the bounds
|
||||
|
|
@ -380,8 +377,7 @@ emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst,
|
|||
*/
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
const struct pipe_blit_info *info)
|
||||
emit_blit_buffer(struct fd_context *ctx, fd_cs &cs, const struct pipe_blit_info *info)
|
||||
{
|
||||
const struct pipe_box *sbox = &info->src.box;
|
||||
const struct pipe_box *dbox = &info->dst.box;
|
||||
|
|
@ -428,7 +424,8 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
sshift = sbox->x & 0x3f;
|
||||
dshift = dbox->x & 0x3f;
|
||||
|
||||
emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0);
|
||||
with_ncrb (cs, 5)
|
||||
emit_blit_setup<CHIP>(ncrb, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0);
|
||||
|
||||
for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
|
||||
unsigned soff, doff, w, p;
|
||||
|
|
@ -442,94 +439,80 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
assert((soff + w) <= fd_bo_size(src->bo));
|
||||
assert((doff + w) <= fd_bo_size(dst->bo));
|
||||
|
||||
/*
|
||||
* Emit source:
|
||||
*/
|
||||
OUT_REG(ring,
|
||||
TPL1_A2D_SRC_TEXTURE_INFO(
|
||||
CHIP,
|
||||
.color_format = FMT6_8_UNORM,
|
||||
.tile_mode = TILE6_LINEAR,
|
||||
.color_swap = WZYX,
|
||||
.unk20 = true,
|
||||
.unk22 = true,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_SIZE(
|
||||
CHIP,
|
||||
.width = sshift + w,
|
||||
.height = 1,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_BASE(
|
||||
CHIP,
|
||||
.bo = src->bo,
|
||||
.bo_offset = soff,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_PITCH(
|
||||
CHIP,
|
||||
.pitch = p,
|
||||
),
|
||||
);
|
||||
with_ncrb (cs, 15) {
|
||||
/*
|
||||
* Emit source:
|
||||
*/
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_INFO(CHIP,
|
||||
.color_format = FMT6_8_UNORM,
|
||||
.tile_mode = TILE6_LINEAR,
|
||||
.color_swap = WZYX,
|
||||
.unk20 = true,
|
||||
.unk22 = true,
|
||||
));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_SIZE(CHIP,
|
||||
.width = sshift + w,
|
||||
.height = 1,
|
||||
));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_BASE(CHIP,
|
||||
.bo = src->bo,
|
||||
.bo_offset = soff,
|
||||
));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_PITCH(CHIP, .pitch = p));
|
||||
|
||||
/*
|
||||
* Emit destination:
|
||||
*/
|
||||
emit_blit_buffer_dst(ring, dst, doff, p, FMT6_8_UNORM);
|
||||
/*
|
||||
* Emit destination:
|
||||
*/
|
||||
emit_blit_buffer_dst<CHIP>(ncrb, dst, doff, p, FMT6_8_UNORM);
|
||||
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_XMIN(sshift));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_XMAX(sshift + w - 1));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_YMIN(0));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_YMAX(0));
|
||||
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_TL(.x = dshift));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_BR(.x = dshift + w - 1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Blit command:
|
||||
*/
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_A2D_SRC_XMIN(sshift),
|
||||
A6XX_GRAS_A2D_SRC_XMAX(sshift + w - 1),
|
||||
A6XX_GRAS_A2D_SRC_YMIN(0),
|
||||
A6XX_GRAS_A2D_SRC_YMAX(0),
|
||||
);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_A2D_DEST_TL, 2);
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_TL_X(dshift) | A6XX_GRAS_A2D_DEST_TL_Y(0));
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_BR_X(dshift + w - 1) |
|
||||
A6XX_GRAS_A2D_DEST_BR_Y(0));
|
||||
|
||||
emit_blit_fini<CHIP>(ctx, ring);
|
||||
emit_blit_fini<CHIP>(ctx, cs);
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
clear_ubwc_setup(struct fd_ringbuffer *ring)
|
||||
clear_ubwc_setup(fd_cs &cs)
|
||||
{
|
||||
union pipe_color_union color = {};
|
||||
fd_ncrb<CHIP> ncrb(cs, 18);
|
||||
|
||||
emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0);
|
||||
emit_blit_setup<CHIP>(ncrb, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0);
|
||||
|
||||
OUT_REG(ring,
|
||||
TPL1_A2D_SRC_TEXTURE_INFO(CHIP),
|
||||
TPL1_A2D_SRC_TEXTURE_SIZE(CHIP),
|
||||
TPL1_A2D_SRC_TEXTURE_BASE(CHIP),
|
||||
TPL1_A2D_SRC_TEXTURE_PITCH(CHIP),
|
||||
);
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_INFO(CHIP));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_SIZE(CHIP));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_BASE(CHIP));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_PITCH(CHIP));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_A2D_CLEAR_COLOR_DW0, 4);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW0());
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW1());
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW2());
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW3());
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_A2D_SRC_XMIN(0),
|
||||
A6XX_GRAS_A2D_SRC_XMAX(0),
|
||||
A6XX_GRAS_A2D_SRC_YMIN(0),
|
||||
A6XX_GRAS_A2D_SRC_YMAX(0),
|
||||
);
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_XMIN(0));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_XMAX(0));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_YMIN(0));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_YMAX(0));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
|
||||
{
|
||||
struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
|
||||
fd_cs cs(fd_batch_get_prologue(batch));
|
||||
|
||||
clear_ubwc_setup<CHIP>(ring);
|
||||
clear_ubwc_setup<CHIP>(cs);
|
||||
|
||||
unsigned size = rsc->layout.slices[0].offset;
|
||||
unsigned offset = 0;
|
||||
|
|
@ -550,34 +533,35 @@ fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
|
|||
/* width is already aligned to a suitable pitch: */
|
||||
const unsigned p = w;
|
||||
|
||||
/*
|
||||
* Emit destination:
|
||||
*/
|
||||
emit_blit_buffer_dst(ring, rsc, offset, p, FMT6_8_UNORM);
|
||||
with_ncrb (cs, 6) {
|
||||
/*
|
||||
* Emit destination:
|
||||
*/
|
||||
emit_blit_buffer_dst<CHIP>(ncrb, rsc, offset, p, FMT6_8_UNORM);
|
||||
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_TL(.x = 0, .y = 0));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_BR(.x = w - 1, .y = h - 1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Blit command:
|
||||
*/
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_A2D_DEST_TL, 2);
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_TL_X(0) | A6XX_GRAS_A2D_DEST_TL_Y(0));
|
||||
OUT_RING(ring,
|
||||
A6XX_GRAS_A2D_DEST_BR_X(w - 1) | A6XX_GRAS_A2D_DEST_BR_Y(h - 1));
|
||||
|
||||
emit_blit_fini<CHIP>(batch->ctx, ring);
|
||||
emit_blit_fini<CHIP>(batch->ctx, cs);
|
||||
offset += w * h;
|
||||
size -= w * h;
|
||||
}
|
||||
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, ring,
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, cs,
|
||||
FD6_FLUSH_CCU_COLOR |
|
||||
FD6_FLUSH_CCU_DEPTH |
|
||||
FD6_FLUSH_CACHE |
|
||||
FD6_WAIT_FOR_IDLE);
|
||||
}
|
||||
|
||||
/* nregs: 10 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
|
||||
emit_blit_dst(fd_ncrb<CHIP> &ncrb, struct pipe_resource *prsc,
|
||||
enum pipe_format pfmt, unsigned level, unsigned layer)
|
||||
{
|
||||
struct fd_resource *dst = fd_resource(prsc);
|
||||
|
|
@ -595,33 +579,36 @@ emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
|
|||
if (fmt == FMT6_Z24_UNORM_S8_UINT)
|
||||
fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_A2D_DEST_BUFFER_INFO(
|
||||
.color_format = fmt,
|
||||
.tile_mode = tile,
|
||||
.color_swap = swap,
|
||||
.flags = ubwc_enabled,
|
||||
.srgb = util_format_is_srgb(pfmt),
|
||||
),
|
||||
A6XX_RB_A2D_DEST_BUFFER_BASE(
|
||||
.bo = dst->bo,
|
||||
.bo_offset = off,
|
||||
),
|
||||
A6XX_RB_A2D_DEST_BUFFER_PITCH(pitch),
|
||||
);
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_INFO(
|
||||
.color_format = fmt,
|
||||
.tile_mode = tile,
|
||||
.color_swap = swap,
|
||||
.flags = ubwc_enabled,
|
||||
.srgb = util_format_is_srgb(pfmt),
|
||||
));
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_BASE(
|
||||
.bo = dst->bo,
|
||||
.bo_offset = off,
|
||||
));
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_PITCH(pitch));
|
||||
|
||||
if (ubwc_enabled) {
|
||||
OUT_PKT4(ring, REG_A6XX_RB_A2D_DEST_FLAG_BUFFER_BASE, 6);
|
||||
fd6_emit_flag_reference(ring, dst, level, layer);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
ncrb.add(A6XX_RB_A2D_DEST_FLAG_BUFFER_BASE(
|
||||
dst->bo, fd_resource_ubwc_offset(dst, level, layer)
|
||||
));
|
||||
ncrb.add(A6XX_RB_A2D_DEST_FLAG_BUFFER_PITCH(
|
||||
.pitch = fdl_ubwc_pitch(&dst->layout, level),
|
||||
.array_pitch = dst->layout.ubwc_layer_size >> 2,
|
||||
));
|
||||
ncrb.add(A6XX_RB_A2D_DEST_FLAG_BUFFER_BASE_1());
|
||||
ncrb.add(A6XX_RB_A2D_DEST_FLAG_BUFFER_PITCH_1());
|
||||
}
|
||||
}
|
||||
|
||||
/* nregs: 8 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
|
||||
emit_blit_src(fd_ncrb<CHIP> &ncrb, const struct pipe_blit_info *info,
|
||||
unsigned layer, unsigned nr_samples)
|
||||
{
|
||||
struct fd_resource *src = fd_resource(info->src.resource);
|
||||
|
|
@ -641,52 +628,36 @@ emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
|
|||
if (info->src.format == PIPE_FORMAT_A8_UNORM)
|
||||
sfmt = FMT6_A8_UNORM;
|
||||
|
||||
OUT_REG(ring,
|
||||
TPL1_A2D_SRC_TEXTURE_INFO(
|
||||
CHIP,
|
||||
.color_format = sfmt,
|
||||
.tile_mode = stile,
|
||||
.color_swap = sswap,
|
||||
.flags = subwc_enabled,
|
||||
.srgb = util_format_is_srgb(info->src.format),
|
||||
.samples = samples,
|
||||
.filter = (info->filter == PIPE_TEX_FILTER_LINEAR),
|
||||
.samples_average = (samples > MSAA_ONE) && !info->sample0_only,
|
||||
.unk20 = true,
|
||||
.unk22 = true,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_SIZE(
|
||||
CHIP,
|
||||
.width = width,
|
||||
.height = height,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_BASE(
|
||||
CHIP,
|
||||
.bo = src->bo,
|
||||
.bo_offset = soff,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_PITCH(
|
||||
CHIP,
|
||||
.pitch = pitch,
|
||||
),
|
||||
);
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_INFO(CHIP,
|
||||
.color_format = sfmt,
|
||||
.tile_mode = stile,
|
||||
.color_swap = sswap,
|
||||
.flags = subwc_enabled,
|
||||
.srgb = util_format_is_srgb(info->src.format),
|
||||
.samples = samples,
|
||||
.filter = (info->filter == PIPE_TEX_FILTER_LINEAR),
|
||||
.samples_average = (samples > MSAA_ONE) && !info->sample0_only,
|
||||
.unk20 = true,
|
||||
.unk22 = true,
|
||||
));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_SIZE(CHIP, .width = width, .height = height));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_BASE(CHIP, .bo = src->bo, .bo_offset = soff));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_PITCH(CHIP, .pitch = pitch));
|
||||
|
||||
if (subwc_enabled && fd_resource_ubwc_enabled(src, info->src.level)) {
|
||||
OUT_REG(ring,
|
||||
TPL1_A2D_SRC_TEXTURE_FLAG_BASE(
|
||||
CHIP,
|
||||
.bo = src->bo,
|
||||
.bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer),
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_FLAG_PITCH(
|
||||
CHIP, fdl_ubwc_pitch(&src->layout, info->src.level)),
|
||||
);
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_FLAG_BASE(CHIP,
|
||||
.bo = src->bo,
|
||||
.bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer),
|
||||
));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_FLAG_PITCH(CHIP,
|
||||
fdl_ubwc_pitch(&src->layout, info->src.level),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_blit_texture_setup(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
|
||||
emit_blit_texture_setup(fd_cs &cs, const struct pipe_blit_info *info)
|
||||
{
|
||||
const struct pipe_box *sbox = &info->src.box;
|
||||
const struct pipe_box *dbox = &info->dst.box;
|
||||
|
|
@ -717,35 +688,33 @@ emit_blit_texture_setup(struct fd_ringbuffer *ring, const struct pipe_blit_info
|
|||
|
||||
enum a6xx_rotation rotate = rotates[mirror_y][mirror_x];
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_A2D_SRC_XMIN(MIN2(sx1, sx2)),
|
||||
A6XX_GRAS_A2D_SRC_XMAX(MAX2(sx1, sx2) - 1),
|
||||
A6XX_GRAS_A2D_SRC_YMIN(MIN2(sy1, sy2)),
|
||||
A6XX_GRAS_A2D_SRC_YMAX(MAX2(sy1, sy2) - 1),
|
||||
);
|
||||
fd_ncrb<CHIP> ncrb(cs, 13);
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_A2D_DEST_TL(.x = MIN2(dx1, dx2),
|
||||
.y = MIN2(dy1, dy2)),
|
||||
A6XX_GRAS_A2D_DEST_BR(.x = MAX2(dx1, dx2) - 1,
|
||||
.y = MAX2(dy1, dy2) - 1),
|
||||
);
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_XMIN(MIN2(sx1, sx2)));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_XMAX(MAX2(sx1, sx2) - 1));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_YMIN(MIN2(sy1, sy2)));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_YMAX(MAX2(sy1, sy2) - 1));
|
||||
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_TL(.x = MIN2(dx1, dx2), .y = MIN2(dy1, dy2)));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_BR(.x = MAX2(dx1, dx2) - 1, .y = MAX2(dy1, dy2) - 1));
|
||||
|
||||
if (info->scissor_enable) {
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_A2D_SCISSOR_TL, 2);
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_SCISSOR_TL_X(info->scissor.minx) |
|
||||
A6XX_GRAS_A2D_SCISSOR_TL_Y(info->scissor.miny));
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_SCISSOR_TL_X(info->scissor.maxx - 1) |
|
||||
A6XX_GRAS_A2D_SCISSOR_TL_Y(info->scissor.maxy - 1));
|
||||
ncrb.add(A6XX_GRAS_A2D_SCISSOR_TL(
|
||||
.x = info->scissor.minx,
|
||||
.y = info->scissor.miny,
|
||||
));
|
||||
ncrb.add(A6XX_GRAS_A2D_SCISSOR_BR(
|
||||
.x = info->scissor.maxx - 1,
|
||||
.y = info->scissor.maxy - 1,
|
||||
));
|
||||
}
|
||||
|
||||
emit_blit_setup<CHIP>(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate);
|
||||
emit_blit_setup<CHIP>(ncrb, info->dst.format, info->scissor_enable, NULL, 0, rotate);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
const struct pipe_blit_info *info)
|
||||
emit_blit_texture(struct fd_context *ctx, fd_cs &cs, const struct pipe_blit_info *info)
|
||||
{
|
||||
const struct pipe_box *sbox = &info->src.box;
|
||||
const struct pipe_box *dbox = &info->dst.box;
|
||||
|
|
@ -756,24 +725,27 @@ emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
dump_blit_info(info);
|
||||
}
|
||||
|
||||
emit_blit_texture_setup<CHIP>(ring, info);
|
||||
emit_blit_texture_setup<CHIP>(cs, info);
|
||||
|
||||
dst = fd_resource(info->dst.resource);
|
||||
|
||||
uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
|
||||
|
||||
for (unsigned i = 0; i < info->dst.box.depth; i++) {
|
||||
with_ncrb (cs, 18) {
|
||||
emit_blit_src<CHIP>(ncrb, info, sbox->z + i, nr_samples);
|
||||
emit_blit_dst(ncrb, info->dst.resource, info->dst.format, info->dst.level,
|
||||
dbox->z + i);
|
||||
}
|
||||
|
||||
emit_blit_src<CHIP>(ring, info, sbox->z + i, nr_samples);
|
||||
emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
|
||||
dbox->z + i);
|
||||
|
||||
emit_blit_fini<CHIP>(ctx, ring);
|
||||
emit_blit_fini<CHIP>(ctx, cs);
|
||||
}
|
||||
}
|
||||
|
||||
/* nregs: 4 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
|
||||
emit_clear_color(fd_ncrb<CHIP> &ncrb, enum pipe_format pfmt,
|
||||
union pipe_color_union *color)
|
||||
{
|
||||
switch (pfmt) {
|
||||
|
|
@ -792,68 +764,65 @@ emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
|
|||
break;
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_A2D_CLEAR_COLOR_DW0, 4);
|
||||
switch (fd6_ifmt(fd6_color_format(pfmt, TILE6_LINEAR))) {
|
||||
case R2D_UNORM8:
|
||||
case R2D_UNORM8_SRGB:
|
||||
/* The r2d ifmt is badly named, it also covers the signed case: */
|
||||
if (util_format_is_snorm(pfmt)) {
|
||||
OUT_RING(ring, float_to_byte_tex(color->f[0]));
|
||||
OUT_RING(ring, float_to_byte_tex(color->f[1]));
|
||||
OUT_RING(ring, float_to_byte_tex(color->f[2]));
|
||||
OUT_RING(ring, float_to_byte_tex(color->f[3]));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW0(float_to_byte_tex(color->f[0])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW1(float_to_byte_tex(color->f[1])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW2(float_to_byte_tex(color->f[2])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW3(float_to_byte_tex(color->f[3])));
|
||||
} else {
|
||||
OUT_RING(ring, float_to_ubyte(color->f[0]));
|
||||
OUT_RING(ring, float_to_ubyte(color->f[1]));
|
||||
OUT_RING(ring, float_to_ubyte(color->f[2]));
|
||||
OUT_RING(ring, float_to_ubyte(color->f[3]));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW0(float_to_ubyte(color->f[0])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW1(float_to_ubyte(color->f[1])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW2(float_to_ubyte(color->f[2])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW3(float_to_ubyte(color->f[3])));
|
||||
}
|
||||
break;
|
||||
case R2D_FLOAT16:
|
||||
OUT_RING(ring, _mesa_float_to_half(color->f[0]));
|
||||
OUT_RING(ring, _mesa_float_to_half(color->f[1]));
|
||||
OUT_RING(ring, _mesa_float_to_half(color->f[2]));
|
||||
OUT_RING(ring, _mesa_float_to_half(color->f[3]));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW0(_mesa_float_to_half(color->f[0])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW1(_mesa_float_to_half(color->f[1])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW2(_mesa_float_to_half(color->f[2])));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW3(_mesa_float_to_half(color->f[3])));
|
||||
break;
|
||||
case R2D_FLOAT32:
|
||||
case R2D_INT32:
|
||||
case R2D_INT16:
|
||||
case R2D_INT8:
|
||||
default:
|
||||
OUT_RING(ring, color->ui[0]);
|
||||
OUT_RING(ring, color->ui[1]);
|
||||
OUT_RING(ring, color->ui[2]);
|
||||
OUT_RING(ring, color->ui[3]);
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW0(color->ui[0]));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW1(color->ui[1]));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW2(color->ui[2]));
|
||||
ncrb.add(A6XX_RB_A2D_CLEAR_COLOR_DW3(color->ui[3]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
clear_lrz_setup(struct fd_ringbuffer *ring, struct fd_resource *zsbuf,
|
||||
struct fd_bo *lrz, double depth)
|
||||
clear_lrz_setup(fd_cs &cs, struct fd_resource *zsbuf, struct fd_bo *lrz, double depth)
|
||||
{
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_A2D_DEST_TL, 2);
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_TL_X(0) | A6XX_GRAS_A2D_DEST_TL_Y(0));
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_BR_X(zsbuf->lrz_layout.lrz_pitch - 1) |
|
||||
A6XX_GRAS_A2D_DEST_BR_Y(zsbuf->lrz_layout.lrz_height - 1));
|
||||
fd_ncrb<CHIP> ncrb(cs, 15);
|
||||
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_TL(.x = 0, .y = 0));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_BR(
|
||||
.x = zsbuf->lrz_layout.lrz_pitch - 1,
|
||||
.y = zsbuf->lrz_layout.lrz_height - 1,
|
||||
));
|
||||
|
||||
union pipe_color_union clear_color = { .f = {depth} };
|
||||
|
||||
emit_clear_color(ring, PIPE_FORMAT_Z16_UNORM, &clear_color);
|
||||
emit_blit_setup<CHIP>(ring, PIPE_FORMAT_Z16_UNORM, false, &clear_color, 0, ROTATE_0);
|
||||
emit_clear_color<CHIP>(ncrb, PIPE_FORMAT_Z16_UNORM, &clear_color);
|
||||
emit_blit_setup<CHIP>(ncrb, PIPE_FORMAT_Z16_UNORM, false, &clear_color, 0, ROTATE_0);
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_A2D_DEST_BUFFER_INFO(
|
||||
.color_format = FMT6_16_UNORM,
|
||||
.tile_mode = TILE6_LINEAR,
|
||||
.color_swap = WZYX,
|
||||
),
|
||||
A6XX_RB_A2D_DEST_BUFFER_BASE(
|
||||
.bo = lrz,
|
||||
),
|
||||
A6XX_RB_A2D_DEST_BUFFER_PITCH(zsbuf->lrz_layout.lrz_pitch * 2),
|
||||
);
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_INFO(
|
||||
.color_format = FMT6_16_UNORM,
|
||||
.tile_mode = TILE6_LINEAR,
|
||||
.color_swap = WZYX,
|
||||
));
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_BASE(.bo = lrz));
|
||||
ncrb.add(A6XX_RB_A2D_DEST_BUFFER_PITCH(zsbuf->lrz_layout.lrz_pitch * 2));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -861,7 +830,7 @@ void
|
|||
fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
|
||||
struct fd_bo *lrz, double depth)
|
||||
{
|
||||
struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
|
||||
fd_cs cs(fd_batch_get_prologue(batch));
|
||||
|
||||
if (DEBUG_BLIT) {
|
||||
fprintf(stderr, "lrz clear:\ndst resource: ");
|
||||
|
|
@ -869,14 +838,14 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
|
|||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
clear_lrz_setup<CHIP>(ring, zsbuf, lrz, depth);
|
||||
clear_lrz_setup<CHIP>(cs, zsbuf, lrz, depth);
|
||||
|
||||
/*
|
||||
* Blit command:
|
||||
*/
|
||||
|
||||
OUT_PKT7(ring, CP_BLIT, 1);
|
||||
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
|
||||
fd_pkt7(cs, CP_BLIT, 1)
|
||||
.add(CP_BLIT_0(.op = BLIT_OP_SCALE));
|
||||
}
|
||||
FD_GENX(fd6_clear_lrz);
|
||||
|
||||
|
|
@ -979,7 +948,7 @@ fd6_clear_buffer(struct pipe_context *pctx,
|
|||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd_resource *rsc = fd_resource(prsc);
|
||||
struct fd_batch *batch = fd_bc_alloc_batch(ctx, true);
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
fd_screen_lock(ctx->screen);
|
||||
fd_batch_resource_write(batch, rsc);
|
||||
|
|
@ -995,10 +964,12 @@ fd6_clear_buffer(struct pipe_context *pctx,
|
|||
|
||||
fd_batch_update_queries(batch);
|
||||
|
||||
emit_setup<CHIP>(batch);
|
||||
emit_setup<CHIP>(batch->ctx, cs);
|
||||
|
||||
emit_clear_color(ring, dst_fmt, &color);
|
||||
emit_blit_setup<CHIP>(ring, dst_fmt, false, &color, 0, ROTATE_0);
|
||||
with_ncrb (cs, 9) {
|
||||
emit_clear_color(ncrb, dst_fmt, &color);
|
||||
emit_blit_setup<CHIP>(ncrb, dst_fmt, false, &color, 0, ROTATE_0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Buffers can have dimensions bigger than max width (0x4000), so
|
||||
|
|
@ -1020,24 +991,24 @@ fd6_clear_buffer(struct pipe_context *pctx,
|
|||
uint32_t doff = offset & ~0x3f;
|
||||
uint32_t width = MIN2(blocks, 0x4000 - dst_x);
|
||||
|
||||
emit_blit_buffer_dst(ring, rsc, doff, 0, fmt);
|
||||
with_ncrb (cs, 6) {
|
||||
emit_blit_buffer_dst(ncrb, rsc, doff, 0, fmt);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_A2D_DEST_TL, 2);
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_TL_X(dst_x) | A6XX_GRAS_A2D_DEST_TL_Y(0));
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_BR_X(dst_x + width - 1) |
|
||||
A6XX_GRAS_A2D_DEST_BR_Y(0));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_TL(.x = dst_x));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_BR(.x = dst_x + width - 1));
|
||||
}
|
||||
|
||||
emit_blit_fini<CHIP>(ctx, ring);
|
||||
emit_blit_fini<CHIP>(ctx, cs);
|
||||
|
||||
offset += width * clear_value_size;
|
||||
blocks -= width;
|
||||
}
|
||||
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, ring,
|
||||
FD6_FLUSH_CCU_COLOR |
|
||||
FD6_FLUSH_CCU_DEPTH |
|
||||
FD6_FLUSH_CACHE |
|
||||
FD6_WAIT_FOR_IDLE);
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, cs,
|
||||
FD6_FLUSH_CCU_COLOR |
|
||||
FD6_FLUSH_CCU_DEPTH |
|
||||
FD6_FLUSH_CACHE |
|
||||
FD6_WAIT_FOR_IDLE);
|
||||
|
||||
fd_batch_flush(batch);
|
||||
fd_batch_reference(&batch, NULL);
|
||||
|
|
@ -1050,26 +1021,31 @@ fd6_clear_buffer(struct pipe_context *pctx,
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
clear_surface_setup(struct fd_ringbuffer *ring, struct pipe_surface *psurf,
|
||||
clear_surface_setup(fd_cs &cs, struct pipe_surface *psurf,
|
||||
const struct pipe_box *box2d, union pipe_color_union *color,
|
||||
uint32_t unknown_8c01)
|
||||
{
|
||||
uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_A2D_DEST_TL, 2);
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_TL_X(box2d->x * nr_samples) |
|
||||
A6XX_GRAS_A2D_DEST_TL_Y(box2d->y));
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_BR_X((box2d->x + box2d->width) * nr_samples - 1) |
|
||||
A6XX_GRAS_A2D_DEST_BR_Y(box2d->y + box2d->height - 1));
|
||||
fd_ncrb<CHIP> ncrb(cs, 11);
|
||||
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_TL(
|
||||
.x = box2d->x * nr_samples,
|
||||
.y = box2d->y,
|
||||
));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_BR(
|
||||
.x = (box2d->x + box2d->width) * nr_samples - 1,
|
||||
.y = box2d->y + box2d->height - 1,
|
||||
));
|
||||
|
||||
union pipe_color_union clear_color = convert_color(psurf->format, color);
|
||||
|
||||
emit_clear_color(ring, psurf->format, &clear_color);
|
||||
emit_blit_setup<CHIP>(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0);
|
||||
emit_clear_color(ncrb, psurf->format, &clear_color);
|
||||
emit_blit_setup<CHIP>(ncrb, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
fd6_clear_surface(struct fd_context *ctx, fd_cs &cs,
|
||||
struct pipe_surface *psurf, const struct pipe_box *box2d,
|
||||
union pipe_color_union *color, uint32_t unknown_8c01)
|
||||
{
|
||||
|
|
@ -1079,13 +1055,13 @@ fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
clear_surface_setup<CHIP>(ring, psurf, box2d, color, unknown_8c01);
|
||||
clear_surface_setup<CHIP>(cs, psurf, box2d, color, unknown_8c01);
|
||||
|
||||
for (unsigned i = psurf->first_layer; i <= psurf->last_layer;
|
||||
i++) {
|
||||
emit_blit_dst(ring, psurf->texture, psurf->format, psurf->level, i);
|
||||
for (unsigned i = psurf->first_layer; i <= psurf->last_layer; i++) {
|
||||
with_ncrb (cs, 10)
|
||||
emit_blit_dst(ncrb, psurf->texture, psurf->format, psurf->level, i);
|
||||
|
||||
emit_blit_fini<CHIP>(ctx, ring);
|
||||
emit_blit_fini<CHIP>(ctx, cs);
|
||||
}
|
||||
}
|
||||
FD_GENX(fd6_clear_surface);
|
||||
|
|
@ -1149,7 +1125,9 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
|
|||
|
||||
fd_batch_update_queries(batch);
|
||||
|
||||
emit_setup<CHIP>(batch);
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
emit_setup<CHIP>(batch->ctx, cs);
|
||||
|
||||
struct pipe_surface surf = {
|
||||
.format = prsc->format,
|
||||
|
|
@ -1159,9 +1137,9 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
|
|||
.texture = prsc,
|
||||
};
|
||||
|
||||
fd6_clear_surface<CHIP>(ctx, batch->draw, &surf, box, &color, 0);
|
||||
fd6_clear_surface<CHIP>(ctx, cs, &surf, box, &color, 0);
|
||||
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, batch->draw,
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, cs,
|
||||
FD6_FLUSH_CCU_COLOR |
|
||||
FD6_FLUSH_CCU_DEPTH |
|
||||
FD6_FLUSH_CACHE |
|
||||
|
|
@ -1178,8 +1156,8 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
resolve_tile_setup(struct fd_batch *batch, struct fd_ringbuffer *ring,
|
||||
uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
|
||||
resolve_tile_setup(struct fd_batch *batch, fd_cs &cs, uint32_t base,
|
||||
struct pipe_surface *psurf, uint32_t unknown_8c01)
|
||||
{
|
||||
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
|
||||
uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
|
||||
|
|
@ -1187,85 +1165,71 @@ resolve_tile_setup(struct fd_batch *batch, struct fd_ringbuffer *ring,
|
|||
util_format_get_blocksize(psurf->format);
|
||||
unsigned width = pipe_surface_width(psurf);
|
||||
unsigned height = pipe_surface_height(psurf);
|
||||
fd_ncrb<CHIP> ncrb(cs, 26);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_A2D_DEST_TL, 2);
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_TL_X(0) | A6XX_GRAS_A2D_DEST_TL_Y(0));
|
||||
OUT_RING(ring, A6XX_GRAS_A2D_DEST_BR_X(width - 1) |
|
||||
A6XX_GRAS_A2D_DEST_BR_Y(height - 1));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_TL(.x = 0, .y = 0));
|
||||
ncrb.add(A6XX_GRAS_A2D_DEST_BR(.x = width - 1, .y = height - 1));
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_A2D_SRC_XMIN(0),
|
||||
A6XX_GRAS_A2D_SRC_XMAX(width - 1),
|
||||
A6XX_GRAS_A2D_SRC_YMIN(0),
|
||||
A6XX_GRAS_A2D_SRC_YMAX(height - 1),
|
||||
);
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_XMIN(0));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_XMAX(width - 1));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_YMIN(0));
|
||||
ncrb.add(A6XX_GRAS_A2D_SRC_YMAX(height - 1));
|
||||
|
||||
/* Enable scissor bit, which will take into account the window scissor
|
||||
* which is set per-tile
|
||||
*/
|
||||
emit_blit_setup<CHIP>(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0);
|
||||
emit_blit_setup<CHIP>(ncrb, psurf->format, true, NULL, unknown_8c01, ROTATE_0);
|
||||
|
||||
/* We shouldn't be using GMEM in the layered rendering case: */
|
||||
assert(psurf->first_layer == psurf->last_layer);
|
||||
|
||||
emit_blit_dst(ring, psurf->texture, psurf->format, psurf->level,
|
||||
emit_blit_dst(ncrb, psurf->texture, psurf->format, psurf->level,
|
||||
psurf->first_layer);
|
||||
|
||||
enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR);
|
||||
enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
|
||||
|
||||
OUT_REG(ring,
|
||||
TPL1_A2D_SRC_TEXTURE_INFO(
|
||||
CHIP,
|
||||
.color_format = sfmt,
|
||||
.tile_mode = TILE6_2,
|
||||
.color_swap = WZYX,
|
||||
.srgb = util_format_is_srgb(psurf->format),
|
||||
.samples = samples,
|
||||
.samples_average = samples > MSAA_ONE,
|
||||
.unk20 = true,
|
||||
.unk22 = true,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_SIZE(
|
||||
CHIP,
|
||||
.width = width,
|
||||
.height = height,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_BASE(
|
||||
CHIP,
|
||||
.qword = gmem_base,
|
||||
),
|
||||
TPL1_A2D_SRC_TEXTURE_PITCH(
|
||||
CHIP,
|
||||
.pitch = gmem_pitch,
|
||||
),
|
||||
);
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_INFO(CHIP,
|
||||
.color_format = sfmt,
|
||||
.tile_mode = TILE6_2,
|
||||
.color_swap = WZYX,
|
||||
.srgb = util_format_is_srgb(psurf->format),
|
||||
.samples = samples,
|
||||
.samples_average = samples > MSAA_ONE,
|
||||
.unk20 = true,
|
||||
.unk22 = true,
|
||||
));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_SIZE(CHIP,
|
||||
.width = width,
|
||||
.height = height,
|
||||
));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_BASE(CHIP, .qword = gmem_base));
|
||||
ncrb.add(TPL1_A2D_SRC_TEXTURE_PITCH(CHIP, .pitch = gmem_pitch));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
|
||||
uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
|
||||
fd6_resolve_tile(struct fd_batch *batch, fd_cs &cs, uint32_t base,
|
||||
struct pipe_surface *psurf, uint32_t unknown_8c01)
|
||||
{
|
||||
resolve_tile_setup<CHIP>(batch, ring, base, psurf, unknown_8c01);
|
||||
resolve_tile_setup<CHIP>(batch, cs, base, psurf, unknown_8c01);
|
||||
|
||||
/* sync GMEM writes with CACHE. */
|
||||
fd6_cache_inv<CHIP>(batch->ctx, ring);
|
||||
fd6_cache_inv<CHIP>(batch->ctx, cs);
|
||||
|
||||
/* Wait for CACHE_INVALIDATE to land */
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
OUT_PKT7(ring, CP_BLIT, 1);
|
||||
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
|
||||
fd_pkt7(cs, CP_BLIT, 1)
|
||||
.add(CP_BLIT_0(.op = BLIT_OP_SCALE));
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
/* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
|
||||
* sysmem, and we generally assume that GMEM renderpasses leave their
|
||||
* results in sysmem, so we need to flush manually here.
|
||||
*/
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, ring,
|
||||
FD6_FLUSH_CCU_COLOR | FD6_WAIT_FOR_IDLE);
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, cs, FD6_FLUSH_CCU_COLOR | FD6_WAIT_FOR_IDLE);
|
||||
}
|
||||
FD_GENX(fd6_resolve_tile);
|
||||
|
||||
|
|
@ -1306,28 +1270,30 @@ handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
|
|||
|
||||
fd_batch_update_queries(batch);
|
||||
|
||||
emit_setup<CHIP>(batch);
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
emit_setup<CHIP>(batch->ctx, cs);
|
||||
|
||||
DBG_BLIT(info, batch);
|
||||
|
||||
trace_start_blit(&batch->trace, batch->draw, info->src.resource->target,
|
||||
trace_start_blit(&batch->trace, cs.ring(), info->src.resource->target,
|
||||
info->dst.resource->target);
|
||||
|
||||
if ((info->src.resource->target == PIPE_BUFFER) &&
|
||||
(info->dst.resource->target == PIPE_BUFFER)) {
|
||||
assert(src->layout.tile_mode == TILE6_LINEAR);
|
||||
assert(dst->layout.tile_mode == TILE6_LINEAR);
|
||||
emit_blit_buffer<CHIP>(ctx, batch->draw, info);
|
||||
emit_blit_buffer<CHIP>(ctx, cs, info);
|
||||
} else {
|
||||
/* I don't *think* we need to handle blits between buffer <-> !buffer */
|
||||
assert(info->src.resource->target != PIPE_BUFFER);
|
||||
assert(info->dst.resource->target != PIPE_BUFFER);
|
||||
emit_blit_texture<CHIP>(ctx, batch->draw, info);
|
||||
emit_blit_texture<CHIP>(ctx, cs, info);
|
||||
}
|
||||
|
||||
trace_end_blit(&batch->trace, batch->draw);
|
||||
trace_end_blit(&batch->trace, cs.ring());
|
||||
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, batch->draw,
|
||||
fd6_emit_flushes<CHIP>(batch->ctx, cs,
|
||||
FD6_FLUSH_CCU_COLOR |
|
||||
FD6_FLUSH_CCU_DEPTH |
|
||||
FD6_FLUSH_CACHE |
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "freedreno_context.h"
|
||||
|
||||
#include "fd6_pack.h"
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_blitter_init(struct pipe_context *pctx);
|
||||
|
|
@ -29,11 +30,11 @@ template <chip CHIP>
|
|||
void fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
|
||||
struct fd_bo *lrz, double depth) assert_dt;
|
||||
template <chip CHIP>
|
||||
void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
void fd6_clear_surface(struct fd_context *ctx, fd_cs &cs,
|
||||
struct pipe_surface *psurf, const struct pipe_box *box2d,
|
||||
union pipe_color_union *color, uint32_t unknown_8c01) assert_dt;
|
||||
template <chip CHIP>
|
||||
void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
|
||||
uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01) assert_dt;
|
||||
void fd6_resolve_tile(struct fd_batch *batch, fd_cs &cs, uint32_t base,
|
||||
struct pipe_surface *psurf, uint32_t unknown_8c01) assert_dt;
|
||||
|
||||
#endif /* FD6_BLIT_H_ */
|
||||
|
|
|
|||
|
|
@ -23,9 +23,10 @@
|
|||
#include "fd6_emit.h"
|
||||
#include "fd6_pack.h"
|
||||
|
||||
/* nregs: 2 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
cs_program_emit_local_size(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb,
|
||||
struct ir3_shader_variant *v, uint16_t local_size[3])
|
||||
{
|
||||
/*
|
||||
|
|
@ -43,53 +44,50 @@ cs_program_emit_local_size(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
: (local_size[1] % 2 == 0) ? 9
|
||||
: 17;
|
||||
|
||||
OUT_REG(ring,
|
||||
SP_CS_WGE_CNTL(
|
||||
CHIP,
|
||||
.linearlocalidregid = INVALID_REG,
|
||||
.threadsize = thrsz_cs,
|
||||
.workgrouprastorderzfirsten = true,
|
||||
.wgtilewidth = 4,
|
||||
.wgtileheight = tile_height,
|
||||
)
|
||||
);
|
||||
crb.add(SP_CS_WGE_CNTL(CHIP,
|
||||
.linearlocalidregid = INVALID_REG,
|
||||
.threadsize = thrsz_cs,
|
||||
.workgrouprastorderzfirsten = true,
|
||||
.wgtilewidth = 4,
|
||||
.wgtileheight = tile_height,
|
||||
));
|
||||
|
||||
OUT_REG(ring,
|
||||
A7XX_SP_CS_NDRANGE_7(
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1,
|
||||
)
|
||||
);
|
||||
crb.add(A7XX_SP_CS_NDRANGE_7(
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
/* nregs: 9 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct ir3_shader_variant *v)
|
||||
cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *v)
|
||||
assert_dt
|
||||
{
|
||||
OUT_REG(ring, SP_UPDATE_CNTL(CHIP, .vs_state = true, .hs_state = true,
|
||||
.ds_state = true, .gs_state = true,
|
||||
.fs_state = true, .cs_state = true,
|
||||
.cs_uav = true, .gfx_uav = true, ));
|
||||
|
||||
OUT_REG(ring, SP_CS_CONST_CONFIG(
|
||||
CHIP,
|
||||
.constlen = v->constlen,
|
||||
.enabled = true,
|
||||
crb.add(SP_UPDATE_CNTL(CHIP,
|
||||
.vs_state = true, .hs_state = true,
|
||||
.ds_state = true, .gs_state = true,
|
||||
.fs_state = true, .cs_state = true,
|
||||
.cs_uav = true, .gfx_uav = true,
|
||||
));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED |
|
||||
COND(v->bindless_tex, A6XX_SP_CS_CONFIG_BINDLESS_TEX) |
|
||||
COND(v->bindless_samp, A6XX_SP_CS_CONFIG_BINDLESS_SAMP) |
|
||||
COND(v->bindless_ibo, A6XX_SP_CS_CONFIG_BINDLESS_UAV) |
|
||||
COND(v->bindless_ubo, A6XX_SP_CS_CONFIG_BINDLESS_UBO) |
|
||||
A6XX_SP_CS_CONFIG_NUAV(ir3_shader_num_uavs(v)) |
|
||||
A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
|
||||
A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_CS_CONFIG */
|
||||
crb.add(SP_CS_CONST_CONFIG(CHIP,
|
||||
.constlen = v->constlen,
|
||||
.enabled = true,
|
||||
));
|
||||
|
||||
crb.add(A6XX_SP_CS_CONFIG(
|
||||
.bindless_tex = v->bindless_tex,
|
||||
.bindless_samp = v->bindless_samp,
|
||||
.bindless_uav = v->bindless_ibo,
|
||||
.bindless_ubo = v->bindless_ubo,
|
||||
.enabled = true,
|
||||
.ntex = v->num_samp,
|
||||
.nsamp = v->num_samp,
|
||||
.nuav = ir3_shader_num_uavs(v),
|
||||
));
|
||||
|
||||
uint32_t local_invocation_id = v->cs.local_invocation_id;
|
||||
uint32_t work_group_id = v->cs.work_group_id;
|
||||
|
|
@ -104,54 +102,53 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
.supports_double_threadsize ? thrsz : THREAD128;
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_CONST_CONFIG_0, 2);
|
||||
OUT_RING(ring, A6XX_SP_CS_CONST_CONFIG_0_WGIDCONSTID(work_group_id) |
|
||||
A6XX_SP_CS_CONST_CONFIG_0_WGSIZECONSTID(regid(63, 0)) |
|
||||
A6XX_SP_CS_CONST_CONFIG_0_WGOFFSETCONSTID(regid(63, 0)) |
|
||||
A6XX_SP_CS_CONST_CONFIG_0_LOCALIDREGID(local_invocation_id));
|
||||
OUT_RING(ring, A6XX_SP_CS_WGE_CNTL_LINEARLOCALIDREGID(regid(63, 0)) |
|
||||
A6XX_SP_CS_WGE_CNTL_THREADSIZE(thrsz_cs));
|
||||
crb.add(A6XX_SP_CS_CONST_CONFIG_0(
|
||||
.wgidconstid = work_group_id,
|
||||
.wgsizeconstid = INVALID_REG,
|
||||
.wgoffsetconstid = INVALID_REG,
|
||||
.localidregid = local_invocation_id,
|
||||
));
|
||||
crb.add(SP_CS_WGE_CNTL(CHIP,
|
||||
.linearlocalidregid = INVALID_REG,
|
||||
.threadsize = thrsz_cs,
|
||||
));
|
||||
|
||||
if (!ctx->screen->info->a6xx.supports_double_threadsize) {
|
||||
OUT_PKT4(ring, REG_A6XX_SP_PS_WAVE_CNTL, 1);
|
||||
OUT_RING(ring, A6XX_SP_PS_WAVE_CNTL_THREADSIZE(thrsz));
|
||||
crb.add(SP_PS_WAVE_CNTL(CHIP, .threadsize = thrsz));
|
||||
}
|
||||
|
||||
if (ctx->screen->info->a6xx.has_lpac) {
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_WIE_CNTL_0, 2);
|
||||
OUT_RING(ring, A6XX_SP_CS_WIE_CNTL_0_WGIDCONSTID(work_group_id) |
|
||||
A6XX_SP_CS_WIE_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
|
||||
A6XX_SP_CS_WIE_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
|
||||
A6XX_SP_CS_WIE_CNTL_0_LOCALIDREGID(local_invocation_id));
|
||||
OUT_RING(ring, A6XX_SP_CS_WIE_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
|
||||
A6XX_SP_CS_WIE_CNTL_1_THREADSIZE(thrsz));
|
||||
}
|
||||
} else {
|
||||
OUT_REG(ring, SP_PS_WAVE_CNTL(CHIP, .threadsize = THREAD64));
|
||||
OUT_REG(ring,
|
||||
A6XX_SP_CS_WIE_CNTL_0(
|
||||
crb.add(A6XX_SP_CS_WIE_CNTL_0(
|
||||
.wgidconstid = work_group_id,
|
||||
.wgsizeconstid = INVALID_REG,
|
||||
.wgoffsetconstid = INVALID_REG,
|
||||
.localidregid = local_invocation_id,
|
||||
)
|
||||
);
|
||||
OUT_REG(ring,
|
||||
SP_CS_WIE_CNTL_1(
|
||||
CHIP,
|
||||
));
|
||||
crb.add(SP_CS_WIE_CNTL_1(CHIP,
|
||||
.linearlocalidregid = INVALID_REG,
|
||||
.threadsize = thrsz_cs,
|
||||
.workitemrastorder =
|
||||
v->cs.force_linear_dispatch ? WORKITEMRASTORDER_LINEAR
|
||||
: WORKITEMRASTORDER_TILED,
|
||||
)
|
||||
);
|
||||
OUT_REG(ring, A7XX_SP_CS_UNKNOWN_A9BE(0)); // Sometimes is 0x08000000
|
||||
.threadsize = thrsz,
|
||||
));
|
||||
}
|
||||
} else {
|
||||
crb.add(SP_PS_WAVE_CNTL(CHIP, .threadsize = THREAD64));
|
||||
crb.add(A6XX_SP_CS_WIE_CNTL_0(
|
||||
.wgidconstid = work_group_id,
|
||||
.wgsizeconstid = INVALID_REG,
|
||||
.wgoffsetconstid = INVALID_REG,
|
||||
.localidregid = local_invocation_id,
|
||||
));
|
||||
crb.add(SP_CS_WIE_CNTL_1(CHIP,
|
||||
.linearlocalidregid = INVALID_REG,
|
||||
.threadsize = thrsz_cs,
|
||||
.workitemrastorder =
|
||||
v->cs.force_linear_dispatch ? WORKITEMRASTORDER_LINEAR
|
||||
: WORKITEMRASTORDER_TILED,
|
||||
));
|
||||
crb.add(A7XX_SP_CS_UNKNOWN_A9BE(0)); // Sometimes is 0x08000000
|
||||
}
|
||||
|
||||
if (!v->local_size_variable)
|
||||
cs_program_emit_local_size<CHIP>(ctx, ring, v, v->local_size);
|
||||
|
||||
fd6_emit_shader<CHIP>(ctx, ring, v);
|
||||
cs_program_emit_local_size<CHIP>(ctx, crb, v, v->local_size);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -159,7 +156,7 @@ static void
|
|||
fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
||||
{
|
||||
struct fd6_compute_state *cp = (struct fd6_compute_state *)ctx->compute;
|
||||
struct fd_ringbuffer *ring = ctx->batch->draw;
|
||||
fd_cs cs(ctx->batch->draw);
|
||||
|
||||
if (unlikely(!cp->v)) {
|
||||
struct ir3_shader_state *hwcso = (struct ir3_shader_state *)cp->hwcso;
|
||||
|
|
@ -170,16 +167,18 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
return;
|
||||
|
||||
cp->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||
cs_program_emit<CHIP>(ctx, cp->stateobj, cp->v);
|
||||
fd_cs cs(cp->stateobj);
|
||||
with_crb (cs, 9)
|
||||
cs_program_emit<CHIP>(ctx, crb, cp->v);
|
||||
fd6_emit_shader<CHIP>(ctx, cs, cp->v);
|
||||
}
|
||||
|
||||
trace_start_compute(&ctx->batch->trace, ring, !!info->indirect, info->work_dim,
|
||||
trace_start_compute(&ctx->batch->trace, cs.ring(), !!info->indirect, info->work_dim,
|
||||
info->block[0], info->block[1], info->block[2],
|
||||
info->grid[0], info->grid[1], info->grid[2],
|
||||
cp->v->shader_id);
|
||||
|
||||
if (ctx->batch->barrier)
|
||||
fd6_barrier_flush<CHIP>(ctx->batch);
|
||||
fd6_barrier_flush<CHIP>(cs, ctx->batch);
|
||||
|
||||
bool emit_instrlen_workaround =
|
||||
cp->v->instrlen > ctx->screen->info->a6xx.instr_cache_size;
|
||||
|
|
@ -200,37 +199,22 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
* See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19023
|
||||
*/
|
||||
if (emit_instrlen_workaround) {
|
||||
OUT_REG(ring, A6XX_SP_PS_INSTR_SIZE(cp->v->instrlen));
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_LABEL);
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A6XX_SP_PS_INSTR_SIZE(cp->v->instrlen));
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_LABEL);
|
||||
}
|
||||
|
||||
if (ctx->gen_dirty)
|
||||
fd6_emit_cs_state<CHIP>(ctx, ring, cp);
|
||||
fd6_emit_cs_state<CHIP>(ctx, cs, cp);
|
||||
|
||||
if (ctx->gen_dirty & BIT(FD6_GROUP_CONST))
|
||||
fd6_emit_cs_user_consts<CHIP>(ctx, ring, cp->v);
|
||||
fd6_emit_cs_user_consts<CHIP>(ctx, cs, cp->v);
|
||||
|
||||
if (cp->v->need_driver_params)
|
||||
fd6_emit_cs_driver_params<CHIP>(ctx, ring, cp->v, info);
|
||||
fd6_emit_cs_driver_params<CHIP>(ctx, cs, cp->v, info);
|
||||
|
||||
OUT_PKT7(ring, CP_SET_MARKER, 1);
|
||||
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));
|
||||
|
||||
uint32_t shared_size =
|
||||
MAX2(((int)(cp->v->cs.req_local_mem + info->variable_shared_mem) - 1) / 1024, 1);
|
||||
enum a6xx_const_ram_mode mode =
|
||||
cp->v->constlen > 256 ? CONSTLEN_512 :
|
||||
(cp->v->constlen > 192 ? CONSTLEN_256 :
|
||||
(cp->v->constlen > 128 ? CONSTLEN_192 : CONSTLEN_128));
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_1, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_CNTL_1_SHARED_SIZE(shared_size) |
|
||||
A6XX_SP_CS_CNTL_1_CONSTANTRAMMODE(mode));
|
||||
|
||||
if (CHIP == A6XX && ctx->screen->info->a6xx.has_lpac) {
|
||||
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CTRL_REG1, 1);
|
||||
OUT_RING(ring, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
|
||||
A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(mode));
|
||||
}
|
||||
fd_pkt7(cs, CP_SET_MARKER, 1)
|
||||
.add(A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));
|
||||
|
||||
const unsigned *local_size =
|
||||
info->block; // v->shader->nir->info->workgroup_size;
|
||||
|
|
@ -238,61 +222,74 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
/* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
|
||||
const unsigned work_dim = info->work_dim ? info->work_dim : 3;
|
||||
|
||||
if (cp->v->local_size_variable) {
|
||||
uint16_t wg[] = {local_size[0], local_size[1], local_size[2]};
|
||||
cs_program_emit_local_size<CHIP>(ctx, ring, cp->v, wg);
|
||||
with_crb (cs, 15) {
|
||||
uint32_t shared_size =
|
||||
MAX2(((int)(cp->v->cs.req_local_mem + info->variable_shared_mem) - 1) / 1024, 1);
|
||||
enum a6xx_const_ram_mode mode =
|
||||
cp->v->constlen > 256 ? CONSTLEN_512 :
|
||||
(cp->v->constlen > 192 ? CONSTLEN_256 :
|
||||
(cp->v->constlen > 128 ? CONSTLEN_192 : CONSTLEN_128));
|
||||
crb.add(A6XX_SP_CS_CNTL_1(
|
||||
.shared_size = shared_size,
|
||||
.constantrammode = mode,
|
||||
));
|
||||
|
||||
if (CHIP == A6XX && ctx->screen->info->a6xx.has_lpac) {
|
||||
crb.add(A6XX_HLSQ_CS_CTRL_REG1(
|
||||
.shared_size = shared_size,
|
||||
.constantrammode = mode,
|
||||
));
|
||||
}
|
||||
|
||||
if (cp->v->local_size_variable) {
|
||||
uint16_t wg[] = {local_size[0], local_size[1], local_size[2]};
|
||||
cs_program_emit_local_size<CHIP>(ctx, crb, cp->v, wg);
|
||||
}
|
||||
|
||||
crb.add(SP_CS_NDRANGE_0(CHIP,
|
||||
.kerneldim = work_dim,
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1,
|
||||
));
|
||||
crb.add(SP_CS_NDRANGE_1(CHIP,
|
||||
.globalsize_x = local_size[0] * num_groups[0],
|
||||
));
|
||||
crb.add(SP_CS_NDRANGE_2(CHIP, .globaloff_x = 0));
|
||||
crb.add(SP_CS_NDRANGE_3(CHIP,
|
||||
.globalsize_y = local_size[1] * num_groups[1],
|
||||
));
|
||||
crb.add(SP_CS_NDRANGE_4(CHIP, .globaloff_y = 0));
|
||||
crb.add(SP_CS_NDRANGE_5(CHIP,
|
||||
.globalsize_z = local_size[2] * num_groups[2],
|
||||
));
|
||||
crb.add(SP_CS_NDRANGE_6(CHIP, .globaloff_z = 0));
|
||||
|
||||
crb.add(SP_CS_KERNEL_GROUP_X(CHIP, 1));
|
||||
crb.add(SP_CS_KERNEL_GROUP_Y(CHIP, 1));
|
||||
crb.add(SP_CS_KERNEL_GROUP_Z(CHIP, 1));
|
||||
}
|
||||
|
||||
OUT_REG(ring,
|
||||
SP_CS_NDRANGE_0(
|
||||
CHIP,
|
||||
.kerneldim = work_dim,
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1,
|
||||
),
|
||||
SP_CS_NDRANGE_1(
|
||||
CHIP,
|
||||
.globalsize_x = local_size[0] * num_groups[0],
|
||||
),
|
||||
SP_CS_NDRANGE_2(CHIP, .globaloff_x = 0),
|
||||
SP_CS_NDRANGE_3(
|
||||
CHIP,
|
||||
.globalsize_y = local_size[1] * num_groups[1],
|
||||
),
|
||||
SP_CS_NDRANGE_4(CHIP, .globaloff_y = 0),
|
||||
SP_CS_NDRANGE_5(
|
||||
CHIP,
|
||||
.globalsize_z = local_size[2] * num_groups[2],
|
||||
),
|
||||
SP_CS_NDRANGE_6(CHIP, .globaloff_z = 0),
|
||||
);
|
||||
|
||||
OUT_REG(ring,
|
||||
SP_CS_KERNEL_GROUP_X(CHIP, 1),
|
||||
SP_CS_KERNEL_GROUP_Y(CHIP, 1),
|
||||
SP_CS_KERNEL_GROUP_Z(CHIP, 1),
|
||||
);
|
||||
|
||||
if (info->indirect) {
|
||||
struct fd_resource *rsc = fd_resource(info->indirect);
|
||||
|
||||
OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
|
||||
OUT_RING(ring,
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
|
||||
fd_pkt7(cs, CP_EXEC_CS_INDIRECT, 4)
|
||||
.add(A4XX_CP_EXEC_CS_INDIRECT_0())
|
||||
.add(A5XX_CP_EXEC_CS_INDIRECT_ADDR(rsc->bo, info->indirect_offset))
|
||||
.add(A5XX_CP_EXEC_CS_INDIRECT_3(
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1,
|
||||
));
|
||||
} else {
|
||||
OUT_PKT7(ring, CP_EXEC_CS, 4);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
|
||||
OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
|
||||
OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
|
||||
fd_pkt7(cs, CP_EXEC_CS, 4)
|
||||
.add(CP_EXEC_CS_0())
|
||||
.add(CP_EXEC_CS_1(info->grid[0]))
|
||||
.add(CP_EXEC_CS_2(info->grid[1]))
|
||||
.add(CP_EXEC_CS_3(info->grid[2]));
|
||||
}
|
||||
|
||||
trace_end_compute(&ctx->batch->trace, ring);
|
||||
trace_end_compute(&ctx->batch->trace, cs.ring());
|
||||
|
||||
fd_context_all_clean(ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,37 +11,34 @@
|
|||
#include "fd6_compute.h"
|
||||
#include "fd6_pack.h"
|
||||
|
||||
#define emit_const_user fd6_emit_const_user
|
||||
#define emit_const_bo fd6_emit_const_bo
|
||||
#include "ir3_const.h"
|
||||
|
||||
|
||||
static inline void
|
||||
fd6_emit_driver_ubo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
|
||||
fd6_emit_driver_ubo(fd_cs &cs, const struct ir3_shader_variant *v,
|
||||
int base, uint32_t sizedwords, unsigned buffer_offset,
|
||||
struct fd_bo *bo)
|
||||
{
|
||||
enum a6xx_state_block block = fd6_stage2shadersb(v->type);
|
||||
int size_vec4s = DIV_ROUND_UP(sizedwords, 4);
|
||||
|
||||
/* base == ubo idx */
|
||||
OUT_PKT7(ring, fd6_stage2opcode(v->type), 5);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(base) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(block) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
|
||||
int size_vec4s = DIV_ROUND_UP(sizedwords, 4);
|
||||
OUT_RELOC(ring, bo, buffer_offset,
|
||||
((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32), 0);
|
||||
fd_pkt7(cs, fd6_stage2opcode(v->type), 5)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.dst_off = base,
|
||||
.state_type = ST6_UBO,
|
||||
.state_src = SS6_DIRECT,
|
||||
.state_block = fd6_stage2shadersb(v->type),
|
||||
.num_unit = 1,
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR())
|
||||
.add(A6XX_UBO_DESC(0, bo, buffer_offset, size_vec4s));
|
||||
}
|
||||
|
||||
/* A helper to upload driver-params to a UBO, for the case where constants are
|
||||
* loaded by shader preamble rather than ST6_CONSTANTS
|
||||
*/
|
||||
static void
|
||||
fd6_upload_emit_driver_ubo(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
fd6_upload_emit_driver_ubo(struct fd_context *ctx, fd_cs &cs,
|
||||
const struct ir3_shader_variant *v, int base,
|
||||
uint32_t sizedwords, const void *dwords)
|
||||
{
|
||||
|
|
@ -63,9 +60,9 @@ fd6_upload_emit_driver_ubo(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
* this allocation happens outside of the context of batch resource
|
||||
* tracking.
|
||||
*/
|
||||
fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
|
||||
cs.attach_bo(fd_resource(buffer)->bo);
|
||||
|
||||
fd6_emit_driver_ubo(ring, v, base, sizedwords, buffer_offset,
|
||||
fd6_emit_driver_ubo(cs, v, base, sizedwords, buffer_offset,
|
||||
fd_resource(buffer)->bo);
|
||||
|
||||
pipe_resource_reference(&buffer, NULL);
|
||||
|
|
@ -76,8 +73,7 @@ fd6_upload_emit_driver_ubo(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
* sizedwords: size of const value buffer
|
||||
*/
|
||||
void
|
||||
fd6_emit_const_user(struct fd_ringbuffer *ring,
|
||||
const struct ir3_shader_variant *v, uint32_t regid,
|
||||
fd6_emit_const_user(fd_cs &cs, const struct ir3_shader_variant *v, uint32_t regid,
|
||||
uint32_t sizedwords, const uint32_t *dwords)
|
||||
{
|
||||
emit_const_asserts(v, regid, sizedwords);
|
||||
|
|
@ -88,28 +84,29 @@ fd6_emit_const_user(struct fd_ringbuffer *ring,
|
|||
*/
|
||||
uint32_t align_sz = align(sizedwords, 4);
|
||||
|
||||
if (fd6_geom_stage(v->type)) {
|
||||
OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
|
||||
CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
|
||||
.state_src = SS6_DIRECT,
|
||||
.state_block = fd6_stage2shadersb(v->type),
|
||||
.num_unit = DIV_ROUND_UP(sizedwords, 4)),
|
||||
CP_LOAD_STATE6_1(),
|
||||
CP_LOAD_STATE6_2());
|
||||
} else {
|
||||
OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
|
||||
CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
|
||||
.state_src = SS6_DIRECT,
|
||||
.state_block = fd6_stage2shadersb(v->type),
|
||||
.num_unit = DIV_ROUND_UP(sizedwords, 4)),
|
||||
CP_LOAD_STATE6_1(),
|
||||
CP_LOAD_STATE6_2());
|
||||
}
|
||||
fd_pkt7(cs, fd6_stage2opcode(v->type), 3 + align_sz)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.dst_off = regid / 4,
|
||||
.state_type = ST6_CONSTANTS,
|
||||
.state_src = SS6_DIRECT,
|
||||
.state_block = fd6_stage2shadersb(v->type),
|
||||
.num_unit = DIV_ROUND_UP(sizedwords, 4)
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR())
|
||||
.add(dwords, align_sz);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_const_user(struct fd_ringbuffer *ring,
|
||||
const struct ir3_shader_variant *v, uint32_t regid,
|
||||
uint32_t size, const uint32_t *user_buffer)
|
||||
{
|
||||
fd_cs cs(ring);
|
||||
fd6_emit_const_user(cs, v, regid, size, user_buffer);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_const_bo(struct fd_ringbuffer *ring,
|
||||
const struct ir3_shader_variant *v, uint32_t regid,
|
||||
fd6_emit_const_bo(fd_cs &cs, const struct ir3_shader_variant *v, uint32_t regid,
|
||||
uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
|
||||
{
|
||||
uint32_t dst_off = regid / 4;
|
||||
|
|
@ -119,21 +116,23 @@ fd6_emit_const_bo(struct fd_ringbuffer *ring,
|
|||
|
||||
emit_const_asserts(v, regid, sizedwords);
|
||||
|
||||
if (fd6_geom_stage(v->type)) {
|
||||
OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
|
||||
CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
|
||||
.state_src = SS6_INDIRECT,
|
||||
.state_block = fd6_stage2shadersb(v->type),
|
||||
.num_unit = num_unit, ),
|
||||
CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
|
||||
} else {
|
||||
OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
|
||||
CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
|
||||
.state_src = SS6_INDIRECT,
|
||||
.state_block = fd6_stage2shadersb(v->type),
|
||||
.num_unit = num_unit, ),
|
||||
CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
|
||||
}
|
||||
fd_pkt7(cs, fd6_stage2opcode(v->type), 3)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.dst_off = dst_off, .state_type = ST6_CONSTANTS,
|
||||
.state_src = SS6_INDIRECT,
|
||||
.state_block = fd6_stage2shadersb(v->type),
|
||||
.num_unit = num_unit,
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
|
||||
}
|
||||
|
||||
static void
|
||||
emit_const_bo(struct fd_ringbuffer *ring,
|
||||
const struct ir3_shader_variant *v, uint32_t regid,
|
||||
uint32_t offset, uint32_t size, struct fd_bo *bo)
|
||||
{
|
||||
fd_cs cs(ring);
|
||||
fd6_emit_const_bo(cs, v, regid, offset, size, bo);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -158,7 +157,7 @@ wait_mem_writes(struct fd_context *ctx)
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
|
||||
emit_stage_tess_consts(fd_cs &cs, const struct ir3_shader_variant *v,
|
||||
struct fd_context *ctx, uint32_t *params, int num_params)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
|
|
@ -166,7 +165,7 @@ emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_varia
|
|||
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
|
||||
int base = const_state->primitive_param_ubo.idx;
|
||||
|
||||
fd6_upload_emit_driver_ubo(ctx, ring, v, base, num_params, params);
|
||||
fd6_upload_emit_driver_ubo(ctx, cs, v, base, num_params, params);
|
||||
} else if (ir3_const_can_upload(&const_state->allocs,
|
||||
IR3_CONST_ALLOC_PRIMITIVE_PARAM,
|
||||
v->constlen)) {
|
||||
|
|
@ -174,7 +173,7 @@ emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_varia
|
|||
const_state->allocs.consts[IR3_CONST_ALLOC_PRIMITIVE_PARAM].offset_vec4;
|
||||
int size = MIN2(1 + regid, v->constlen) - regid;
|
||||
if (size > 0)
|
||||
fd6_emit_const_user(ring, v, regid * 4, num_params, params);
|
||||
fd6_emit_const_user(cs, v, regid * 4, num_params, params);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -183,8 +182,7 @@ struct fd_ringbuffer *
|
|||
fd6_build_tess_consts(struct fd6_emit *emit)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
|
||||
fd_cs constobj(ctx->batch->submit, 0x1000);
|
||||
|
||||
/* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
|
||||
* size is dwords, since that's what LDG/STG use.
|
||||
|
|
@ -205,7 +203,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
|
|||
int64_t tess_factor_iova = fd_bo_get_iova(tess_bo);
|
||||
int64_t tess_param_iova = tess_factor_iova + FD6_TESS_FACTOR_SIZE;
|
||||
|
||||
fd_ringbuffer_attach_bo(constobj, tess_bo);
|
||||
constobj.attach_bo(tess_bo);
|
||||
|
||||
uint32_t hs_params[8] = {
|
||||
emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
|
||||
|
|
@ -258,12 +256,12 @@ fd6_build_tess_consts(struct fd6_emit *emit)
|
|||
gs_params, ARRAY_SIZE(gs_params));
|
||||
}
|
||||
|
||||
return constobj;
|
||||
return constobj.ring();
|
||||
}
|
||||
FD_GENX(fd6_build_tess_consts);
|
||||
|
||||
static void
|
||||
fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
||||
fd6_emit_ubos(const struct ir3_shader_variant *v, fd_cs &cs,
|
||||
struct fd_constbuf_stateobj *constbuf)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
|
|
@ -272,25 +270,26 @@ fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
|||
if (!num_ubos)
|
||||
return;
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
fd_pkt7 pkt(cs, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
|
||||
|
||||
pkt.add(CP_LOAD_STATE6_0(
|
||||
.dst_off = 0,
|
||||
.state_type = ST6_UBO,
|
||||
.state_src = SS6_DIRECT,
|
||||
.state_block = fd6_stage2shadersb(v->type),
|
||||
.num_unit = num_ubos,
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR());
|
||||
|
||||
for (int i = 0; i < num_ubos; i++) {
|
||||
struct pipe_constant_buffer *cb = &constbuf->cb[i];
|
||||
|
||||
if (cb->buffer) {
|
||||
struct fd_bo *bo = fd_resource(cb->buffer)->bo;
|
||||
int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
|
||||
OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset,
|
||||
(uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
|
||||
pkt.add(A6XX_UBO_DESC(i, bo, cb->buffer_offset, size_vec4s));
|
||||
} else {
|
||||
OUT_RING(ring, 0xbad00000 | (i << 16));
|
||||
OUT_RING(ring, A6XX_UBO_1_SIZE(0));
|
||||
pkt.add(A6XX_UBO_DESC(i, NULL, 0, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -325,16 +324,15 @@ FD_GENX(fd6_user_consts_cmdstream_size);
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_user_consts(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring,
|
||||
emit_user_consts(const struct ir3_shader_variant *v, fd_cs &cs,
|
||||
struct fd_constbuf_stateobj *constbuf)
|
||||
{
|
||||
fd6_emit_ubos(v, ring, constbuf);
|
||||
fd6_emit_ubos(v, cs, constbuf);
|
||||
|
||||
if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble)
|
||||
return;
|
||||
|
||||
ir3_emit_user_consts(v, ring, constbuf);
|
||||
ir3_emit_user_consts(v, cs.ring(), constbuf);
|
||||
}
|
||||
|
||||
template <chip CHIP, fd6_pipeline_type PIPELINE>
|
||||
|
|
@ -344,8 +342,7 @@ fd6_build_user_consts(struct fd6_emit *emit)
|
|||
struct fd_context *ctx = emit->ctx;
|
||||
unsigned sz = emit->prog->user_consts_cmdstream_size;
|
||||
|
||||
struct fd_ringbuffer *constobj =
|
||||
fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
|
||||
fd_cs constobj(ctx->batch->submit, sz);
|
||||
|
||||
emit_user_consts<CHIP>(emit->vs, constobj, &ctx->constbuf[MESA_SHADER_VERTEX]);
|
||||
|
||||
|
|
@ -360,7 +357,7 @@ fd6_build_user_consts(struct fd6_emit *emit)
|
|||
}
|
||||
emit_user_consts<CHIP>(emit->fs, constobj, &ctx->constbuf[MESA_SHADER_FRAGMENT]);
|
||||
|
||||
return constobj;
|
||||
return constobj.ring();
|
||||
}
|
||||
template struct fd_ringbuffer * fd6_build_user_consts<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_user_consts<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
|
|
@ -369,7 +366,7 @@ template struct fd_ringbuffer * fd6_build_user_consts<A7XX, NO_TESS_GS>(struct f
|
|||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
emit_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *dpconstobj,
|
||||
emit_driver_params(const struct ir3_shader_variant *v, fd_cs &dpconstobj,
|
||||
struct fd_context *ctx, const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct ir3_driver_params_vs *vertex_params)
|
||||
|
|
@ -382,14 +379,13 @@ emit_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *dpc
|
|||
dword_sizeof(*vertex_params),
|
||||
vertex_params);
|
||||
} else {
|
||||
ir3_emit_driver_params(v, dpconstobj, ctx, info, indirect, vertex_params);
|
||||
ir3_emit_driver_params(v, dpconstobj.ring(), ctx, info, indirect, vertex_params);
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
emit_hs_driver_params(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *dpconstobj,
|
||||
emit_hs_driver_params(const struct ir3_shader_variant *v, fd_cs &dpconstobj,
|
||||
struct fd_context *ctx)
|
||||
{
|
||||
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
|
||||
|
|
@ -401,7 +397,7 @@ emit_hs_driver_params(const struct ir3_shader_variant *v,
|
|||
dword_sizeof(hs_params),
|
||||
&hs_params);
|
||||
} else {
|
||||
ir3_emit_hs_driver_params(v, dpconstobj, ctx);
|
||||
ir3_emit_hs_driver_params(v, dpconstobj.ring(), ctx);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -442,12 +438,11 @@ fd6_build_driver_params(struct fd6_emit *emit)
|
|||
num_dp * (4 + dword_sizeof(p)) + /* 4dw PKT7 header */
|
||||
num_ubo_dp * 6; /* 6dw per UBO descriptor */
|
||||
|
||||
struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING);
|
||||
fd_cs dpconstobj(ctx->batch->submit, size_dwords * 4);
|
||||
|
||||
/* VS still works the old way*/
|
||||
if (emit->vs->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
ir3_emit_driver_params(emit->vs, dpconstobj.ring(), ctx, emit->info, emit->indirect, &p);
|
||||
}
|
||||
|
||||
if (PIPELINE == HAS_TESS_GS) {
|
||||
|
|
@ -469,7 +464,7 @@ fd6_build_driver_params(struct fd6_emit *emit)
|
|||
|
||||
fd6_ctx->has_dp_state = true;
|
||||
|
||||
return dpconstobj;
|
||||
return dpconstobj.ring();
|
||||
}
|
||||
|
||||
template struct fd_ringbuffer * fd6_build_driver_params<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
|
|
@ -479,8 +474,7 @@ template struct fd_ringbuffer * fd6_build_driver_params<A7XX, NO_TESS_GS>(struct
|
|||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_cs_driver_params(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
fd6_emit_cs_driver_params(struct fd_context *ctx, fd_cs &cs,
|
||||
const struct ir3_shader_variant *v,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
|
|
@ -501,20 +495,20 @@ fd6_emit_cs_driver_params(struct fd_context *ctx,
|
|||
|
||||
if (info->indirect) {
|
||||
/* Copy indirect params into UBO: */
|
||||
ctx->screen->mem_to_mem(ring, buffer, buffer_offset, info->indirect,
|
||||
ctx->screen->mem_to_mem(cs.ring(), buffer, buffer_offset, info->indirect,
|
||||
info->indirect_offset, 3);
|
||||
|
||||
wait_mem_writes(ctx);
|
||||
} else {
|
||||
fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
|
||||
cs.attach_bo(fd_resource(buffer)->bo);
|
||||
}
|
||||
|
||||
fd6_emit_driver_ubo(ring, v, base, dword_sizeof(compute_params),
|
||||
fd6_emit_driver_ubo(cs, v, base, dword_sizeof(compute_params),
|
||||
buffer_offset, fd_resource(buffer)->bo);
|
||||
|
||||
pipe_resource_reference(&buffer, NULL);
|
||||
} else {
|
||||
ir3_emit_cs_driver_params(v, ring, ctx, info);
|
||||
ir3_emit_cs_driver_params(v, cs.ring(), ctx, info);
|
||||
if (info->indirect)
|
||||
wait_mem_writes(ctx);
|
||||
}
|
||||
|
|
@ -523,50 +517,47 @@ FD_GENX(fd6_emit_cs_driver_params);
|
|||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_cs_user_consts(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
fd6_emit_cs_user_consts(struct fd_context *ctx, fd_cs &cs,
|
||||
const struct ir3_shader_variant *v)
|
||||
{
|
||||
emit_user_consts<CHIP>(v, ring, &ctx->constbuf[MESA_SHADER_COMPUTE]);
|
||||
emit_user_consts<CHIP>(v, cs, &ctx->constbuf[MESA_SHADER_COMPUTE]);
|
||||
}
|
||||
FD_GENX(fd6_emit_cs_user_consts);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_immediates(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring)
|
||||
fd6_emit_immediates(const struct ir3_shader_variant *v, fd_cs &cs)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
|
||||
if (const_state->consts_ubo.idx >= 0) {
|
||||
int sizedwords = DIV_ROUND_UP(v->constant_data_size, 4);
|
||||
|
||||
fd6_emit_driver_ubo(ring, v, const_state->consts_ubo.idx, sizedwords,
|
||||
fd6_emit_driver_ubo(cs, v, const_state->consts_ubo.idx, sizedwords,
|
||||
v->info.constant_data_offset, v->bo);
|
||||
}
|
||||
|
||||
if (CHIP == A7XX && v->compiler->load_inline_uniforms_via_preamble_ldgk)
|
||||
return;
|
||||
|
||||
ir3_emit_immediates(v, ring);
|
||||
ir3_emit_immediates(v, cs.ring());
|
||||
}
|
||||
FD_GENX(fd6_emit_immediates);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_link_map(struct fd_context *ctx,
|
||||
fd6_emit_link_map(struct fd_context *ctx, fd_cs &cs,
|
||||
const struct ir3_shader_variant *producer,
|
||||
const struct ir3_shader_variant *consumer,
|
||||
struct fd_ringbuffer *ring)
|
||||
const struct ir3_shader_variant *consumer)
|
||||
{
|
||||
if (CHIP == A7XX && producer->compiler->load_shader_consts_via_preamble) {
|
||||
const struct ir3_const_state *const_state = ir3_const_state(consumer);
|
||||
int base = const_state->primitive_map_ubo.idx;
|
||||
uint32_t size = ALIGN(consumer->input_size, 4);
|
||||
|
||||
fd6_upload_emit_driver_ubo(ctx, ring, consumer, base, size, producer->output_loc);
|
||||
fd6_upload_emit_driver_ubo(ctx, cs, consumer, base, size, producer->output_loc);
|
||||
} else {
|
||||
ir3_emit_link_map(producer, consumer, ring);
|
||||
ir3_emit_link_map(producer, consumer, cs.ring());
|
||||
}
|
||||
}
|
||||
FD_GENX(fd6_emit_link_map);
|
||||
|
|
|
|||
|
|
@ -22,21 +22,17 @@ struct fd_ringbuffer *
|
|||
fd6_build_driver_params(struct fd6_emit *emit) assert_dt;
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_emit_cs_driver_params(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
void fd6_emit_cs_driver_params(struct fd_context *ctx, fd_cs &cs,
|
||||
const struct ir3_shader_variant *v,
|
||||
const struct pipe_grid_info *info) assert_dt;
|
||||
template <chip CHIP>
|
||||
void fd6_emit_cs_user_consts(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
void fd6_emit_cs_user_consts(struct fd_context *ctx, fd_cs &cs,
|
||||
const struct ir3_shader_variant *v) assert_dt;
|
||||
template <chip CHIP>
|
||||
void fd6_emit_immediates(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring) assert_dt;
|
||||
void fd6_emit_immediates(const struct ir3_shader_variant *v, fd_cs &cs) assert_dt;
|
||||
template <chip CHIP>
|
||||
void fd6_emit_link_map(struct fd_context *ctx,
|
||||
void fd6_emit_link_map(struct fd_context *ctx, fd_cs &cs,
|
||||
const struct ir3_shader_variant *producer,
|
||||
const struct ir3_shader_variant *consumer,
|
||||
struct fd_ringbuffer *ring) assert_dt;
|
||||
const struct ir3_shader_variant *consumer) assert_dt;
|
||||
|
||||
#endif /* FD6_CONST_H */
|
||||
|
|
|
|||
|
|
@ -74,11 +74,9 @@ fd6_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
|
|||
struct fd6_vertex_stateobj *state = CALLOC_STRUCT(fd6_vertex_stateobj);
|
||||
memcpy(state->base.pipe, elements, sizeof(*elements) * num_elements);
|
||||
state->base.num_elements = num_elements;
|
||||
state->stateobj =
|
||||
fd_ringbuffer_new_object(ctx->pipe, 4 * (num_elements * 4 + 1));
|
||||
struct fd_ringbuffer *ring = state->stateobj;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VFD_FETCH_INSTR(0), 2 * num_elements);
|
||||
fd_crb crb(ctx->pipe, num_elements * 3);
|
||||
|
||||
for (int32_t i = 0; i < num_elements; i++) {
|
||||
const struct pipe_vertex_element *elem = &elements[i];
|
||||
enum pipe_format pfmt = (enum pipe_format)elem->src_format;
|
||||
|
|
@ -86,25 +84,26 @@ fd6_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
|
|||
bool isint = util_format_is_pure_integer(pfmt);
|
||||
assert(fmt != FMT6_NONE);
|
||||
|
||||
OUT_RING(ring, A6XX_VFD_FETCH_INSTR_INSTR_IDX(elem->vertex_buffer_index) |
|
||||
A6XX_VFD_FETCH_INSTR_INSTR_OFFSET(elem->src_offset) |
|
||||
A6XX_VFD_FETCH_INSTR_INSTR_FORMAT(fmt) |
|
||||
COND(elem->instance_divisor,
|
||||
A6XX_VFD_FETCH_INSTR_INSTR_INSTANCED) |
|
||||
A6XX_VFD_FETCH_INSTR_INSTR_SWAP(fd6_vertex_swap(pfmt)) |
|
||||
A6XX_VFD_FETCH_INSTR_INSTR_UNK30 |
|
||||
COND(!isint, A6XX_VFD_FETCH_INSTR_INSTR_FLOAT));
|
||||
OUT_RING(ring,
|
||||
MAX2(1, elem->instance_divisor)); /* VFD_FETCH_INSTR[j].STEP_RATE */
|
||||
crb.add(A6XX_VFD_FETCH_INSTR_INSTR(i,
|
||||
.idx = elem->vertex_buffer_index,
|
||||
.offset = elem->src_offset,
|
||||
.instanced = elem->instance_divisor,
|
||||
.format = fmt,
|
||||
.swap = fd6_vertex_swap(pfmt),
|
||||
.unk30 = true,
|
||||
._float = !isint,
|
||||
))
|
||||
.add(A6XX_VFD_FETCH_INSTR_STEP_RATE(i, MAX2(1, elem->instance_divisor)));
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < num_elements; i++) {
|
||||
const struct pipe_vertex_element *elem = &elements[i];
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VFD_VERTEX_BUFFER_STRIDE(elem->vertex_buffer_index), 1);
|
||||
OUT_RING(ring, elem->src_stride);
|
||||
crb.add(A6XX_VFD_VERTEX_BUFFER_STRIDE(elem->vertex_buffer_index, elem->src_stride));
|
||||
}
|
||||
|
||||
state->stateobj = crb.ring();
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
|
|
@ -308,20 +307,19 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv,
|
|||
|
||||
fd6_blitter_init<CHIP>(pctx);
|
||||
|
||||
struct fd_ringbuffer *ring =
|
||||
fd_ringbuffer_new_object(fd6_ctx->base.pipe, 6 * 4);
|
||||
fd_crb crb(fd6_ctx->base.pipe, 3);
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_SC_MSAA_SAMPLE_POS_CNTL());
|
||||
OUT_REG(ring, A6XX_RB_MSAA_SAMPLE_POS_CNTL());
|
||||
OUT_REG(ring, A6XX_TPL1_MSAA_SAMPLE_POS_CNTL());
|
||||
crb.add(A6XX_GRAS_SC_MSAA_SAMPLE_POS_CNTL())
|
||||
.add(A6XX_RB_MSAA_SAMPLE_POS_CNTL())
|
||||
.add(A6XX_TPL1_MSAA_SAMPLE_POS_CNTL());
|
||||
|
||||
fd6_ctx->sample_locations_disable_stateobj = ring;
|
||||
fd6_ctx->sample_locations_disable_stateobj = crb.ring();
|
||||
|
||||
fd6_ctx->preamble = fd6_build_preemption_preamble<CHIP>(&fd6_ctx->base);
|
||||
|
||||
ring = fd_ringbuffer_new_object(fd6_ctx->base.pipe, 0x1000);
|
||||
fd6_emit_static_regs<CHIP>(&fd6_ctx->base, ring);
|
||||
fd6_ctx->restore = ring;
|
||||
fd_cs restore(fd6_ctx->base.pipe, 0x1000);
|
||||
fd6_emit_static_regs<CHIP>(restore, &fd6_ctx->base);
|
||||
fd6_ctx->restore = restore.ring();
|
||||
|
||||
return fd_context_init_tc(pctx, flags);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
#include "ir3/ir3_descriptor.h"
|
||||
|
||||
#include "fd6_hw.h"
|
||||
#include "fd6_pack.h"
|
||||
|
||||
struct fd6_lrz_state {
|
||||
union {
|
||||
|
|
@ -165,14 +166,13 @@ struct fd6_control {
|
|||
(fd6_ctx)->control_mem, offsetof(struct fd6_control, member)
|
||||
|
||||
static inline void
|
||||
emit_marker6(struct fd_ringbuffer *ring, int scratch_idx)
|
||||
emit_marker6(fd_cs &cs, int scratch_idx)
|
||||
{
|
||||
extern int32_t marker_cnt;
|
||||
unsigned reg = REG_A6XX_CP_SCRATCH_REG(scratch_idx);
|
||||
if (__EMIT_MARKER) {
|
||||
OUT_WFI5(ring);
|
||||
OUT_PKT4(ring, reg, 1);
|
||||
OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A6XX_CP_SCRATCH_REG(scratch_idx, p_atomic_inc_return(&marker_cnt)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ is_indexed(enum draw_type type)
|
|||
}
|
||||
|
||||
static void
|
||||
draw_emit_xfb(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
||||
draw_emit_xfb(fd_cs &cs, struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect)
|
||||
{
|
||||
|
|
@ -68,14 +68,13 @@ draw_emit_xfb(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
|||
fd_stream_output_target(indirect->count_from_stream_output);
|
||||
struct fd_resource *offset = fd_resource(target->offset_buf);
|
||||
|
||||
OUT_PKT7(ring, CP_DRAW_AUTO, 6);
|
||||
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
|
||||
OUT_RING(ring, info->instance_count);
|
||||
OUT_RELOC(ring, offset->bo, 0, 0, 0);
|
||||
OUT_RING(
|
||||
ring,
|
||||
0); /* byte counter offset subtraced from the value read from above */
|
||||
OUT_RING(ring, target->stride);
|
||||
fd_pkt7(cs, CP_DRAW_AUTO, 6)
|
||||
.add(pack_CP_DRAW_INDX_OFFSET_0(*draw0))
|
||||
.add(CP_DRAW_AUTO_1(info->instance_count))
|
||||
.add(CP_DRAW_AUTO_NUM_VERTICES_BASE(offset->bo, 0))
|
||||
/* byte counter offset subtraced from the value read from above: */
|
||||
.add(CP_DRAW_AUTO_4(0))
|
||||
.add(CP_DRAW_AUTO_5(target->stride));
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
|
|
@ -100,9 +99,7 @@ max_indices(const struct pipe_draw_info *info, unsigned index_offset)
|
|||
|
||||
template <draw_type DRAW>
|
||||
static void
|
||||
draw_emit_indirect(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
||||
draw_emit_indirect(fd_cs &cs, struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
unsigned index_offset, uint32_t driver_param)
|
||||
|
|
@ -110,59 +107,94 @@ draw_emit_indirect(struct fd_context *ctx,
|
|||
struct fd_resource *ind = fd_resource(indirect->buffer);
|
||||
|
||||
if (DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED) {
|
||||
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 11);
|
||||
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
|
||||
OUT_RING(ring,
|
||||
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDIRECT_COUNT_INDEXED)
|
||||
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
|
||||
struct fd_resource *count_buf = fd_resource(indirect->indirect_draw_count);
|
||||
struct pipe_resource *idx = info->index.resource;
|
||||
OUT_RING(ring, indirect->draw_count);
|
||||
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
|
||||
OUT_RING(ring, max_indices(info, index_offset));
|
||||
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
|
||||
OUT_RELOC(ring, count_buf->bo, indirect->indirect_draw_count_offset, 0, 0);
|
||||
OUT_RING(ring, indirect->stride);
|
||||
|
||||
fd_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 11)
|
||||
.add(pack_CP_DRAW_INDX_OFFSET_0(*draw0))
|
||||
.add(A6XX_CP_DRAW_INDIRECT_MULTI_1(
|
||||
.opcode = INDIRECT_OP_INDIRECT_COUNT_INDEXED,
|
||||
.dst_off = driver_param,
|
||||
))
|
||||
.add(A6XX_CP_DRAW_INDIRECT_MULTI_DRAW_COUNT(indirect->draw_count))
|
||||
.add(INDIRECT_OP_INDIRECT_COUNT_INDEXED_CP_DRAW_INDIRECT_MULTI_INDEX(
|
||||
fd_resource(idx)->bo, index_offset
|
||||
))
|
||||
.add(INDIRECT_OP_INDIRECT_COUNT_INDEXED_CP_DRAW_INDIRECT_MULTI_MAX_INDICES(
|
||||
max_indices(info, index_offset)
|
||||
))
|
||||
.add(INDIRECT_OP_INDIRECT_COUNT_INDEXED_CP_DRAW_INDIRECT_MULTI_INDIRECT(
|
||||
ind->bo, indirect->offset
|
||||
))
|
||||
.add(INDIRECT_OP_INDIRECT_COUNT_INDEXED_CP_DRAW_INDIRECT_MULTI_INDIRECT_COUNT(
|
||||
count_buf->bo, indirect->indirect_draw_count_offset
|
||||
))
|
||||
.add(INDIRECT_OP_INDIRECT_COUNT_INDEXED_CP_DRAW_INDIRECT_MULTI_STRIDE(
|
||||
indirect->stride
|
||||
));
|
||||
} else if (DRAW == DRAW_INDIRECT_OP_INDEXED) {
|
||||
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 9);
|
||||
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
|
||||
OUT_RING(ring,
|
||||
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDEXED)
|
||||
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
|
||||
struct pipe_resource *idx = info->index.resource;
|
||||
OUT_RING(ring, indirect->draw_count);
|
||||
//index va
|
||||
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
|
||||
//max indices
|
||||
OUT_RING(ring, max_indices(info, index_offset));
|
||||
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
|
||||
OUT_RING(ring, indirect->stride);
|
||||
|
||||
fd_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 9)
|
||||
.add(pack_CP_DRAW_INDX_OFFSET_0(*draw0))
|
||||
.add(A6XX_CP_DRAW_INDIRECT_MULTI_1(
|
||||
.opcode = INDIRECT_OP_INDEXED,
|
||||
.dst_off = driver_param,
|
||||
))
|
||||
.add(A6XX_CP_DRAW_INDIRECT_MULTI_DRAW_COUNT(indirect->draw_count))
|
||||
//index va
|
||||
.add(INDIRECT_OP_INDEXED_CP_DRAW_INDIRECT_MULTI_INDEX(
|
||||
fd_resource(idx)->bo, index_offset
|
||||
))
|
||||
//max indices
|
||||
.add(INDIRECT_OP_INDEXED_CP_DRAW_INDIRECT_MULTI_MAX_INDICES(
|
||||
max_indices(info, index_offset)
|
||||
))
|
||||
.add(INDIRECT_OP_INDEXED_CP_DRAW_INDIRECT_MULTI_INDIRECT(
|
||||
ind->bo, indirect->offset
|
||||
))
|
||||
.add(INDIRECT_OP_INDEXED_CP_DRAW_INDIRECT_MULTI_STRIDE(
|
||||
indirect->stride
|
||||
));
|
||||
} else if(DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT) {
|
||||
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 8);
|
||||
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
|
||||
OUT_RING(ring,
|
||||
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDIRECT_COUNT)
|
||||
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
|
||||
struct fd_resource *count_buf = fd_resource(indirect->indirect_draw_count);
|
||||
OUT_RING(ring, indirect->draw_count);
|
||||
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
|
||||
OUT_RELOC(ring, count_buf->bo, indirect->indirect_draw_count_offset, 0, 0);
|
||||
OUT_RING(ring, indirect->stride);
|
||||
|
||||
fd_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 8)
|
||||
.add(pack_CP_DRAW_INDX_OFFSET_0(*draw0))
|
||||
.add(A6XX_CP_DRAW_INDIRECT_MULTI_1(
|
||||
.opcode = INDIRECT_OP_INDIRECT_COUNT,
|
||||
.dst_off = driver_param,
|
||||
))
|
||||
.add(A6XX_CP_DRAW_INDIRECT_MULTI_DRAW_COUNT(indirect->draw_count))
|
||||
.add(INDIRECT_OP_INDIRECT_COUNT_CP_DRAW_INDIRECT_MULTI_INDIRECT(
|
||||
ind->bo, indirect->offset
|
||||
))
|
||||
.add(INDIRECT_OP_INDIRECT_COUNT_CP_DRAW_INDIRECT_MULTI_INDIRECT_COUNT(
|
||||
count_buf->bo, indirect->indirect_draw_count_offset
|
||||
))
|
||||
.add(INDIRECT_OP_INDIRECT_COUNT_CP_DRAW_INDIRECT_MULTI_STRIDE(
|
||||
indirect->stride
|
||||
));
|
||||
} else if (DRAW == DRAW_INDIRECT_OP_NORMAL) {
|
||||
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 6);
|
||||
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
|
||||
OUT_RING(ring,
|
||||
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_NORMAL)
|
||||
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
|
||||
OUT_RING(ring, indirect->draw_count);
|
||||
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
|
||||
OUT_RING(ring, indirect->stride);
|
||||
fd_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 6)
|
||||
.add(pack_CP_DRAW_INDX_OFFSET_0(*draw0))
|
||||
.add(A6XX_CP_DRAW_INDIRECT_MULTI_1(
|
||||
.opcode = INDIRECT_OP_NORMAL,
|
||||
.dst_off = driver_param,
|
||||
))
|
||||
.add(A6XX_CP_DRAW_INDIRECT_MULTI_DRAW_COUNT(indirect->draw_count))
|
||||
.add(INDIRECT_OP_NORMAL_CP_DRAW_INDIRECT_MULTI_INDIRECT(
|
||||
ind->bo, indirect->offset
|
||||
))
|
||||
.add(INDIRECT_OP_NORMAL_CP_DRAW_INDIRECT_MULTI_STRIDE(
|
||||
indirect->stride
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
template <draw_type DRAW>
|
||||
static void
|
||||
draw_emit(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
||||
draw_emit(fd_cs &cs, struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_start_count_bias *draw, unsigned index_offset)
|
||||
{
|
||||
|
|
@ -171,17 +203,21 @@ draw_emit(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
|||
|
||||
struct pipe_resource *idx_buffer = info->index.resource;
|
||||
|
||||
OUT_PKT(ring, CP_DRAW_INDX_OFFSET, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
|
||||
CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
|
||||
CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count),
|
||||
CP_DRAW_INDX_OFFSET_3(.first_indx = draw->start),
|
||||
A5XX_CP_DRAW_INDX_OFFSET_INDX_BASE(fd_resource(idx_buffer)->bo,
|
||||
index_offset),
|
||||
A5XX_CP_DRAW_INDX_OFFSET_6(.max_indices = max_indices(info, index_offset)));
|
||||
fd_pkt7(cs, CP_DRAW_INDX_OFFSET, 7)
|
||||
.add(pack_CP_DRAW_INDX_OFFSET_0(*draw0))
|
||||
.add(CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count))
|
||||
.add(CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count))
|
||||
.add(CP_DRAW_INDX_OFFSET_3(.first_indx = draw->start))
|
||||
.add(A5XX_CP_DRAW_INDX_OFFSET_INDX_BASE(
|
||||
fd_resource(idx_buffer)->bo,
|
||||
index_offset
|
||||
))
|
||||
.add(A5XX_CP_DRAW_INDX_OFFSET_6(.max_indices = max_indices(info, index_offset)));
|
||||
} else if (DRAW == DRAW_DIRECT_OP_NORMAL) {
|
||||
OUT_PKT(ring, CP_DRAW_INDX_OFFSET, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
|
||||
CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
|
||||
CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count));
|
||||
fd_pkt7(cs, CP_DRAW_INDX_OFFSET, 3)
|
||||
.add(pack_CP_DRAW_INDX_OFFSET_0(*draw0))
|
||||
.add(CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count))
|
||||
.add(CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -258,18 +294,16 @@ get_program_state(struct fd_context *ctx, const struct pipe_draw_info *info)
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
flush_streamout(struct fd_context *ctx, struct fd6_emit *emit)
|
||||
flush_streamout(struct fd_context *ctx, fd_cs &cs, struct fd6_emit *emit)
|
||||
assert_dt
|
||||
{
|
||||
if (!emit->streamout_mask)
|
||||
return;
|
||||
|
||||
struct fd_ringbuffer *ring = ctx->batch->draw;
|
||||
|
||||
for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
|
||||
if (emit->streamout_mask & (1 << i)) {
|
||||
enum fd_gpu_event evt = (enum fd_gpu_event)(FD_FLUSH_SO_0 + i);
|
||||
fd6_event_write<CHIP>(ctx, ring, evt);
|
||||
fd6_event_write<CHIP>(ctx, cs, evt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -360,7 +394,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
ctx->stats.fs_regs += ir3_shader_halfregs(emit.fs);
|
||||
}
|
||||
|
||||
struct fd_ringbuffer *ring = ctx->batch->draw;
|
||||
fd_cs cs(ctx->batch->draw);
|
||||
|
||||
struct CP_DRAW_INDX_OFFSET_0 draw0 = {
|
||||
.prim_type = ctx->screen->primtypes[info->mode],
|
||||
|
|
@ -400,35 +434,36 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
/* convert from # of patches to draw count */
|
||||
subdraw_size *= ctx->patch_vertices;
|
||||
|
||||
OUT_PKT7(ring, CP_SET_SUBDRAW_SIZE, 1);
|
||||
OUT_RING(ring, subdraw_size);
|
||||
fd_pkt7(cs, CP_SET_SUBDRAW_SIZE, 1)
|
||||
.add(subdraw_size);
|
||||
|
||||
ctx->batch->tessellation = true;
|
||||
}
|
||||
|
||||
uint32_t index_start = is_indexed(DRAW) ? draws[0].index_bias : draws[0].start;
|
||||
if (ctx->last.dirty || (ctx->last.index_start != index_start)) {
|
||||
OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 1);
|
||||
OUT_RING(ring, index_start); /* VFD_INDEX_OFFSET */
|
||||
ctx->last.index_start = index_start;
|
||||
}
|
||||
{
|
||||
fd_crb crb(cs, 3);
|
||||
|
||||
if (ctx->last.dirty || (ctx->last.instance_start != info->start_instance)) {
|
||||
OUT_PKT4(ring, REG_A6XX_VFD_INSTANCE_START_OFFSET, 1);
|
||||
OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
|
||||
ctx->last.instance_start = info->start_instance;
|
||||
}
|
||||
uint32_t index_start = is_indexed(DRAW) ? draws[0].index_bias : draws[0].start;
|
||||
if (ctx->last.dirty || (ctx->last.index_start != index_start)) {
|
||||
crb.add(A6XX_VFD_INDEX_OFFSET(index_start));
|
||||
ctx->last.index_start = index_start;
|
||||
}
|
||||
|
||||
uint32_t restart_index =
|
||||
info->primitive_restart ? info->restart_index : 0xffffffff;
|
||||
if (ctx->last.dirty || (ctx->last.restart_index != restart_index)) {
|
||||
OUT_PKT4(ring, REG_A6XX_PC_RESTART_INDEX, 1);
|
||||
OUT_RING(ring, restart_index); /* PC_RESTART_INDEX */
|
||||
ctx->last.restart_index = restart_index;
|
||||
if (ctx->last.dirty || (ctx->last.instance_start != info->start_instance)) {
|
||||
crb.add(A6XX_VFD_INSTANCE_START_OFFSET(info->start_instance));
|
||||
ctx->last.instance_start = info->start_instance;
|
||||
}
|
||||
|
||||
uint32_t restart_index =
|
||||
info->primitive_restart ? info->restart_index : 0xffffffff;
|
||||
if (ctx->last.dirty || (ctx->last.restart_index != restart_index)) {
|
||||
crb.add(A6XX_PC_RESTART_INDEX(restart_index));
|
||||
ctx->last.restart_index = restart_index;
|
||||
}
|
||||
}
|
||||
|
||||
if (emit.dirty_groups)
|
||||
fd6_emit_3d_state<CHIP, PIPELINE>(ring, &emit);
|
||||
fd6_emit_3d_state<CHIP, PIPELINE>(cs, &emit);
|
||||
|
||||
/* All known firmware versions do not wait for WFI's with CP_DRAW_AUTO.
|
||||
* Plus, for the common case where the counter buffer is written by
|
||||
|
|
@ -444,8 +479,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT)
|
||||
ctx->batch->barrier |= FD6_WAIT_FOR_ME;
|
||||
|
||||
if (ctx->batch->barrier)
|
||||
fd6_barrier_flush<CHIP>(ctx->batch);
|
||||
fd6_barrier_flush<CHIP>(cs, ctx->batch);
|
||||
|
||||
/* for debug after a lock up, write a unique counter value
|
||||
* to scratch7 for each draw, to make it easier to match up
|
||||
|
|
@ -453,12 +487,12 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
* (scratch6) and DRAW is enough to "triangulate" the
|
||||
* particular draw that caused lockup.
|
||||
*/
|
||||
emit_marker6(ring, 7);
|
||||
emit_marker6(cs, 7);
|
||||
|
||||
if (is_indirect(DRAW)) {
|
||||
assert(num_draws == 1); /* only >1 for direct draws */
|
||||
if (DRAW == DRAW_INDIRECT_OP_XFB) {
|
||||
draw_emit_xfb(ring, &draw0, info, indirect);
|
||||
draw_emit_xfb(cs, &draw0, info, indirect);
|
||||
} else {
|
||||
const struct ir3_const_state *const_state = ir3_const_state(emit.vs);
|
||||
uint32_t dst_offset_dp =
|
||||
|
|
@ -470,10 +504,10 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
emit.vs->constlen))
|
||||
dst_offset_dp = 0;
|
||||
|
||||
draw_emit_indirect<DRAW>(ctx, ring, &draw0, info, indirect, index_offset, dst_offset_dp);
|
||||
draw_emit_indirect<DRAW>(cs, &draw0, info, indirect, index_offset, dst_offset_dp);
|
||||
}
|
||||
} else {
|
||||
draw_emit<DRAW>(ring, &draw0, info, &draws[0], index_offset);
|
||||
draw_emit<DRAW>(cs, &draw0, info, &draws[0], index_offset);
|
||||
|
||||
if (unlikely(num_draws > 1)) {
|
||||
|
||||
|
|
@ -492,14 +526,14 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
uint32_t last_index_start = ctx->last.index_start;
|
||||
|
||||
for (unsigned i = 1; i < num_draws; i++) {
|
||||
flush_streamout<CHIP>(ctx, &emit);
|
||||
flush_streamout<CHIP>(ctx, cs, &emit);
|
||||
|
||||
fd6_vsc_update_sizes(ctx->batch, info, &draws[i]);
|
||||
|
||||
uint32_t index_start = is_indexed(DRAW) ? draws[i].index_bias : draws[i].start;
|
||||
if (last_index_start != index_start) {
|
||||
OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 1);
|
||||
OUT_RING(ring, index_start); /* VFD_INDEX_OFFSET */
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A6XX_VFD_INDEX_OFFSET(index_start));
|
||||
last_index_start = index_start;
|
||||
}
|
||||
|
||||
|
|
@ -507,21 +541,21 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
emit.state.num_groups = 0;
|
||||
emit.draw = &draws[i];
|
||||
emit.draw_id = info->increment_draw_id ? i : 0;
|
||||
fd6_emit_3d_state<CHIP, PIPELINE>(ring, &emit);
|
||||
fd6_emit_3d_state<CHIP, PIPELINE>(cs, &emit);
|
||||
}
|
||||
|
||||
assert(!index_offset); /* handled by util_draw_multi() */
|
||||
|
||||
draw_emit<DRAW>(ring, &draw0, info, &draws[i], 0);
|
||||
draw_emit<DRAW>(cs, &draw0, info, &draws[i], 0);
|
||||
}
|
||||
|
||||
ctx->last.index_start = last_index_start;
|
||||
}
|
||||
}
|
||||
|
||||
emit_marker6(ring, 7);
|
||||
emit_marker6(cs, 7);
|
||||
|
||||
flush_streamout<CHIP>(ctx, &emit);
|
||||
flush_streamout<CHIP>(ctx, cs, &emit);
|
||||
|
||||
fd_context_all_clean(ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,29 +52,26 @@ build_vbo_state(struct fd6_emit *emit) assert_dt
|
|||
const struct fd_vertex_state *vtx = &emit->ctx->vtx;
|
||||
|
||||
const unsigned cnt = vtx->vertexbuf.count;
|
||||
const unsigned dwords = cnt * 4; /* per vbo: reg64 + one reg32 + pkt hdr */
|
||||
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
|
||||
emit->ctx->batch->submit, 4 * dwords, FD_RINGBUFFER_STREAMING);
|
||||
fd_crb crb(emit->ctx->batch->submit, 3 * cnt);
|
||||
|
||||
for (int32_t j = 0; j < cnt; j++) {
|
||||
OUT_PKT4(ring, REG_A6XX_VFD_VERTEX_BUFFER(j), 3);
|
||||
|
||||
const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[j];
|
||||
struct fd_resource *rsc = fd_resource(vb->buffer.resource);
|
||||
if (rsc == NULL) {
|
||||
OUT_RING(ring, 0);
|
||||
OUT_RING(ring, 0);
|
||||
OUT_RING(ring, 0);
|
||||
crb.add(A6XX_VFD_VERTEX_BUFFER_BASE(j));
|
||||
crb.add(A6XX_VFD_VERTEX_BUFFER_SIZE(j));
|
||||
} else {
|
||||
uint32_t off = vb->buffer_offset;
|
||||
uint32_t size = vb->buffer.resource->width0 - off;
|
||||
|
||||
OUT_RELOC(ring, rsc->bo, off, 0, 0);
|
||||
OUT_RING(ring, size); /* VFD_VERTEX_BUFFER[j].SIZE */
|
||||
crb.add(A6XX_VFD_VERTEX_BUFFER_BASE(j, .bo = rsc->bo, .bo_offset = off));
|
||||
crb.add(A6XX_VFD_VERTEX_BUFFER_SIZE(j, size));
|
||||
}
|
||||
}
|
||||
|
||||
return ring;
|
||||
return crb.ring();
|
||||
}
|
||||
|
||||
static enum a6xx_ztest_mode
|
||||
|
|
@ -227,46 +224,39 @@ build_lrz(struct fd6_emit *emit) assert_dt
|
|||
|
||||
fd6_ctx->last.lrz = lrz;
|
||||
|
||||
unsigned ndwords = (CHIP >= A7XX) ? 10 : 8;
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, ndwords * 4, FD_RINGBUFFER_STREAMING);
|
||||
unsigned nregs = (CHIP >= A7XX) ? 5 : 4;
|
||||
fd_crb crb(ctx->batch->submit, nregs);
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_LRZ_CNTL(
|
||||
.enable = lrz.enable,
|
||||
.lrz_write = lrz.write,
|
||||
.greater = lrz.direction == FD_LRZ_GREATER,
|
||||
.z_write_enable = lrz.test,
|
||||
.z_bounds_enable = lrz.z_bounds_enable,
|
||||
)
|
||||
);
|
||||
OUT_REG(ring,
|
||||
A7XX_GRAS_LRZ_CNTL2(
|
||||
.disable_on_wrong_dir = false,
|
||||
.fc_enable = false,
|
||||
)
|
||||
);
|
||||
crb.add(A6XX_GRAS_LRZ_CNTL(
|
||||
.enable = lrz.enable,
|
||||
.lrz_write = lrz.write,
|
||||
.greater = lrz.direction == FD_LRZ_GREATER,
|
||||
.z_write_enable = lrz.test,
|
||||
.z_bounds_enable = lrz.z_bounds_enable,
|
||||
))
|
||||
.add(A7XX_GRAS_LRZ_CNTL2(
|
||||
.disable_on_wrong_dir = false,
|
||||
.fc_enable = false,
|
||||
));
|
||||
} else {
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_LRZ_CNTL(
|
||||
.enable = lrz.enable,
|
||||
.lrz_write = lrz.write,
|
||||
.greater = lrz.direction == FD_LRZ_GREATER,
|
||||
.fc_enable = false,
|
||||
.z_write_enable = lrz.test,
|
||||
.z_bounds_enable = lrz.z_bounds_enable,
|
||||
.disable_on_wrong_dir = false,
|
||||
crb.add(A6XX_GRAS_LRZ_CNTL(
|
||||
.enable = lrz.enable,
|
||||
.lrz_write = lrz.write,
|
||||
.greater = lrz.direction == FD_LRZ_GREATER,
|
||||
.fc_enable = false,
|
||||
.z_write_enable = lrz.test,
|
||||
.z_bounds_enable = lrz.z_bounds_enable,
|
||||
.disable_on_wrong_dir = false,
|
||||
)
|
||||
);
|
||||
}
|
||||
OUT_REG(ring, A6XX_RB_LRZ_CNTL(.enable = lrz.enable, ));
|
||||
|
||||
OUT_REG(ring, A6XX_RB_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));
|
||||
crb.add(A6XX_RB_LRZ_CNTL(.enable = lrz.enable, ))
|
||||
.add(A6XX_RB_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ))
|
||||
.add(A6XX_GRAS_SU_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_SU_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));
|
||||
|
||||
return ring;
|
||||
return crb.ring();
|
||||
}
|
||||
|
||||
static struct fd_ringbuffer *
|
||||
|
|
@ -276,18 +266,14 @@ build_scissor(struct fd6_emit *emit) assert_dt
|
|||
struct pipe_scissor_state *scissors = fd_context_get_scissor(ctx);
|
||||
unsigned num_viewports = emit->prog->num_viewports;
|
||||
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
|
||||
emit->ctx->batch->submit, (1 + (2 * num_viewports)) * 4, FD_RINGBUFFER_STREAMING);
|
||||
fd_crb crb(emit->ctx->batch->submit, 2 * num_viewports);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0), 2 * num_viewports);
|
||||
for (unsigned i = 0; i < num_viewports; i++) {
|
||||
OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_X(scissors[i].minx) |
|
||||
A6XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(scissors[i].miny));
|
||||
OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_BR_X(scissors[i].maxx) |
|
||||
A6XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(scissors[i].maxy));
|
||||
crb.add(A6XX_GRAS_SC_SCREEN_SCISSOR_TL(i, .x = scissors[i].minx, .y = scissors[i].miny))
|
||||
.add(A6XX_GRAS_SC_SCREEN_SCISSOR_BR(i, .x = scissors[i].maxx, .y = scissors[i].maxy));
|
||||
}
|
||||
|
||||
return ring;
|
||||
return crb.ring();
|
||||
}
|
||||
|
||||
/* Combination of FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
|
||||
|
|
@ -301,8 +287,7 @@ build_prog_fb_rast(struct fd6_emit *emit) assert_dt
|
|||
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
|
||||
const struct ir3_shader_variant *fs = emit->fs;
|
||||
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);
|
||||
fd_crb crb(ctx->batch->submit, 5);
|
||||
|
||||
unsigned nr = pfb->nr_cbufs;
|
||||
|
||||
|
|
@ -314,18 +299,14 @@ build_prog_fb_rast(struct fd6_emit *emit) assert_dt
|
|||
if (blend->use_dual_src_blend)
|
||||
nr++;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_PS_OUTPUT_CNTL, 2);
|
||||
OUT_RING(ring, COND(fs->writes_pos, A6XX_RB_PS_OUTPUT_CNTL_FRAG_WRITES_Z) |
|
||||
COND(fs->writes_smask && pfb->samples > 1,
|
||||
A6XX_RB_PS_OUTPUT_CNTL_FRAG_WRITES_SAMPMASK) |
|
||||
COND(fs->writes_stencilref,
|
||||
A6XX_RB_PS_OUTPUT_CNTL_FRAG_WRITES_STENCILREF) |
|
||||
COND(blend->use_dual_src_blend,
|
||||
A6XX_RB_PS_OUTPUT_CNTL_DUAL_COLOR_IN_ENABLE));
|
||||
OUT_RING(ring, A6XX_RB_PS_MRT_CNTL_MRT(nr));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_SP_PS_MRT_CNTL, 1);
|
||||
OUT_RING(ring, A6XX_SP_PS_MRT_CNTL_MRT(nr));
|
||||
crb.add(A6XX_RB_PS_OUTPUT_CNTL(
|
||||
.dual_color_in_enable = blend->use_dual_src_blend,
|
||||
.frag_writes_z = fs->writes_pos,
|
||||
.frag_writes_sampmask = fs->writes_smask && pfb->samples > 1,
|
||||
.frag_writes_stencilref = fs->writes_stencilref,
|
||||
));
|
||||
crb.add(A6XX_RB_PS_MRT_CNTL(.mrt = nr));
|
||||
crb.add(A6XX_SP_PS_MRT_CNTL(.mrt = nr));
|
||||
|
||||
unsigned mrt_components = 0;
|
||||
for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
|
||||
|
|
@ -340,10 +321,10 @@ build_prog_fb_rast(struct fd6_emit *emit) assert_dt
|
|||
|
||||
mrt_components &= prog->mrt_components;
|
||||
|
||||
OUT_REG(ring, A6XX_SP_PS_OUTPUT_MASK(.dword = mrt_components));
|
||||
OUT_REG(ring, A6XX_RB_PS_OUTPUT_MASK(.dword = mrt_components));
|
||||
crb.add(A6XX_SP_PS_OUTPUT_MASK(.dword = mrt_components))
|
||||
.add(A6XX_RB_PS_OUTPUT_MASK(.dword = mrt_components));
|
||||
|
||||
return ring;
|
||||
return crb.ring();
|
||||
}
|
||||
|
||||
static struct fd_ringbuffer *
|
||||
|
|
@ -351,15 +332,13 @@ build_blend_color(struct fd6_emit *emit) assert_dt
|
|||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
struct pipe_blend_color *bcolor = &ctx->blend_color;
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, 5 * 4, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
OUT_REG(ring, A6XX_RB_BLEND_CONSTANT_RED_FP32(bcolor->color[0]),
|
||||
A6XX_RB_BLEND_CONSTANT_GREEN_FP32(bcolor->color[1]),
|
||||
A6XX_RB_BLEND_CONSTANT_BLUE_FP32(bcolor->color[2]),
|
||||
A6XX_RB_BLEND_CONSTANT_ALPHA_FP32(bcolor->color[3]));
|
||||
|
||||
return ring;
|
||||
return fd_crb(ctx->batch->submit, 4)
|
||||
.add(A6XX_RB_BLEND_CONSTANT_RED_FP32(bcolor->color[0]))
|
||||
.add(A6XX_RB_BLEND_CONSTANT_GREEN_FP32(bcolor->color[1]))
|
||||
.add(A6XX_RB_BLEND_CONSTANT_BLUE_FP32(bcolor->color[2]))
|
||||
.add(A6XX_RB_BLEND_CONSTANT_ALPHA_FP32(bcolor->color[3]))
|
||||
.ring();
|
||||
}
|
||||
|
||||
static struct fd_ringbuffer *
|
||||
|
|
@ -373,9 +352,6 @@ build_sample_locations(struct fd6_emit *emit)
|
|||
return fd_ringbuffer_ref(fd6_ctx->sample_locations_disable_stateobj);
|
||||
}
|
||||
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
uint32_t sample_locations = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
float x = (ctx->sample_locations[i] & 0xf) / 16.0f;
|
||||
|
|
@ -389,21 +365,19 @@ build_sample_locations(struct fd6_emit *emit)
|
|||
A6XX_RB_PROGRAMMABLE_MSAA_POS_0_SAMPLE_0_Y(y)) << i*8;
|
||||
}
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_SC_MSAA_SAMPLE_POS_CNTL(.location_enable = true),
|
||||
A6XX_GRAS_SC_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations));
|
||||
|
||||
OUT_REG(ring, A6XX_RB_MSAA_SAMPLE_POS_CNTL(.location_enable = true),
|
||||
A6XX_RB_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations));
|
||||
|
||||
OUT_REG(ring, A6XX_TPL1_MSAA_SAMPLE_POS_CNTL(.location_enable = true),
|
||||
A6XX_TPL1_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations));
|
||||
|
||||
return ring;
|
||||
return fd_crb(ctx->batch->submit, 6)
|
||||
.add(A6XX_GRAS_SC_MSAA_SAMPLE_POS_CNTL(.location_enable = true))
|
||||
.add(A6XX_GRAS_SC_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations))
|
||||
.add(A6XX_RB_MSAA_SAMPLE_POS_CNTL(.location_enable = true))
|
||||
.add(A6XX_RB_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations))
|
||||
.add(A6XX_TPL1_MSAA_SAMPLE_POS_CNTL(.location_enable = true))
|
||||
.add(A6XX_TPL1_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations))
|
||||
.ring();
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
|
||||
fd6_emit_streamout(fd_cs &cs, struct fd6_emit *emit) assert_dt
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
|
||||
|
|
@ -423,34 +397,34 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
|
|||
|
||||
target->stride = info->stride[i];
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE(i), 3);
|
||||
/* VPC_SO[i].BUFFER_BASE_LO: */
|
||||
OUT_RELOC(ring, fd_resource(target->base.buffer)->bo, 0, 0, 0);
|
||||
OUT_RING(ring, target->base.buffer_size + target->base.buffer_offset);
|
||||
fd_pkt4(cs, 3)
|
||||
.add(A6XX_VPC_SO_BUFFER_BASE(i, fd_resource(target->base.buffer)->bo))
|
||||
.add(A6XX_VPC_SO_BUFFER_SIZE(i, target->base.buffer_size + target->base.buffer_offset));
|
||||
|
||||
struct fd_bo *offset_bo = fd_resource(target->offset_buf)->bo;
|
||||
|
||||
if (so->reset & (1 << i)) {
|
||||
assert(so->offsets[i] == 0);
|
||||
|
||||
OUT_PKT7(ring, CP_MEM_WRITE, 3);
|
||||
OUT_RELOC(ring, offset_bo, 0, 0, 0);
|
||||
OUT_RING(ring, target->base.buffer_offset);
|
||||
fd_pkt7(cs, CP_MEM_WRITE, 3)
|
||||
.add(CP_MEM_WRITE_ADDR(offset_bo))
|
||||
.add(target->base.buffer_offset);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1);
|
||||
OUT_RING(ring, target->base.buffer_offset);
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A6XX_VPC_SO_BUFFER_OFFSET(i,target->base.buffer_offset));
|
||||
} else {
|
||||
OUT_PKT7(ring, CP_MEM_TO_REG, 3);
|
||||
OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
|
||||
COND(CHIP == A6XX, CP_MEM_TO_REG_0_SHIFT_BY_2) |
|
||||
CP_MEM_TO_REG_0_UNK31 |
|
||||
CP_MEM_TO_REG_0_CNT(0));
|
||||
OUT_RELOC(ring, offset_bo, 0, 0, 0);
|
||||
fd_pkt7(cs, CP_MEM_TO_REG, 3)
|
||||
.add(CP_MEM_TO_REG_0(
|
||||
.reg = REG_A6XX_VPC_SO_BUFFER_OFFSET(i),
|
||||
.shift_by_2 = CHIP == A6XX,
|
||||
.unk31 = true,
|
||||
))
|
||||
.add(CP_MEM_TO_REG_SRC(offset_bo));
|
||||
}
|
||||
|
||||
// After a draw HW would write the new offset to offset_bo
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE(i), 2);
|
||||
OUT_RELOC(ring, offset_bo, 0, 0, 0);
|
||||
fd_pkt4(cs, 2)
|
||||
.add(A6XX_VPC_SO_FLUSH_BASE(i, offset_bo));
|
||||
|
||||
so->reset &= ~(1 << i);
|
||||
|
||||
|
|
@ -482,7 +456,7 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
|
|||
* themselves.
|
||||
*/
|
||||
if (ctx->dirty & FD_DIRTY_STREAMOUT)
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
ctx->last.streamout_mask = streamout_mask;
|
||||
emit->streamout_mask = streamout_mask;
|
||||
|
|
@ -492,18 +466,18 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
|
|||
* Stuff that less frequently changes and isn't (yet) moved into stategroups
|
||||
*/
|
||||
static void
|
||||
fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
|
||||
fd6_emit_non_group(fd_cs &cs, struct fd6_emit *emit) assert_dt
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
const enum fd_dirty_3d_state dirty = ctx->dirty;
|
||||
unsigned num_viewports = emit->prog->num_viewports;
|
||||
|
||||
fd_crb crb(cs, 324);
|
||||
|
||||
if (dirty & FD_DIRTY_STENCIL_REF) {
|
||||
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_STENCIL_REF_CNTL, 1);
|
||||
OUT_RING(ring, A6XX_RB_STENCIL_REF_CNTL_REF(sr->ref_value[0]) |
|
||||
A6XX_RB_STENCIL_REF_CNTL_BFREF(sr->ref_value[1]));
|
||||
crb.add(A6XX_RB_STENCIL_REF_CNTL(.ref = sr->ref_value[0], .bfref = sr->ref_value[1]));
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_PROG)) {
|
||||
|
|
@ -511,25 +485,18 @@ fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
|
|||
struct pipe_scissor_state *scissor = &ctx->viewport_scissor[i];
|
||||
struct pipe_viewport_state *vp = & ctx->viewport[i];
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_CL_VIEWPORT_XOFFSET(i, vp->translate[0]),
|
||||
A6XX_GRAS_CL_VIEWPORT_XSCALE(i, vp->scale[0]),
|
||||
A6XX_GRAS_CL_VIEWPORT_YOFFSET(i, vp->translate[1]),
|
||||
A6XX_GRAS_CL_VIEWPORT_YSCALE(i, vp->scale[1]),
|
||||
A6XX_GRAS_CL_VIEWPORT_ZOFFSET(i, vp->translate[2]),
|
||||
A6XX_GRAS_CL_VIEWPORT_ZSCALE(i, vp->scale[2]));
|
||||
|
||||
OUT_REG(
|
||||
ring,
|
||||
A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(i,
|
||||
.x = scissor->minx,
|
||||
.y = scissor->miny),
|
||||
A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(i,
|
||||
.x = scissor->maxx,
|
||||
.y = scissor->maxy));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_XOFFSET(i, vp->translate[0]));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_XSCALE(i, vp->scale[0]));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_YOFFSET(i, vp->translate[1]));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_YSCALE(i, vp->scale[1]));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_ZOFFSET(i, vp->translate[2]));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_ZSCALE(i, vp->scale[2]));
|
||||
crb.add(A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(i, .x = scissor->minx, .y = scissor->miny));
|
||||
crb.add(A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(i, .x = scissor->maxx, .y = scissor->maxy));
|
||||
}
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ(.horz = ctx->guardband.x,
|
||||
.vert = ctx->guardband.y));
|
||||
crb.add(A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ(.horz = ctx->guardband.x,
|
||||
.vert = ctx->guardband.y));
|
||||
}
|
||||
|
||||
/* The clamp ranges are only used when the rasterizer wants depth
|
||||
|
|
@ -544,12 +511,14 @@ fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
|
|||
util_viewport_zmin_zmax(vp, ctx->rasterizer->clip_halfz,
|
||||
&zmin, &zmax);
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_CL_VIEWPORT_ZCLAMP_MIN(i, zmin),
|
||||
A6XX_GRAS_CL_VIEWPORT_ZCLAMP_MAX(i, zmax));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_ZCLAMP_MIN(i, zmin));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_ZCLAMP_MAX(i, zmax));
|
||||
|
||||
/* TODO: what to do about this and multi viewport ? */
|
||||
if (i == 0)
|
||||
OUT_REG(ring, A6XX_RB_VIEWPORT_ZCLAMP_MIN(zmin), A6XX_RB_VIEWPORT_ZCLAMP_MAX(zmax));
|
||||
if (i == 0) {
|
||||
crb.add(A6XX_RB_VIEWPORT_ZCLAMP_MIN(zmin));
|
||||
crb.add(A6XX_RB_VIEWPORT_ZCLAMP_MAX(zmax));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -558,8 +527,6 @@ static struct fd_ringbuffer*
|
|||
build_prim_mode(struct fd6_emit *emit, struct fd_context *ctx, bool gmem)
|
||||
assert_dt
|
||||
{
|
||||
struct fd_ringbuffer *ring =
|
||||
fd_submit_new_ringbuffer(emit->ctx->batch->submit, 2 * 4, FD_RINGBUFFER_STREAMING);
|
||||
uint32_t prim_mode = NO_FLUSH;
|
||||
if (emit->fs->fs.uses_fbfetch_output) {
|
||||
if (gmem) {
|
||||
|
|
@ -571,21 +538,25 @@ build_prim_mode(struct fd6_emit *emit, struct fd_context *ctx, bool gmem)
|
|||
} else {
|
||||
prim_mode = NO_FLUSH;
|
||||
}
|
||||
OUT_REG(ring, A6XX_GRAS_SC_CNTL(.ccusinglecachelinesize = 2,
|
||||
.single_prim_mode = (enum a6xx_single_prim_mode)prim_mode));
|
||||
return ring;
|
||||
|
||||
return fd_crb(ctx->batch->submit, 1)
|
||||
.add(A6XX_GRAS_SC_CNTL(
|
||||
.ccusinglecachelinesize = 2,
|
||||
.single_prim_mode = (enum a6xx_single_prim_mode)prim_mode)
|
||||
)
|
||||
.ring();
|
||||
}
|
||||
|
||||
template <chip CHIP, fd6_pipeline_type PIPELINE>
|
||||
void
|
||||
fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
||||
fd6_emit_3d_state(fd_cs &cs, struct fd6_emit *emit)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
|
||||
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
|
||||
const struct ir3_shader_variant *fs = emit->fs;
|
||||
|
||||
emit_marker6(ring, 5);
|
||||
emit_marker6(cs, 5);
|
||||
|
||||
/* Special case, we need to re-emit bindless FS state w/ the
|
||||
* fb-read state appended:
|
||||
|
|
@ -713,7 +684,7 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
|||
fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_TEX);
|
||||
break;
|
||||
case FD6_GROUP_SO:
|
||||
fd6_emit_streamout<CHIP>(ring, emit);
|
||||
fd6_emit_streamout<CHIP>(cs, emit);
|
||||
break;
|
||||
case FD6_GROUP_PRIM_MODE_SYSMEM:
|
||||
state = build_prim_mode(emit, ctx, false);
|
||||
|
|
@ -724,25 +695,24 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
|||
fd6_state_take_group(&emit->state, state, FD6_GROUP_PRIM_MODE_GMEM);
|
||||
break;
|
||||
case FD6_GROUP_NON_GROUP:
|
||||
fd6_emit_non_ring(ring, emit);
|
||||
fd6_emit_non_group(cs, emit);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fd6_state_emit(&emit->state, ring);
|
||||
fd6_state_emit(&emit->state, cs);
|
||||
}
|
||||
|
||||
template void fd6_emit_3d_state<A6XX, NO_TESS_GS>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
|
||||
template void fd6_emit_3d_state<A7XX, NO_TESS_GS>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
|
||||
template void fd6_emit_3d_state<A6XX, HAS_TESS_GS>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
|
||||
template void fd6_emit_3d_state<A7XX, HAS_TESS_GS>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
|
||||
template void fd6_emit_3d_state<A6XX, NO_TESS_GS>(fd_cs &cs, struct fd6_emit *emit);
|
||||
template void fd6_emit_3d_state<A7XX, NO_TESS_GS>(fd_cs &cs, struct fd6_emit *emit);
|
||||
template void fd6_emit_3d_state<A6XX, HAS_TESS_GS>(fd_cs &cs, struct fd6_emit *emit);
|
||||
template void fd6_emit_3d_state<A7XX, HAS_TESS_GS>(fd_cs &cs, struct fd6_emit *emit);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cp)
|
||||
fd6_emit_cs_state(struct fd_context *ctx, fd_cs &cs, struct fd6_compute_state *cp)
|
||||
{
|
||||
struct fd6_state state = {};
|
||||
|
||||
|
|
@ -754,8 +724,8 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
* const state, so it must execute before we start loading consts, rather
|
||||
* than be deferred until CP_EXEC_CS.
|
||||
*/
|
||||
OUT_PKT7(ring, CP_SET_MODE, 1);
|
||||
OUT_RING(ring, 1);
|
||||
fd_pkt7(cs, CP_SET_MODE, 1)
|
||||
.add(1);
|
||||
|
||||
uint32_t gen_dirty = ctx->gen_dirty &
|
||||
(BIT(FD6_GROUP_PROG) | BIT(FD6_GROUP_CS_TEX) | BIT(FD6_GROUP_CS_BINDLESS));
|
||||
|
|
@ -785,13 +755,13 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
}
|
||||
}
|
||||
|
||||
fd6_state_emit(&state, ring);
|
||||
fd6_state_emit(&state, cs);
|
||||
}
|
||||
FD_GENX(fd6_emit_cs_state);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem)
|
||||
fd6_emit_ccu_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem)
|
||||
{
|
||||
const struct fd6_gmem_config *cfg = gmem ? &screen->config_gmem : &screen->config_sysmem;
|
||||
enum a6xx_ccu_cache_size color_cache_size = !gmem ? CCU_CACHE_SIZE_FULL :
|
||||
|
|
@ -803,8 +773,8 @@ fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gme
|
|||
uint32_t depth_offset_hi = cfg->depth_ccu_offset >> 21;
|
||||
|
||||
if (CHIP == A7XX) {
|
||||
OUT_REG(ring,
|
||||
A7XX_RB_CCU_CACHE_CNTL(
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A7XX_RB_CCU_CACHE_CNTL(
|
||||
.depth_offset_hi = depth_offset_hi,
|
||||
.color_offset_hi = color_offset_hi,
|
||||
.depth_cache_size = CCU_CACHE_SIZE_FULL,
|
||||
|
|
@ -815,20 +785,16 @@ fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gme
|
|||
);
|
||||
|
||||
if (screen->info->a7xx.has_gmem_vpc_attr_buf) {
|
||||
OUT_REG(ring,
|
||||
A7XX_VPC_ATTR_BUF_GMEM_SIZE(.size_gmem = cfg->vpc_attr_buf_size),
|
||||
A7XX_VPC_ATTR_BUF_GMEM_BASE(.base_gmem = cfg->vpc_attr_buf_offset)
|
||||
);
|
||||
OUT_REG(ring,
|
||||
A7XX_PC_ATTR_BUF_GMEM_SIZE(.size_gmem = cfg->vpc_attr_buf_size)
|
||||
);
|
||||
fd_crb(cs, 3)
|
||||
.add(A7XX_VPC_ATTR_BUF_GMEM_SIZE(.size_gmem = cfg->vpc_attr_buf_size))
|
||||
.add(A7XX_VPC_ATTR_BUF_GMEM_BASE(.base_gmem = cfg->vpc_attr_buf_offset))
|
||||
.add(A7XX_PC_ATTR_BUF_GMEM_SIZE(.size_gmem = cfg->vpc_attr_buf_size));
|
||||
}
|
||||
} else {
|
||||
OUT_WFI5(ring); /* early a6xx (a630?) needed this */
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
OUT_REG(ring,
|
||||
RB_CCU_CNTL(
|
||||
CHIP,
|
||||
fd_pkt4(cs, 1)
|
||||
.add(RB_CCU_CNTL(CHIP,
|
||||
.gmem_fast_clear_disable =
|
||||
!screen->info->a6xx.has_gmem_fast_clear,
|
||||
.concurrent_resolve =
|
||||
|
|
@ -847,21 +813,22 @@ FD_GENX(fd6_emit_ccu_cntl);
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
fd6_emit_stomp(struct fd_ringbuffer *ring, const uint16_t *regs, size_t count)
|
||||
fd6_emit_stomp(fd_cs &cs, const uint16_t *regs, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
if (fd_reg_stomp_allowed(CHIP, regs[i])) {
|
||||
WRITE(regs[i], 0xffffffff);
|
||||
fd_pkt4(cs, 1).add({regs[i], 0xffffffff});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_static_regs(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
static void
|
||||
fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
|
||||
{
|
||||
struct fd_screen *screen = ctx->screen;
|
||||
|
||||
fd_ncrb<CHIP> ncrb(cs, 25 + ARRAY_SIZE(screen->info->a6xx.magic_raw));
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
/* On A7XX, RB_CCU_CNTL was broken into two registers, RB_CCU_CNTL which has
|
||||
* static properties that can be set once, this requires a WFI to take effect.
|
||||
|
|
@ -869,13 +836,10 @@ fd6_emit_static_regs(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
|||
* change per-RP and don't require a WFI to take effect, only CCU inval/flush
|
||||
* events are required.
|
||||
*/
|
||||
OUT_REG(ring,
|
||||
RB_CCU_CNTL(
|
||||
CHIP,
|
||||
.gmem_fast_clear_disable = true, // !screen->info->a6xx.has_gmem_fast_clear,
|
||||
.concurrent_resolve = screen->info->a6xx.concurrent_resolve,
|
||||
)
|
||||
);
|
||||
ncrb.add(RB_CCU_CNTL(CHIP,
|
||||
.gmem_fast_clear_disable = true, // !screen->info->a6xx.has_gmem_fast_clear,
|
||||
.concurrent_resolve = screen->info->a6xx.concurrent_resolve,
|
||||
));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(screen->info->a6xx.magic_raw); i++) {
|
||||
|
|
@ -893,176 +857,163 @@ fd6_emit_static_regs(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
|||
break;
|
||||
}
|
||||
|
||||
WRITE(magic_reg.reg, value);
|
||||
ncrb.add({ .reg = magic_reg.reg, .value = value });
|
||||
}
|
||||
|
||||
WRITE(REG_A6XX_RB_DBG_ECO_CNTL, screen->info->a6xx.magic.RB_DBG_ECO_CNTL);
|
||||
WRITE(REG_A6XX_SP_NC_MODE_CNTL_2, A6XX_SP_NC_MODE_CNTL_2_F16_NO_INF);
|
||||
WRITE(REG_A6XX_SP_DBG_ECO_CNTL, screen->info->a6xx.magic.SP_DBG_ECO_CNTL);
|
||||
WRITE(REG_A6XX_SP_PERFCTR_SHADER_MASK, 0x3f);
|
||||
ncrb.add(A6XX_RB_DBG_ECO_CNTL(.dword = screen->info->a6xx.magic.RB_DBG_ECO_CNTL));
|
||||
ncrb.add(A6XX_SP_NC_MODE_CNTL_2(.f16_no_inf = true));
|
||||
|
||||
ncrb.add(A6XX_SP_DBG_ECO_CNTL(.dword = screen->info->a6xx.magic.SP_DBG_ECO_CNTL));
|
||||
ncrb.add(A6XX_SP_PERFCTR_SHADER_MASK(.dword = 0x3f));
|
||||
if (CHIP == A6XX && !screen->info->a6xx.is_a702)
|
||||
WRITE(REG_A6XX_TPL1_UNKNOWN_B605, 0x44);
|
||||
WRITE(REG_A6XX_TPL1_DBG_ECO_CNTL, screen->info->a6xx.magic.TPL1_DBG_ECO_CNTL);
|
||||
ncrb.add(A6XX_TPL1_UNKNOWN_B605(.dword = 0x44));
|
||||
ncrb.add(A6XX_TPL1_DBG_ECO_CNTL(.dword = screen->info->a6xx.magic.TPL1_DBG_ECO_CNTL));
|
||||
if (CHIP == A6XX) {
|
||||
WRITE(REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
|
||||
WRITE(REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
|
||||
ncrb.add(A6XX_HLSQ_UNKNOWN_BE00(.dword = 0x80));
|
||||
ncrb.add(A6XX_HLSQ_UNKNOWN_BE01());
|
||||
}
|
||||
|
||||
WRITE(REG_A6XX_VPC_DBG_ECO_CNTL, screen->info->a6xx.magic.VPC_DBG_ECO_CNTL);
|
||||
WRITE(REG_A6XX_GRAS_DBG_ECO_CNTL, screen->info->a6xx.magic.GRAS_DBG_ECO_CNTL);
|
||||
ncrb.add(A6XX_VPC_DBG_ECO_CNTL(.dword = screen->info->a6xx.magic.VPC_DBG_ECO_CNTL));
|
||||
ncrb.add(A6XX_GRAS_DBG_ECO_CNTL(.dword = screen->info->a6xx.magic.GRAS_DBG_ECO_CNTL));
|
||||
if (CHIP == A6XX)
|
||||
WRITE(REG_A6XX_HLSQ_DBG_ECO_CNTL, screen->info->a6xx.magic.HLSQ_DBG_ECO_CNTL);
|
||||
WRITE(REG_A6XX_SP_CHICKEN_BITS, screen->info->a6xx.magic.SP_CHICKEN_BITS);
|
||||
WRITE(REG_A6XX_SP_GFX_USIZE, 0);
|
||||
WRITE(REG_A6XX_SP_UNKNOWN_B182, 0);
|
||||
if (CHIP == A6XX)
|
||||
WRITE(REG_A6XX_HLSQ_SHARED_CONSTS, 0);
|
||||
WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, screen->info->a6xx.magic.UCHE_UNKNOWN_0E12);
|
||||
WRITE(REG_A6XX_UCHE_CLIENT_PF, screen->info->a6xx.magic.UCHE_CLIENT_PF);
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_8E01, screen->info->a6xx.magic.RB_UNKNOWN_8E01);
|
||||
WRITE(REG_A6XX_SP_UNKNOWN_A9A8, 0);
|
||||
OUT_REG(ring,
|
||||
A6XX_SP_MODE_CNTL(
|
||||
ncrb.add(A6XX_HLSQ_DBG_ECO_CNTL(.dword = screen->info->a6xx.magic.HLSQ_DBG_ECO_CNTL));
|
||||
ncrb.add(A6XX_SP_CHICKEN_BITS(.dword = screen->info->a6xx.magic.SP_CHICKEN_BITS));
|
||||
|
||||
ncrb.add(A6XX_UCHE_UNKNOWN_0E12(.dword = screen->info->a6xx.magic.UCHE_UNKNOWN_0E12));
|
||||
ncrb.add(A6XX_UCHE_CLIENT_PF(.dword = screen->info->a6xx.magic.UCHE_CLIENT_PF));
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
ncrb.add(A6XX_HLSQ_SHARED_CONSTS());
|
||||
ncrb.add(A6XX_VPC_UNKNOWN_9211());
|
||||
}
|
||||
|
||||
ncrb.add(A6XX_GRAS_UNKNOWN_80AF());
|
||||
ncrb.add(A6XX_VPC_UNKNOWN_9602());
|
||||
|
||||
/* These regs are blocked (CP_PROTECT) on a6xx: */
|
||||
if (CHIP >= A7XX) {
|
||||
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_0(CHIP, 0));
|
||||
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_1(CHIP, 0x3fe05ff4));
|
||||
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_2(CHIP, 0x3fa0ebee));
|
||||
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_3(CHIP, 0x3f5193ed));
|
||||
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_4(CHIP, 0x3f0243f0));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note, CP_CONTEXT_REG_BUNCH can only write context regs, some of the static
|
||||
* regs are non-context regs, attempting to write them with CRB will trigger
|
||||
* CP_PROTECT errors.
|
||||
*/
|
||||
template <chip CHIP>
|
||||
static void
|
||||
fd6_emit_static_context_regs(struct fd_context *ctx, fd_cs &cs)
|
||||
{
|
||||
struct fd_screen *screen = ctx->screen;
|
||||
|
||||
fd_crb crb(cs, 80);
|
||||
|
||||
crb.add(A6XX_SP_GFX_USIZE());
|
||||
crb.add(A6XX_SP_UNKNOWN_B182());
|
||||
|
||||
crb.add(A6XX_RB_UNKNOWN_8E01(.dword = screen->info->a6xx.magic.RB_UNKNOWN_8E01));
|
||||
crb.add(A6XX_SP_UNKNOWN_A9A8());
|
||||
|
||||
crb.add(A6XX_SP_MODE_CNTL(
|
||||
.constant_demotion_enable = true,
|
||||
.isammode = ISAMMODE_GL,
|
||||
.shared_consts_enable = false,
|
||||
)
|
||||
);
|
||||
OUT_REG(ring, A6XX_VFD_MODE_CNTL(.vertex = true, .instance = true));
|
||||
WRITE(REG_A6XX_VPC_UNKNOWN_9107, 0);
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
|
||||
WRITE(REG_A6XX_PC_MODE_CNTL, screen->info->a6xx.magic.PC_MODE_CNTL);
|
||||
|
||||
WRITE(REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 0);
|
||||
WRITE(REG_A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL, 0);
|
||||
WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2);
|
||||
crb.add(A6XX_VFD_MODE_CNTL(.vertex = true, .instance = true));
|
||||
crb.add(A6XX_VPC_UNKNOWN_9107());
|
||||
crb.add(A6XX_RB_UNKNOWN_8811(.dword = 0x00000010));
|
||||
crb.add(A6XX_PC_MODE_CNTL(.dword=screen->info->a6xx.magic.PC_MODE_CNTL));
|
||||
crb.add(A6XX_GRAS_LRZ_PS_INPUT_CNTL());
|
||||
crb.add(A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL());
|
||||
crb.add(A6XX_GRAS_UNKNOWN_8110(.dword = 0x2));
|
||||
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_8818, 0);
|
||||
crb.add(A6XX_RB_UNKNOWN_8818());
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_8819, 0);
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_881A, 0);
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_881B, 0);
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_881C, 0);
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_881D, 0);
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_881E, 0);
|
||||
crb.add(A6XX_RB_UNKNOWN_8819());
|
||||
crb.add(A6XX_RB_UNKNOWN_881A());
|
||||
crb.add(A6XX_RB_UNKNOWN_881B());
|
||||
crb.add(A6XX_RB_UNKNOWN_881C());
|
||||
crb.add(A6XX_RB_UNKNOWN_881D());
|
||||
crb.add(A6XX_RB_UNKNOWN_881E());
|
||||
}
|
||||
|
||||
WRITE(REG_A6XX_RB_UNKNOWN_88F0, 0);
|
||||
crb.add(A6XX_RB_UNKNOWN_88F0());
|
||||
crb.add(A6XX_VPC_REPLACE_MODE_CNTL());
|
||||
crb.add(A6XX_VPC_UNKNOWN_9300());
|
||||
crb.add(A6XX_VPC_SO_OVERRIDE(true));
|
||||
|
||||
WRITE(REG_A6XX_VPC_REPLACE_MODE_CNTL, A6XX_VPC_REPLACE_MODE_CNTL(0).value);
|
||||
WRITE(REG_A6XX_VPC_UNKNOWN_9300, 0);
|
||||
|
||||
WRITE(REG_A6XX_VPC_SO_OVERRIDE, A6XX_VPC_SO_OVERRIDE(true).value);
|
||||
|
||||
OUT_REG(ring, VPC_RAST_STREAM_CNTL(CHIP));
|
||||
crb.add(VPC_RAST_STREAM_CNTL(CHIP));
|
||||
|
||||
if (CHIP == A7XX)
|
||||
OUT_REG(ring, A7XX_VPC_RAST_STREAM_CNTL_V2());
|
||||
crb.add(A7XX_VPC_RAST_STREAM_CNTL_V2());
|
||||
|
||||
WRITE(REG_A6XX_PC_STEREO_RENDERING_CNTL, 0);
|
||||
crb.add(A6XX_PC_STEREO_RENDERING_CNTL());
|
||||
crb.add(A6XX_SP_UNKNOWN_B183());
|
||||
crb.add(A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL());
|
||||
crb.add(A6XX_GRAS_SU_VS_SIV_CNTL());
|
||||
crb.add(A6XX_GRAS_SC_CNTL(.ccusinglecachelinesize = 2));
|
||||
|
||||
WRITE(REG_A6XX_SP_UNKNOWN_B183, 0);
|
||||
|
||||
WRITE(REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 0);
|
||||
WRITE(REG_A6XX_GRAS_SU_VS_SIV_CNTL, 0);
|
||||
WRITE(REG_A6XX_GRAS_SC_CNTL, A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2));
|
||||
WRITE(REG_A6XX_GRAS_UNKNOWN_80AF, 0);
|
||||
if (CHIP == A6XX) {
|
||||
WRITE(REG_A6XX_VPC_UNKNOWN_9210, 0);
|
||||
WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0);
|
||||
crb.add(A6XX_VPC_UNKNOWN_9210());
|
||||
}
|
||||
WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0);
|
||||
WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0);
|
||||
/* NOTE blob seems to (mostly?) use 0xb2 for TPL1_MODE_CNTL
|
||||
* but this seems to kill texture gather offsets.
|
||||
*/
|
||||
OUT_REG(ring,
|
||||
A6XX_TPL1_MODE_CNTL(
|
||||
|
||||
crb.add(A6XX_PC_UNKNOWN_9E72());
|
||||
|
||||
crb.add(A6XX_TPL1_MODE_CNTL(
|
||||
.isammode = ISAMMODE_GL,
|
||||
.texcoordroundmode = COORD_TRUNCATE,
|
||||
.nearestmipsnap = CLAMP_ROUND_TRUNCATE,
|
||||
.destdatatypeoverride = true));
|
||||
.destdatatypeoverride = true,
|
||||
));
|
||||
|
||||
OUT_REG(ring, SP_REG_PROG_ID_3(
|
||||
crb.add(SP_REG_PROG_ID_3(
|
||||
CHIP,
|
||||
.linelengthregid = INVALID_REG,
|
||||
.foveationqualityregid = INVALID_REG,
|
||||
));
|
||||
|
||||
emit_marker6(ring, 7);
|
||||
crb.add(A6XX_VFD_RENDER_MODE(RENDERING_PASS));
|
||||
crb.add(A6XX_VFD_STEREO_RENDERING_CNTL());
|
||||
crb.add(A6XX_VPC_SO_CNTL());
|
||||
|
||||
OUT_REG(ring, A6XX_VFD_RENDER_MODE(RENDERING_PASS));
|
||||
crb.add(A6XX_GRAS_LRZ_CNTL());
|
||||
if (CHIP >= A7XX)
|
||||
crb.add(A7XX_GRAS_LRZ_CNTL2());
|
||||
|
||||
WRITE(REG_A6XX_VFD_STEREO_RENDERING_CNTL, 0);
|
||||
|
||||
/* Clear any potential pending state groups to be safe: */
|
||||
OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
|
||||
OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
|
||||
CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
|
||||
CP_SET_DRAW_STATE__0_GROUP_ID(0));
|
||||
OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
|
||||
OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_SO_CNTL, 1);
|
||||
OUT_RING(ring, 0x00000000); /* VPC_SO_CNTL */
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
OUT_REG(ring, A6XX_GRAS_LRZ_CNTL());
|
||||
OUT_REG(ring, A7XX_GRAS_LRZ_CNTL2());
|
||||
} else {
|
||||
OUT_REG(ring, A6XX_GRAS_LRZ_CNTL());
|
||||
}
|
||||
|
||||
OUT_REG(ring, A6XX_RB_LRZ_CNTL());
|
||||
OUT_REG(ring, A6XX_RB_DEPTH_PLANE_CNTL());
|
||||
OUT_REG(ring, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
crb.add(A6XX_RB_LRZ_CNTL());
|
||||
crb.add(A6XX_RB_DEPTH_PLANE_CNTL());
|
||||
crb.add(A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
|
||||
|
||||
/* Initialize VFD_VERTEX_BUFFER[n].SIZE to zero to avoid iova faults trying
|
||||
* to fetch from a VFD_VERTEX_BUFFER[n].BASE which we've potentially inherited
|
||||
* from another process:
|
||||
*/
|
||||
for (int32_t i = 0; i < 32; i++) {
|
||||
OUT_PKT4(ring, REG_A6XX_VFD_VERTEX_BUFFER_SIZE(i), 1);
|
||||
OUT_RING(ring, 0);
|
||||
}
|
||||
for (int32_t i = 0; i < 32; i++)
|
||||
crb.add(A6XX_VFD_VERTEX_BUFFER_SIZE(i, 0));
|
||||
|
||||
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
||||
struct fd_bo *bcolor_mem = fd6_ctx->bcolor_mem;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_TPL1_GFX_BORDER_COLOR_BASE, 2);
|
||||
OUT_RELOC(ring, bcolor_mem, 0, 0, 0);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_TPL1_CS_BORDER_COLOR_BASE, 2);
|
||||
OUT_RELOC(ring, bcolor_mem, 0, 0, 0);
|
||||
|
||||
OUT_REG(ring, A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL());
|
||||
|
||||
/* These regs are blocked (CP_PROTECT) on a6xx: */
|
||||
if (CHIP >= A7XX) {
|
||||
OUT_REG(ring,
|
||||
TPL1_BICUBIC_WEIGHTS_TABLE_0(CHIP, 0),
|
||||
TPL1_BICUBIC_WEIGHTS_TABLE_1(CHIP, 0x3fe05ff4),
|
||||
TPL1_BICUBIC_WEIGHTS_TABLE_2(CHIP, 0x3fa0ebee),
|
||||
TPL1_BICUBIC_WEIGHTS_TABLE_3(CHIP, 0x3f5193ed),
|
||||
TPL1_BICUBIC_WEIGHTS_TABLE_4(CHIP, 0x3f0243f0),
|
||||
);
|
||||
}
|
||||
crb.add(A6XX_TPL1_GFX_BORDER_COLOR_BASE(.bo = bcolor_mem));
|
||||
crb.add(A6XX_TPL1_CS_BORDER_COLOR_BASE(.bo = bcolor_mem));
|
||||
crb.add(A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL());
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
/* Blob sets these two per draw. */
|
||||
OUT_REG(ring, A7XX_PC_HS_BUFFER_SIZE(FD6_TESS_PARAM_SIZE));
|
||||
crb.add(A7XX_PC_HS_BUFFER_SIZE(FD6_TESS_PARAM_SIZE));
|
||||
/* Blob adds a bit more space ({0x10, 0x20, 0x30, 0x40} bytes)
|
||||
* but the meaning of this additional space is not known,
|
||||
* so we play safe and don't add it.
|
||||
*/
|
||||
OUT_REG(ring, A7XX_PC_TF_BUFFER_SIZE(FD6_TESS_FACTOR_SIZE));
|
||||
crb.add(A7XX_PC_TF_BUFFER_SIZE(FD6_TESS_FACTOR_SIZE));
|
||||
}
|
||||
|
||||
/* There is an optimization to skip executing draw states for draws with no
|
||||
|
|
@ -1081,9 +1032,21 @@ fd6_emit_static_regs(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
|||
* seem to be affected.
|
||||
*/
|
||||
if (screen->info->a6xx.has_early_preamble) {
|
||||
WRITE(REG_A6XX_SP_PS_CNTL_0, 0);
|
||||
crb.add(A6XX_SP_PS_CNTL_0());
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_static_regs(fd_cs &cs, struct fd_context *ctx)
|
||||
{
|
||||
fd6_emit_static_non_context_regs<CHIP>(ctx, cs);
|
||||
fd6_emit_static_context_regs<CHIP>(ctx, cs);
|
||||
|
||||
fd_pkt7(cs, CP_SET_DRAW_STATE, 3)
|
||||
.add(CP_SET_DRAW_STATE__0(0, .disable_all_groups = true))
|
||||
.add(CP_SET_DRAW_STATE__ADDR(0));
|
||||
}
|
||||
FD_GENX(fd6_emit_static_regs);
|
||||
|
||||
/* emit setup at begin of new cmdstream buffer (don't rely on previous
|
||||
|
|
@ -1091,42 +1054,44 @@ FD_GENX(fd6_emit_static_regs);
|
|||
*/
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
|
||||
fd6_emit_restore(fd_cs &cs, struct fd_batch *batch)
|
||||
{
|
||||
struct fd_context *ctx = batch->ctx;
|
||||
struct fd_screen *screen = ctx->screen;
|
||||
|
||||
if (!batch->nondraw) {
|
||||
trace_start_state_restore(&batch->trace, ring);
|
||||
trace_start_state_restore(&batch->trace, cs.ring());
|
||||
}
|
||||
|
||||
if (FD_DBG(STOMP)) {
|
||||
fd6_emit_stomp<CHIP>(ring, &RP_BLIT_REGS<CHIP>[0], ARRAY_SIZE(RP_BLIT_REGS<CHIP>));
|
||||
fd6_emit_stomp<CHIP>(ring, &CMD_REGS<CHIP>[0], ARRAY_SIZE(CMD_REGS<CHIP>));
|
||||
fd6_emit_stomp<CHIP>(cs, &RP_BLIT_REGS<CHIP>[0], ARRAY_SIZE(RP_BLIT_REGS<CHIP>));
|
||||
fd6_emit_stomp<CHIP>(cs, &CMD_REGS<CHIP>[0], ARRAY_SIZE(CMD_REGS<CHIP>));
|
||||
}
|
||||
|
||||
OUT_PKT7(ring, CP_SET_MODE, 1);
|
||||
OUT_RING(ring, 0);
|
||||
fd_pkt7(cs, CP_SET_MODE, 1)
|
||||
.add(0x0);
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
fd6_cache_inv<CHIP>(ctx, ring);
|
||||
fd6_cache_inv<CHIP>(ctx, cs);
|
||||
} else {
|
||||
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
|
||||
OUT_RING(ring, CP_THREAD_CONTROL_0_THREAD(CP_SET_THREAD_BR) |
|
||||
CP_THREAD_CONTROL_0_CONCURRENT_BIN_DISABLE);
|
||||
fd_pkt7(cs, CP_THREAD_CONTROL, 1)
|
||||
.add(CP_THREAD_CONTROL_0(
|
||||
.thread = CP_SET_THREAD_BR,
|
||||
.concurrent_bin_disable = true,
|
||||
));
|
||||
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_COLOR);
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_DEPTH);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_COLOR);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_DEPTH);
|
||||
|
||||
OUT_PKT7(ring, CP_EVENT_WRITE, 1);
|
||||
OUT_RING(ring, UNK_40);
|
||||
fd_pkt7(cs, CP_EVENT_WRITE, 1)
|
||||
.add(UNK_40);
|
||||
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CACHE_INVALIDATE);
|
||||
OUT_WFI5(ring);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CACHE_INVALIDATE);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
}
|
||||
|
||||
OUT_REG(ring,
|
||||
SP_UPDATE_CNTL(CHIP,
|
||||
fd_pkt4(cs, 1)
|
||||
.add(SP_UPDATE_CNTL(CHIP,
|
||||
.vs_state = true, .hs_state = true,
|
||||
.ds_state = true, .gs_state = true,
|
||||
.fs_state = true, .cs_state = true,
|
||||
|
|
@ -1135,31 +1100,29 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
|
|||
.gfx_shared_const = true,
|
||||
.cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
|
||||
.gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,
|
||||
)
|
||||
);
|
||||
));
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
fd6_emit_ib(ring, fd6_context(ctx)->restore);
|
||||
fd6_emit_ccu_cntl<CHIP>(ring, screen, false);
|
||||
fd6_emit_ib(cs, fd6_context(ctx)->restore);
|
||||
fd6_emit_ccu_cntl<CHIP>(cs, screen, false);
|
||||
|
||||
OUT_PKT7(ring, CP_SET_AMBLE, 3);
|
||||
uint32_t dwords = fd_ringbuffer_emit_reloc_ring_full(ring, fd6_context(ctx)->preamble, 0) / 4;
|
||||
OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(dwords) |
|
||||
CP_SET_AMBLE_2_TYPE(BIN_PREAMBLE_AMBLE_TYPE));
|
||||
uint32_t dwords;
|
||||
|
||||
OUT_PKT7(ring, CP_SET_AMBLE, 3);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, CP_SET_AMBLE_2_TYPE(PREAMBLE_AMBLE_TYPE));
|
||||
fd_pkt7(cs, CP_SET_AMBLE, 3)
|
||||
.add(fd6_context(ctx)->preamble, 0, &dwords)
|
||||
.add(CP_SET_AMBLE_2(.dwords = dwords, .type = BIN_PREAMBLE_AMBLE_TYPE));
|
||||
|
||||
OUT_PKT7(ring, CP_SET_AMBLE, 3);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, CP_SET_AMBLE_2_TYPE(POSTAMBLE_AMBLE_TYPE));
|
||||
fd_pkt7(cs, CP_SET_AMBLE, 3)
|
||||
.add(CP_SET_AMBLE_ADDR())
|
||||
.add(CP_SET_AMBLE_2(.type = PREAMBLE_AMBLE_TYPE));
|
||||
|
||||
fd_pkt7(cs, CP_SET_AMBLE, 3)
|
||||
.add(CP_SET_AMBLE_ADDR())
|
||||
.add(CP_SET_AMBLE_2(.type = POSTAMBLE_AMBLE_TYPE));
|
||||
|
||||
if (!batch->nondraw) {
|
||||
trace_end_state_restore(&batch->trace, ring);
|
||||
trace_end_state_restore(&batch->trace, cs.ring());
|
||||
}
|
||||
}
|
||||
FD_GENX(fd6_emit_restore);
|
||||
|
|
@ -1171,16 +1134,17 @@ fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
|
|||
{
|
||||
struct fd_bo *src_bo = fd_resource(src)->bo;
|
||||
struct fd_bo *dst_bo = fd_resource(dst)->bo;
|
||||
fd_cs cs(ring);
|
||||
unsigned i;
|
||||
|
||||
fd_ringbuffer_attach_bo(ring, dst_bo);
|
||||
fd_ringbuffer_attach_bo(ring, src_bo);
|
||||
cs.attach_bo(dst_bo);
|
||||
cs.attach_bo(src_bo);
|
||||
|
||||
for (i = 0; i < sizedwords; i++) {
|
||||
OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
|
||||
OUT_RELOC(ring, src_bo, src_off, 0, 0);
|
||||
fd_pkt7(cs, CP_MEM_TO_MEM, 5)
|
||||
.add(CP_MEM_TO_MEM_0())
|
||||
.add(CP_MEM_TO_MEM_DST(dst_bo, dst_off))
|
||||
.add(CP_MEM_TO_MEM_SRC_A(src_bo, src_off));
|
||||
|
||||
dst_off += 4;
|
||||
src_off += 4;
|
||||
|
|
|
|||
|
|
@ -104,28 +104,34 @@ struct fd6_state {
|
|||
};
|
||||
|
||||
static inline void
|
||||
fd6_state_emit(struct fd6_state *state, struct fd_ringbuffer *ring)
|
||||
fd6_state_emit(struct fd6_state *state, fd_cs &cs)
|
||||
{
|
||||
if (!state->num_groups)
|
||||
return;
|
||||
|
||||
OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * state->num_groups);
|
||||
fd_pkt7 pkt(cs, CP_SET_DRAW_STATE, 3 * state->num_groups);
|
||||
|
||||
for (unsigned i = 0; i < state->num_groups; i++) {
|
||||
struct fd6_state_group *g = &state->groups[i];
|
||||
unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0;
|
||||
|
||||
assert((g->enable_mask & ~ENABLE_ALL) == 0);
|
||||
|
||||
if (n == 0) {
|
||||
OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
|
||||
CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask |
|
||||
CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
if (g->stateobj) {
|
||||
unsigned n = fd_ringbuffer_size(g->stateobj) / 4;
|
||||
|
||||
pkt.add(CP_SET_DRAW_STATE__0(i,
|
||||
.count = n,
|
||||
.group_id = g->group_id,
|
||||
.dword = g->enable_mask,
|
||||
));
|
||||
pkt.add(g->stateobj, 0, NULL);
|
||||
} else {
|
||||
OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask |
|
||||
CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
|
||||
OUT_RB(ring, g->stateobj);
|
||||
pkt.add(CP_SET_DRAW_STATE__0(i,
|
||||
.disable = true,
|
||||
.group_id = g->group_id,
|
||||
.dword = g->enable_mask,
|
||||
));
|
||||
pkt.add(CP_SET_DRAW_STATE__ADDR(i));
|
||||
}
|
||||
|
||||
if (g->stateobj)
|
||||
|
|
@ -201,51 +207,55 @@ fd6_emit_get_prog(struct fd6_emit *emit)
|
|||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
__event_write(struct fd_ringbuffer *ring, enum fd_gpu_event event,
|
||||
__event_write(fd_cs &cs, enum fd_gpu_event event,
|
||||
enum event_write_src esrc, enum event_write_dst edst,
|
||||
uint32_t val, struct fd_bo *bo, uint32_t offset)
|
||||
{
|
||||
struct fd_gpu_event_info info = fd_gpu_events<CHIP>[event];
|
||||
unsigned len = info.needs_seqno ? 4 : 1;
|
||||
|
||||
if ((CHIP == A7XX) && (event == FD_RB_DONE))
|
||||
len--;
|
||||
|
||||
fd_pkt7 pkt(cs, CP_EVENT_WRITE, len);
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
OUT_PKT7(ring, CP_EVENT_WRITE, len);
|
||||
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(info.raw_event) |
|
||||
pkt.add(CP_EVENT_WRITE_0_EVENT(info.raw_event) |
|
||||
COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP));
|
||||
} else if (CHIP == A7XX) {
|
||||
if (event == FD_RB_DONE)
|
||||
len--;
|
||||
OUT_PKT7(ring, CP_EVENT_WRITE, len);
|
||||
OUT_RING(ring, CP_EVENT_WRITE7_0_EVENT(info.raw_event) |
|
||||
CP_EVENT_WRITE7_0_WRITE_SRC(esrc) |
|
||||
CP_EVENT_WRITE7_0_WRITE_DST(edst) |
|
||||
COND(info.needs_seqno, CP_EVENT_WRITE7_0_WRITE_ENABLED));
|
||||
pkt.add(CP_EVENT_WRITE7_0_EVENT(info.raw_event) |
|
||||
CP_EVENT_WRITE7_0_WRITE_SRC(esrc) |
|
||||
CP_EVENT_WRITE7_0_WRITE_DST(edst) |
|
||||
COND(info.needs_seqno, CP_EVENT_WRITE7_0_WRITE_ENABLED));
|
||||
}
|
||||
|
||||
if (info.needs_seqno) {
|
||||
OUT_RELOC(ring, bo, offset); /* ADDR_LO/HI */
|
||||
pkt.add(CP_EVENT_WRITE_ADDR(
|
||||
.bo = bo,
|
||||
.bo_offset = offset,
|
||||
)); /* ADDR_LO/HI */
|
||||
if (len == 4)
|
||||
OUT_RING(ring, val);
|
||||
pkt.add(val);
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
fd6_record_ts(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset)
|
||||
fd6_record_ts(fd_cs &cs, struct fd_bo *bo, uint32_t offset)
|
||||
{
|
||||
__event_write<CHIP>(ring, FD_RB_DONE, EV_WRITE_ALWAYSON, EV_DST_RAM, 0, bo, offset);
|
||||
__event_write<CHIP>(cs, FD_RB_DONE, EV_WRITE_ALWAYSON, EV_DST_RAM, 0, bo, offset);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
fd6_fence_write(struct fd_ringbuffer *ring, uint32_t val, struct fd_bo *bo, uint32_t offset)
|
||||
fd6_fence_write(fd_cs &cs, uint32_t val, struct fd_bo *bo, uint32_t offset)
|
||||
{
|
||||
__event_write<CHIP>(ring, FD_CACHE_CLEAN, EV_WRITE_USER_32B, EV_DST_RAM, val, bo, offset);
|
||||
__event_write<CHIP>(cs, FD_CACHE_CLEAN, EV_WRITE_USER_32B, EV_DST_RAM, val, bo, offset);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static inline unsigned
|
||||
fd6_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring, enum fd_gpu_event event)
|
||||
fd6_event_write(struct fd_context *ctx, fd_cs &cs, enum fd_gpu_event event)
|
||||
{
|
||||
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
||||
struct fd_gpu_event_info info = fd_gpu_events<CHIP>[event];
|
||||
|
|
@ -256,7 +266,7 @@ fd6_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring, enum fd_gpu_
|
|||
seqno = ++fd6_ctx->seqno;
|
||||
}
|
||||
|
||||
__event_write<CHIP>(ring, event, EV_WRITE_USER_32B, EV_DST_RAM, seqno,
|
||||
__event_write<CHIP>(cs, event, EV_WRITE_USER_32B, EV_DST_RAM, seqno,
|
||||
control_ptr(fd6_ctx, seqno));
|
||||
|
||||
return seqno;
|
||||
|
|
@ -264,45 +274,20 @@ fd6_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring, enum fd_gpu_
|
|||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
fd6_cache_inv(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
fd6_cache_inv(struct fd_context *ctx, fd_cs &cs)
|
||||
{
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_COLOR);
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_DEPTH);
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CACHE_INVALIDATE);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_COLOR);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_DEPTH);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CACHE_INVALIDATE);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
fd6_cache_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
fd6_emit_blit(struct fd_context *ctx, fd_cs &cs)
|
||||
{
|
||||
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
||||
unsigned seqno;
|
||||
|
||||
seqno = fd6_event_write<CHIP>(ctx, ring, FD_RB_DONE);
|
||||
|
||||
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
|
||||
OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
|
||||
CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
|
||||
OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
|
||||
OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
|
||||
OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
|
||||
OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
|
||||
|
||||
seqno = fd6_event_write<CHIP>(ctx, ring, FD_CACHE_CLEAN);
|
||||
|
||||
OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
|
||||
OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
|
||||
OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
|
||||
OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
fd6_emit_blit(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
{
|
||||
emit_marker6(ring, 7);
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_BLIT);
|
||||
emit_marker6(ring, 7);
|
||||
emit_marker6(cs, 7);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_BLIT);
|
||||
emit_marker6(cs, 7);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
@ -323,7 +308,7 @@ fd6_geom_stage(mesa_shader_stage type)
|
|||
}
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
static inline enum adreno_pm4_type3_packets
|
||||
fd6_stage2opcode(mesa_shader_stage type)
|
||||
{
|
||||
return fd6_geom_stage(type) ? CP_LOAD_STATE6_GEOM : CP_LOAD_STATE6_FRAG;
|
||||
|
|
@ -369,37 +354,45 @@ fd6_gl2spacing(enum gl_tess_spacing spacing)
|
|||
}
|
||||
|
||||
template <chip CHIP, fd6_pipeline_type PIPELINE>
|
||||
void fd6_emit_3d_state(struct fd_ringbuffer *ring,
|
||||
struct fd6_emit *emit) assert_dt;
|
||||
void fd6_emit_3d_state(fd_cs &cs, struct fd6_emit *emit) assert_dt;
|
||||
|
||||
struct fd6_compute_state;
|
||||
template <chip CHIP>
|
||||
void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs) assert_dt;
|
||||
void fd6_emit_cs_state(struct fd_context *ctx, fd_cs &cs,
|
||||
struct fd6_compute_state *cp) assert_dt;
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem);
|
||||
void fd6_emit_ccu_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem);
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_emit_static_regs(struct fd_context *ctx, struct fd_ringbuffer *ring);
|
||||
void fd6_emit_static_regs(fd_cs &cs, struct fd_context *ctx);
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
|
||||
void fd6_emit_restore(fd_cs &cs, struct fd_batch *batch);
|
||||
|
||||
void fd6_emit_init_screen(struct pipe_screen *pscreen);
|
||||
|
||||
static inline void
|
||||
fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
|
||||
fd6_emit_ib(fd_cs &cs, struct fd_ringbuffer *target)
|
||||
{
|
||||
emit_marker6(ring, 6);
|
||||
__OUT_IB5(ring, target);
|
||||
emit_marker6(ring, 6);
|
||||
if (target->cur == target->start)
|
||||
return;
|
||||
|
||||
unsigned count = fd_ringbuffer_cmd_count(target);
|
||||
|
||||
emit_marker6(cs, 6);
|
||||
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
uint32_t dwords;
|
||||
|
||||
fd_pkt7(cs, CP_INDIRECT_BUFFER, 3)
|
||||
.add(target, i, &dwords)
|
||||
.add(A5XX_CP_INDIRECT_BUFFER_2(.ib_size = dwords));
|
||||
|
||||
assert(dwords > 0);
|
||||
}
|
||||
|
||||
emit_marker6(cs, 6);
|
||||
}
|
||||
|
||||
#define WRITE(reg, val) \
|
||||
do { \
|
||||
OUT_PKT4(ring, reg, 1); \
|
||||
OUT_RING(ring, val); \
|
||||
} while (0)
|
||||
|
||||
#endif /* FD6_EMIT_H */
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -191,8 +191,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader,
|
|||
struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
|
||||
struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
|
||||
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, 16 * 4, FD_RINGBUFFER_STREAMING);
|
||||
fd_cs cs(ctx->batch->submit, 19 * 4);
|
||||
|
||||
/* Don't re-use a previous descriptor set if appending the
|
||||
* fb-read descriptor, as that can change across batches.
|
||||
|
|
@ -267,110 +266,104 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader,
|
|||
|
||||
unsigned idx = ir3_shader_descriptor_set(shader);
|
||||
|
||||
fd_ringbuffer_attach_bo(ring, set->bo);
|
||||
cs.attach_bo(set->bo);
|
||||
|
||||
if (shader == MESA_SHADER_COMPUTE) {
|
||||
OUT_REG(ring,
|
||||
SP_UPDATE_CNTL(
|
||||
CHIP,
|
||||
with_crb (cs, 5) {
|
||||
crb.add(SP_UPDATE_CNTL(CHIP,
|
||||
.cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
|
||||
)
|
||||
);
|
||||
OUT_REG(ring, SP_CS_BINDLESS_BASE_DESCRIPTOR(CHIP,
|
||||
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
|
||||
));
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR(
|
||||
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
|
||||
));
|
||||
crb.add(SP_CS_BINDLESS_BASE_DESCRIPTOR(CHIP,
|
||||
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
|
||||
));
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
crb.add(A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR(
|
||||
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if (bufso->enabled_mask) {
|
||||
OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
|
||||
CP_LOAD_STATE6_0(
|
||||
.dst_off = IR3_BINDLESS_SSBO_OFFSET,
|
||||
.state_type = ST6_UAV,
|
||||
.state_src = SS6_BINDLESS,
|
||||
.state_block = SB6_CS_SHADER,
|
||||
.num_unit = util_last_bit(bufso->enabled_mask),
|
||||
),
|
||||
CP_LOAD_STATE6_EXT_SRC_ADDR(
|
||||
/* This isn't actually an address: */
|
||||
.qword = (idx << 28) |
|
||||
IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
|
||||
),
|
||||
);
|
||||
fd_pkt7(cs, CP_LOAD_STATE6_FRAG, 3)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.dst_off = IR3_BINDLESS_SSBO_OFFSET,
|
||||
.state_type = ST6_UAV,
|
||||
.state_src = SS6_BINDLESS,
|
||||
.state_block = SB6_CS_SHADER,
|
||||
.num_unit = util_last_bit(bufso->enabled_mask),
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR(
|
||||
/* This isn't actually an address: */
|
||||
.qword = (idx << 28) |
|
||||
IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
|
||||
));
|
||||
}
|
||||
|
||||
if (imgso->enabled_mask) {
|
||||
OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
|
||||
CP_LOAD_STATE6_0(
|
||||
.dst_off = IR3_BINDLESS_IMAGE_OFFSET,
|
||||
.state_type = ST6_UAV,
|
||||
.state_src = SS6_BINDLESS,
|
||||
.state_block = SB6_CS_SHADER,
|
||||
.num_unit = util_last_bit(imgso->enabled_mask),
|
||||
),
|
||||
CP_LOAD_STATE6_EXT_SRC_ADDR(
|
||||
/* This isn't actually an address: */
|
||||
.qword = (idx << 28) |
|
||||
IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
|
||||
),
|
||||
);
|
||||
fd_pkt7(cs, CP_LOAD_STATE6_FRAG, 3)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.dst_off = IR3_BINDLESS_IMAGE_OFFSET,
|
||||
.state_type = ST6_UAV,
|
||||
.state_src = SS6_BINDLESS,
|
||||
.state_block = SB6_CS_SHADER,
|
||||
.num_unit = util_last_bit(imgso->enabled_mask),
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR(
|
||||
/* This isn't actually an address: */
|
||||
.qword = (idx << 28) |
|
||||
IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
|
||||
));
|
||||
}
|
||||
} else {
|
||||
OUT_REG(ring,
|
||||
SP_UPDATE_CNTL(
|
||||
CHIP,
|
||||
with_crb (cs, 5) {
|
||||
crb.add(SP_UPDATE_CNTL(CHIP,
|
||||
.gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,
|
||||
)
|
||||
);
|
||||
OUT_REG(ring, SP_GFX_BINDLESS_BASE_DESCRIPTOR(CHIP,
|
||||
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
|
||||
));
|
||||
if (CHIP == A6XX) {
|
||||
OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR(
|
||||
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
|
||||
));
|
||||
crb.add(SP_GFX_BINDLESS_BASE_DESCRIPTOR(CHIP,
|
||||
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
|
||||
));
|
||||
if (CHIP == A6XX) {
|
||||
crb.add(A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR(
|
||||
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if (bufso->enabled_mask) {
|
||||
OUT_PKT(ring, CP_LOAD_STATE6,
|
||||
CP_LOAD_STATE6_0(
|
||||
.dst_off = IR3_BINDLESS_SSBO_OFFSET,
|
||||
.state_type = ST6_SHADER,
|
||||
.state_src = SS6_BINDLESS,
|
||||
.state_block = SB6_UAV,
|
||||
.num_unit = util_last_bit(bufso->enabled_mask),
|
||||
),
|
||||
CP_LOAD_STATE6_EXT_SRC_ADDR(
|
||||
/* This isn't actually an address: */
|
||||
.qword = (idx << 28) |
|
||||
IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
|
||||
),
|
||||
);
|
||||
fd_pkt7(cs, CP_LOAD_STATE6, 3)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.dst_off = IR3_BINDLESS_SSBO_OFFSET,
|
||||
.state_type = ST6_SHADER,
|
||||
.state_src = SS6_BINDLESS,
|
||||
.state_block = SB6_UAV,
|
||||
.num_unit = util_last_bit(bufso->enabled_mask),
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR(
|
||||
/* This isn't actually an address: */
|
||||
.qword = (idx << 28) |
|
||||
IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
|
||||
));
|
||||
}
|
||||
|
||||
if (imgso->enabled_mask) {
|
||||
OUT_PKT(ring, CP_LOAD_STATE6,
|
||||
CP_LOAD_STATE6_0(
|
||||
.dst_off = IR3_BINDLESS_IMAGE_OFFSET,
|
||||
.state_type = ST6_SHADER,
|
||||
.state_src = SS6_BINDLESS,
|
||||
.state_block = SB6_UAV,
|
||||
.num_unit = util_last_bit(imgso->enabled_mask),
|
||||
),
|
||||
CP_LOAD_STATE6_EXT_SRC_ADDR(
|
||||
/* This isn't actually an address: */
|
||||
.qword = (idx << 28) |
|
||||
IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
|
||||
),
|
||||
);
|
||||
fd_pkt7(cs, CP_LOAD_STATE6, 3)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.dst_off = IR3_BINDLESS_IMAGE_OFFSET,
|
||||
.state_type = ST6_SHADER,
|
||||
.state_src = SS6_BINDLESS,
|
||||
.state_block = SB6_UAV,
|
||||
.num_unit = util_last_bit(imgso->enabled_mask),
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR(
|
||||
/* This isn't actually an address: */
|
||||
.qword = (idx << 28) |
|
||||
IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
return ring;
|
||||
return cs.ring();
|
||||
}
|
||||
FD_GENX(fd6_build_bindless_state);
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -16,6 +16,8 @@
|
|||
#include "ir3/ir3_shader.h"
|
||||
#include "ir3_cache.h"
|
||||
|
||||
class fd_cs;
|
||||
class fd_crb;
|
||||
struct fd6_emit;
|
||||
|
||||
struct fd6_program_state {
|
||||
|
|
@ -87,7 +89,7 @@ fd6_last_shader(const struct fd6_program_state *state)
|
|||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
void fd6_emit_shader(struct fd_context *ctx, fd_cs &cs,
|
||||
const struct ir3_shader_variant *so) assert_dt;
|
||||
|
||||
struct fd_ringbuffer *fd6_program_interp_state(struct fd6_emit *emit) assert_dt;
|
||||
|
|
|
|||
|
|
@ -59,40 +59,39 @@ static void
|
|||
occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
{
|
||||
struct fd_context *ctx = batch->ctx;
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
ASSERT_ALIGNED(struct fd6_query_sample, start, 16);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNTER_CNTL, 1);
|
||||
OUT_RING(ring, A6XX_RB_SAMPLE_COUNTER_CNTL_COPY);
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A6XX_RB_SAMPLE_COUNTER_CNTL(.copy = true));
|
||||
|
||||
if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
|
||||
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNTER_BASE, 2);
|
||||
OUT_RELOC(ring, query_sample(aq, start));
|
||||
fd_pkt4(cs, 2)
|
||||
.add(A6XX_RB_SAMPLE_COUNTER_BASE(query_sample(aq, start)));
|
||||
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_ZPASS_DONE);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_ZPASS_DONE);
|
||||
|
||||
/* Copied from blob's cmdstream, not sure why it is done. */
|
||||
if (CHIP == A7XX) {
|
||||
fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_DEPTH);
|
||||
fd6_event_write<CHIP>(ctx, cs, FD_CCU_CLEAN_DEPTH);
|
||||
}
|
||||
} else {
|
||||
OUT_PKT(ring, CP_EVENT_WRITE7,
|
||||
CP_EVENT_WRITE7_0(
|
||||
fd_pkt7(cs, CP_EVENT_WRITE7, 3)
|
||||
.add(CP_EVENT_WRITE7_0(
|
||||
.event = ZPASS_DONE,
|
||||
.write_sample_count = true,
|
||||
),
|
||||
EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
|
||||
);
|
||||
OUT_PKT(ring, CP_EVENT_WRITE7,
|
||||
CP_EVENT_WRITE7_0(
|
||||
))
|
||||
.add(EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)));
|
||||
|
||||
fd_pkt7(cs, CP_EVENT_WRITE7, 3)
|
||||
.add(CP_EVENT_WRITE7_0(
|
||||
.event = ZPASS_DONE,
|
||||
.write_sample_count = true,
|
||||
.sample_count_end_offset = true,
|
||||
.write_accum_sample_count_diff = true,
|
||||
),
|
||||
EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
|
||||
);
|
||||
))
|
||||
.add(EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)));
|
||||
}
|
||||
|
||||
ctx->occlusion_queries_active++;
|
||||
|
|
@ -108,63 +107,62 @@ static void
|
|||
occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
||||
{
|
||||
struct fd_context *ctx = batch->ctx;
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
|
||||
OUT_PKT7(ring, CP_MEM_WRITE, 4);
|
||||
OUT_RELOC(ring, query_sample(aq, stop));
|
||||
OUT_RING(ring, 0xffffffff);
|
||||
OUT_RING(ring, 0xffffffff);
|
||||
fd_pkt7(cs, CP_MEM_WRITE, 4)
|
||||
.add(CP_MEM_WRITE_ADDR(query_sample(aq, stop)))
|
||||
.add(0xffffffff)
|
||||
.add(0xffffffff);
|
||||
|
||||
OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
|
||||
fd_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNTER_CNTL, 1);
|
||||
OUT_RING(ring, A6XX_RB_SAMPLE_COUNTER_CNTL_COPY);
|
||||
fd_pkt4(cs, 1)
|
||||
.add(A6XX_RB_SAMPLE_COUNTER_CNTL(.copy = true));
|
||||
|
||||
ASSERT_ALIGNED(struct fd6_query_sample, stop, 16);
|
||||
|
||||
if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
|
||||
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNTER_BASE, 2);
|
||||
OUT_RELOC(ring, query_sample(aq, stop));
|
||||
fd_pkt4(cs, 2)
|
||||
.add(A6XX_RB_SAMPLE_COUNTER_BASE(query_sample(aq, stop)));
|
||||
|
||||
fd6_event_write<CHIP>(batch->ctx, ring, FD_ZPASS_DONE);
|
||||
fd6_event_write<CHIP>(batch->ctx, cs, FD_ZPASS_DONE);
|
||||
|
||||
/* To avoid stalling in the draw buffer, emit code the code to compute the
|
||||
* counter delta in the epilogue ring.
|
||||
*/
|
||||
struct fd_ringbuffer *epilogue = fd_batch_get_tile_epilogue(batch);
|
||||
fd_cs epilogue(fd_batch_get_tile_epilogue(batch));
|
||||
|
||||
OUT_PKT7(epilogue, CP_WAIT_REG_MEM, 6);
|
||||
OUT_RING(epilogue, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_NE) |
|
||||
CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
|
||||
OUT_RELOC(epilogue, query_sample(aq, stop));
|
||||
OUT_RING(epilogue, CP_WAIT_REG_MEM_3_REF(0xffffffff));
|
||||
OUT_RING(epilogue, CP_WAIT_REG_MEM_4_MASK(0xffffffff));
|
||||
OUT_RING(epilogue, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
|
||||
fd_pkt7(epilogue, CP_WAIT_REG_MEM, 6)
|
||||
.add(CP_WAIT_REG_MEM_0(.function = WRITE_NE, .poll = POLL_MEMORY))
|
||||
.add(CP_WAIT_REG_MEM_POLL_ADDR(query_sample(aq, stop)))
|
||||
.add(CP_WAIT_REG_MEM_3(.ref = 0xffffffff))
|
||||
.add(CP_WAIT_REG_MEM_4(.mask = 0xffffffff))
|
||||
.add(CP_WAIT_REG_MEM_5(.delay_loop_cycles = 16));
|
||||
|
||||
/* result += stop - start: */
|
||||
OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
|
||||
OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
|
||||
OUT_RELOC(epilogue, query_sample(aq, result)); /* dst */
|
||||
OUT_RELOC(epilogue, query_sample(aq, result)); /* srcA */
|
||||
OUT_RELOC(epilogue, query_sample(aq, stop)); /* srcB */
|
||||
OUT_RELOC(epilogue, query_sample(aq, start)); /* srcC */
|
||||
fd_pkt7(epilogue, CP_MEM_TO_MEM, 9)
|
||||
.add(CP_MEM_TO_MEM_0(.neg_c = true, ._double = true))
|
||||
.add(CP_MEM_TO_MEM_DST(query_sample(aq, result)))
|
||||
.add(CP_MEM_TO_MEM_SRC_A(query_sample(aq, result)))
|
||||
.add(CP_MEM_TO_MEM_SRC_B(query_sample(aq, stop)))
|
||||
.add(CP_MEM_TO_MEM_SRC_C(query_sample(aq, start)));
|
||||
} else {
|
||||
OUT_PKT(ring, CP_EVENT_WRITE7,
|
||||
CP_EVENT_WRITE7_0(
|
||||
fd_pkt7(cs, CP_EVENT_WRITE7, 3)
|
||||
.add(CP_EVENT_WRITE7_0(
|
||||
.event = ZPASS_DONE,
|
||||
.write_sample_count = true,
|
||||
),
|
||||
EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, stop)),
|
||||
);
|
||||
OUT_PKT(ring, CP_EVENT_WRITE7,
|
||||
CP_EVENT_WRITE7_0(
|
||||
))
|
||||
.add(EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, stop)));
|
||||
|
||||
fd_pkt7(cs, CP_EVENT_WRITE7, 3)
|
||||
.add(CP_EVENT_WRITE7_0(
|
||||
.event = ZPASS_DONE,
|
||||
.write_sample_count = true,
|
||||
.sample_count_end_offset = true,
|
||||
.write_accum_sample_count_diff = true,
|
||||
),
|
||||
))
|
||||
/* Note: SQE is adding offsets to the iova, SAMPLE_COUNT_END_OFFSET causes
|
||||
* the result to be written to iova+16, and WRITE_ACCUM_SAMP_COUNT_DIFF
|
||||
* does *(iova + 8) += *(iova + 16) - *iova
|
||||
|
|
@ -172,8 +170,7 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
|||
* It just so happens this is the layout we already to for start/result/stop
|
||||
* So we just give the start address in all cases.
|
||||
*/
|
||||
EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
|
||||
);
|
||||
.add(EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)));
|
||||
}
|
||||
|
||||
assert(ctx->occlusion_queries_active > 0);
|
||||
|
|
@ -219,23 +216,27 @@ occlusion_predicate_result_resource(struct fd_acc_query *aq, struct fd_ringbuffe
|
|||
int index, struct fd_resource *dst,
|
||||
unsigned offset)
|
||||
{
|
||||
fd_cs cs(ring);
|
||||
|
||||
/* This is a bit annoying but we need to turn the result into a one or
|
||||
* zero.. to do this use a CP_COND_WRITE to overwrite the result with
|
||||
* a one if it is non-zero. This doesn't change the results if the
|
||||
* query is also read on the CPU (ie. occlusion_predicate_result()).
|
||||
*/
|
||||
OUT_PKT7(ring, CP_COND_WRITE5, 9);
|
||||
OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_NE) |
|
||||
CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY) |
|
||||
CP_COND_WRITE5_0_WRITE_MEMORY);
|
||||
OUT_RELOC(ring, query_sample(aq, result)); /* POLL_ADDR_LO/HI */
|
||||
OUT_RING(ring, CP_COND_WRITE5_3_REF(0));
|
||||
OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
|
||||
OUT_RELOC(ring, query_sample(aq, result)); /* WRITE_ADDR_LO/HI */
|
||||
OUT_RING(ring, 1);
|
||||
OUT_RING(ring, 0);
|
||||
fd_pkt7(cs, CP_COND_WRITE5, 9)
|
||||
.add(CP_COND_WRITE5_0(
|
||||
.function = WRITE_NE,
|
||||
.poll = POLL_MEMORY,
|
||||
.write_memory = true
|
||||
))
|
||||
.add(CP_COND_WRITE5_POLL_ADDR(query_sample(aq, result)))
|
||||
.add(CP_COND_WRITE5_3(.ref = 0))
|
||||
.add(CP_COND_WRITE5_4(.mask = ~0))
|
||||
.add(CP_COND_WRITE5_WRITE_ADDR(query_sample(aq, result)))
|
||||
.add(1)
|
||||
.add(0);
|
||||
|
||||
copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
|
||||
copy_result(cs.ring(), result_type, dst, offset, fd_resource(aq->prsc),
|
||||
offsetof(struct fd6_query_sample, result));
|
||||
}
|
||||
|
||||
|
|
@ -277,28 +278,28 @@ template <chip CHIP>
|
|||
static void
|
||||
timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
fd6_record_ts<CHIP>(ring, query_sample(aq, start));
|
||||
fd6_record_ts<CHIP>(cs, query_sample(aq, start));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
fd6_record_ts<CHIP>(ring, query_sample(aq, stop));
|
||||
fd6_record_ts<CHIP>(cs, query_sample(aq, stop));
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
/* result += stop - start: */
|
||||
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
|
||||
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
|
||||
OUT_RELOC(ring, query_sample(aq, result)); /* dst */
|
||||
OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
|
||||
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
|
||||
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
|
||||
fd_pkt7(cs, CP_MEM_TO_MEM, 9)
|
||||
.add(CP_MEM_TO_MEM_0(.neg_c = true, ._double = true))
|
||||
.add(CP_MEM_TO_MEM_DST(query_sample(aq, result)))
|
||||
.add(CP_MEM_TO_MEM_SRC_A(query_sample(aq, result)))
|
||||
.add(CP_MEM_TO_MEM_SRC_B(query_sample(aq, stop)))
|
||||
.add(CP_MEM_TO_MEM_SRC_C(query_sample(aq, start)));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -312,8 +313,10 @@ template <chip CHIP>
|
|||
static void
|
||||
record_timestamp(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset)
|
||||
{
|
||||
fd_ringbuffer_attach_bo(ring, bo);
|
||||
fd6_record_ts<CHIP>(ring, bo, offset);
|
||||
fd_cs cs(ring);
|
||||
|
||||
cs.attach_bo(bo);
|
||||
fd6_record_ts<CHIP>(cs, bo, offset);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -392,9 +395,8 @@ struct PACKED fd6_pipeline_stats_sample {
|
|||
};
|
||||
FD_DEFINE_CAST(fd_acc_query_sample, fd6_pipeline_stats_sample);
|
||||
|
||||
#define stats_reloc(ring, aq, field) \
|
||||
OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, \
|
||||
offsetof(struct fd6_pipeline_stats_sample, field), 0, 0);
|
||||
#define stats_sample(aq, field) \
|
||||
fd_resource((aq)->prsc)->bo, offsetof(struct fd6_pipeline_stats_sample, field)
|
||||
|
||||
/* Mapping of counters to pipeline stats:
|
||||
*
|
||||
|
|
@ -493,23 +495,22 @@ static void
|
|||
pipeline_stats_resume(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
assert_dt
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
enum stats_type type = get_stats_type(aq);
|
||||
unsigned idx = stats_counter_index(aq);
|
||||
unsigned reg = REG_A6XX_RBBM_PIPESTAT_IAVERTICES + (2 * idx);
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
|
||||
CP_REG_TO_MEM_0_CNT(2) |
|
||||
CP_REG_TO_MEM_0_REG(reg));
|
||||
stats_reloc(ring, aq, start);
|
||||
/* snapshot the start value: */
|
||||
fd_pkt7(cs, CP_REG_TO_MEM, 3)
|
||||
.add(CP_REG_TO_MEM_0(.reg = reg, .cnt = 2, ._64b = true))
|
||||
.add(CP_REG_TO_MEM_DEST(stats_sample(aq, start)));
|
||||
|
||||
assert(type < ARRAY_SIZE(batch->pipeline_stats_queries_active));
|
||||
|
||||
if (!batch->pipeline_stats_queries_active[type])
|
||||
fd6_event_write<CHIP>(batch->ctx, ring, stats_counter_events[type].start);
|
||||
fd6_event_write<CHIP>(batch->ctx, cs, stats_counter_events[type].start);
|
||||
batch->pipeline_stats_queries_active[type]++;
|
||||
}
|
||||
|
||||
|
|
@ -518,34 +519,36 @@ static void
|
|||
pipeline_stats_pause(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
assert_dt
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
enum stats_type type = get_stats_type(aq);
|
||||
unsigned idx = stats_counter_index(aq);
|
||||
unsigned reg = REG_A6XX_RBBM_PIPESTAT_IAVERTICES + (2 * idx);
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
/* snapshot the end values: */
|
||||
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
|
||||
CP_REG_TO_MEM_0_CNT(2) |
|
||||
CP_REG_TO_MEM_0_REG(reg));
|
||||
stats_reloc(ring, aq, stop);
|
||||
fd_pkt7(cs, CP_REG_TO_MEM, 3)
|
||||
.add(CP_REG_TO_MEM_0(.reg = reg, .cnt = 2, ._64b = true))
|
||||
.add(CP_REG_TO_MEM_DEST(stats_sample(aq, stop)));
|
||||
|
||||
assert(type < ARRAY_SIZE(batch->pipeline_stats_queries_active));
|
||||
assert(batch->pipeline_stats_queries_active[type] > 0);
|
||||
|
||||
batch->pipeline_stats_queries_active[type]--;
|
||||
if (batch->pipeline_stats_queries_active[type])
|
||||
fd6_event_write<CHIP>(batch->ctx, ring, stats_counter_events[type].stop);
|
||||
fd6_event_write<CHIP>(batch->ctx, cs, stats_counter_events[type].stop);
|
||||
|
||||
/* result += stop - start: */
|
||||
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
|
||||
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
|
||||
stats_reloc(ring, aq, result);
|
||||
stats_reloc(ring, aq, result);
|
||||
stats_reloc(ring, aq, stop)
|
||||
stats_reloc(ring, aq, start);
|
||||
fd_pkt7(cs, CP_MEM_TO_MEM, 9)
|
||||
.add(CP_MEM_TO_MEM_0(
|
||||
.neg_c = true,
|
||||
._double = true,
|
||||
.wait_for_mem_writes = true
|
||||
))
|
||||
.add(CP_MEM_TO_MEM_DST(stats_sample(aq, result)))
|
||||
.add(CP_MEM_TO_MEM_SRC_A(stats_sample(aq, result)))
|
||||
.add(CP_MEM_TO_MEM_SRC_B(stats_sample(aq, stop)))
|
||||
.add(CP_MEM_TO_MEM_SRC_C(stats_sample(aq, start)));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -603,9 +606,8 @@ struct PACKED fd6_primitives_sample {
|
|||
};
|
||||
FD_DEFINE_CAST(fd_acc_query_sample, fd6_primitives_sample);
|
||||
|
||||
#define primitives_reloc(ring, aq, field) \
|
||||
OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, \
|
||||
__offsetof(struct fd6_primitives_sample, field), 0, 0);
|
||||
#define primitives_sample(aq, field) \
|
||||
fd_resource((aq)->prsc)->bo, __offsetof(struct fd6_primitives_sample, field)
|
||||
|
||||
static void
|
||||
log_primitives_sample(struct fd6_primitives_sample *ps)
|
||||
|
|
@ -633,44 +635,40 @@ static void
|
|||
primitives_emitted_resume(struct fd_acc_query *aq,
|
||||
struct fd_batch *batch) assert_dt
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
ASSERT_ALIGNED(struct fd6_primitives_sample, start[0], 32);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_SO_QUERY_BASE, 2);
|
||||
primitives_reloc(ring, aq, start[0]);
|
||||
fd_pkt4(cs, 2)
|
||||
.add(A6XX_VPC_SO_QUERY_BASE(primitives_sample(aq, start[0])));
|
||||
|
||||
fd6_event_write<CHIP>(batch->ctx, ring, FD_WRITE_PRIMITIVE_COUNTS);
|
||||
fd6_event_write<CHIP>(batch->ctx, cs, FD_WRITE_PRIMITIVE_COUNTS);
|
||||
}
|
||||
|
||||
static void
|
||||
accumultate_primitives_emitted(struct fd_acc_query *aq,
|
||||
struct fd_ringbuffer *ring,
|
||||
int idx)
|
||||
accumultate_primitives_emitted(struct fd_acc_query *aq, fd_cs &cs, int idx)
|
||||
{
|
||||
/* result += stop - start: */
|
||||
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
|
||||
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
|
||||
primitives_reloc(ring, aq, result.emitted);
|
||||
primitives_reloc(ring, aq, result.emitted);
|
||||
primitives_reloc(ring, aq, stop[idx].emitted);
|
||||
primitives_reloc(ring, aq, start[idx].emitted);
|
||||
fd_pkt7(cs, CP_MEM_TO_MEM, 9)
|
||||
.add(CP_MEM_TO_MEM_0(.neg_c = true, ._double = true, .unk31 = true))
|
||||
.add(CP_MEM_TO_MEM_DST(primitives_sample(aq, result.emitted)))
|
||||
.add(CP_MEM_TO_MEM_SRC_A(primitives_sample(aq, result.emitted)))
|
||||
.add(CP_MEM_TO_MEM_SRC_B(primitives_sample(aq, stop[idx].emitted)))
|
||||
.add(CP_MEM_TO_MEM_SRC_C(primitives_sample(aq, start[idx].emitted)));
|
||||
}
|
||||
|
||||
static void
|
||||
accumultate_primitives_generated(struct fd_acc_query *aq,
|
||||
struct fd_ringbuffer *ring,
|
||||
int idx)
|
||||
accumultate_primitives_generated(struct fd_acc_query *aq, fd_cs &cs, int idx)
|
||||
{
|
||||
/* result += stop - start: */
|
||||
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
|
||||
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
|
||||
primitives_reloc(ring, aq, result.generated);
|
||||
primitives_reloc(ring, aq, result.generated);
|
||||
primitives_reloc(ring, aq, stop[idx].generated);
|
||||
primitives_reloc(ring, aq, start[idx].generated);
|
||||
fd_pkt7(cs, CP_MEM_TO_MEM, 9)
|
||||
.add(CP_MEM_TO_MEM_0(.neg_c = true, ._double = true, .unk31 = true))
|
||||
.add(CP_MEM_TO_MEM_DST(primitives_sample(aq, result.generated)))
|
||||
.add(CP_MEM_TO_MEM_SRC_A(primitives_sample(aq, result.generated)))
|
||||
.add(CP_MEM_TO_MEM_SRC_B(primitives_sample(aq, stop[idx].generated)))
|
||||
.add(CP_MEM_TO_MEM_SRC_C(primitives_sample(aq, start[idx].generated)));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -678,29 +676,29 @@ static void
|
|||
primitives_emitted_pause(struct fd_acc_query *aq,
|
||||
struct fd_batch *batch) assert_dt
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
ASSERT_ALIGNED(struct fd6_primitives_sample, stop[0], 32);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_SO_QUERY_BASE, 2);
|
||||
primitives_reloc(ring, aq, stop[0]);
|
||||
fd_pkt4(cs, 2)
|
||||
.add(A6XX_VPC_SO_QUERY_BASE(primitives_sample(aq, stop[0])));
|
||||
|
||||
fd6_event_write<CHIP>(batch->ctx, ring, FD_WRITE_PRIMITIVE_COUNTS);
|
||||
fd6_event_write<CHIP>(batch->ctx, ring, FD_CACHE_CLEAN);
|
||||
fd6_event_write<CHIP>(batch->ctx, cs, FD_WRITE_PRIMITIVE_COUNTS);
|
||||
fd6_event_write<CHIP>(batch->ctx, cs, FD_CACHE_CLEAN);
|
||||
|
||||
if (aq->provider->query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
|
||||
/* Need results from all channels: */
|
||||
for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
|
||||
accumultate_primitives_emitted(aq, ring, i);
|
||||
accumultate_primitives_generated(aq, ring, i);
|
||||
accumultate_primitives_emitted(aq, cs, i);
|
||||
accumultate_primitives_generated(aq, cs, i);
|
||||
}
|
||||
} else {
|
||||
accumultate_primitives_emitted(aq, ring, aq->base.index);
|
||||
accumultate_primitives_emitted(aq, cs, aq->base.index);
|
||||
/* Only need primitives generated counts for the overflow queries: */
|
||||
if (aq->provider->query_type == PIPE_QUERY_SO_OVERFLOW_PREDICATE)
|
||||
accumultate_primitives_generated(aq, ring, aq->base.index);
|
||||
accumultate_primitives_generated(aq, cs, aq->base.index);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -746,30 +744,36 @@ so_overflow_predicate_result_resource(struct fd_acc_query *aq,
|
|||
int index, struct fd_resource *dst,
|
||||
unsigned offset)
|
||||
{
|
||||
fd_ringbuffer_attach_bo(ring, dst->bo);
|
||||
fd_ringbuffer_attach_bo(ring, fd_resource(aq->prsc)->bo);
|
||||
fd_cs cs(ring);
|
||||
|
||||
cs.attach_bo(dst->bo);
|
||||
cs.attach_bo(fd_resource(aq->prsc)->bo);
|
||||
|
||||
/* result = generated - emitted: */
|
||||
OUT_PKT7(ring, CP_MEM_TO_MEM, 7);
|
||||
OUT_RING(ring, CP_MEM_TO_MEM_0_NEG_B |
|
||||
COND(result_type >= PIPE_QUERY_TYPE_I64, CP_MEM_TO_MEM_0_DOUBLE));
|
||||
OUT_RELOC(ring, dst->bo, offset, 0, 0);
|
||||
primitives_reloc(ring, aq, result.generated);
|
||||
primitives_reloc(ring, aq, result.emitted);
|
||||
fd_pkt7(cs, CP_MEM_TO_MEM, 7)
|
||||
.add(CP_MEM_TO_MEM_0(
|
||||
.neg_b = true,
|
||||
._double = result_type >= PIPE_QUERY_TYPE_I64,
|
||||
))
|
||||
.add(CP_MEM_TO_MEM_DST(dst->bo, offset))
|
||||
.add(CP_MEM_TO_MEM_SRC_A(primitives_sample(aq, result.generated)))
|
||||
.add(CP_MEM_TO_MEM_SRC_B(primitives_sample(aq, result.emitted)));
|
||||
|
||||
/* This is a bit awkward, but glcts expects the result to be 1 or 0
|
||||
* rather than non-zero vs zero:
|
||||
*/
|
||||
OUT_PKT7(ring, CP_COND_WRITE5, 9);
|
||||
OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_NE) |
|
||||
CP_COND_WRITE5_0_POLL(POLL_MEMORY) |
|
||||
CP_COND_WRITE5_0_WRITE_MEMORY);
|
||||
OUT_RELOC(ring, dst->bo, offset, 0, 0); /* POLL_ADDR_LO/HI */
|
||||
OUT_RING(ring, CP_COND_WRITE5_3_REF(0));
|
||||
OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
|
||||
OUT_RELOC(ring, dst->bo, offset, 0, 0); /* WRITE_ADDR_LO/HI */
|
||||
OUT_RING(ring, 1);
|
||||
OUT_RING(ring, 0);
|
||||
fd_pkt7(cs, CP_COND_WRITE5, 9)
|
||||
.add(CP_COND_WRITE5_0(
|
||||
.function = WRITE_NE,
|
||||
.poll = POLL_MEMORY,
|
||||
.write_memory = true
|
||||
))
|
||||
.add(CP_COND_WRITE5_POLL_ADDR(dst->bo, offset))
|
||||
.add(CP_COND_WRITE5_3(.ref = 0))
|
||||
.add(CP_COND_WRITE5_4(.mask = ~0))
|
||||
.add(CP_COND_WRITE5_WRITE_ADDR(dst->bo, offset))
|
||||
.add(1)
|
||||
.add(0);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -827,12 +831,12 @@ perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
|||
{
|
||||
struct fd_batch_query_data *data = (struct fd_batch_query_data *)aq->query_data;
|
||||
struct fd_screen *screen = data->screen;
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
unsigned counters_per_group[screen->num_perfcntr_groups];
|
||||
memset(counters_per_group, 0, sizeof(counters_per_group));
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
/* configure performance counters for the requested queries: */
|
||||
for (unsigned i = 0; i < data->num_query_entries; i++) {
|
||||
|
|
@ -842,8 +846,10 @@ perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
|||
|
||||
assert(counter_idx < g->num_counters);
|
||||
|
||||
OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
|
||||
OUT_RING(ring, g->countables[entry->cid].selector);
|
||||
fd_pkt4(cs, 1).add((fd_reg_pair){
|
||||
.reg = g->counters[counter_idx].select_reg,
|
||||
.value = g->countables[entry->cid].selector,
|
||||
});
|
||||
}
|
||||
|
||||
memset(counters_per_group, 0, sizeof(counters_per_group));
|
||||
|
|
@ -855,10 +861,9 @@ perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
|||
unsigned counter_idx = counters_per_group[entry->gid]++;
|
||||
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
|
||||
|
||||
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
|
||||
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
|
||||
OUT_RELOC(ring, query_sample_idx(aq, i, start));
|
||||
fd_pkt7(cs, CP_REG_TO_MEM, 3)
|
||||
.add(CP_REG_TO_MEM_0(.reg = counter->counter_reg_lo, ._64b = true))
|
||||
.add(CP_REG_TO_MEM_DEST(query_sample_idx(aq, i, start)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -867,12 +872,12 @@ perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
|||
{
|
||||
struct fd_batch_query_data *data = (struct fd_batch_query_data *)aq->query_data;
|
||||
struct fd_screen *screen = data->screen;
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
fd_cs cs(batch->draw);
|
||||
|
||||
unsigned counters_per_group[screen->num_perfcntr_groups];
|
||||
memset(counters_per_group, 0, sizeof(counters_per_group));
|
||||
|
||||
OUT_WFI5(ring);
|
||||
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
/* TODO do we need to bother to turn anything off? */
|
||||
|
||||
|
|
@ -883,21 +888,20 @@ perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
|||
unsigned counter_idx = counters_per_group[entry->gid]++;
|
||||
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
|
||||
|
||||
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
|
||||
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
|
||||
OUT_RELOC(ring, query_sample_idx(aq, i, stop));
|
||||
fd_pkt7(cs, CP_REG_TO_MEM, 3)
|
||||
.add(CP_REG_TO_MEM_0(.reg = counter->counter_reg_lo, ._64b = true))
|
||||
.add(CP_REG_TO_MEM_DEST(query_sample_idx(aq, i, stop)));
|
||||
}
|
||||
|
||||
/* and compute the result: */
|
||||
for (unsigned i = 0; i < data->num_query_entries; i++) {
|
||||
/* result += stop - start: */
|
||||
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
|
||||
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
|
||||
OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
|
||||
OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
|
||||
OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
|
||||
OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
|
||||
fd_pkt7(cs, CP_MEM_TO_MEM, 9)
|
||||
.add(CP_MEM_TO_MEM_0(.neg_c = true, ._double = true))
|
||||
.add(CP_MEM_TO_MEM_DST(query_sample_idx(aq, i, result)))
|
||||
.add(CP_MEM_TO_MEM_SRC_A(query_sample_idx(aq, i, result)))
|
||||
.add(CP_MEM_TO_MEM_SRC_B(query_sample_idx(aq, i, stop)))
|
||||
.add(CP_MEM_TO_MEM_SRC_C(query_sample_idx(aq, i, start)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
|
|||
const struct pipe_rasterizer_state *cso,
|
||||
bool primitive_restart)
|
||||
{
|
||||
unsigned ndwords = (CHIP >= A7XX) ? 66 : 26;
|
||||
struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, ndwords * 4);
|
||||
float psize_min, psize_max;
|
||||
|
||||
if (cso->point_size_per_vertex) {
|
||||
|
|
@ -38,48 +36,45 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
|
|||
psize_max = cso->point_size;
|
||||
}
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_CL_CNTL(
|
||||
unsigned nreg = (CHIP >= A7XX) ? 46 : 15;
|
||||
fd_crb crb(ctx->pipe, nreg);
|
||||
|
||||
crb.add(A6XX_GRAS_CL_CNTL(
|
||||
.znear_clip_disable = !cso->depth_clip_near,
|
||||
.zfar_clip_disable = !cso->depth_clip_far,
|
||||
.z_clamp_enable = cso->depth_clamp || CHIP >= A7XX,
|
||||
.zero_gb_scale_z = cso->clip_halfz,
|
||||
.vp_clip_code_ignore = 1,
|
||||
),
|
||||
)
|
||||
);
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_SU_CNTL(
|
||||
crb.add(A6XX_GRAS_SU_CNTL(
|
||||
.cull_front = cso->cull_face & PIPE_FACE_FRONT,
|
||||
.cull_back = cso->cull_face & PIPE_FACE_BACK,
|
||||
.front_cw = !cso->front_ccw,
|
||||
.linehalfwidth = cso->line_width / 2.0f,
|
||||
.poly_offset = cso->offset_tri,
|
||||
.line_mode = cso->multisample ? RECTANGULAR : BRESENHAM,
|
||||
),
|
||||
)
|
||||
);
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_GRAS_SU_POINT_MINMAX(.min = psize_min, .max = psize_max, ),
|
||||
A6XX_GRAS_SU_POINT_SIZE(cso->point_size));
|
||||
crb.add(A6XX_GRAS_SU_POINT_MINMAX(.min = psize_min, .max = psize_max, ));
|
||||
crb.add(A6XX_GRAS_SU_POINT_SIZE(cso->point_size));
|
||||
crb.add(A6XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale));
|
||||
crb.add(A6XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units));
|
||||
crb.add(A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp));
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale),
|
||||
A6XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units),
|
||||
A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp));
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_PC_CNTL(
|
||||
crb.add(A6XX_PC_CNTL(
|
||||
.primitive_restart = primitive_restart,
|
||||
.provoking_vtx_last = !cso->flatshade_first,
|
||||
),
|
||||
)
|
||||
);
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
OUT_REG(ring,
|
||||
A7XX_VPC_PC_CNTL(
|
||||
crb.add(A7XX_VPC_PC_CNTL(
|
||||
.primitive_restart = primitive_restart,
|
||||
.provoking_vtx_last = !cso->flatshade_first,
|
||||
),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -96,12 +91,12 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
|
|||
break;
|
||||
}
|
||||
|
||||
OUT_REG(ring, A6XX_VPC_RAST_CNTL(mode));
|
||||
OUT_REG(ring, PC_DGEN_RAST_CNTL(CHIP, mode));
|
||||
crb.add(A6XX_VPC_RAST_CNTL(mode));
|
||||
crb.add(PC_DGEN_RAST_CNTL(CHIP, mode));
|
||||
|
||||
if (CHIP == A7XX ||
|
||||
(CHIP == A6XX && ctx->screen->info->a6xx.is_a702)) {
|
||||
OUT_REG(ring, A6XX_VPC_PS_RAST_CNTL(mode));
|
||||
crb.add(A6XX_VPC_PS_RAST_CNTL(mode));
|
||||
}
|
||||
|
||||
/* With a7xx the hw doesn't do the clamping for us. When depth clamp
|
||||
|
|
@ -115,26 +110,23 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
|
|||
/* We must assume the max: */
|
||||
const unsigned num_viewports = 16;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_CL_VIEWPORT_ZCLAMP(0), num_viewports * 2);
|
||||
for (unsigned i = 0; i < num_viewports; i++) {
|
||||
OUT_RING(ring, fui(0.0f));
|
||||
OUT_RING(ring, fui(1.0f));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_ZCLAMP_MIN(i, 0.0f));
|
||||
crb.add(A6XX_GRAS_CL_VIEWPORT_ZCLAMP_MAX(i, 1.0f));
|
||||
}
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_VIEWPORT_ZCLAMP_MIN(0.0f),
|
||||
A6XX_RB_VIEWPORT_ZCLAMP_MAX(1.0),
|
||||
);
|
||||
crb.add(A6XX_RB_VIEWPORT_ZCLAMP_MIN(0.0f));
|
||||
crb.add(A6XX_RB_VIEWPORT_ZCLAMP_MAX(1.0f));
|
||||
}
|
||||
|
||||
if (CHIP == A6XX && ctx->screen->info->a6xx.has_legacy_pipeline_shading_rate) {
|
||||
OUT_REG(ring, A6XX_RB_UNKNOWN_8A00());
|
||||
OUT_REG(ring, A6XX_RB_UNKNOWN_8A10());
|
||||
OUT_REG(ring, A6XX_RB_UNKNOWN_8A20());
|
||||
OUT_REG(ring, A6XX_RB_UNKNOWN_8A30());
|
||||
crb.add(A6XX_RB_UNKNOWN_8A00());
|
||||
crb.add(A6XX_RB_UNKNOWN_8A10());
|
||||
crb.add(A6XX_RB_UNKNOWN_8A20());
|
||||
crb.add(A6XX_RB_UNKNOWN_8A30());
|
||||
}
|
||||
|
||||
return ring;
|
||||
return crb.ring();
|
||||
}
|
||||
FD_GENX(__fd6_setup_rasterizer_stateobj);
|
||||
|
||||
|
|
|
|||
|
|
@ -29,8 +29,6 @@ fd6_assert_valid_format(struct fd_resource *rsc, enum pipe_format format)
|
|||
assert(fd6_check_valid_format(rsc, format) == FORMAT_OK);
|
||||
}
|
||||
|
||||
void fd6_emit_flag_reference(struct fd_ringbuffer *ring,
|
||||
struct fd_resource *rsc, int level, int layer);
|
||||
template <chip CHIP>
|
||||
void fd6_resource_screen_init(struct pipe_screen *pscreen);
|
||||
|
||||
|
|
|
|||
|
|
@ -582,62 +582,28 @@ tex_key_equals(const void *_a, const void *_b)
|
|||
return memcmp(a, b, sizeof(struct fd6_texture_key)) == 0;
|
||||
}
|
||||
|
||||
static enum a6xx_state_block
|
||||
stage2sb(mesa_shader_stage type)
|
||||
{
|
||||
switch (type) {
|
||||
case MESA_SHADER_VERTEX: return SB6_VS_TEX;
|
||||
case MESA_SHADER_TESS_CTRL: return SB6_HS_TEX;
|
||||
case MESA_SHADER_TESS_EVAL: return SB6_DS_TEX;
|
||||
case MESA_SHADER_GEOMETRY: return SB6_GS_TEX;
|
||||
case MESA_SHADER_FRAGMENT: return SB6_FS_TEX;
|
||||
case MESA_SHADER_COMPUTE: return SB6_CS_TEX;
|
||||
default:
|
||||
UNREACHABLE("bad state block");
|
||||
}
|
||||
}
|
||||
|
||||
static struct fd_ringbuffer *
|
||||
build_texture_state(struct fd_context *ctx, mesa_shader_stage type,
|
||||
struct fd_texture_stateobj *tex)
|
||||
assert_dt
|
||||
{
|
||||
struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 32 * 4);
|
||||
unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
|
||||
struct fd_bo *tex_desc = NULL, *samp_desc = NULL;
|
||||
enum a6xx_state_block sb;
|
||||
|
||||
switch (type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
sb = SB6_VS_TEX;
|
||||
opcode = CP_LOAD_STATE6_GEOM;
|
||||
tex_samp_reg = REG_A6XX_SP_VS_SAMPLER_BASE;
|
||||
tex_const_reg = REG_A6XX_SP_VS_TEXMEMOBJ_BASE;
|
||||
tex_count_reg = REG_A6XX_SP_VS_TSIZE;
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
sb = SB6_HS_TEX;
|
||||
opcode = CP_LOAD_STATE6_GEOM;
|
||||
tex_samp_reg = REG_A6XX_SP_HS_SAMPLER_BASE;
|
||||
tex_const_reg = REG_A6XX_SP_HS_TEXMEMOBJ_BASE;
|
||||
tex_count_reg = REG_A6XX_SP_HS_TSIZE;
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
sb = SB6_DS_TEX;
|
||||
opcode = CP_LOAD_STATE6_GEOM;
|
||||
tex_samp_reg = REG_A6XX_SP_DS_SAMPLER_BASE;
|
||||
tex_const_reg = REG_A6XX_SP_DS_TEXMEMOBJ_BASE;
|
||||
tex_count_reg = REG_A6XX_SP_DS_TSIZE;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
sb = SB6_GS_TEX;
|
||||
opcode = CP_LOAD_STATE6_GEOM;
|
||||
tex_samp_reg = REG_A6XX_SP_GS_SAMPLER_BASE;
|
||||
tex_const_reg = REG_A6XX_SP_GS_TEXMEMOBJ_BASE;
|
||||
tex_count_reg = REG_A6XX_SP_GS_TSIZE;
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
sb = SB6_FS_TEX;
|
||||
opcode = CP_LOAD_STATE6_FRAG;
|
||||
tex_samp_reg = REG_A6XX_SP_PS_SAMPLER_BASE;
|
||||
tex_const_reg = REG_A6XX_SP_PS_TEXMEMOBJ_BASE;
|
||||
tex_count_reg = REG_A6XX_SP_PS_TSIZE;
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
sb = SB6_CS_TEX;
|
||||
opcode = CP_LOAD_STATE6_FRAG;
|
||||
tex_samp_reg = REG_A6XX_SP_CS_SAMPLER_BASE;
|
||||
tex_const_reg = REG_A6XX_SP_CS_TEXMEMOBJ_BASE;
|
||||
tex_count_reg = REG_A6XX_SP_CS_TSIZE;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("bad state block");
|
||||
}
|
||||
fd_cs cs(ctx->pipe, 32 * 4);
|
||||
|
||||
if (tex->num_samplers > 0) {
|
||||
samp_desc = fd_bo_new(ctx->dev, tex->num_samplers * 4 * 4,
|
||||
|
|
@ -654,21 +620,7 @@ build_texture_state(struct fd_context *ctx, mesa_shader_stage type,
|
|||
buf += 4;
|
||||
}
|
||||
|
||||
fd_ringbuffer_attach_bo(ring, samp_desc);
|
||||
|
||||
/* output sampler state: */
|
||||
OUT_PKT7(ring, opcode, 3);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(tex->num_samplers));
|
||||
OUT_RELOC(ring, samp_desc, 0); /* SRC_ADDR_LO/HI */
|
||||
|
||||
OUT_PKT4(ring, tex_samp_reg, 2);
|
||||
OUT_RELOC(ring, samp_desc, 0); /* SRC_ADDR_LO/HI */
|
||||
|
||||
fd_bo_del(samp_desc);
|
||||
cs.attach_bo(samp_desc);
|
||||
}
|
||||
|
||||
if (tex->num_textures > 0) {
|
||||
|
|
@ -694,27 +646,73 @@ build_texture_state(struct fd_context *ctx, mesa_shader_stage type,
|
|||
buf += 16;
|
||||
}
|
||||
|
||||
fd_ringbuffer_attach_bo(ring, tex_desc);
|
||||
|
||||
/* emit texture state: */
|
||||
OUT_PKT7(ring, opcode, 3);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(tex->num_textures));
|
||||
OUT_RELOC(ring, tex_desc, 0); /* SRC_ADDR_LO/HI */
|
||||
|
||||
OUT_PKT4(ring, tex_const_reg, 2);
|
||||
OUT_RELOC(ring, tex_desc, 0); /* SRC_ADDR_LO/HI */
|
||||
|
||||
fd_bo_del(tex_desc);
|
||||
cs.attach_bo(tex_desc);
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, tex_count_reg, 1);
|
||||
OUT_RING(ring, tex->num_textures);
|
||||
with_crb (cs, 5) {
|
||||
switch (type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
crb.add(A6XX_SP_VS_SAMPLER_BASE(samp_desc));
|
||||
crb.add(A6XX_SP_VS_TEXMEMOBJ_BASE(tex_desc));
|
||||
crb.add(A6XX_SP_VS_TSIZE(tex->num_textures));
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
crb.add(A6XX_SP_HS_SAMPLER_BASE(samp_desc));
|
||||
crb.add(A6XX_SP_HS_TEXMEMOBJ_BASE(tex_desc));
|
||||
crb.add(A6XX_SP_HS_TSIZE(tex->num_textures));
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
crb.add(A6XX_SP_DS_SAMPLER_BASE(samp_desc));
|
||||
crb.add(A6XX_SP_DS_TEXMEMOBJ_BASE(tex_desc));
|
||||
crb.add(A6XX_SP_DS_TSIZE(tex->num_textures));
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
crb.add(A6XX_SP_GS_SAMPLER_BASE(samp_desc));
|
||||
crb.add(A6XX_SP_GS_TEXMEMOBJ_BASE(tex_desc));
|
||||
crb.add(A6XX_SP_GS_TSIZE(tex->num_textures));
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
crb.add(A6XX_SP_PS_SAMPLER_BASE(samp_desc));
|
||||
crb.add(A6XX_SP_PS_TEXMEMOBJ_BASE(tex_desc));
|
||||
crb.add(A6XX_SP_PS_TSIZE(tex->num_textures));
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
crb.add(A6XX_SP_CS_SAMPLER_BASE(samp_desc));
|
||||
crb.add(A6XX_SP_CS_TEXMEMOBJ_BASE(tex_desc));
|
||||
crb.add(A6XX_SP_CS_TSIZE(tex->num_textures));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("bad state block");
|
||||
}
|
||||
}
|
||||
|
||||
return ring;
|
||||
if (samp_desc) {
|
||||
fd_pkt7(cs, fd6_stage2opcode(type), 3)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.state_type = ST6_SHADER,
|
||||
.state_src = SS6_INDIRECT,
|
||||
.state_block = stage2sb(type),
|
||||
.num_unit = tex->num_samplers,
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR(samp_desc));
|
||||
|
||||
fd_bo_del(samp_desc);
|
||||
}
|
||||
|
||||
if (tex_desc) {
|
||||
fd_pkt7(cs, fd6_stage2opcode(type), 3)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
.state_type = ST6_CONSTANTS,
|
||||
.state_src = SS6_INDIRECT,
|
||||
.state_block = stage2sb(type),
|
||||
.num_unit = tex->num_textures,
|
||||
))
|
||||
.add(CP_LOAD_STATE6_EXT_SRC_ADDR(tex_desc));
|
||||
|
||||
fd_bo_del(tex_desc);
|
||||
}
|
||||
|
||||
return cs.ring();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ fd6_zsa_state_create(struct pipe_context *pctx,
|
|||
|
||||
enum adreno_compare_func depth_func =
|
||||
(enum adreno_compare_func)cso->depth_func; /* maps 1:1 */
|
||||
bool force_z_test_enable = false;
|
||||
|
||||
/* On some GPUs it is necessary to enable z test for depth bounds test
|
||||
* when UBWC is enabled. Otherwise, the GPU would hang. FUNC_ALWAYS is
|
||||
|
|
@ -100,16 +101,11 @@ fd6_zsa_state_create(struct pipe_context *pctx,
|
|||
*/
|
||||
if (cso->depth_bounds_test && !cso->depth_enabled &&
|
||||
ctx->screen->info->a6xx.depth_bounds_require_depth_test_quirk) {
|
||||
so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
|
||||
force_z_test_enable = true;
|
||||
depth_func = FUNC_ALWAYS;
|
||||
}
|
||||
|
||||
so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_ZFUNC(depth_func);
|
||||
|
||||
if (cso->depth_enabled) {
|
||||
so->rb_depth_cntl |=
|
||||
A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE;
|
||||
|
||||
so->lrz.test = true;
|
||||
|
||||
if (cso->depth_writemask) {
|
||||
|
|
@ -155,9 +151,6 @@ fd6_zsa_state_create(struct pipe_context *pctx,
|
|||
}
|
||||
}
|
||||
|
||||
if (cso->depth_writemask)
|
||||
so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
|
||||
|
||||
if (cso->stencil[0].enabled) {
|
||||
const struct pipe_stencil_state *s = &cso->stencil[0];
|
||||
|
||||
|
|
@ -167,97 +160,79 @@ fd6_zsa_state_create(struct pipe_context *pctx,
|
|||
*/
|
||||
update_lrz_stencil(so, (enum pipe_compare_func)s->func, util_writes_stencil(s));
|
||||
|
||||
so->rb_stencil_control |=
|
||||
A6XX_RB_STENCIL_CNTL_STENCIL_READ |
|
||||
A6XX_RB_STENCIL_CNTL_STENCIL_ENABLE |
|
||||
A6XX_RB_STENCIL_CNTL_FUNC((enum adreno_compare_func)s->func) | /* maps 1:1 */
|
||||
A6XX_RB_STENCIL_CNTL_FAIL(fd_stencil_op(s->fail_op)) |
|
||||
A6XX_RB_STENCIL_CNTL_ZPASS(fd_stencil_op(s->zpass_op)) |
|
||||
A6XX_RB_STENCIL_CNTL_ZFAIL(fd_stencil_op(s->zfail_op));
|
||||
|
||||
so->rb_stencilmask = A6XX_RB_STENCIL_MASK_MASK(s->valuemask);
|
||||
so->rb_stencilwrmask = A6XX_RB_STENCIL_WRITE_MASK_WRMASK(s->writemask);
|
||||
|
||||
if (cso->stencil[1].enabled) {
|
||||
const struct pipe_stencil_state *bs = &cso->stencil[1];
|
||||
|
||||
update_lrz_stencil(so, (enum pipe_compare_func)bs->func, util_writes_stencil(bs));
|
||||
|
||||
so->rb_stencil_control |=
|
||||
A6XX_RB_STENCIL_CNTL_STENCIL_ENABLE_BF |
|
||||
A6XX_RB_STENCIL_CNTL_FUNC_BF((enum adreno_compare_func)bs->func) | /* maps 1:1 */
|
||||
A6XX_RB_STENCIL_CNTL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
|
||||
A6XX_RB_STENCIL_CNTL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
|
||||
A6XX_RB_STENCIL_CNTL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
|
||||
|
||||
so->rb_stencilmask |= A6XX_RB_STENCIL_MASK_BFMASK(bs->valuemask);
|
||||
so->rb_stencilwrmask |= A6XX_RB_STENCIL_WRITE_MASK_BFWRMASK(bs->writemask);
|
||||
}
|
||||
}
|
||||
|
||||
if (cso->alpha_enabled) {
|
||||
/* Alpha test is functionally a conditional discard, so we can't
|
||||
* write LRZ before seeing if we end up discarding or not
|
||||
*/
|
||||
if (cso->alpha_func != PIPE_FUNC_ALWAYS) {
|
||||
so->lrz.write = false;
|
||||
so->alpha_test = true;
|
||||
}
|
||||
|
||||
uint32_t ref = cso->alpha_ref_value * 255.0f;
|
||||
so->rb_alpha_control =
|
||||
A6XX_RB_ALPHA_TEST_CNTL_ALPHA_TEST |
|
||||
A6XX_RB_ALPHA_TEST_CNTL_ALPHA_REF(ref) |
|
||||
A6XX_RB_ALPHA_TEST_CNTL_ALPHA_TEST_FUNC(
|
||||
(enum adreno_compare_func)cso->alpha_func);
|
||||
/* Alpha test is functionally a conditional discard, so we can't
|
||||
* write LRZ before seeing if we end up discarding or not
|
||||
*/
|
||||
if (cso->alpha_enabled && (cso->alpha_func != PIPE_FUNC_ALWAYS)) {
|
||||
so->lrz.write = false;
|
||||
so->alpha_test = true;
|
||||
}
|
||||
|
||||
if (cso->depth_bounds_test) {
|
||||
so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE |
|
||||
A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE;
|
||||
so->lrz.z_bounds_enable = true;
|
||||
}
|
||||
|
||||
const struct pipe_stencil_state *fs = &cso->stencil[0];
|
||||
const struct pipe_stencil_state *bs = &cso->stencil[1];
|
||||
|
||||
/* Build the four state permutations (with/without alpha/depth-clamp)*/
|
||||
for (int i = 0; i < 4; i++) {
|
||||
struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 16 * 4);
|
||||
bool depth_clamp_enable = (i & FD6_ZSA_DEPTH_CLAMP);
|
||||
bool no_alpha = (i & FD6_ZSA_NO_ALPHA);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_ALPHA_TEST_CNTL, 1);
|
||||
OUT_RING(ring,
|
||||
(i & FD6_ZSA_NO_ALPHA)
|
||||
? so->rb_alpha_control & ~A6XX_RB_ALPHA_TEST_CNTL_ALPHA_TEST
|
||||
: so->rb_alpha_control);
|
||||
fd_crb crb(ctx->pipe, 9);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_STENCIL_CNTL, 1);
|
||||
OUT_RING(ring, so->rb_stencil_control);
|
||||
crb.add(A6XX_RB_ALPHA_TEST_CNTL(
|
||||
.alpha_ref = (uint32_t)(cso->alpha_ref_value * 255.0f) & 0xff,
|
||||
.alpha_test = cso->alpha_enabled && !no_alpha,
|
||||
.alpha_test_func = (enum adreno_compare_func)cso->alpha_func,
|
||||
));
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_SU_STENCIL_CNTL(cso->stencil[0].enabled));
|
||||
crb.add(A6XX_RB_STENCIL_CNTL(
|
||||
.stencil_enable = fs->enabled,
|
||||
.stencil_enable_bf = bs->enabled,
|
||||
.stencil_read = fs->enabled,
|
||||
.func = (enum adreno_compare_func)fs->func, /* maps 1:1 */
|
||||
.fail = fd_stencil_op(fs->fail_op),
|
||||
.zpass = fd_stencil_op(fs->zpass_op),
|
||||
.zfail = fd_stencil_op(fs->zfail_op),
|
||||
.func_bf = (enum adreno_compare_func)bs->func, /* maps 1:1 */
|
||||
.fail_bf = fd_stencil_op(bs->fail_op),
|
||||
.zpass_bf = fd_stencil_op(bs->zpass_op),
|
||||
.zfail_bf = fd_stencil_op(bs->zfail_op),
|
||||
));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_DEPTH_CNTL, 1);
|
||||
OUT_RING(ring,
|
||||
so->rb_depth_cntl | COND(depth_clamp_enable || CHIP >= A7XX,
|
||||
A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE));
|
||||
crb.add(A6XX_GRAS_SU_STENCIL_CNTL(cso->stencil[0].enabled));
|
||||
crb.add(A6XX_RB_STENCIL_MASK(.mask = fs->valuemask, .bfmask = bs->valuemask));
|
||||
crb.add(A6XX_RB_STENCIL_WRITE_MASK(.wrmask = fs->writemask, .bfwrmask = bs->writemask));
|
||||
|
||||
OUT_REG(ring, A6XX_GRAS_SU_DEPTH_CNTL(cso->depth_enabled));
|
||||
crb.add(A6XX_RB_DEPTH_CNTL(
|
||||
.z_test_enable = cso->depth_enabled || force_z_test_enable,
|
||||
.z_write_enable = cso->depth_writemask,
|
||||
.zfunc = depth_func,
|
||||
.z_clamp_enable = depth_clamp_enable || CHIP >= A7XX,
|
||||
.z_read_enable = cso->depth_enabled || cso->depth_bounds_test,
|
||||
.z_bounds_enable = cso->depth_bounds_test,
|
||||
));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_STENCIL_MASK, 2);
|
||||
OUT_RING(ring, so->rb_stencilmask);
|
||||
OUT_RING(ring, so->rb_stencilwrmask);
|
||||
crb.add(A6XX_GRAS_SU_DEPTH_CNTL(cso->depth_enabled));
|
||||
|
||||
if (CHIP >= A7XX && !depth_clamp_enable) {
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_DEPTH_BOUND_MIN(0.0f),
|
||||
A6XX_RB_DEPTH_BOUND_MAX(1.0f),
|
||||
);
|
||||
crb.add(A6XX_RB_DEPTH_BOUND_MIN(0.0f));
|
||||
crb.add(A6XX_RB_DEPTH_BOUND_MAX(1.0f));
|
||||
} else {
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_DEPTH_BOUND_MIN(cso->depth_bounds_min),
|
||||
A6XX_RB_DEPTH_BOUND_MAX(cso->depth_bounds_max),
|
||||
);
|
||||
crb.add(A6XX_RB_DEPTH_BOUND_MIN(cso->depth_bounds_min));
|
||||
crb.add(A6XX_RB_DEPTH_BOUND_MAX(cso->depth_bounds_max));
|
||||
}
|
||||
|
||||
so->stateobj[i] = ring;
|
||||
so->stateobj[i] = crb.ring();
|
||||
}
|
||||
|
||||
return so;
|
||||
|
|
|
|||
|
|
@ -23,12 +23,6 @@
|
|||
struct fd6_zsa_stateobj {
|
||||
struct pipe_depth_stencil_alpha_state base;
|
||||
|
||||
uint32_t rb_alpha_control;
|
||||
uint32_t rb_depth_cntl;
|
||||
uint32_t rb_stencil_control;
|
||||
uint32_t rb_stencilmask;
|
||||
uint32_t rb_stencilwrmask;
|
||||
|
||||
struct fd6_lrz_state lrz;
|
||||
bool writes_zs : 1; /* writes depth and/or stencil */
|
||||
bool writes_z : 1; /* writes depth */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue