freedreno/a6xx+: Use template to handle a6xx vs a7xx differences

This doesn't enable support for a7xx yet, but uses the new register pack
builders for registers that differ between a7xx and a6xx.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22837>
This commit is contained in:
Rob Clark 2023-03-03 13:45:34 -08:00 committed by Marge Bot
parent 526831ee2e
commit 6dc8afc19b
20 changed files with 442 additions and 283 deletions

View file

@ -262,6 +262,7 @@ emit_setup(struct fd_batch *batch)
fd6_emit_ccu_cntl(ring, screen, false);
}
template <chip CHIP>
static void
emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
bool scissor_enable, union pipe_color_union *color,
@ -296,14 +297,14 @@ emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
* controlling the internal/accumulator format or something like
* that. It's certainly not tied to only the src format.
*/
OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
OUT_RING(
ring,
A6XX_SP_2D_DST_FORMAT_COLOR_FORMAT(fmt) |
COND(util_format_is_pure_sint(pfmt), A6XX_SP_2D_DST_FORMAT_SINT) |
COND(util_format_is_pure_uint(pfmt), A6XX_SP_2D_DST_FORMAT_UINT) |
COND(is_srgb, A6XX_SP_2D_DST_FORMAT_SRGB) |
A6XX_SP_2D_DST_FORMAT_MASK(0xf));
OUT_REG(ring, SP_2D_DST_FORMAT(
CHIP,
.sint = util_format_is_pure_sint(pfmt),
.uint = util_format_is_pure_uint(pfmt),
.color_format = fmt,
.srgb = is_srgb,
.mask = 0xf,
));
OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
OUT_RING(ring, unknown_8c01);
@ -330,6 +331,7 @@ emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst,
/* buffers need to be handled specially since x/width can exceed the bounds
* supported by hw.. if necessary decompose into (potentially) two 2D blits
*/
template <chip CHIP>
static void
emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct pipe_blit_info *info)
@ -379,7 +381,7 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
sshift = sbox->x & 0x3f;
dshift = dbox->x & 0x3f;
emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0);
emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0);
for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
unsigned soff, doff, w, p;
@ -397,22 +399,26 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
* Emit source:
*/
OUT_REG(ring,
A6XX_SP_PS_2D_SRC_INFO(
SP_PS_2D_SRC_INFO(
CHIP,
.color_format = FMT6_8_UNORM,
.tile_mode = TILE6_LINEAR,
.color_swap = WZYX,
.unk20 = true,
.unk22 = true,
),
A6XX_SP_PS_2D_SRC_SIZE(
SP_PS_2D_SRC_SIZE(
CHIP,
.width = sshift + w,
.height = 1,
),
A6XX_SP_PS_2D_SRC(
SP_PS_2D_SRC(
CHIP,
.bo = src->bo,
.bo_offset = soff,
),
A6XX_SP_PS_2D_SRC_PITCH(
SP_PS_2D_SRC_PITCH(
CHIP,
.pitch = p,
),
);
@ -454,19 +460,20 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
}
}
template <chip CHIP>
static void
fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
{
struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
union pipe_color_union color = {};
emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0);
emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0);
OUT_REG(ring,
A6XX_SP_PS_2D_SRC_INFO(),
A6XX_SP_PS_2D_SRC_SIZE(),
A6XX_SP_PS_2D_SRC(),
A6XX_SP_PS_2D_SRC_PITCH(),
SP_PS_2D_SRC_INFO(CHIP),
SP_PS_2D_SRC_SIZE(CHIP),
SP_PS_2D_SRC(CHIP),
SP_PS_2D_SRC_PITCH(CHIP),
);
OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
@ -583,6 +590,7 @@ emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
}
}
template <chip CHIP>
static void
emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
unsigned layer, unsigned nr_samples, bool sample_0)
@ -605,7 +613,8 @@ emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
sfmt = FMT6_A8_UNORM;
OUT_REG(ring,
A6XX_SP_PS_2D_SRC_INFO(
SP_PS_2D_SRC_INFO(
CHIP,
.color_format = sfmt,
.tile_mode = stile,
.color_swap = sswap,
@ -617,28 +626,36 @@ emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
.unk20 = true,
.unk22 = true,
),
A6XX_SP_PS_2D_SRC_SIZE(
SP_PS_2D_SRC_SIZE(
CHIP,
.width = width,
.height = height,
),
A6XX_SP_PS_2D_SRC(
SP_PS_2D_SRC(
CHIP,
.bo = src->bo,
.bo_offset = soff,
),
A6XX_SP_PS_2D_SRC_PITCH(
SP_PS_2D_SRC_PITCH(
CHIP,
.pitch = pitch,
),
);
if (subwc_enabled) {
OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS, 6);
fd6_emit_flag_reference(ring, src, info->src.level, layer);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
if (subwc_enabled && fd_resource_ubwc_enabled(src, info->src.level)) {
OUT_REG(ring,
SP_PS_2D_SRC_FLAGS(
CHIP,
.bo = src->bo,
.bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer),
),
SP_PS_2D_SRC_FLAGS_PITCH(
CHIP, fdl_ubwc_pitch(&src->layout, info->src.level)),
);
}
}
template <chip CHIP>
static void
emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct pipe_blit_info *info, bool sample_0)
@ -699,11 +716,11 @@ emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
}
emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate);
emit_blit_setup<CHIP>(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate);
for (unsigned i = 0; i < info->dst.box.depth; i++) {
emit_blit_src(ring, info, sbox->z + i, nr_samples, sample_0);
emit_blit_src<CHIP>(ring, info, sbox->z + i, nr_samples, sample_0);
emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
dbox->z + i);
@ -809,6 +826,7 @@ convert_color(enum pipe_format format, union pipe_color_union *pcolor)
return color;
}
template <chip CHIP>
void
fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct pipe_surface *psurf, const struct pipe_box *box2d,
@ -830,7 +848,7 @@ fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
union pipe_color_union clear_color = convert_color(psurf->format, color);
emit_clear_color(ring, psurf->format, &clear_color);
emit_blit_setup(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0);
emit_blit_setup<CHIP>(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0);
for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer;
i++) {
@ -856,6 +874,14 @@ fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
}
}
template void fd6_clear_surface<A6XX>(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct pipe_surface *psurf, const struct pipe_box *box2d,
union pipe_color_union *color, uint32_t unknown_8c01);
template void fd6_clear_surface<A7XX>(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct pipe_surface *psurf, const struct pipe_box *box2d,
union pipe_color_union *color, uint32_t unknown_8c01);
template <chip CHIP>
static void
fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
unsigned level, const struct pipe_box *box, const void *data)
@ -890,7 +916,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
util_format_unpack_s_8uint(prsc->format, &stencil, data, 1);
if (rsc->stencil)
fd6_clear_texture(pctx, &rsc->stencil->b.b, level, box, &stencil);
fd6_clear_texture<CHIP>(pctx, &rsc->stencil->b.b, level, box, &stencil);
color.f[0] = depth;
color.ui[1] = stencil;
@ -928,7 +954,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
},
};
fd6_clear_surface(ctx, batch->draw, &surf, box, &color, 0);
fd6_clear_surface<CHIP>(ctx, batch->draw, &surf, box, &color, 0);
fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_COLOR_TS, true);
fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_DEPTH_TS, true);
@ -945,6 +971,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
fd_context_dirty(ctx, FD_DIRTY_QUERY);
}
template <chip CHIP>
void
fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
@ -969,7 +996,7 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
/* Enable scissor bit, which will take into account the window scissor
* which is set per-tile
*/
emit_blit_setup(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0);
emit_blit_setup<CHIP>(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0);
/* We shouldn't be using GMEM in the layered rendering case: */
assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
@ -980,24 +1007,32 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR);
enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
OUT_RING(ring,
A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_2) |
A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
COND(samples > MSAA_ONE, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
COND(util_format_is_srgb(psurf->format), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
A6XX_SP_PS_2D_SRC_INFO_UNK20 | A6XX_SP_PS_2D_SRC_INFO_UNK22);
OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(psurf->width) |
A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(psurf->height));
OUT_RING(ring, gmem_base); /* SP_PS_2D_SRC_LO */
OUT_RING(ring, gmem_base >> 32); /* SP_PS_2D_SRC_HI */
OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(gmem_pitch));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_REG(ring,
SP_PS_2D_SRC_INFO(
CHIP,
.color_format = sfmt,
.tile_mode = TILE6_2,
.color_swap = WZYX,
.srgb = util_format_is_srgb(psurf->format),
.samples = samples,
.samples_average = samples > MSAA_ONE,
.unk20 = true,
.unk22 = true,
),
SP_PS_2D_SRC_SIZE(
CHIP,
.width = psurf->width,
.height = psurf->height,
),
SP_PS_2D_SRC(
CHIP,
.qword = gmem_base,
),
SP_PS_2D_SRC_PITCH(
CHIP,
.pitch = gmem_pitch,
),
);
/* sync GMEM writes with CACHE. */
fd6_cache_inv(batch, ring);
@ -1018,6 +1053,12 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
fd_wfi(batch, ring);
}
template void fd6_resolve_tile<A6XX>(struct fd_batch *batch, struct fd_ringbuffer *ring,
uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01);
template void fd6_resolve_tile<A7XX>(struct fd_batch *batch, struct fd_ringbuffer *ring,
uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01);
template <chip CHIP>
static bool
handle_rgba_blit(struct fd_context *ctx,
const struct pipe_blit_info *info, bool sample_0) assert_dt
@ -1065,12 +1106,12 @@ handle_rgba_blit(struct fd_context *ctx,
(info->dst.resource->target == PIPE_BUFFER)) {
assert(src->layout.tile_mode == TILE6_LINEAR);
assert(dst->layout.tile_mode == TILE6_LINEAR);
emit_blit_buffer(ctx, batch->draw, info);
emit_blit_buffer<CHIP>(ctx, batch->draw, info);
} else {
/* I don't *think* we need to handle blits between buffer <-> !buffer */
assert(info->src.resource->target != PIPE_BUFFER);
assert(info->dst.resource->target != PIPE_BUFFER);
emit_blit_texture(ctx, batch->draw, info, sample_0);
emit_blit_texture<CHIP>(ctx, batch->draw, info, sample_0);
}
trace_end_blit(&batch->trace, batch->draw);
@ -1098,11 +1139,12 @@ handle_rgba_blit(struct fd_context *ctx,
* in particular as u_blitter cannot blit stencil. So handle the fallback
* ourself and never "fail".
*/
template <chip CHIP>
static bool
do_rewritten_blit(struct fd_context *ctx,
const struct pipe_blit_info *info, bool sample_0) assert_dt
{
bool success = handle_rgba_blit(ctx, info, sample_0);
bool success = handle_rgba_blit<CHIP>(ctx, info, sample_0);
if (!success) {
if (sample_0 && !util_format_is_pure_integer(info->src.format))
mesa_logw("sample averaging on fallback blit when we shouldn't.");
@ -1116,6 +1158,7 @@ do_rewritten_blit(struct fd_context *ctx,
* Handle depth/stencil blits either via u_blitter and/or re-writing the
* blit into an equivilant format that we can handle
*/
template <chip CHIP>
static bool
handle_zs_blit(struct fd_context *ctx,
const struct pipe_blit_info *info) assert_dt
@ -1139,14 +1182,14 @@ handle_zs_blit(struct fd_context *ctx,
blit.mask = PIPE_MASK_R;
blit.src.format = PIPE_FORMAT_R8_UINT;
blit.dst.format = PIPE_FORMAT_R8_UINT;
return do_rewritten_blit(ctx, &blit, true);
return do_rewritten_blit<CHIP>(ctx, &blit, true);
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
if (info->mask & PIPE_MASK_Z) {
blit.mask = PIPE_MASK_R;
blit.src.format = PIPE_FORMAT_R32_FLOAT;
blit.dst.format = PIPE_FORMAT_R32_FLOAT;
do_rewritten_blit(ctx, &blit, true);
do_rewritten_blit<CHIP>(ctx, &blit, true);
}
if (info->mask & PIPE_MASK_S) {
@ -1155,7 +1198,7 @@ handle_zs_blit(struct fd_context *ctx,
blit.dst.format = PIPE_FORMAT_R8_UINT;
blit.src.resource = &src->stencil->b.b;
blit.dst.resource = &dst->stencil->b.b;
do_rewritten_blit(ctx, &blit, true);
do_rewritten_blit<CHIP>(ctx, &blit, true);
}
return true;
@ -1164,7 +1207,7 @@ handle_zs_blit(struct fd_context *ctx,
blit.mask = PIPE_MASK_R;
blit.src.format = PIPE_FORMAT_R16_UNORM;
blit.dst.format = PIPE_FORMAT_R16_UNORM;
return do_rewritten_blit(ctx, &blit, true);
return do_rewritten_blit<CHIP>(ctx, &blit, true);
case PIPE_FORMAT_Z32_UNORM:
case PIPE_FORMAT_Z32_FLOAT:
@ -1172,7 +1215,7 @@ handle_zs_blit(struct fd_context *ctx,
blit.mask = PIPE_MASK_R;
blit.src.format = PIPE_FORMAT_R32_UINT;
blit.dst.format = PIPE_FORMAT_R32_UINT;
return do_rewritten_blit(ctx, &blit, true);
return do_rewritten_blit<CHIP>(ctx, &blit, true);
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
@ -1206,6 +1249,7 @@ handle_zs_blit(struct fd_context *ctx,
}
}
template <chip CHIP>
static bool
handle_compressed_blit(struct fd_context *ctx,
const struct pipe_blit_info *info) assert_dt
@ -1251,7 +1295,7 @@ handle_compressed_blit(struct fd_context *ctx,
blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
return do_rewritten_blit(ctx, &blit, false);
return do_rewritten_blit<CHIP>(ctx, &blit, false);
}
/**
@ -1260,6 +1304,7 @@ handle_compressed_blit(struct fd_context *ctx,
* (also -1.0), when we're supposed to be memcpying the bits. See
* https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
*/
template <chip CHIP>
static bool
handle_snorm_copy_blit(struct fd_context *ctx,
const struct pipe_blit_info *info)
@ -1273,41 +1318,48 @@ handle_snorm_copy_blit(struct fd_context *ctx,
blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format);
return do_rewritten_blit(ctx, &blit, false);
return do_rewritten_blit<CHIP>(ctx, &blit, false);
}
template <chip CHIP>
static bool
fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt
{
if (info->mask & PIPE_MASK_ZS)
return handle_zs_blit(ctx, info);
return handle_zs_blit<CHIP>(ctx, info);
if (util_format_is_compressed(info->src.format) ||
util_format_is_compressed(info->dst.format))
return handle_compressed_blit(ctx, info);
return handle_compressed_blit<CHIP>(ctx, info);
if ((info->src.format == info->dst.format) &&
util_format_is_snorm(info->src.format))
return handle_snorm_copy_blit(ctx, info);
return handle_snorm_copy_blit<CHIP>(ctx, info);
return handle_rgba_blit(ctx, info, false);
return handle_rgba_blit<CHIP>(ctx, info, false);
}
template <chip CHIP>
void
fd6_blitter_init(struct pipe_context *pctx) disable_thread_safety_analysis
fd6_blitter_init(struct pipe_context *pctx)
disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
ctx->clear_ubwc = fd6_clear_ubwc;
ctx->clear_ubwc = fd6_clear_ubwc<CHIP>;
ctx->validate_format = fd6_validate_format;
if (FD_DBG(NOBLIT))
return;
pctx->clear_texture = fd6_clear_texture;
ctx->blit = fd6_blit;
pctx->clear_texture = fd6_clear_texture<CHIP>;
ctx->blit = fd6_blit<CHIP>;
}
/* Teach the compiler about needed variants: */
template void fd6_blitter_init<A6XX>(struct pipe_context *pctx);
template void fd6_blitter_init<A7XX>(struct pipe_context *pctx);
unsigned
fd6_tile_mode(const struct pipe_resource *tmpl)
{

View file

@ -32,8 +32,8 @@
#include "freedreno_context.h"
BEGINC;
template <chip CHIP>
void fd6_blitter_init(struct pipe_context *pctx);
unsigned fd6_tile_mode(const struct pipe_resource *tmpl);
@ -42,12 +42,12 @@ unsigned fd6_tile_mode(const struct pipe_resource *tmpl);
* instead of CP_EVENT_WRITE::BLITs
*/
template <chip CHIP>
void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct pipe_surface *psurf, const struct pipe_box *box2d,
union pipe_color_union *color, uint32_t unknown_8c01) assert_dt;
template <chip CHIP>
void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01) assert_dt;
ENDC;
#endif /* FD6_BLIT_H_ */

View file

@ -41,6 +41,7 @@
#include "fd6_pack.h"
/* maybe move to fd6_program? */
template <chip CHIP>
static void
cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct ir3_shader_variant *v)
@ -49,14 +50,16 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct ir3_info *i = &v->info;
enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64;
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true,
.ds_state = true, .gs_state = true,
.fs_state = true, .cs_state = true,
.cs_ibo = true, .gfx_ibo = true, ));
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) |
A6XX_HLSQ_CS_CNTL_ENABLED);
OUT_REG(ring, HLSQ_CS_CNTL(
CHIP,
.constlen = v->constlen,
.enabled = true,
));
OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 1);
OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED |
@ -103,6 +106,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd6_emit_immediates(ctx->screen, v, ring);
}
template <chip CHIP>
static void
fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
{
@ -119,7 +123,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
return;
cs->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
cs_program_emit(ctx, cs->stateobj, cs->v);
cs_program_emit<CHIP>(ctx, cs->stateobj, cs->v);
cs->user_consts_cmdstream_size = fd6_user_consts_cmdstream_size(cs->v);
}
@ -156,7 +160,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
}
if (ctx->gen_dirty)
fd6_emit_cs_state(ctx, ring, cs);
fd6_emit_cs_state<CHIP>(ctx, ring, cs);
if (ctx->gen_dirty & BIT(FD6_GROUP_CONST))
fd6_emit_cs_user_consts(ctx, ring, cs);
@ -201,25 +205,37 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
const unsigned *num_groups = info->grid;
/* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
const unsigned work_dim = info->work_dim ? info->work_dim : 3;
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7);
OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
OUT_RING(ring,
A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
OUT_RING(ring,
A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
OUT_RING(ring,
A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3);
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
OUT_REG(ring,
HLSQ_CS_NDRANGE_0(
CHIP,
.kerneldim = work_dim,
.localsizex = local_size[0] - 1,
.localsizey = local_size[1] - 1,
.localsizez = local_size[2] - 1,
),
HLSQ_CS_NDRANGE_1(
CHIP,
.globalsize_x = local_size[0] * num_groups[0],
),
HLSQ_CS_NDRANGE_2(CHIP, .globaloff_x = 0),
HLSQ_CS_NDRANGE_3(
CHIP,
.globalsize_y = local_size[1] * num_groups[1],
),
HLSQ_CS_NDRANGE_4(CHIP, .globaloff_y = 0),
HLSQ_CS_NDRANGE_5(
CHIP,
.globalsize_z = local_size[2] * num_groups[2],
),
HLSQ_CS_NDRANGE_6(CHIP, .globaloff_z = 0),
);
OUT_REG(ring,
HLSQ_CS_KERNEL_GROUP_X(CHIP, 1),
HLSQ_CS_KERNEL_GROUP_Y(CHIP, 1),
HLSQ_CS_KERNEL_GROUP_Z(CHIP, 1),
);
if (info->indirect) {
struct fd_resource *rsc = fd_resource(info->indirect);
@ -264,11 +280,18 @@ fd6_compute_state_delete(struct pipe_context *pctx, void *_hwcso)
free(hwcso);
}
template <chip CHIP>
void
fd6_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis
fd6_compute_init(struct pipe_context *pctx)
disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
ctx->launch_grid = fd6_launch_grid;
ctx->launch_grid = fd6_launch_grid<CHIP>;
pctx->create_compute_state = fd6_compute_state_create;
pctx->delete_compute_state = fd6_compute_state_delete;
}
/* Teach the compiler about needed variants: */
template void fd6_compute_init<A6XX>(struct pipe_context *pctx);
template void fd6_compute_init<A7XX>(struct pipe_context *pctx);

View file

@ -36,7 +36,7 @@ struct fd6_compute_state {
uint32_t user_consts_cmdstream_size;
};
EXTERNC
template <chip CHIP>
void fd6_compute_init(struct pipe_context *pctx);
#endif /* FD6_COMPUTE_H_ */

View file

@ -228,6 +228,7 @@ setup_state_map(struct fd_context *ctx)
BIT(FD6_GROUP_NON_GROUP));
}
template <chip CHIP>
struct pipe_context *
fd6_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags) disable_thread_safety_analysis
@ -253,11 +254,11 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv,
pctx->create_depth_stencil_alpha_state = fd6_zsa_state_create;
pctx->create_vertex_elements_state = fd6_vertex_state_create;
fd6_draw_init(pctx);
fd6_compute_init(pctx);
fd6_gmem_init(pctx);
fd6_draw_init<CHIP>(pctx);
fd6_compute_init<CHIP>(pctx);
fd6_gmem_init<CHIP>(pctx);
fd6_texture_init(pctx);
fd6_prog_init(pctx);
fd6_prog_init<CHIP>(pctx);
fd6_query_context_init(pctx);
setup_state_map(&fd6_ctx->base);
@ -297,7 +298,11 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv,
fd_context_setup_common_vbos(&fd6_ctx->base);
fd6_blitter_init(pctx);
fd6_blitter_init<CHIP>(pctx);
return fd_context_init_tc(pctx, flags);
}
/* Teach the compiler about needed variants: */
template struct pipe_context *fd6_context_create<A6XX>(struct pipe_screen *pscreen, void *priv, unsigned flags);
template struct pipe_context *fd6_context_create<A7XX>(struct pipe_screen *pscreen, void *priv, unsigned flags);

View file

@ -38,8 +38,6 @@
#include "a6xx.xml.h"
BEGINC;
struct fd6_lrz_state {
union {
struct {
@ -154,6 +152,7 @@ fd6_context(struct fd_context *ctx)
return (struct fd6_context *)ctx;
}
template <chip CHIP>
struct pipe_context *fd6_context_create(struct pipe_screen *pscreen, void *priv,
unsigned flags);
@ -197,6 +196,4 @@ fd6_vertex_stateobj(void *p)
return (struct fd6_vertex_stateobj *)p;
}
ENDC;
#endif /* FD6_CONTEXT_H_ */

View file

@ -202,6 +202,7 @@ flush_streamout(struct fd_context *ctx, struct fd6_emit *emit)
}
}
template <chip CHIP>
static void
fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
unsigned drawid_offset,
@ -344,7 +345,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
}
if (emit.dirty_groups)
fd6_emit_3d_state(ring, &emit);
fd6_emit_3d_state<CHIP>(ring, &emit);
if (ctx->batch->barrier)
fd6_barrier_flush(ctx->batch);
@ -398,7 +399,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
if (emit.dirty_groups) {
emit.state.num_groups = 0;
emit.draw = &draws[i];
fd6_emit_3d_state(ring, &emit);
fd6_emit_3d_state<CHIP>(ring, &emit);
}
assert(!index_offset); /* handled by util_draw_multi() */
@ -418,6 +419,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
fd_context_all_clean(ctx);
}
template <chip CHIP>
static void
fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) assert_dt
{
@ -436,7 +438,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) a
fd6_emit_ccu_cntl(ring, screen, false);
OUT_REG(ring,
A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true,
.ds_state = true, .gs_state = true,
.fs_state = true, .cs_state = true,
.cs_ibo = true, .gfx_ibo = true,
@ -451,23 +453,19 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) a
OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
OUT_RING(ring, 0x0);
OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_REG(ring,
SP_PS_2D_SRC_INFO(CHIP),
SP_PS_2D_SRC_SIZE(CHIP),
SP_PS_2D_SRC(CHIP),
SP_PS_2D_SRC_PITCH(CHIP),
);
OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
OUT_RING(ring, 0x0000f410);
OUT_REG(ring, SP_2D_DST_FORMAT(
CHIP,
// TODO probably FMT6_16_UNORM, but this matches what we used to emit:
.color_format = FMT6_32_32_32_32_FLOAT,
.mask = 0xf,
));
OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
OUT_RING(ring,
@ -545,6 +543,7 @@ is_z32(enum pipe_format format)
}
}
template <chip CHIP>
static bool
fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers,
const union pipe_color_union *color, double depth,
@ -566,7 +565,7 @@ fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers,
if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) {
zsbuf->lrz_valid = true;
zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
fd6_clear_lrz(ctx->batch, zsbuf, depth);
fd6_clear_lrz<CHIP>(ctx->batch, zsbuf, depth);
}
}
@ -586,10 +585,16 @@ fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers,
return true;
}
template <chip CHIP>
void
fd6_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
fd6_draw_init(struct pipe_context *pctx)
disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbos = fd6_draw_vbos;
ctx->clear = fd6_clear;
ctx->clear = fd6_clear<CHIP>;
ctx->draw_vbos = fd6_draw_vbos<CHIP>;
}
/* Teach the compiler about needed variants: */
template void fd6_draw_init<A6XX>(struct pipe_context *pctx);
template void fd6_draw_init<A7XX>(struct pipe_context *pctx);

View file

@ -34,7 +34,7 @@
#include "fd6_context.h"
EXTERNC
template <chip CHIP>
void fd6_draw_init(struct pipe_context *pctx);
#endif /* FD6_DRAW_H_ */

View file

@ -535,6 +535,7 @@ build_prim_mode(struct fd6_emit *emit, struct fd_context *ctx, bool gmem)
return ring;
}
template <chip CHIP>
void
fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
{
@ -596,7 +597,7 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
FD6_GROUP_PROG_INTERP);
break;
case FD6_GROUP_RASTERIZER:
state = fd6_rasterizer_state(ctx, emit->primitive_restart);
state = fd6_rasterizer_state<CHIP>(ctx, emit->primitive_restart);
fd6_state_add_group(&emit->state, state, FD6_GROUP_RASTERIZER);
break;
case FD6_GROUP_PROG_FB_RAST:
@ -613,23 +614,23 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_state_take_group(&emit->state, state, FD6_GROUP_BLEND_COLOR);
break;
case FD6_GROUP_VS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_VERTEX, false);
state = fd6_build_bindless_state<CHIP>(ctx, PIPE_SHADER_VERTEX, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_VS_BINDLESS);
break;
case FD6_GROUP_HS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_CTRL, false);
state = fd6_build_bindless_state<CHIP>(ctx, PIPE_SHADER_TESS_CTRL, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_HS_BINDLESS);
break;
case FD6_GROUP_DS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_EVAL, false);
state = fd6_build_bindless_state<CHIP>(ctx, PIPE_SHADER_TESS_EVAL, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_DS_BINDLESS);
break;
case FD6_GROUP_GS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_GEOMETRY, false);
state = fd6_build_bindless_state<CHIP>(ctx, PIPE_SHADER_GEOMETRY, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_GS_BINDLESS);
break;
case FD6_GROUP_FS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_FRAGMENT, fs->fb_read);
state = fd6_build_bindless_state<CHIP>(ctx, PIPE_SHADER_FRAGMENT, fs->fb_read);
fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS);
break;
case FD6_GROUP_CONST:
@ -686,6 +687,10 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_state_emit(&emit->state, ring);
}
template void fd6_emit_3d_state<A6XX>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
template void fd6_emit_3d_state<A7XX>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
template <chip CHIP>
void
fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd6_compute_state *cs)
@ -722,7 +727,7 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
case FD6_GROUP_CS_BINDLESS:
fd6_state_take_group(
&state,
fd6_build_bindless_state(ctx, PIPE_SHADER_COMPUTE, false),
fd6_build_bindless_state<CHIP>(ctx, PIPE_SHADER_COMPUTE, false),
FD6_GROUP_CS_BINDLESS);
break;
default:
@ -749,9 +754,13 @@ fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gme
));
}
template void fd6_emit_cs_state<A6XX>(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs);
template void fd6_emit_cs_state<A7XX>(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs);
/* emit setup at begin of new cmdstream buffer (don't rely on previous
* state, there could have been a context switch between ioctls):
*/
template <chip CHIP>
void
fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
@ -767,7 +776,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
fd6_cache_inv(batch, ring);
OUT_REG(ring,
A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true,
.ds_state = true, .gs_state = true,
.fs_state = true, .cs_state = true,
.cs_ibo = true, .gfx_ibo = true,
@ -820,7 +829,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
WRITE(REG_A6XX_VPC_SO_DISABLE, A6XX_VPC_SO_DISABLE(true).value);
WRITE(REG_A6XX_PC_RASTER_CNTL, 0);
OUT_REG(ring, PC_RASTER_CNTL(CHIP));
WRITE(REG_A6XX_PC_MULTIVIEW_CNTL, 0);
@ -844,7 +853,11 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0);
WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0);
WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0);
WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
OUT_REG(ring, HLSQ_CONTROL_5_REG(
CHIP,
.linelengthregid = INVALID_REG,
.foveationqualityregid = INVALID_REG,
));
emit_marker6(ring, 7);
@ -904,6 +917,9 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
}
}
template void fd6_emit_restore<A6XX>(struct fd_batch *batch, struct fd_ringbuffer *ring);
template void fd6_emit_restore<A7XX>(struct fd_batch *batch, struct fd_ringbuffer *ring);
static void
fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
unsigned dst_off, struct pipe_resource *src, unsigned src_off,

View file

@ -332,22 +332,22 @@ fd6_gl2spacing(enum gl_tess_spacing spacing)
}
}
BEGINC;
template <chip CHIP>
void fd6_emit_3d_state(struct fd_ringbuffer *ring,
struct fd6_emit *emit) assert_dt;
struct fd6_compute_state;
template <chip CHIP>
void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd6_compute_state *cs) assert_dt;
void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem);
template <chip CHIP>
void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
void fd6_emit_init_screen(struct pipe_screen *pscreen);
ENDC;
static inline void
fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{

View file

@ -73,6 +73,7 @@ fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
}
}
template <chip CHIP>
static void
emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
const struct fd_gmem_stateobj *gmem)
@ -127,7 +128,7 @@ emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
OUT_REG(
ring,
A6XX_RB_MRT_BUF_INFO(i, .color_format = format,
RB_MRT_BUF_INFO(CHIP, i, .color_format = format,
.color_tile_mode = tile_mode, .color_swap = swap),
A6XX_RB_MRT_PITCH(i, stride),
A6XX_RB_MRT_ARRAY_PITCH(i, array_stride),
@ -155,6 +156,7 @@ emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
}
template <chip CHIP>
static void
emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
const struct fd_gmem_stateobj *gmem)
@ -169,7 +171,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
OUT_REG(
ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),
ring, RB_DEPTH_BUFFER_INFO(CHIP, .depth_format = fmt),
A6XX_RB_DEPTH_BUFFER_PITCH(stride),
A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(array_stride),
A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
@ -209,22 +211,25 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
uint32_t offset =
fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true),
OUT_REG(ring, RB_STENCIL_INFO(CHIP, .separate_stencil = true),
A6XX_RB_STENCIL_BUFFER_PITCH(stride),
A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(array_stride),
A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo, .bo_offset = offset),
A6XX_RB_STENCIL_BUFFER_BASE_GMEM(base));
} else {
OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0));
}
} else {
OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
OUT_REG(ring,
RB_DEPTH_BUFFER_INFO(
CHIP,
.depth_format = DEPTH6_NONE,
),
A6XX_RB_DEPTH_BUFFER_PITCH(),
A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(),
A6XX_RB_DEPTH_BUFFER_BASE(),
A6XX_RB_DEPTH_BUFFER_BASE_GMEM(),
);
OUT_REG(ring,
A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
@ -236,7 +241,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0));
}
}
@ -359,13 +364,13 @@ patch_fb_read_sysmem(struct fd_batch *batch)
util_dynarray_clear(&batch->fb_read_patches);
}
template <chip CHIP>
static void
update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
bool binning)
{
struct fd_ringbuffer *ring = batch->gmem;
struct fd_screen *screen = batch->ctx->screen;
uint32_t cntl = 0;
bool depth_ubwc_enable = false;
uint32_t mrts_ubwc_enable = 0;
int i;
@ -387,20 +392,23 @@ update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
mrts_ubwc_enable |= 1 << i;
}
cntl |= A6XX_RB_RENDER_CNTL_CCUSINGLECACHELINESIZE(2);
if (binning)
cntl |= A6XX_RB_RENDER_CNTL_BINNING;
struct fd_reg_pair rb_render_cntl = RB_RENDER_CNTL(
CHIP,
.ccusinglecachelinesize = 2,
.binning = binning,
.flag_depth = depth_ubwc_enable,
.flag_mrts = mrts_ubwc_enable,
);
if (screen->info->a6xx.has_cp_reg_write) {
OUT_PKT7(ring, CP_REG_WRITE, 3);
OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);
OUT_PKT(ring, CP_REG_WRITE,
CP_REG_WRITE_0(TRACK_RENDER_CNTL),
CP_REG_WRITE_1(rb_render_cntl.reg),
CP_REG_WRITE_2(rb_render_cntl.value),
);
} else {
OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1);
OUT_REG(ring, rb_render_cntl);
}
OUT_RING(ring, cntl |
COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |
A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
}
/* extra size to store VSC_DRAW_STRM_SIZE: */
@ -693,12 +701,14 @@ struct bin_size_params {
unsigned lrz_feedback_zmode_mask;
};
template <chip CHIP>
static void
set_bin_size(struct fd_ringbuffer *ring, const struct fd_gmem_stateobj *gmem,
struct bin_size_params p)
{
unsigned w = gmem ? gmem->bin_w : 0;
unsigned h = gmem ? gmem->bin_h : 0;
OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(
.binw = w, .binh = h,
.render_mode = p.render_mode,
@ -706,7 +716,8 @@ set_bin_size(struct fd_ringbuffer *ring, const struct fd_gmem_stateobj *gmem,
.buffers_location = p.buffers_location,
.lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
));
OUT_REG(ring, A6XX_RB_BIN_CONTROL(
OUT_REG(ring, RB_BIN_CONTROL(
CHIP,
.binw = w, .binh = h,
.render_mode = p.render_mode,
.force_lrz_write_dis = p.force_lrz_write_dis,
@ -827,9 +838,11 @@ emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
}
static void prepare_tile_setup_ib(struct fd_batch *batch);
template <chip CHIP>
static void prepare_tile_fini_ib(struct fd_batch *batch);
/* before first tile */
template <chip CHIP>
static void
fd6_emit_tile_init(struct fd_batch *batch) assert_dt
{
@ -838,7 +851,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
struct fd_screen *screen = batch->ctx->screen;
fd6_emit_restore(batch, ring);
fd6_emit_restore<CHIP>(batch, ring);
fd6_emit_lrz_flush(ring);
@ -851,7 +864,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
fd6_cache_inv(batch, ring);
prepare_tile_setup_ib(batch);
prepare_tile_fini_ib(batch);
prepare_tile_fini_ib<CHIP>(batch);
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x0);
@ -863,8 +876,8 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
fd_wfi(batch, ring);
fd6_emit_ccu_cntl(ring, screen, true);
emit_zs(ring, pfb->zsbuf, batch->gmem_state);
emit_mrt(ring, pfb, batch->gmem_state);
emit_zs<CHIP>(ring, pfb->zsbuf, batch->gmem_state);
emit_mrt<CHIP>(ring, pfb, batch->gmem_state);
emit_msaa(ring, pfb->samples);
patch_fb_read_gmem(batch);
@ -872,12 +885,12 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
/* enable stream-out during binning pass: */
OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
set_bin_size(ring, gmem, {
set_bin_size<CHIP>(ring, gmem, {
.render_mode = BINNING_PASS,
.buffers_location = BUFFERS_IN_GMEM,
.lrz_feedback_zmode_mask = 0x6,
});
update_render_cntl(batch, pfb, true);
update_render_cntl<CHIP>(batch, pfb, true);
emit_binning_pass(batch);
/* and disable stream-out for draw pass: */
@ -890,7 +903,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
*/
// NOTE a618 not setting .FORCE_LRZ_WRITE_DIS ..
set_bin_size(ring, gmem, {
set_bin_size<CHIP>(ring, gmem, {
.render_mode = RENDERING_PASS,
.force_lrz_write_dis = true,
.buffers_location = BUFFERS_IN_GMEM,
@ -912,18 +925,19 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
/* no binning pass, so enable stream-out for draw pass:: */
OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
set_bin_size(ring, gmem, {
set_bin_size<CHIP>(ring, gmem, {
.render_mode = RENDERING_PASS,
.buffers_location = BUFFERS_IN_GMEM,
.lrz_feedback_zmode_mask = 0x6,
});
}
update_render_cntl(batch, pfb, false);
update_render_cntl<CHIP>(batch, pfb, false);
emit_common_init(batch);
}
template <chip CHIP>
static void
set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
{
@ -933,8 +947,7 @@ set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);
OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
OUT_REG(ring, SP_WINDOW_OFFSET(CHIP, .x = x1, .y = y1));
OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
OUT_RING(ring,
@ -942,6 +955,7 @@ set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
}
/* before mem2gmem */
template <chip CHIP>
static void
fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
{
@ -984,10 +998,10 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
OUT_RING(ring, 0x0);
set_window_offset(ring, x1, y1);
set_window_offset<CHIP>(ring, x1, y1);
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
set_bin_size(ring, gmem, {
set_bin_size<CHIP>(ring, gmem, {
.render_mode = RENDERING_PASS,
.buffers_location = BUFFERS_IN_GMEM,
.lrz_feedback_zmode_mask = 0x6,
@ -996,7 +1010,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
OUT_PKT7(ring, CP_SET_MODE, 1);
OUT_RING(ring, 0x0);
} else {
set_window_offset(ring, x1, y1);
set_window_offset<CHIP>(ring, x1, y1);
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
OUT_RING(ring, 0x1);
@ -1388,6 +1402,7 @@ fd6_unknown_8c01(enum pipe_format format, unsigned buffers)
return 0;
}
template <chip CHIP>
static void
emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
uint32_t base, struct pipe_surface *psurf,
@ -1412,7 +1427,7 @@ emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
* !resolve case below, so batch_draw_tracking_for_dirty_bits() has us
* just do a restore of the other channel for partial packed z/s writes.
*/
fd6_resolve_tile(batch, ring, base, psurf, 0);
fd6_resolve_tile<CHIP>(batch, ring, base, psurf, 0);
return;
}
@ -1442,6 +1457,7 @@ emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
* transfer from gmem to system memory (ie. normal RAM)
*/
template <chip CHIP>
static void
prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
{
@ -1459,12 +1475,12 @@ prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
FD_BUFFER_DEPTH);
emit_resolve_blit<CHIP>(batch, ring, gmem->zsbuf_base[0],
pfb->zsbuf, FD_BUFFER_DEPTH);
}
if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
FD_BUFFER_STENCIL);
emit_resolve_blit<CHIP>(batch, ring, gmem->zsbuf_base[1],
pfb->zsbuf, FD_BUFFER_STENCIL);
}
}
@ -1475,8 +1491,8 @@ prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
continue;
if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
continue;
emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
FD_BUFFER_COLOR);
emit_resolve_blit<CHIP>(batch, ring, gmem->cbuf_base[i],
pfb->cbufs[i], FD_BUFFER_COLOR);
}
}
}
@ -1546,6 +1562,7 @@ fd6_emit_tile_fini(struct fd_batch *batch)
}
}
template <chip CHIP>
static void
emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
{
@ -1572,7 +1589,7 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
continue;
fd6_clear_surface(ctx, ring, pfb->cbufs[i], &box2d, &color, 0);
fd6_clear_surface<CHIP>(ctx, ring, pfb->cbufs[i], &box2d, &color, 0);
}
}
if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
@ -1587,8 +1604,8 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
value.f[0] = batch->clear_depth;
value.ui[1] = batch->clear_stencil;
fd6_clear_surface(ctx, ring, pfb->zsbuf, &box2d,
&value, fd6_unknown_8c01(pfb->zsbuf->format, buffers));
fd6_clear_surface<CHIP>(ctx, ring, pfb->zsbuf, &box2d,
&value, fd6_unknown_8c01(pfb->zsbuf->format, buffers));
}
if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
@ -1598,7 +1615,7 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
stencil_surf.format = PIPE_FORMAT_S8_UINT;
stencil_surf.texture = separate_stencil;
fd6_clear_surface(ctx, ring, &stencil_surf, &box2d, &value, 0);
fd6_clear_surface<CHIP>(ctx, ring, &stencil_surf, &box2d, &value, 0);
}
}
@ -1608,13 +1625,14 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
trace_end_clear_restore(&batch->trace, ring);
}
template <chip CHIP>
static void
fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
{
struct fd_ringbuffer *ring = batch->gmem;
struct fd_screen *screen = batch->ctx->screen;
fd6_emit_restore(batch, ring);
fd6_emit_restore<CHIP>(batch, ring);
fd6_emit_lrz_flush(ring);
if (batch->prologue) {
@ -1638,14 +1656,14 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
else
set_scissor(ring, 0, 0, 0, 0);
set_window_offset(ring, 0, 0);
set_window_offset<CHIP>(ring, 0, 0);
set_bin_size(ring, NULL, {
set_bin_size<CHIP>(ring, NULL, {
.render_mode = RENDERING_PASS,
.buffers_location = BUFFERS_IN_SYSMEM,
});
emit_sysmem_clears(batch, ring);
emit_sysmem_clears<CHIP>(batch, ring);
emit_marker6(ring, 7);
OUT_PKT7(ring, CP_SET_MARKER, 1);
@ -1671,12 +1689,12 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
OUT_RING(ring, 0x1);
emit_zs(ring, pfb->zsbuf, NULL);
emit_mrt(ring, pfb, NULL);
emit_zs<CHIP>(ring, pfb->zsbuf, NULL);
emit_mrt<CHIP>(ring, pfb, NULL);
emit_msaa(ring, pfb->samples);
patch_fb_read_sysmem(batch);
update_render_cntl(batch, pfb, false);
update_render_cntl<CHIP>(batch, pfb, false);
emit_common_init(batch);
}
@ -1704,18 +1722,24 @@ fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt
fd_wfi(batch, ring);
}
template <chip CHIP>
void
fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
fd6_gmem_init(struct pipe_context *pctx)
disable_thread_safety_analysis
{
struct fd_context *ctx = fd_context(pctx);
ctx->emit_tile_init = fd6_emit_tile_init;
ctx->emit_tile_prep = fd6_emit_tile_prep;
ctx->emit_tile_init = fd6_emit_tile_init<CHIP>;
ctx->emit_tile_prep = fd6_emit_tile_prep<CHIP>;
ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
ctx->emit_tile = fd6_emit_tile;
ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
ctx->emit_tile_fini = fd6_emit_tile_fini;
ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;
ctx->emit_sysmem_prep = fd6_emit_sysmem_prep<CHIP>;
ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;
}
/* Teach the compiler about needed variants: */
template void fd6_gmem_init<A6XX>(struct pipe_context *pctx);
template void fd6_gmem_init<A7XX>(struct pipe_context *pctx);

View file

@ -30,7 +30,7 @@
#include "pipe/p_context.h"
EXTERNC
template <chip CHIP>
void fd6_gmem_init(struct pipe_context *pctx);
#endif /* FD6_GMEM_H_ */

View file

@ -169,6 +169,7 @@ validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *se
}
/* Build bindless descriptor state, returns ownership of state reference */
template <chip CHIP>
struct fd_ringbuffer *
fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
bool append_fb_read)
@ -258,8 +259,8 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
unsigned idx = ir3_shader_descriptor_set(shader);
if (shader == PIPE_SHADER_COMPUTE) {
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.cs_bindless = 0x1f));
OUT_REG(ring, A6XX_SP_CS_BINDLESS_BASE_DESCRIPTOR(
OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .cs_bindless = 0x1f));
OUT_REG(ring, SP_CS_BINDLESS_BASE_DESCRIPTOR(CHIP,
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
));
OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR(
@ -300,8 +301,8 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
);
}
} else {
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.gfx_bindless = 0x1f));
OUT_REG(ring, A6XX_SP_BINDLESS_BASE_DESCRIPTOR(
OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .gfx_bindless = 0x1f));
OUT_REG(ring, SP_BINDLESS_BASE_DESCRIPTOR(CHIP,
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
));
OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR(
@ -346,6 +347,9 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
return ring;
}
template struct fd_ringbuffer *fd6_build_bindless_state<A6XX>(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read);
template struct fd_ringbuffer *fd6_build_bindless_state<A7XX>(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read);
static void
fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned count,

View file

@ -30,14 +30,11 @@
#include "freedreno_context.h"
BEGINC;
template <chip CHIP>
struct fd_ringbuffer *
fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
bool append_fb_read) assert_dt;
void fd6_image_init(struct pipe_context *pctx);
ENDC;
#endif /* FD6_IMAGE_H_ */

View file

@ -305,33 +305,44 @@ sp_xs_config(struct ir3_shader_variant *v)
A6XX_SP_VS_CONFIG_NSAMP(v->num_samp);
}
template <chip CHIP>
static void
setup_config_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
{
struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 100 * 4);
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true,
.ds_state = true, .gs_state = true,
.fs_state = true, .cs_state = true,
.cs_ibo = true, .gfx_ibo = true, ));
assert(state->vs->constlen >= state->bs->constlen);
OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4);
OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(state->vs->constlen) |
A6XX_HLSQ_VS_CNTL_ENABLED);
OUT_RING(ring, COND(state->hs,
A6XX_HLSQ_HS_CNTL_ENABLED |
A6XX_HLSQ_HS_CNTL_CONSTLEN(state->hs->constlen)));
OUT_RING(ring, COND(state->ds,
A6XX_HLSQ_DS_CNTL_ENABLED |
A6XX_HLSQ_DS_CNTL_CONSTLEN(state->ds->constlen)));
OUT_RING(ring, COND(state->gs,
A6XX_HLSQ_GS_CNTL_ENABLED |
A6XX_HLSQ_GS_CNTL_CONSTLEN(state->gs->constlen)));
OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1);
OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(state->fs->constlen) |
A6XX_HLSQ_FS_CNTL_ENABLED);
OUT_REG(ring, HLSQ_VS_CNTL(
CHIP,
.constlen = state->vs->constlen,
.enabled = true,
));
OUT_REG(ring, HLSQ_HS_CNTL(
CHIP,
.constlen = COND(state->hs, state->hs->constlen),
.enabled = COND(state->hs, true),
));
OUT_REG(ring, HLSQ_DS_CNTL(
CHIP,
.constlen = COND(state->ds, state->ds->constlen),
.enabled = COND(state->ds, true),
));
OUT_REG(ring, HLSQ_GS_CNTL(
CHIP,
.constlen = COND(state->gs, state->gs->constlen),
.enabled = COND(state->gs, true),
));
OUT_REG(ring, HLSQ_FS_CNTL(
CHIP,
.constlen = state->fs->constlen,
.enabled = true,
));
OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1);
OUT_RING(ring, sp_xs_config(state->vs));
@ -397,6 +408,7 @@ tex_opc_to_prefetch_cmd(opc_t tex_opc)
}
}
template <chip CHIP>
static void
setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
struct fd6_program_state *state,
@ -549,16 +561,18 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE));
for (int i = 0; i < fs->num_sampler_prefetch; i++) {
const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
OUT_RING(ring,
A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) |
A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(prefetch->samp_id) |
A6XX_SP_FS_PREFETCH_CMD_TEX_ID(prefetch->tex_id) |
A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) |
A6XX_SP_FS_PREFETCH_CMD_CMD(
tex_opc_to_prefetch_cmd(prefetch->tex_opc)));
OUT_RING(ring, SP_FS_PREFETCH_CMD(
CHIP, i,
.src = prefetch->src,
.samp_id = prefetch->samp_id,
.tex_id = prefetch->tex_id,
.dst = prefetch->dst,
.wrmask = prefetch->wrmask,
.half = prefetch->half_precision,
.bindless = prefetch->bindless,
.cmd = tex_opc_to_prefetch_cmd(prefetch->tex_opc),
).value
);
}
OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1);
@ -864,31 +878,43 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
OUT_RING(ring, 0x7); /* XXX */
OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) |
A6XX_HLSQ_CONTROL_2_REG_CENTERRHW(ij_regid[IJ_PERSP_CENTER_RHW]));
OUT_RING(
ring,
A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(
ij_regid[IJ_PERSP_CENTROID]) |
A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID(
ij_regid[IJ_LINEAR_CENTROID]));
OUT_RING(
ring,
A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
A6XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) |
A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE]));
OUT_RING(ring, 0xfcfc); /* line length (?), foveation quality */
OUT_REG(ring,
HLSQ_CONTROL_1_REG(CHIP, 0x7), /* XXX */
HLSQ_CONTROL_2_REG(
CHIP,
.faceregid = face_regid,
.sampleid = samp_id_regid,
.samplemask = smask_in_regid,
.centerrhw = ij_regid[IJ_PERSP_CENTER_RHW],
),
HLSQ_CONTROL_3_REG(
CHIP,
.ij_persp_pixel = ij_regid[IJ_PERSP_PIXEL],
.ij_linear_pixel = ij_regid[IJ_LINEAR_PIXEL],
.ij_persp_centroid = ij_regid[IJ_PERSP_CENTROID],
.ij_linear_centroid = ij_regid[IJ_LINEAR_CENTROID],
),
HLSQ_CONTROL_4_REG(
CHIP,
.ij_persp_sample = ij_regid[IJ_PERSP_SAMPLE],
.ij_linear_sample = ij_regid[IJ_LINEAR_SAMPLE],
.xycoordregid = coord_regid,
.zwcoordregid = zwcoord_regid,
),
HLSQ_CONTROL_5_REG(
CHIP,
.linelengthregid = INVALID_REG,
.foveationqualityregid = INVALID_REG,
),
);
OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL_0, 1);
OUT_RING(ring, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(fssz) |
COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS));
OUT_REG(ring,
HLSQ_FS_CNTL_0(
CHIP,
.threadsize = fssz,
.varyings = enable_varyings,
),
);
OUT_PKT4(ring, REG_A6XX_SP_FS_CTRL_REG0, 1);
OUT_RING(
@ -1287,6 +1313,7 @@ emit_interp_state(struct fd_ringbuffer *ring, const struct fd6_program_state *st
OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
}
template <chip CHIP>
static struct ir3_program_state *
fd6_program_create(void *data, struct ir3_shader_variant *bs,
struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
@ -1336,9 +1363,9 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
fd_screen_unlock(screen);
}
setup_config_stateobj(ctx, state);
setup_stateobj(state->binning_stateobj, ctx, state, key, true);
setup_stateobj(state->stateobj, ctx, state, key, false);
setup_config_stateobj<CHIP>(ctx, state);
setup_stateobj<CHIP>(state->binning_stateobj, ctx, state, key, true);
setup_stateobj<CHIP>(state->stateobj, ctx, state, key, false);
state->interp_stateobj = create_interp_stateobj(ctx, state);
const struct ir3_stream_output_info *stream_output =
@ -1403,19 +1430,25 @@ fd6_program_destroy(void *data, struct ir3_program_state *state)
free(so);
}
template <chip CHIP>
static const struct ir3_cache_funcs cache_funcs = {
.create_state = fd6_program_create,
.create_state = fd6_program_create<CHIP>,
.destroy_state = fd6_program_destroy,
};
template <chip CHIP>
void
fd6_prog_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
ctx->shader_cache = ir3_cache_create(&cache_funcs<CHIP>, ctx);
ir3_prog_init(pctx);
fd_prog_init(pctx);
}
/* Teach the compiler about needed variants: */
template void fd6_prog_init<A6XX>(struct pipe_context *pctx);
template void fd6_prog_init<A7XX>(struct pipe_context *pctx);

View file

@ -99,15 +99,12 @@ fd6_last_shader(const struct fd6_program_state *state)
return state->vs;
}
BEGINC;
void fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct ir3_shader_variant *so) assert_dt;
struct fd_ringbuffer *fd6_program_interp_state(struct fd6_emit *emit) assert_dt;
template <chip CHIP>
void fd6_prog_init(struct pipe_context *pctx);
ENDC;
#endif /* FD6_PROGRAM_H_ */

View file

@ -35,6 +35,7 @@
#include "fd6_pack.h"
#include "fd6_rasterizer.h"
template <chip CHIP>
struct fd_ringbuffer *
__fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
const struct pipe_rasterizer_state *cso,
@ -102,7 +103,7 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
}
OUT_REG(ring, A6XX_VPC_POLYGON_MODE(mode));
OUT_REG(ring, A6XX_PC_POLYGON_MODE(mode));
OUT_REG(ring, PC_POLYGON_MODE(CHIP, mode));
if (ctx->screen->info->a6xx.has_shading_rate) {
OUT_REG(ring, A6XX_RB_UNKNOWN_8A00());
@ -114,6 +115,9 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
return ring;
}
template struct fd_ringbuffer *__fd6_setup_rasterizer_stateobj<A6XX>(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, bool primitive_restart);
template struct fd_ringbuffer *__fd6_setup_rasterizer_stateobj<A7XX>(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, bool primitive_restart);
void *
fd6_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso)

View file

@ -33,8 +33,6 @@
#include "freedreno_context.h"
BEGINC;
struct fd6_rasterizer_stateobj {
struct pipe_rasterizer_state base;
@ -51,11 +49,13 @@ void *fd6_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso);
void fd6_rasterizer_state_delete(struct pipe_context *, void *hwcso);
template <chip CHIP>
struct fd_ringbuffer *
__fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
const struct pipe_rasterizer_state *cso,
bool primitive_restart);
template <chip CHIP>
static inline struct fd_ringbuffer *
fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart) assert_dt
{
@ -64,13 +64,11 @@ fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart) assert_dt
unsigned variant = primitive_restart;
if (unlikely(!rasterizer->stateobjs[variant])) {
rasterizer->stateobjs[variant] = __fd6_setup_rasterizer_stateobj(
rasterizer->stateobjs[variant] = __fd6_setup_rasterizer_stateobj<CHIP>(
ctx, ctx->rasterizer, primitive_restart);
}
return rasterizer->stateobjs[variant];
}
ENDC;
#endif /* FD6_RASTERIZER_H_ */

View file

@ -169,7 +169,11 @@ fd6_screen_init(struct pipe_screen *pscreen)
FD_GMEM_DEPTH_ENABLED | FD_GMEM_STENCIL_ENABLED |
FD_GMEM_BLEND_ENABLED | FD_GMEM_LOGICOP_ENABLED);
pscreen->context_create = fd6_context_create;
if (screen->gen == 7) {
pscreen->context_create = fd6_context_create<A7XX>;
} else {
pscreen->context_create = fd6_context_create<A6XX>;
}
pscreen->is_format_supported = fd6_screen_is_format_supported;
screen->tile_mode = fd6_tile_mode;

View file

@ -28,7 +28,7 @@
#ifndef FD6_SCREEN_H_
#define FD6_SCREEN_H_
#include "pipe/p_screen.h"
#include "freedreno_screen.h"
EXTERNC void fd6_screen_init(struct pipe_screen *pscreen);