diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc index 71ce3451327..7c1775fa7e2 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc @@ -262,6 +262,7 @@ emit_setup(struct fd_batch *batch) fd6_emit_ccu_cntl(ring, screen, false); } +template static void emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt, bool scissor_enable, union pipe_color_union *color, @@ -296,14 +297,14 @@ emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt, * controlling the internal/accumulator format or something like * that. It's certainly not tied to only the src format. */ - OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1); - OUT_RING( - ring, - A6XX_SP_2D_DST_FORMAT_COLOR_FORMAT(fmt) | - COND(util_format_is_pure_sint(pfmt), A6XX_SP_2D_DST_FORMAT_SINT) | - COND(util_format_is_pure_uint(pfmt), A6XX_SP_2D_DST_FORMAT_UINT) | - COND(is_srgb, A6XX_SP_2D_DST_FORMAT_SRGB) | - A6XX_SP_2D_DST_FORMAT_MASK(0xf)); + OUT_REG(ring, SP_2D_DST_FORMAT( + CHIP, + .sint = util_format_is_pure_sint(pfmt), + .uint = util_format_is_pure_uint(pfmt), + .color_format = fmt, + .srgb = is_srgb, + .mask = 0xf, + )); OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1); OUT_RING(ring, unknown_8c01); @@ -330,6 +331,7 @@ emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst, /* buffers need to be handled specially since x/width can exceed the bounds * supported by hw.. if necessary decompose into (potentially) two 2D blits */ +template static void emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct pipe_blit_info *info) @@ -379,7 +381,7 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, sshift = sbox->x & 0x3f; dshift = dbox->x & 0x3f; - emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0); + emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0); for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) { unsigned soff, doff, w, p; @@ -397,22 +399,26 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, * Emit source: */ OUT_REG(ring, - A6XX_SP_PS_2D_SRC_INFO( + SP_PS_2D_SRC_INFO( + CHIP, .color_format = FMT6_8_UNORM, .tile_mode = TILE6_LINEAR, .color_swap = WZYX, .unk20 = true, .unk22 = true, ), - A6XX_SP_PS_2D_SRC_SIZE( + SP_PS_2D_SRC_SIZE( + CHIP, .width = sshift + w, .height = 1, ), - A6XX_SP_PS_2D_SRC( + SP_PS_2D_SRC( + CHIP, .bo = src->bo, .bo_offset = soff, ), - A6XX_SP_PS_2D_SRC_PITCH( + SP_PS_2D_SRC_PITCH( + CHIP, .pitch = p, ), ); @@ -454,19 +460,20 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, } } +template static void fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt { struct fd_ringbuffer *ring = fd_batch_get_prologue(batch); union pipe_color_union color = {}; - emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0); + emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0); OUT_REG(ring, - A6XX_SP_PS_2D_SRC_INFO(), - A6XX_SP_PS_2D_SRC_SIZE(), - A6XX_SP_PS_2D_SRC(), - A6XX_SP_PS_2D_SRC_PITCH(), + SP_PS_2D_SRC_INFO(CHIP), + SP_PS_2D_SRC_SIZE(CHIP), + SP_PS_2D_SRC(CHIP), + SP_PS_2D_SRC_PITCH(CHIP), ); OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); @@ -583,6 +590,7 @@ emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc, } } +template static void emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info, unsigned layer, unsigned nr_samples, bool sample_0) @@ -605,7 +613,8 @@ emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info, sfmt = FMT6_A8_UNORM; OUT_REG(ring, - A6XX_SP_PS_2D_SRC_INFO( + SP_PS_2D_SRC_INFO( + CHIP, .color_format = sfmt, .tile_mode = stile, .color_swap = sswap, @@ -617,28 +626,36 @@ emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info, .unk20 = true, .unk22 = true, ), - A6XX_SP_PS_2D_SRC_SIZE( + SP_PS_2D_SRC_SIZE( + CHIP, .width = width, .height = height, ), - A6XX_SP_PS_2D_SRC( + SP_PS_2D_SRC( + CHIP, .bo = src->bo, .bo_offset = soff, ), - A6XX_SP_PS_2D_SRC_PITCH( + SP_PS_2D_SRC_PITCH( + CHIP, .pitch = pitch, ), ); - if (subwc_enabled) { - OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS, 6); - fd6_emit_flag_reference(ring, src, info->src.level, layer); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + if (subwc_enabled && fd_resource_ubwc_enabled(src, info->src.level)) { + OUT_REG(ring, + SP_PS_2D_SRC_FLAGS( + CHIP, + .bo = src->bo, + .bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer), + ), + SP_PS_2D_SRC_FLAGS_PITCH( + CHIP, fdl_ubwc_pitch(&src->layout, info->src.level)), + ); } } +template static void emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct pipe_blit_info *info, bool sample_0) @@ -699,11 +716,11 @@ emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1)); } - emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate); + emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate); for (unsigned i = 0; i < info->dst.box.depth; i++) { - emit_blit_src(ring, info, sbox->z + i, nr_samples, sample_0); + emit_blit_src(ring, info, sbox->z + i, nr_samples, sample_0); emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level, dbox->z + i); @@ -809,6 +826,7 @@ convert_color(enum pipe_format format, union pipe_color_union *pcolor) return color; } +template void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, struct pipe_surface *psurf, const struct pipe_box *box2d, @@ -830,7 +848,7 @@ fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, union pipe_color_union clear_color = convert_color(psurf->format, color); emit_clear_color(ring, psurf->format, &clear_color); - emit_blit_setup(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0); + emit_blit_setup(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0); for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer; i++) { @@ -856,6 +874,14 @@ fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, } } +template void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct pipe_surface *psurf, const struct pipe_box *box2d, + union pipe_color_union *color, uint32_t unknown_8c01); +template void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct pipe_surface *psurf, const struct pipe_box *box2d, + union pipe_color_union *color, uint32_t unknown_8c01); + +template static void fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc, unsigned level, const struct pipe_box *box, const void *data) @@ -890,7 +916,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc, util_format_unpack_s_8uint(prsc->format, &stencil, data, 1); if (rsc->stencil) - fd6_clear_texture(pctx, &rsc->stencil->b.b, level, box, &stencil); + fd6_clear_texture(pctx, &rsc->stencil->b.b, level, box, &stencil); color.f[0] = depth; color.ui[1] = stencil; @@ -928,7 +954,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc, }, }; - fd6_clear_surface(ctx, batch->draw, &surf, box, &color, 0); + fd6_clear_surface(ctx, batch->draw, &surf, box, &color, 0); fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_COLOR_TS, true); fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_DEPTH_TS, true); @@ -945,6 +971,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc, fd_context_dirty(ctx, FD_DIRTY_QUERY); } +template void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01) @@ -969,7 +996,7 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, /* Enable scissor bit, which will take into account the window scissor * which is set per-tile */ - emit_blit_setup(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0); + emit_blit_setup(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0); /* We shouldn't be using GMEM in the layered rendering case: */ assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); @@ -980,24 +1007,32 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR); enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples); - OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10); - OUT_RING(ring, - A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) | - A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_2) | - A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) | - COND(samples > MSAA_ONE, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) | - COND(util_format_is_srgb(psurf->format), A6XX_SP_PS_2D_SRC_INFO_SRGB) | - A6XX_SP_PS_2D_SRC_INFO_UNK20 | A6XX_SP_PS_2D_SRC_INFO_UNK22); - OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(psurf->width) | - A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(psurf->height)); - OUT_RING(ring, gmem_base); /* SP_PS_2D_SRC_LO */ - OUT_RING(ring, gmem_base >> 32); /* SP_PS_2D_SRC_HI */ - OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(gmem_pitch)); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + OUT_REG(ring, + SP_PS_2D_SRC_INFO( + CHIP, + .color_format = sfmt, + .tile_mode = TILE6_2, + .color_swap = WZYX, + .srgb = util_format_is_srgb(psurf->format), + .samples = samples, + .samples_average = samples > MSAA_ONE, + .unk20 = true, + .unk22 = true, + ), + SP_PS_2D_SRC_SIZE( + CHIP, + .width = psurf->width, + .height = psurf->height, + ), + SP_PS_2D_SRC( + CHIP, + .qword = gmem_base, + ), + SP_PS_2D_SRC_PITCH( + CHIP, + .pitch = gmem_pitch, + ), + ); /* sync GMEM writes with CACHE. */ fd6_cache_inv(batch, ring); @@ -1018,6 +1053,12 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, fd_wfi(batch, ring); } +template void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, + uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01); +template void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, + uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01); + +template static bool handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info, bool sample_0) assert_dt @@ -1065,12 +1106,12 @@ handle_rgba_blit(struct fd_context *ctx, (info->dst.resource->target == PIPE_BUFFER)) { assert(src->layout.tile_mode == TILE6_LINEAR); assert(dst->layout.tile_mode == TILE6_LINEAR); - emit_blit_buffer(ctx, batch->draw, info); + emit_blit_buffer(ctx, batch->draw, info); } else { /* I don't *think* we need to handle blits between buffer <-> !buffer */ assert(info->src.resource->target != PIPE_BUFFER); assert(info->dst.resource->target != PIPE_BUFFER); - emit_blit_texture(ctx, batch->draw, info, sample_0); + emit_blit_texture(ctx, batch->draw, info, sample_0); } trace_end_blit(&batch->trace, batch->draw); @@ -1098,11 +1139,12 @@ handle_rgba_blit(struct fd_context *ctx, * in particular as u_blitter cannot blit stencil. So handle the fallback * ourself and never "fail". */ +template static bool do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info, bool sample_0) assert_dt { - bool success = handle_rgba_blit(ctx, info, sample_0); + bool success = handle_rgba_blit(ctx, info, sample_0); if (!success) { if (sample_0 && !util_format_is_pure_integer(info->src.format)) mesa_logw("sample averaging on fallback blit when we shouldn't."); @@ -1116,6 +1158,7 @@ do_rewritten_blit(struct fd_context *ctx, * Handle depth/stencil blits either via u_blitter and/or re-writing the * blit into an equivilant format that we can handle */ +template static bool handle_zs_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt @@ -1139,14 +1182,14 @@ handle_zs_blit(struct fd_context *ctx, blit.mask = PIPE_MASK_R; blit.src.format = PIPE_FORMAT_R8_UINT; blit.dst.format = PIPE_FORMAT_R8_UINT; - return do_rewritten_blit(ctx, &blit, true); + return do_rewritten_blit(ctx, &blit, true); case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: if (info->mask & PIPE_MASK_Z) { blit.mask = PIPE_MASK_R; blit.src.format = PIPE_FORMAT_R32_FLOAT; blit.dst.format = PIPE_FORMAT_R32_FLOAT; - do_rewritten_blit(ctx, &blit, true); + do_rewritten_blit(ctx, &blit, true); } if (info->mask & PIPE_MASK_S) { @@ -1155,7 +1198,7 @@ handle_zs_blit(struct fd_context *ctx, blit.dst.format = PIPE_FORMAT_R8_UINT; blit.src.resource = &src->stencil->b.b; blit.dst.resource = &dst->stencil->b.b; - do_rewritten_blit(ctx, &blit, true); + do_rewritten_blit(ctx, &blit, true); } return true; @@ -1164,7 +1207,7 @@ handle_zs_blit(struct fd_context *ctx, blit.mask = PIPE_MASK_R; blit.src.format = PIPE_FORMAT_R16_UNORM; blit.dst.format = PIPE_FORMAT_R16_UNORM; - return do_rewritten_blit(ctx, &blit, true); + return do_rewritten_blit(ctx, &blit, true); case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_Z32_FLOAT: @@ -1172,7 +1215,7 @@ handle_zs_blit(struct fd_context *ctx, blit.mask = PIPE_MASK_R; blit.src.format = PIPE_FORMAT_R32_UINT; blit.dst.format = PIPE_FORMAT_R32_UINT; - return do_rewritten_blit(ctx, &blit, true); + return do_rewritten_blit(ctx, &blit, true); case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: @@ -1206,6 +1249,7 @@ handle_zs_blit(struct fd_context *ctx, } } +template static bool handle_compressed_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt @@ -1251,7 +1295,7 @@ handle_compressed_blit(struct fd_context *ctx, blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw); blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh); - return do_rewritten_blit(ctx, &blit, false); + return do_rewritten_blit(ctx, &blit, false); } /** @@ -1260,6 +1304,7 @@ handle_compressed_blit(struct fd_context *ctx, * (also -1.0), when we're supposed to be memcpying the bits. See * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion. */ +template static bool handle_snorm_copy_blit(struct fd_context *ctx, const struct pipe_blit_info *info) @@ -1273,41 +1318,48 @@ handle_snorm_copy_blit(struct fd_context *ctx, blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format); - return do_rewritten_blit(ctx, &blit, false); + return do_rewritten_blit(ctx, &blit, false); } +template static bool fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt { if (info->mask & PIPE_MASK_ZS) - return handle_zs_blit(ctx, info); + return handle_zs_blit(ctx, info); if (util_format_is_compressed(info->src.format) || util_format_is_compressed(info->dst.format)) - return handle_compressed_blit(ctx, info); + return handle_compressed_blit(ctx, info); if ((info->src.format == info->dst.format) && util_format_is_snorm(info->src.format)) - return handle_snorm_copy_blit(ctx, info); + return handle_snorm_copy_blit(ctx, info); - return handle_rgba_blit(ctx, info, false); + return handle_rgba_blit(ctx, info, false); } +template void -fd6_blitter_init(struct pipe_context *pctx) disable_thread_safety_analysis +fd6_blitter_init(struct pipe_context *pctx) + disable_thread_safety_analysis { struct fd_context *ctx = fd_context(pctx); - ctx->clear_ubwc = fd6_clear_ubwc; + ctx->clear_ubwc = fd6_clear_ubwc; ctx->validate_format = fd6_validate_format; if (FD_DBG(NOBLIT)) return; - pctx->clear_texture = fd6_clear_texture; - ctx->blit = fd6_blit; + pctx->clear_texture = fd6_clear_texture; + ctx->blit = fd6_blit; } +/* Teach the compiler about needed variants: */ +template void fd6_blitter_init(struct pipe_context *pctx); +template void fd6_blitter_init(struct pipe_context *pctx); + unsigned fd6_tile_mode(const struct pipe_resource *tmpl) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.h b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.h index 8687f57445b..d7b8a257eed 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.h @@ -32,8 +32,8 @@ #include "freedreno_context.h" -BEGINC; +template void fd6_blitter_init(struct pipe_context *pctx); unsigned fd6_tile_mode(const struct pipe_resource *tmpl); @@ -42,12 +42,12 @@ unsigned fd6_tile_mode(const struct pipe_resource *tmpl); * instead of CP_EVENT_WRITE::BLITs */ +template void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, struct pipe_surface *psurf, const struct pipe_box *box2d, union pipe_color_union *color, uint32_t unknown_8c01) assert_dt; +template void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01) assert_dt; -ENDC; - #endif /* FD6_BLIT_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc index 0306e70f1d3..9802964045b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc @@ -41,6 +41,7 @@ #include "fd6_pack.h" /* maybe move to fd6_program? */ +template static void cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, struct ir3_shader_variant *v) @@ -49,14 +50,16 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct ir3_info *i = &v->info; enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64; - OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, + OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true, .ds_state = true, .gs_state = true, .fs_state = true, .cs_state = true, .cs_ibo = true, .gfx_ibo = true, )); - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); - OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) | - A6XX_HLSQ_CS_CNTL_ENABLED); + OUT_REG(ring, HLSQ_CS_CNTL( + CHIP, + .constlen = v->constlen, + .enabled = true, + )); OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 1); OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | @@ -103,6 +106,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, fd6_emit_immediates(ctx->screen, v, ring); } +template static void fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt { @@ -119,7 +123,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt return; cs->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); - cs_program_emit(ctx, cs->stateobj, cs->v); + cs_program_emit(ctx, cs->stateobj, cs->v); cs->user_consts_cmdstream_size = fd6_user_consts_cmdstream_size(cs->v); } @@ -156,7 +160,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt } if (ctx->gen_dirty) - fd6_emit_cs_state(ctx, ring, cs); + fd6_emit_cs_state(ctx, ring, cs); if (ctx->gen_dirty & BIT(FD6_GROUP_CONST)) fd6_emit_cs_user_consts(ctx, ring, cs); @@ -201,25 +205,37 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt const unsigned *num_groups = info->grid; /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */ const unsigned work_dim = info->work_dim ? info->work_dim : 3; - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7); - OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */ - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */ - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */ - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3); - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */ - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */ - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */ + OUT_REG(ring, + HLSQ_CS_NDRANGE_0( + CHIP, + .kerneldim = work_dim, + .localsizex = local_size[0] - 1, + .localsizey = local_size[1] - 1, + .localsizez = local_size[2] - 1, + ), + HLSQ_CS_NDRANGE_1( + CHIP, + .globalsize_x = local_size[0] * num_groups[0], + ), + HLSQ_CS_NDRANGE_2(CHIP, .globaloff_x = 0), + HLSQ_CS_NDRANGE_3( + CHIP, + .globalsize_y = local_size[1] * num_groups[1], + ), + HLSQ_CS_NDRANGE_4(CHIP, .globaloff_y = 0), + HLSQ_CS_NDRANGE_5( + CHIP, + .globalsize_z = local_size[2] * num_groups[2], + ), + HLSQ_CS_NDRANGE_6(CHIP, .globaloff_z = 0), + ); + + OUT_REG(ring, + HLSQ_CS_KERNEL_GROUP_X(CHIP, 1), + HLSQ_CS_KERNEL_GROUP_Y(CHIP, 1), + HLSQ_CS_KERNEL_GROUP_Z(CHIP, 1), + ); if (info->indirect) { struct fd_resource *rsc = fd_resource(info->indirect); @@ -264,11 +280,18 @@ fd6_compute_state_delete(struct pipe_context *pctx, void *_hwcso) free(hwcso); } +template void -fd6_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis +fd6_compute_init(struct pipe_context *pctx) + disable_thread_safety_analysis { struct fd_context *ctx = fd_context(pctx); - ctx->launch_grid = fd6_launch_grid; + + ctx->launch_grid = fd6_launch_grid; pctx->create_compute_state = fd6_compute_state_create; pctx->delete_compute_state = fd6_compute_state_delete; } + +/* Teach the compiler about needed variants: */ +template void fd6_compute_init(struct pipe_context *pctx); +template void fd6_compute_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.h b/src/gallium/drivers/freedreno/a6xx/fd6_compute.h index f832790c41b..3836589ba58 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.h @@ -36,7 +36,7 @@ struct fd6_compute_state { uint32_t user_consts_cmdstream_size; }; -EXTERNC +template void fd6_compute_init(struct pipe_context *pctx); #endif /* FD6_COMPUTE_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc index 1d4281e0de2..0493b6fcd23 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc @@ -228,6 +228,7 @@ setup_state_map(struct fd_context *ctx) BIT(FD6_GROUP_NON_GROUP)); } +template struct pipe_context * fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) disable_thread_safety_analysis @@ -253,11 +254,11 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, pctx->create_depth_stencil_alpha_state = fd6_zsa_state_create; pctx->create_vertex_elements_state = fd6_vertex_state_create; - fd6_draw_init(pctx); - fd6_compute_init(pctx); - fd6_gmem_init(pctx); + fd6_draw_init(pctx); + fd6_compute_init(pctx); + fd6_gmem_init(pctx); fd6_texture_init(pctx); - fd6_prog_init(pctx); + fd6_prog_init(pctx); fd6_query_context_init(pctx); setup_state_map(&fd6_ctx->base); @@ -297,7 +298,11 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, fd_context_setup_common_vbos(&fd6_ctx->base); - fd6_blitter_init(pctx); + fd6_blitter_init(pctx); return fd_context_init_tc(pctx, flags); } + +/* Teach the compiler about needed variants: */ +template struct pipe_context *fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); +template struct pipe_context *fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 56a40a2c4fb..7025b22ab2a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -38,8 +38,6 @@ #include "a6xx.xml.h" -BEGINC; - struct fd6_lrz_state { union { struct { @@ -154,6 +152,7 @@ fd6_context(struct fd_context *ctx) return (struct fd6_context *)ctx; } +template struct pipe_context *fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); @@ -197,6 +196,4 @@ fd6_vertex_stateobj(void *p) return (struct fd6_vertex_stateobj *)p; } -ENDC; - #endif /* FD6_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc index 1e778ab6b42..425a6f79bc9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc @@ -202,6 +202,7 @@ flush_streamout(struct fd_context *ctx, struct fd6_emit *emit) } } +template static void fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, unsigned drawid_offset, @@ -344,7 +345,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, } if (emit.dirty_groups) - fd6_emit_3d_state(ring, &emit); + fd6_emit_3d_state(ring, &emit); if (ctx->batch->barrier) fd6_barrier_flush(ctx->batch); @@ -398,7 +399,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, if (emit.dirty_groups) { emit.state.num_groups = 0; emit.draw = &draws[i]; - fd6_emit_3d_state(ring, &emit); + fd6_emit_3d_state(ring, &emit); } assert(!index_offset); /* handled by util_draw_multi() */ @@ -418,6 +419,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, fd_context_all_clean(ctx); } +template static void fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) assert_dt { @@ -436,7 +438,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) a fd6_emit_ccu_cntl(ring, screen, false); OUT_REG(ring, - A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, + HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true, .ds_state = true, .gs_state = true, .fs_state = true, .cs_state = true, .cs_ibo = true, .gfx_ibo = true, @@ -451,23 +453,19 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) a OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1); OUT_RING(ring, 0x0); - OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + OUT_REG(ring, + SP_PS_2D_SRC_INFO(CHIP), + SP_PS_2D_SRC_SIZE(CHIP), + SP_PS_2D_SRC(CHIP), + SP_PS_2D_SRC_PITCH(CHIP), + ); - OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1); - OUT_RING(ring, 0x0000f410); + OUT_REG(ring, SP_2D_DST_FORMAT( + CHIP, + // TODO probably FMT6_16_UNORM, but this matches what we used to emit: + .color_format = FMT6_32_32_32_32_FLOAT, + .mask = 0xf, + )); OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); OUT_RING(ring, @@ -545,6 +543,7 @@ is_z32(enum pipe_format format) } } +template static bool fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers, const union pipe_color_union *color, double depth, @@ -566,7 +565,7 @@ fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers, if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) { zsbuf->lrz_valid = true; zsbuf->lrz_direction = FD_LRZ_UNKNOWN; - fd6_clear_lrz(ctx->batch, zsbuf, depth); + fd6_clear_lrz(ctx->batch, zsbuf, depth); } } @@ -586,10 +585,16 @@ fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers, return true; } +template void -fd6_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis +fd6_draw_init(struct pipe_context *pctx) + disable_thread_safety_analysis { struct fd_context *ctx = fd_context(pctx); - ctx->draw_vbos = fd6_draw_vbos; - ctx->clear = fd6_clear; + ctx->clear = fd6_clear; + ctx->draw_vbos = fd6_draw_vbos; } + +/* Teach the compiler about needed variants: */ +template void fd6_draw_init(struct pipe_context *pctx); +template void fd6_draw_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.h b/src/gallium/drivers/freedreno/a6xx/fd6_draw.h index fad25015c4b..00e9eedd2f9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.h @@ -34,7 +34,7 @@ #include "fd6_context.h" -EXTERNC +template void fd6_draw_init(struct pipe_context *pctx); #endif /* FD6_DRAW_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index c7d7ab91288..a172fc308bb 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -535,6 +535,7 @@ build_prim_mode(struct fd6_emit *emit, struct fd_context *ctx, bool gmem) return ring; } +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) { @@ -596,7 +597,7 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) FD6_GROUP_PROG_INTERP); break; case FD6_GROUP_RASTERIZER: - state = fd6_rasterizer_state(ctx, emit->primitive_restart); + state = fd6_rasterizer_state(ctx, emit->primitive_restart); fd6_state_add_group(&emit->state, state, FD6_GROUP_RASTERIZER); break; case FD6_GROUP_PROG_FB_RAST: @@ -613,23 +614,23 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_state_take_group(&emit->state, state, FD6_GROUP_BLEND_COLOR); break; case FD6_GROUP_VS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_VERTEX, false); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_VERTEX, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_VS_BINDLESS); break; case FD6_GROUP_HS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_CTRL, false); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_CTRL, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_HS_BINDLESS); break; case FD6_GROUP_DS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_EVAL, false); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_EVAL, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_DS_BINDLESS); break; case FD6_GROUP_GS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_GEOMETRY, false); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_GEOMETRY, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_GS_BINDLESS); break; case FD6_GROUP_FS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_FRAGMENT, fs->fb_read); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_FRAGMENT, fs->fb_read); fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS); break; case FD6_GROUP_CONST: @@ -686,6 +687,10 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_state_emit(&emit->state, ring); } +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); + +template void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs) @@ -722,7 +727,7 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, case FD6_GROUP_CS_BINDLESS: fd6_state_take_group( &state, - fd6_build_bindless_state(ctx, PIPE_SHADER_COMPUTE, false), + fd6_build_bindless_state(ctx, PIPE_SHADER_COMPUTE, false), FD6_GROUP_CS_BINDLESS); break; default: @@ -749,9 +754,13 @@ fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gme )); } +template void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs); +template void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs); + /* emit setup at begin of new cmdstream buffer (don't rely on previous * state, there could have been a context switch between ioctls): */ +template void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) { @@ -767,7 +776,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) fd6_cache_inv(batch, ring); OUT_REG(ring, - A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, + HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true, .ds_state = true, .gs_state = true, .fs_state = true, .cs_state = true, .cs_ibo = true, .gfx_ibo = true, @@ -820,7 +829,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) WRITE(REG_A6XX_VPC_SO_DISABLE, A6XX_VPC_SO_DISABLE(true).value); - WRITE(REG_A6XX_PC_RASTER_CNTL, 0); + OUT_REG(ring, PC_RASTER_CNTL(CHIP)); WRITE(REG_A6XX_PC_MULTIVIEW_CNTL, 0); @@ -844,7 +853,11 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0); WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0); WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0); - WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); + OUT_REG(ring, HLSQ_CONTROL_5_REG( + CHIP, + .linelengthregid = INVALID_REG, + .foveationqualityregid = INVALID_REG, + )); emit_marker6(ring, 7); @@ -904,6 +917,9 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) } } +template void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); +template void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); + static void fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst, unsigned dst_off, struct pipe_resource *src, unsigned src_off, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 16e6b48c11a..67572374fcc 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -332,22 +332,22 @@ fd6_gl2spacing(enum gl_tess_spacing spacing) } } -BEGINC; - +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt; struct fd6_compute_state; +template void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs) assert_dt; void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem); + +template void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); void fd6_emit_init_screen(struct pipe_screen *pscreen); -ENDC; - static inline void fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc index 70fee1bce33..c10f09b6641 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc @@ -73,6 +73,7 @@ fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc, } } +template static void emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb, const struct fd_gmem_stateobj *gmem) @@ -127,7 +128,7 @@ emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb, OUT_REG( ring, - A6XX_RB_MRT_BUF_INFO(i, .color_format = format, + RB_MRT_BUF_INFO(CHIP, i, .color_format = format, .color_tile_mode = tile_mode, .color_swap = swap), A6XX_RB_MRT_PITCH(i, stride), A6XX_RB_MRT_ARRAY_PITCH(i, array_stride), @@ -155,6 +156,7 @@ emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb, OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index)); } +template static void emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, const struct fd_gmem_stateobj *gmem) @@ -169,7 +171,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer); OUT_REG( - ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt), + ring, RB_DEPTH_BUFFER_INFO(CHIP, .depth_format = fmt), A6XX_RB_DEPTH_BUFFER_PITCH(stride), A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(array_stride), A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset), @@ -209,22 +211,25 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, uint32_t offset = fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer); - OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true), + OUT_REG(ring, RB_STENCIL_INFO(CHIP, .separate_stencil = true), A6XX_RB_STENCIL_BUFFER_PITCH(stride), A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(array_stride), A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo, .bo_offset = offset), A6XX_RB_STENCIL_BUFFER_BASE_GMEM(base)); } else { - OUT_REG(ring, A6XX_RB_STENCIL_INFO(0)); + OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0)); } } else { - OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); - OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */ + OUT_REG(ring, + RB_DEPTH_BUFFER_INFO( + CHIP, + .depth_format = DEPTH6_NONE, + ), + A6XX_RB_DEPTH_BUFFER_PITCH(), + A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(), + A6XX_RB_DEPTH_BUFFER_BASE(), + A6XX_RB_DEPTH_BUFFER_BASE_GMEM(), + ); OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE)); @@ -236,7 +241,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */ - OUT_REG(ring, A6XX_RB_STENCIL_INFO(0)); + OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0)); } } @@ -359,13 +364,13 @@ patch_fb_read_sysmem(struct fd_batch *batch) util_dynarray_clear(&batch->fb_read_patches); } +template static void update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb, bool binning) { struct fd_ringbuffer *ring = batch->gmem; struct fd_screen *screen = batch->ctx->screen; - uint32_t cntl = 0; bool depth_ubwc_enable = false; uint32_t mrts_ubwc_enable = 0; int i; @@ -387,20 +392,23 @@ update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb, mrts_ubwc_enable |= 1 << i; } - cntl |= A6XX_RB_RENDER_CNTL_CCUSINGLECACHELINESIZE(2); - if (binning) - cntl |= A6XX_RB_RENDER_CNTL_BINNING; + struct fd_reg_pair rb_render_cntl = RB_RENDER_CNTL( + CHIP, + .ccusinglecachelinesize = 2, + .binning = binning, + .flag_depth = depth_ubwc_enable, + .flag_mrts = mrts_ubwc_enable, + ); if (screen->info->a6xx.has_cp_reg_write) { - OUT_PKT7(ring, CP_REG_WRITE, 3); - OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL)); - OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL); + OUT_PKT(ring, CP_REG_WRITE, + CP_REG_WRITE_0(TRACK_RENDER_CNTL), + CP_REG_WRITE_1(rb_render_cntl.reg), + CP_REG_WRITE_2(rb_render_cntl.value), + ); } else { - OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1); + OUT_REG(ring, rb_render_cntl); } - OUT_RING(ring, cntl | - COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) | - A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable)); } /* extra size to store VSC_DRAW_STRM_SIZE: */ @@ -693,12 +701,14 @@ struct bin_size_params { unsigned lrz_feedback_zmode_mask; }; +template static void set_bin_size(struct fd_ringbuffer *ring, const struct fd_gmem_stateobj *gmem, struct bin_size_params p) { unsigned w = gmem ? gmem->bin_w : 0; unsigned h = gmem ? gmem->bin_h : 0; + OUT_REG(ring, A6XX_GRAS_BIN_CONTROL( .binw = w, .binh = h, .render_mode = p.render_mode, @@ -706,7 +716,8 @@ set_bin_size(struct fd_ringbuffer *ring, const struct fd_gmem_stateobj *gmem, .buffers_location = p.buffers_location, .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask, )); - OUT_REG(ring, A6XX_RB_BIN_CONTROL( + OUT_REG(ring, RB_BIN_CONTROL( + CHIP, .binw = w, .binh = h, .render_mode = p.render_mode, .force_lrz_write_dis = p.force_lrz_write_dis, @@ -827,9 +838,11 @@ emit_msaa(struct fd_ringbuffer *ring, unsigned nr) } static void prepare_tile_setup_ib(struct fd_batch *batch); +template static void prepare_tile_fini_ib(struct fd_batch *batch); /* before first tile */ +template static void fd6_emit_tile_init(struct fd_batch *batch) assert_dt { @@ -838,7 +851,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt const struct fd_gmem_stateobj *gmem = batch->gmem_state; struct fd_screen *screen = batch->ctx->screen; - fd6_emit_restore(batch, ring); + fd6_emit_restore(batch, ring); fd6_emit_lrz_flush(ring); @@ -851,7 +864,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt fd6_cache_inv(batch, ring); prepare_tile_setup_ib(batch); - prepare_tile_fini_ib(batch); + prepare_tile_fini_ib(batch); OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); @@ -863,8 +876,8 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt fd_wfi(batch, ring); fd6_emit_ccu_cntl(ring, screen, true); - emit_zs(ring, pfb->zsbuf, batch->gmem_state); - emit_mrt(ring, pfb, batch->gmem_state); + emit_zs(ring, pfb->zsbuf, batch->gmem_state); + emit_mrt(ring, pfb, batch->gmem_state); emit_msaa(ring, pfb->samples); patch_fb_read_gmem(batch); @@ -872,12 +885,12 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt /* enable stream-out during binning pass: */ OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); - set_bin_size(ring, gmem, { + set_bin_size(ring, gmem, { .render_mode = BINNING_PASS, .buffers_location = BUFFERS_IN_GMEM, .lrz_feedback_zmode_mask = 0x6, }); - update_render_cntl(batch, pfb, true); + update_render_cntl(batch, pfb, true); emit_binning_pass(batch); /* and disable stream-out for draw pass: */ @@ -890,7 +903,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt */ // NOTE a618 not setting .FORCE_LRZ_WRITE_DIS .. - set_bin_size(ring, gmem, { + set_bin_size(ring, gmem, { .render_mode = RENDERING_PASS, .force_lrz_write_dis = true, .buffers_location = BUFFERS_IN_GMEM, @@ -912,18 +925,19 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt /* no binning pass, so enable stream-out for draw pass:: */ OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); - set_bin_size(ring, gmem, { + set_bin_size(ring, gmem, { .render_mode = RENDERING_PASS, .buffers_location = BUFFERS_IN_GMEM, .lrz_feedback_zmode_mask = 0x6, }); } - update_render_cntl(batch, pfb, false); + update_render_cntl(batch, pfb, false); emit_common_init(batch); } +template static void set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1) { @@ -933,8 +947,7 @@ set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1) OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1); OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1)); - OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1); - OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1)); + OUT_REG(ring, SP_WINDOW_OFFSET(CHIP, .x = x1, .y = y1)); OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1); OUT_RING(ring, @@ -942,6 +955,7 @@ set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1) } /* before mem2gmem */ +template static void fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) { @@ -984,10 +998,10 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x0); - set_window_offset(ring, x1, y1); + set_window_offset(ring, x1, y1); const struct fd_gmem_stateobj *gmem = batch->gmem_state; - set_bin_size(ring, gmem, { + set_bin_size(ring, gmem, { .render_mode = RENDERING_PASS, .buffers_location = BUFFERS_IN_GMEM, .lrz_feedback_zmode_mask = 0x6, @@ -996,7 +1010,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) OUT_PKT7(ring, CP_SET_MODE, 1); OUT_RING(ring, 0x0); } else { - set_window_offset(ring, x1, y1); + set_window_offset(ring, x1, y1); OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); @@ -1388,6 +1402,7 @@ fd6_unknown_8c01(enum pipe_format format, unsigned buffers) return 0; } +template static void emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base, struct pipe_surface *psurf, @@ -1412,7 +1427,7 @@ emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, * !resolve case below, so batch_draw_tracking_for_dirty_bits() has us * just do a restore of the other channel for partial packed z/s writes. */ - fd6_resolve_tile(batch, ring, base, psurf, 0); + fd6_resolve_tile(batch, ring, base, psurf, 0); return; } @@ -1442,6 +1457,7 @@ emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, * transfer from gmem to system memory (ie. normal RAM) */ +template static void prepare_tile_fini_ib(struct fd_batch *batch) assert_dt { @@ -1459,12 +1475,12 @@ prepare_tile_fini_ib(struct fd_batch *batch) assert_dt struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) { - emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf, - FD_BUFFER_DEPTH); + emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], + pfb->zsbuf, FD_BUFFER_DEPTH); } if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) { - emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf, - FD_BUFFER_STENCIL); + emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], + pfb->zsbuf, FD_BUFFER_STENCIL); } } @@ -1475,8 +1491,8 @@ prepare_tile_fini_ib(struct fd_batch *batch) assert_dt continue; if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i], - FD_BUFFER_COLOR); + emit_resolve_blit(batch, ring, gmem->cbuf_base[i], + pfb->cbufs[i], FD_BUFFER_COLOR); } } } @@ -1546,6 +1562,7 @@ fd6_emit_tile_fini(struct fd_batch *batch) } } +template static void emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt { @@ -1572,7 +1589,7 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) continue; - fd6_clear_surface(ctx, ring, pfb->cbufs[i], &box2d, &color, 0); + fd6_clear_surface(ctx, ring, pfb->cbufs[i], &box2d, &color, 0); } } if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { @@ -1587,8 +1604,8 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) { value.f[0] = batch->clear_depth; value.ui[1] = batch->clear_stencil; - fd6_clear_surface(ctx, ring, pfb->zsbuf, &box2d, - &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers)); + fd6_clear_surface(ctx, ring, pfb->zsbuf, &box2d, + &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers)); } if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) { @@ -1598,7 +1615,7 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt stencil_surf.format = PIPE_FORMAT_S8_UINT; stencil_surf.texture = separate_stencil; - fd6_clear_surface(ctx, ring, &stencil_surf, &box2d, &value, 0); + fd6_clear_surface(ctx, ring, &stencil_surf, &box2d, &value, 0); } } @@ -1608,13 +1625,14 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt trace_end_clear_restore(&batch->trace, ring); } +template static void fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt { struct fd_ringbuffer *ring = batch->gmem; struct fd_screen *screen = batch->ctx->screen; - fd6_emit_restore(batch, ring); + fd6_emit_restore(batch, ring); fd6_emit_lrz_flush(ring); if (batch->prologue) { @@ -1638,14 +1656,14 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt else set_scissor(ring, 0, 0, 0, 0); - set_window_offset(ring, 0, 0); + set_window_offset(ring, 0, 0); - set_bin_size(ring, NULL, { + set_bin_size(ring, NULL, { .render_mode = RENDERING_PASS, .buffers_location = BUFFERS_IN_SYSMEM, }); - emit_sysmem_clears(batch, ring); + emit_sysmem_clears(batch, ring); emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); @@ -1671,12 +1689,12 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); - emit_zs(ring, pfb->zsbuf, NULL); - emit_mrt(ring, pfb, NULL); + emit_zs(ring, pfb->zsbuf, NULL); + emit_mrt(ring, pfb, NULL); emit_msaa(ring, pfb->samples); patch_fb_read_sysmem(batch); - update_render_cntl(batch, pfb, false); + update_render_cntl(batch, pfb, false); emit_common_init(batch); } @@ -1704,18 +1722,24 @@ fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt fd_wfi(batch, ring); } +template void -fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis +fd6_gmem_init(struct pipe_context *pctx) + disable_thread_safety_analysis { struct fd_context *ctx = fd_context(pctx); - ctx->emit_tile_init = fd6_emit_tile_init; - ctx->emit_tile_prep = fd6_emit_tile_prep; + ctx->emit_tile_init = fd6_emit_tile_init; + ctx->emit_tile_prep = fd6_emit_tile_prep; ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem; ctx->emit_tile_renderprep = fd6_emit_tile_renderprep; ctx->emit_tile = fd6_emit_tile; ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem; ctx->emit_tile_fini = fd6_emit_tile_fini; - ctx->emit_sysmem_prep = fd6_emit_sysmem_prep; + ctx->emit_sysmem_prep = fd6_emit_sysmem_prep; ctx->emit_sysmem_fini = fd6_emit_sysmem_fini; } + +/* Teach the compiler about needed variants: */ +template void fd6_gmem_init(struct pipe_context *pctx); +template void fd6_gmem_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h index 74b68bdea87..bb596f95d8d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h @@ -30,7 +30,7 @@ #include "pipe/p_context.h" -EXTERNC +template void fd6_gmem_init(struct pipe_context *pctx); #endif /* FD6_GMEM_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.cc b/src/gallium/drivers/freedreno/a6xx/fd6_image.cc index b66f218f071..7e1a371f10c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.cc @@ -169,6 +169,7 @@ validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *se } /* Build bindless descriptor state, returns ownership of state reference */ +template struct fd_ringbuffer * fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read) @@ -258,8 +259,8 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, unsigned idx = ir3_shader_descriptor_set(shader); if (shader == PIPE_SHADER_COMPUTE) { - OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.cs_bindless = 0x1f)); - OUT_REG(ring, A6XX_SP_CS_BINDLESS_BASE_DESCRIPTOR( + OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .cs_bindless = 0x1f)); + OUT_REG(ring, SP_CS_BINDLESS_BASE_DESCRIPTOR(CHIP, idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo, )); OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR( @@ -300,8 +301,8 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, ); } } else { - OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.gfx_bindless = 0x1f)); - OUT_REG(ring, A6XX_SP_BINDLESS_BASE_DESCRIPTOR( + OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .gfx_bindless = 0x1f)); + OUT_REG(ring, SP_BINDLESS_BASE_DESCRIPTOR(CHIP, idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo, )); OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR( @@ -346,6 +347,9 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, return ring; } +template struct fd_ringbuffer *fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read); +template struct fd_ringbuffer *fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read); + static void fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.h b/src/gallium/drivers/freedreno/a6xx/fd6_image.h index 11d25e2c4c3..a0e03c91e46 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.h @@ -30,14 +30,11 @@ #include "freedreno_context.h" -BEGINC; - +template struct fd_ringbuffer * fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read) assert_dt; void fd6_image_init(struct pipe_context *pctx); -ENDC; - #endif /* FD6_IMAGE_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc index 99c7c94ea27..a85c9531f18 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc @@ -305,33 +305,44 @@ sp_xs_config(struct ir3_shader_variant *v) A6XX_SP_VS_CONFIG_NSAMP(v->num_samp); } +template static void setup_config_stateobj(struct fd_context *ctx, struct fd6_program_state *state) { struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 100 * 4); - OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, + OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true, .ds_state = true, .gs_state = true, .fs_state = true, .cs_state = true, .cs_ibo = true, .gfx_ibo = true, )); assert(state->vs->constlen >= state->bs->constlen); - OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4); - OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(state->vs->constlen) | - A6XX_HLSQ_VS_CNTL_ENABLED); - OUT_RING(ring, COND(state->hs, - A6XX_HLSQ_HS_CNTL_ENABLED | - A6XX_HLSQ_HS_CNTL_CONSTLEN(state->hs->constlen))); - OUT_RING(ring, COND(state->ds, - A6XX_HLSQ_DS_CNTL_ENABLED | - A6XX_HLSQ_DS_CNTL_CONSTLEN(state->ds->constlen))); - OUT_RING(ring, COND(state->gs, - A6XX_HLSQ_GS_CNTL_ENABLED | - A6XX_HLSQ_GS_CNTL_CONSTLEN(state->gs->constlen))); - OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1); - OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(state->fs->constlen) | - A6XX_HLSQ_FS_CNTL_ENABLED); + OUT_REG(ring, HLSQ_VS_CNTL( + CHIP, + .constlen = state->vs->constlen, + .enabled = true, + )); + OUT_REG(ring, HLSQ_HS_CNTL( + CHIP, + .constlen = COND(state->hs, state->hs->constlen), + .enabled = COND(state->hs, true), + )); + OUT_REG(ring, HLSQ_DS_CNTL( + CHIP, + .constlen = COND(state->ds, state->ds->constlen), + .enabled = COND(state->ds, true), + )); + OUT_REG(ring, HLSQ_GS_CNTL( + CHIP, + .constlen = COND(state->gs, state->gs->constlen), + .enabled = COND(state->gs, true), + )); + OUT_REG(ring, HLSQ_FS_CNTL( + CHIP, + .constlen = state->fs->constlen, + .enabled = true, + )); OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1); OUT_RING(ring, sp_xs_config(state->vs)); @@ -397,6 +408,7 @@ tex_opc_to_prefetch_cmd(opc_t tex_opc) } } +template static void setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, struct fd6_program_state *state, @@ -549,16 +561,18 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE)); for (int i = 0; i < fs->num_sampler_prefetch; i++) { const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i]; - OUT_RING(ring, - A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) | - A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(prefetch->samp_id) | - A6XX_SP_FS_PREFETCH_CMD_TEX_ID(prefetch->tex_id) | - A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) | - A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) | - COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) | - COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) | - A6XX_SP_FS_PREFETCH_CMD_CMD( - tex_opc_to_prefetch_cmd(prefetch->tex_opc))); + OUT_RING(ring, SP_FS_PREFETCH_CMD( + CHIP, i, + .src = prefetch->src, + .samp_id = prefetch->samp_id, + .tex_id = prefetch->tex_id, + .dst = prefetch->dst, + .wrmask = prefetch->wrmask, + .half = prefetch->half_precision, + .bindless = prefetch->bindless, + .cmd = tex_opc_to_prefetch_cmd(prefetch->tex_opc), + ).value + ); } OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1); @@ -864,31 +878,43 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) | A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); - OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5); - OUT_RING(ring, 0x7); /* XXX */ - OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | - A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) | - A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) | - A6XX_HLSQ_CONTROL_2_REG_CENTERRHW(ij_regid[IJ_PERSP_CENTER_RHW])); - OUT_RING( - ring, - A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) | - A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) | - A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID( - ij_regid[IJ_PERSP_CENTROID]) | - A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID( - ij_regid[IJ_LINEAR_CENTROID])); - OUT_RING( - ring, - A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | - A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | - A6XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) | - A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE])); - OUT_RING(ring, 0xfcfc); /* line length (?), foveation quality */ + OUT_REG(ring, + HLSQ_CONTROL_1_REG(CHIP, 0x7), /* XXX */ + HLSQ_CONTROL_2_REG( + CHIP, + .faceregid = face_regid, + .sampleid = samp_id_regid, + .samplemask = smask_in_regid, + .centerrhw = ij_regid[IJ_PERSP_CENTER_RHW], + ), + HLSQ_CONTROL_3_REG( + CHIP, + .ij_persp_pixel = ij_regid[IJ_PERSP_PIXEL], + .ij_linear_pixel = ij_regid[IJ_LINEAR_PIXEL], + .ij_persp_centroid = ij_regid[IJ_PERSP_CENTROID], + .ij_linear_centroid = ij_regid[IJ_LINEAR_CENTROID], + ), + HLSQ_CONTROL_4_REG( + CHIP, + .ij_persp_sample = ij_regid[IJ_PERSP_SAMPLE], + .ij_linear_sample = ij_regid[IJ_LINEAR_SAMPLE], + .xycoordregid = coord_regid, + .zwcoordregid = zwcoord_regid, + ), + HLSQ_CONTROL_5_REG( + CHIP, + .linelengthregid = INVALID_REG, + .foveationqualityregid = INVALID_REG, + ), + ); - OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL_0, 1); - OUT_RING(ring, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(fssz) | - COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS)); + OUT_REG(ring, + HLSQ_FS_CNTL_0( + CHIP, + .threadsize = fssz, + .varyings = enable_varyings, + ), + ); OUT_PKT4(ring, REG_A6XX_SP_FS_CTRL_REG0, 1); OUT_RING( @@ -1287,6 +1313,7 @@ emit_interp_state(struct fd_ringbuffer *ring, const struct fd6_program_state *st OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ } +template static struct ir3_program_state * fd6_program_create(void *data, struct ir3_shader_variant *bs, struct ir3_shader_variant *vs, struct ir3_shader_variant *hs, @@ -1336,9 +1363,9 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs, fd_screen_unlock(screen); } - setup_config_stateobj(ctx, state); - setup_stateobj(state->binning_stateobj, ctx, state, key, true); - setup_stateobj(state->stateobj, ctx, state, key, false); + setup_config_stateobj(ctx, state); + setup_stateobj(state->binning_stateobj, ctx, state, key, true); + setup_stateobj(state->stateobj, ctx, state, key, false); state->interp_stateobj = create_interp_stateobj(ctx, state); const struct ir3_stream_output_info *stream_output = @@ -1403,19 +1430,25 @@ fd6_program_destroy(void *data, struct ir3_program_state *state) free(so); } +template static const struct ir3_cache_funcs cache_funcs = { - .create_state = fd6_program_create, + .create_state = fd6_program_create, .destroy_state = fd6_program_destroy, }; +template void fd6_prog_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); - ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx); + ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx); ir3_prog_init(pctx); fd_prog_init(pctx); } + +/* Teach the compiler about needed variants: */ +template void fd6_prog_init(struct pipe_context *pctx); +template void fd6_prog_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.h b/src/gallium/drivers/freedreno/a6xx/fd6_program.h index e10898b90d0..e7ae12f466e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.h @@ -99,15 +99,12 @@ fd6_last_shader(const struct fd6_program_state *state) return state->vs; } -BEGINC; - void fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) assert_dt; struct fd_ringbuffer *fd6_program_interp_state(struct fd6_emit *emit) assert_dt; +template void fd6_prog_init(struct pipe_context *pctx); -ENDC; - #endif /* FD6_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc index 466cf8df208..d636a9e8643 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc @@ -35,6 +35,7 @@ #include "fd6_pack.h" #include "fd6_rasterizer.h" +template struct fd_ringbuffer * __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, @@ -102,7 +103,7 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, } OUT_REG(ring, A6XX_VPC_POLYGON_MODE(mode)); - OUT_REG(ring, A6XX_PC_POLYGON_MODE(mode)); + OUT_REG(ring, PC_POLYGON_MODE(CHIP, mode)); if (ctx->screen->info->a6xx.has_shading_rate) { OUT_REG(ring, A6XX_RB_UNKNOWN_8A00()); @@ -114,6 +115,9 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, return ring; } +template struct fd_ringbuffer *__fd6_setup_rasterizer_stateobj(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, bool primitive_restart); +template struct fd_ringbuffer *__fd6_setup_rasterizer_stateobj(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, bool primitive_restart); + void * fd6_rasterizer_state_create(struct pipe_context *pctx, const struct pipe_rasterizer_state *cso) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h index 32db99b6593..800ee1a748b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h @@ -33,8 +33,6 @@ #include "freedreno_context.h" -BEGINC; - struct fd6_rasterizer_stateobj { struct pipe_rasterizer_state base; @@ -51,11 +49,13 @@ void *fd6_rasterizer_state_create(struct pipe_context *pctx, const struct pipe_rasterizer_state *cso); void fd6_rasterizer_state_delete(struct pipe_context *, void *hwcso); +template struct fd_ringbuffer * __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, bool primitive_restart); +template static inline struct fd_ringbuffer * fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart) assert_dt { @@ -64,13 +64,11 @@ fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart) assert_dt unsigned variant = primitive_restart; if (unlikely(!rasterizer->stateobjs[variant])) { - rasterizer->stateobjs[variant] = __fd6_setup_rasterizer_stateobj( + rasterizer->stateobjs[variant] = __fd6_setup_rasterizer_stateobj( ctx, ctx->rasterizer, primitive_restart); } return rasterizer->stateobjs[variant]; } -ENDC; - #endif /* FD6_RASTERIZER_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc b/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc index 010a151e227..abb7caba748 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc @@ -169,7 +169,11 @@ fd6_screen_init(struct pipe_screen *pscreen) FD_GMEM_DEPTH_ENABLED | FD_GMEM_STENCIL_ENABLED | FD_GMEM_BLEND_ENABLED | FD_GMEM_LOGICOP_ENABLED); - pscreen->context_create = fd6_context_create; + if (screen->gen == 7) { + pscreen->context_create = fd6_context_create; + } else { + pscreen->context_create = fd6_context_create; + } pscreen->is_format_supported = fd6_screen_is_format_supported; screen->tile_mode = fd6_tile_mode; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.h b/src/gallium/drivers/freedreno/a6xx/fd6_screen.h index 5d656835001..96d8330c804 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.h @@ -28,7 +28,7 @@ #ifndef FD6_SCREEN_H_ #define FD6_SCREEN_H_ -#include "pipe/p_screen.h" +#include "freedreno_screen.h" EXTERNC void fd6_screen_init(struct pipe_screen *pscreen);