From 5f091af8973be90fa1635f25fa0bc656950da26a Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Mon, 1 Dec 2025 17:08:35 -0800 Subject: [PATCH] tu: Move a bunch of program config to CRB. This shows off how we don't need to pass an explicit size per CRB instance in our non-growable CSes. However, I don't like the additional indentation I did to make a CRB go out of scope when I needed. Part-of: --- src/freedreno/vulkan/tu_clear_blit.cc | 12 ++-- src/freedreno/vulkan/tu_pipeline.cc | 69 ++++++++++---------- src/freedreno/vulkan/tu_pipeline.h | 4 +- src/freedreno/vulkan/tu_shader.cc | 93 ++++++++++++--------------- 4 files changed, 85 insertions(+), 93 deletions(-) diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 70daba82da0..5567ee7b8ff 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -871,11 +871,13 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, enum r3d_type type, .cs_bindless = CHIP == A6XX ? 0x1f : 0xff, .gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,)); - tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, vs); - tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL); - tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL); - tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL); - tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, fs); + tu_crb crb = cs->crb(2 * 5); + tu6_emit_xs_config(crb, MESA_SHADER_VERTEX, vs); + tu6_emit_xs_config(crb, MESA_SHADER_TESS_CTRL, NULL); + tu6_emit_xs_config(crb, MESA_SHADER_TESS_EVAL, NULL); + tu6_emit_xs_config(crb, MESA_SHADER_GEOMETRY, NULL); + tu6_emit_xs_config(crb, MESA_SHADER_FRAGMENT, fs); + crb.flush(); struct tu_pvtmem_config pvtmem = {}; tu6_emit_xs(cs, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova); diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index eb16536a7d6..a927b52b25a 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -374,7 +374,7 @@ static const xs_config xs_configs[] = { template void -tu6_emit_xs_config(struct tu_cs *cs, +tu6_emit_xs_config(struct tu_crb &crb, mesa_shader_stage stage, /* xs->type, but xs may be NULL */ const struct ir3_shader_variant *xs) { @@ -382,28 +382,27 @@ tu6_emit_xs_config(struct tu_cs *cs, if (!xs) { /* shader stage disabled */ - tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1); - tu_cs_emit(cs, 0); - - tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1); - tu_cs_emit(cs, 0); + crb.add(tu_reg_value { .reg = cfg->reg_sp_xs_config, .value = 0 }); + crb.add(tu_reg_value { .reg = cfg->reg_hlsq_xs_ctrl, .value = 0 }); return; } - tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1); - tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED | - COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) | - COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) | - COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_UAV) | - COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) | - A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) | - A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp)); - - tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1); - tu_cs_emit(cs, A6XX_SP_VS_CONST_CONFIG_CONSTLEN(xs->constlen) | - A6XX_SP_VS_CONST_CONFIG_ENABLED | - COND(xs->shader_options.push_consts_type == IR3_PUSH_CONSTS_SHARED_PREAMBLE, - A7XX_SP_VS_CONST_CONFIG_READ_IMM_SHARED_CONSTS)); + crb.add(tu_reg_value { + .reg = cfg->reg_sp_xs_config, + .value = A6XX_SP_VS_CONFIG_ENABLED | + COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) | + COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) | + COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_UAV) | + COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) | + A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) | + A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp) }); + crb.add(tu_reg_value { + .reg = cfg->reg_hlsq_xs_ctrl, + .value = A6XX_SP_VS_CONST_CONFIG_CONSTLEN(xs->constlen) | + A6XX_SP_VS_CONST_CONFIG_ENABLED | + COND(xs->shader_options.push_consts_type == + IR3_PUSH_CONSTS_SHARED_PREAMBLE, + A7XX_SP_VS_CONST_CONFIG_READ_IMM_SHARED_CONSTS) }); } TU_GENX(tu6_emit_xs_config); @@ -466,18 +465,18 @@ tu6_emit_dynamic_offset(struct tu_cs *cs, template void -tu6_emit_shared_consts_enable(struct tu_cs *cs, bool enable) +tu6_emit_shared_consts_enable(struct tu_crb &crb, bool enable) { if (CHIP == A6XX) { /* Enable/disable shared constants */ - tu_cs_emit_regs(cs, HLSQ_SHARED_CONSTS(CHIP, .enable = enable)); + crb.add(HLSQ_SHARED_CONSTS(CHIP, .enable = enable)); } else { assert(!enable); } - tu_cs_emit_regs(cs, A6XX_SP_MODE_CNTL(.constant_demotion_enable = true, - .isammode = ISAMMODE_GL, - .shared_consts_enable = enable)); + crb.add(A6XX_SP_MODE_CNTL(.constant_demotion_enable = true, + .isammode = ISAMMODE_GL, + .shared_consts_enable = enable)); } TU_GENX(tu6_emit_shared_consts_enable); @@ -1257,24 +1256,24 @@ tu6_emit_program_config(struct tu_cs *cs, { STATIC_ASSERT(MESA_SHADER_VERTEX == 0); + tu_crb crb = cs->crb(0); + bool shared_consts_enable = prog->shared_consts.type == IR3_PUSH_CONSTS_SHARED; - tu6_emit_shared_consts_enable(cs, shared_consts_enable); + tu6_emit_shared_consts_enable(crb, shared_consts_enable); - tu_cs_emit_regs(cs, SP_UPDATE_CNTL(CHIP, - .vs_state = true, - .hs_state = true, - .ds_state = true, - .gs_state = true, - .fs_state = true, - .gfx_uav = true, - .gfx_shared_const = shared_consts_enable)); + crb.add(SP_UPDATE_CNTL(CHIP, .vs_state = true, .hs_state = true, + .ds_state = true, .gs_state = true, + .fs_state = true, .gfx_uav = true, + .gfx_shared_const = shared_consts_enable)); for (size_t stage_idx = MESA_SHADER_VERTEX; stage_idx <= MESA_SHADER_FRAGMENT; stage_idx++) { mesa_shader_stage stage = (mesa_shader_stage) stage_idx; - tu6_emit_xs_config(cs, stage, variants[stage]); + tu6_emit_xs_config(crb, stage, variants[stage]); } + crb.flush(); + for (size_t stage_idx = MESA_SHADER_VERTEX; stage_idx <= MESA_SHADER_FRAGMENT; stage_idx++) { mesa_shader_stage stage = (mesa_shader_stage) stage_idx; diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 300a7b458ed..1fab462cd69 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -303,13 +303,13 @@ struct tu_pvtmem_config { template void -tu6_emit_xs_config(struct tu_cs *cs, +tu6_emit_xs_config(struct tu_crb &crb, mesa_shader_stage stage, const struct ir3_shader_variant *xs); template void -tu6_emit_shared_consts_enable(struct tu_cs *cs, bool shared_consts_enable); +tu6_emit_shared_consts_enable(struct tu_crb &crb, bool shared_consts_enable); template void diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 74b80203c26..b9997fb6c4d 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -1769,29 +1769,29 @@ tu6_emit_cs_config(struct tu_cs *cs, { bool shared_consts_enable = ir3_const_state(v)->push_consts_type == IR3_PUSH_CONSTS_SHARED; - tu6_emit_shared_consts_enable(cs, shared_consts_enable); - tu_cs_emit_regs(cs, SP_UPDATE_CNTL(CHIP, - .cs_state = true, - .cs_uav = true, - .cs_shared_const = shared_consts_enable)); + with_crb (cs) { + tu6_emit_shared_consts_enable(crb, shared_consts_enable); - tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v); + crb.add(SP_UPDATE_CNTL(CHIP, .cs_state = true, .cs_uav = true, + .cs_shared_const = shared_consts_enable)); + tu6_emit_xs_config(crb, MESA_SHADER_COMPUTE, v); + } tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova); - uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1); + tu_crb crb = cs->crb(0); + + uint32_t shared_size = MAX2(((int) v->shared_size - 1) / 1024, 1); enum a6xx_const_ram_mode mode = v->constlen > 256 ? CONSTLEN_512 : (v->constlen > 192 ? CONSTLEN_256 : (v->constlen > 128 ? CONSTLEN_192 : CONSTLEN_128)); - tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CNTL_1, 1); - tu_cs_emit(cs, A6XX_SP_CS_CNTL_1_SHARED_SIZE(shared_size) | - A6XX_SP_CS_CNTL_1_CONSTANTRAMMODE(mode)); + crb.add( + A6XX_SP_CS_CNTL_1(.shared_size = shared_size, .constantrammode = mode)); if (CHIP == A6XX && cs->device->physical_device->info->props.has_lpac) { - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CTRL_REG1, 1); - tu_cs_emit(cs, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(shared_size) | - A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(mode)); + crb.add(HLSQ_CS_CTRL_REG1(CHIP, .shared_size = shared_size, + .constantrammode = mode)); } uint32_t local_invocation_id = @@ -1808,58 +1808,49 @@ tu6_emit_cs_config(struct tu_cs *cs, enum a6xx_threadsize thrsz_cs = cs->device->physical_device->info->props .supports_double_threadsize ? thrsz : THREAD128; if (CHIP == A6XX) { - tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONST_CONFIG_0, 2); - tu_cs_emit(cs, - A6XX_SP_CS_CONST_CONFIG_0_WGIDCONSTID(work_group_id) | - A6XX_SP_CS_CONST_CONFIG_0_WGSIZECONSTID(regid(63, 0)) | - A6XX_SP_CS_CONST_CONFIG_0_WGOFFSETCONSTID(regid(63, 0)) | - A6XX_SP_CS_CONST_CONFIG_0_LOCALIDREGID(local_invocation_id)); - tu_cs_emit(cs, A6XX_SP_CS_WGE_CNTL_LINEARLOCALIDREGID(regid(63, 0)) | - A6XX_SP_CS_WGE_CNTL_THREADSIZE(thrsz_cs)); + crb.add(SP_CS_CONST_CONFIG_0(CHIP, .wgidconstid = work_group_id, + .wgsizeconstid = regid(63, 0), + .wgoffsetconstid = regid(63, 0), + .localidregid = local_invocation_id)); + crb.add(SP_CS_WGE_CNTL(CHIP, .linearlocalidregid = regid(63, 0), + .threadsize = thrsz_cs)); + if (!cs->device->physical_device->info->props.supports_double_threadsize) { - tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_WAVE_CNTL, 1); - tu_cs_emit(cs, A6XX_SP_PS_WAVE_CNTL_THREADSIZE(thrsz)); + crb.add(SP_PS_WAVE_CNTL(CHIP, .threadsize = thrsz)); } if (cs->device->physical_device->info->props.has_lpac) { - tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_WIE_CNTL_0, 2); - tu_cs_emit(cs, - A6XX_SP_CS_WIE_CNTL_0_WGIDCONSTID(work_group_id) | - A6XX_SP_CS_WIE_CNTL_0_WGSIZECONSTID(regid(63, 0)) | - A6XX_SP_CS_WIE_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | - A6XX_SP_CS_WIE_CNTL_0_LOCALIDREGID(local_invocation_id)); - tu_cs_emit(cs, A6XX_SP_CS_WIE_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | - A6XX_SP_CS_WIE_CNTL_1_THREADSIZE(thrsz)); + crb.add(A6XX_SP_CS_WIE_CNTL_0(.wgidconstid = work_group_id, + .wgsizeconstid = regid(63, 0), + .wgoffsetconstid = regid(63, 0), + .localidregid = local_invocation_id)); + crb.add(SP_CS_WIE_CNTL_1(CHIP, .linearlocalidregid = regid(63, 0), + .threadsize = thrsz)); } } else { unsigned tile_height = (v->local_size[1] % 8 == 0) ? 3 : (v->local_size[1] % 4 == 0) ? 5 : (v->local_size[1] % 2 == 0) ? 9 : 17; - tu_cs_emit_regs( - cs, SP_CS_WGE_CNTL(CHIP, - .linearlocalidregid = regid(63, 0), .threadsize = thrsz_cs, - .workgrouprastorderzfirsten = true, - .wgtilewidth = 4, .wgtileheight = tile_height)); + crb.add(SP_CS_WGE_CNTL(CHIP, .linearlocalidregid = regid(63, 0), + .threadsize = thrsz_cs, + .workgrouprastorderzfirsten = true, + .wgtilewidth = 4, .wgtileheight = tile_height)); - tu_cs_emit_regs(cs, SP_PS_WAVE_CNTL(CHIP, .threadsize = THREAD64)); + crb.add(SP_PS_WAVE_CNTL(CHIP, .threadsize = THREAD64)); - tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_WIE_CNTL_0, 1); - tu_cs_emit(cs, A6XX_SP_CS_WIE_CNTL_0_WGIDCONSTID(work_group_id) | - A6XX_SP_CS_WIE_CNTL_0_WGSIZECONSTID(regid(63, 0)) | - A6XX_SP_CS_WIE_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | - A6XX_SP_CS_WIE_CNTL_0_LOCALIDREGID(local_invocation_id)); + crb.add(A6XX_SP_CS_WIE_CNTL_0(.wgidconstid = work_group_id, + .wgsizeconstid = regid(63, 0), + .wgoffsetconstid = regid(63, 0), + .localidregid = local_invocation_id)); - tu_cs_emit_regs(cs, - SP_CS_WIE_CNTL_1(CHIP, - .linearlocalidregid = regid(63, 0), - .threadsize = thrsz_cs, - .workitemrastorder = - v->cs.force_linear_dispatch ? - WORKITEMRASTORDER_LINEAR : - WORKITEMRASTORDER_TILED, )); + crb.add(SP_CS_WIE_CNTL_1( + CHIP, .linearlocalidregid = regid(63, 0), .threadsize = thrsz_cs, + .workitemrastorder = v->cs.force_linear_dispatch + ? WORKITEMRASTORDER_LINEAR + : WORKITEMRASTORDER_TILED)); - tu_cs_emit_regs(cs, SP_CS_HYSTERESIS(CHIP, 0)); // Sometimes is 0x08000000 + crb.add(SP_CS_HYSTERESIS(CHIP, 0)); // Sometimes is 0x08000000 } }