From c166c5100b6251f83f9b881b415b8bc91a402615 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 31 Oct 2023 10:55:16 -0400 Subject: [PATCH] tu/a750: Basic a750 support Could run vkcube. Based on changes from Jonathan Marek Signed-off-by: Danylo Piliaiev Part-of: --- .../.gitlab-ci/reference/afuc_test.asm | 2 +- src/freedreno/common/freedreno_devices.py | 68 +++++++++++++++++++ src/freedreno/registers/adreno/a6xx.xml | 60 +++++++++++++++- src/freedreno/registers/adreno/adreno_pm4.xml | 4 +- src/freedreno/vulkan/tu_clear_blit.cc | 11 +++ src/freedreno/vulkan/tu_cmd_buffer.cc | 1 + src/freedreno/vulkan/tu_pipeline.cc | 33 +++++++-- 7 files changed, 170 insertions(+), 9 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/afuc_test.asm b/src/freedreno/.gitlab-ci/reference/afuc_test.asm index 0bbdf37a4ba..198b84331b8 100644 --- a/src/freedreno/.gitlab-ci/reference/afuc_test.asm +++ b/src/freedreno/.gitlab-ci/reference/afuc_test.asm @@ -210,7 +210,6 @@ CP_REG_WR_NO_CTXT: CP_RUN_OPENCL: CP_SCRATCH_TO_REG: CP_SET_BIN_DATA5_OFFSET: -CP_SET_CONSTANT: CP_SET_CTXSWITCH_IB: CP_SET_DRAW_INIT_FLAGS: CP_SET_MARKER: @@ -266,6 +265,7 @@ UNKN3: UNKN30: UNKN31: UNKN32: +UNKN45: UNKN48: UNKN5: UNKN6: diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 6f5f65a9c89..aca618b5367 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -789,6 +789,10 @@ a7xx_740 = A7XXProps( has_event_write_sample_count = True, ) +a7xx_750 = A7XXProps( + has_event_write_sample_count = True, + ) + a730_magic_regs = dict( TPL1_DBG_ECO_CNTL = 0x1000000, GRAS_DBG_ECO_CNTL = 0x800, @@ -931,6 +935,70 @@ add_gpus([ ], )) +add_gpus([ + GPUId(chip_id=0x43051401, name="FD750"), # KGSL, no speedbin data + GPUId(chip_id=0xffff43051401, name="FD750"), # Default no-speedbin fallback + ], A6xxGPUInfo( + CHIP.A7XX, + [a7xx_base, a7xx_750], + num_ccu = 6, + tile_align_w = 96, + tile_align_h = 32, + num_vsc_pipes = 32, + cs_shared_mem_size = 32 * 1024, + wave_granularity = 2, + fibers_per_sp = 128 * 2 * 16, + magic_regs = dict( + TPL1_DBG_ECO_CNTL = 0x11100000, + GRAS_DBG_ECO_CNTL = 0x00004800, + SP_CHICKEN_BITS = 0x10000400, + PC_MODE_CNTL = 0x00003f1f, + SP_DBG_ECO_CNTL = 0x10000000, + RB_DBG_ECO_CNTL = 0x00000001, + RB_DBG_ECO_CNTL_blit = 0x00000001, + RB_UNKNOWN_8E01 = 0x0, + VPC_DBG_ECO_CNTL = 0x02000000, + UCHE_UNKNOWN_0E12 = 0x40000000, + + RB_UNKNOWN_8E06 = 0x02082000, + ), + raw_magic_regs = [ + [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000], + [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], + [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00431800], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00800000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], + [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], + [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600], + [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], + [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], + [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], + [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], + [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], + [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], + [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], + [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], + + [A6XXRegs.REG_A7XX_VPC_ATTR_BUF_SIZE_GMEM, 0x00020000], + [A6XXRegs.REG_A7XX_VPC_ATTR_BUF_BASE_GMEM, 0x00240000], + [A6XXRegs.REG_A7XX_PC_ATTR_BUF_SIZE_GMEM, 0x00020000], + + [0x930a, 0], + [0x960a, 1], + [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS_CONTROL, 0], + [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS, 0], + ], + )) + template = """\ /* Copyright (C) 2021 Google, Inc. * diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 0453adb029e..f3f5722ba76 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -2440,6 +2440,9 @@ to upconvert to 32b float internally? + + + @@ -2621,6 +2624,9 @@ to upconvert to 32b float internally? + + + @@ -2661,6 +2667,9 @@ to upconvert to 32b float internally? + + + @@ -2944,6 +2953,10 @@ to upconvert to 32b float internally? + + + + @@ -2954,6 +2967,10 @@ to upconvert to 32b float internally? + + + + @@ -3135,6 +3152,15 @@ to upconvert to 32b float internally? + + + + + + + + + @@ -3225,6 +3251,12 @@ to upconvert to 32b float internally? + + + + + + @@ -3596,8 +3628,6 @@ to upconvert to 32b float internally? - - @@ -3971,7 +4001,31 @@ to upconvert to 32b float internally? - + + + + + + Specify for which components the output color should be read + from alias, e.g. for: + + alias.1.b32.0 r3.x, c8.x + alias.1.b32.0 r2.x, c4.x + alias.1.b32.0 r1.x, c4.x + alias.1.b32.0 r0.x, c0.x + + the SP_PS_ALIASED_COMPONENTS would be 0x00001111 + + + + + + + + + + + diff --git a/src/freedreno/registers/adreno/adreno_pm4.xml b/src/freedreno/registers/adreno/adreno_pm4.xml index 1b687eed5a7..bd1a1c493a2 100644 --- a/src/freedreno/registers/adreno/adreno_pm4.xml +++ b/src/freedreno/registers/adreno/adreno_pm4.xml @@ -324,7 +324,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> fetch state sub-blocks and initiate shader code DMAs load constant into chip and to memory - + load sequencer instruction memory (pointer-based) load sequencer instruction memory (code embedded in packet) @@ -573,6 +573,8 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> + + Write CP_CONTEXT_SWITCH_*_INFO from CP to the following dwords, and forcibly switch to the indicated context. diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index c4336820816..26ca84654c9 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -889,6 +889,8 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, enum r3d_type type, tu_cs_emit_regs(cs, PC_RASTER_CNTL(CHIP)); if (CHIP == A6XX) { tu_cs_emit_regs(cs, A6XX_VPC_UNKNOWN_9107()); + } else { + tu_cs_emit_regs(cs, A7XX_PC_RASTER_CNTL_V2()); } tu_cs_emit_regs(cs, @@ -1309,6 +1311,7 @@ r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = fmt)); tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled)); + tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } static void @@ -1323,6 +1326,7 @@ r3d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t laye tu_cs_image_flag_ref(cs, &iview->view, layer); tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->view.ubwc_enabled)); + tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } static void @@ -1334,6 +1338,7 @@ r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t la tu_cs_emit(cs, 0); tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); + tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } static void @@ -1353,6 +1358,7 @@ r3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t A6XX_RB_MRT_BASE_GMEM(0, 0)); tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); + tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } static void @@ -1390,6 +1396,7 @@ r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = color_format)); tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); + tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } static uint8_t @@ -1477,8 +1484,10 @@ r3d_setup(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL()); + tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_CNTL()); tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL()); + tu_cs_emit_regs(cs, A6XX_GRAS_SU_STENCIL_CNTL()); tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK()); tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK()); tu_cs_emit_regs(cs, A6XX_RB_STENCILREF()); @@ -2907,11 +2916,13 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, .z_test_enable = z_clear, .z_write_enable = z_clear, .zfunc = FUNC_ALWAYS)); + tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_CNTL(z_clear)); tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL( .stencil_enable = s_clear, .func = FUNC_ALWAYS, .zpass = STENCIL_REPLACE)); + tu_cs_emit_regs(cs, A6XX_GRAS_SU_STENCIL_CNTL(s_clear)); tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK(.mask = 0xff)); tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK(.wrmask = 0xff)); tu_cs_emit_regs(cs, A6XX_RB_STENCILREF(.ref = s_clear_val)); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 586753a988b..d9f3cf307f1 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -543,6 +543,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, tu_cs_emit_regs( cs, A7XX_RB_RENDER_CNTL(.binning = binning, .raster_mode = TYPE_TILED, .raster_direction = LR_TB)); + tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL(.binning = binning)); } static void diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index e0a119af4ac..c1ebf2e7680 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -738,10 +738,12 @@ tu6_emit_vpc(struct tu_cs *cs, uint16_t reg_sp_xs_vpc_dst_reg; uint16_t reg_vpc_xs_pack; uint16_t reg_vpc_xs_clip_cntl; + uint16_t reg_vpc_xs_clip_cntl_v2; uint16_t reg_gras_xs_cl_cntl; uint16_t reg_pc_xs_out_cntl; uint16_t reg_sp_xs_primitive_cntl; uint16_t reg_vpc_xs_layer_cntl; + uint16_t reg_vpc_xs_layer_cntl_v2; uint16_t reg_gras_xs_layer_cntl; } reg_config[] = { [MESA_SHADER_VERTEX] = { @@ -749,10 +751,12 @@ tu6_emit_vpc(struct tu_cs *cs, REG_A6XX_SP_VS_VPC_DST_REG(0), REG_A6XX_VPC_VS_PACK, REG_A6XX_VPC_VS_CLIP_CNTL, + REG_A6XX_VPC_VS_CLIP_CNTL_V2, REG_A6XX_GRAS_VS_CL_CNTL, REG_A6XX_PC_VS_OUT_CNTL, REG_A6XX_SP_VS_PRIMITIVE_CNTL, REG_A6XX_VPC_VS_LAYER_CNTL, + REG_A6XX_VPC_VS_LAYER_CNTL_V2, REG_A6XX_GRAS_VS_LAYER_CNTL }, [MESA_SHADER_TESS_CTRL] = { @@ -761,6 +765,7 @@ tu6_emit_vpc(struct tu_cs *cs, 0, 0, 0, + 0, REG_A6XX_PC_HS_OUT_CNTL, 0, 0, @@ -771,10 +776,12 @@ tu6_emit_vpc(struct tu_cs *cs, REG_A6XX_SP_DS_VPC_DST_REG(0), REG_A6XX_VPC_DS_PACK, REG_A6XX_VPC_DS_CLIP_CNTL, + REG_A6XX_VPC_DS_CLIP_CNTL_V2, REG_A6XX_GRAS_DS_CL_CNTL, REG_A6XX_PC_DS_OUT_CNTL, REG_A6XX_SP_DS_PRIMITIVE_CNTL, REG_A6XX_VPC_DS_LAYER_CNTL, + REG_A6XX_VPC_DS_LAYER_CNTL_V2, REG_A6XX_GRAS_DS_LAYER_CNTL }, [MESA_SHADER_GEOMETRY] = { @@ -782,10 +789,12 @@ tu6_emit_vpc(struct tu_cs *cs, REG_A6XX_SP_GS_VPC_DST_REG(0), REG_A6XX_VPC_GS_PACK, REG_A6XX_VPC_GS_CLIP_CNTL, + REG_A6XX_VPC_GS_CLIP_CNTL_V2, REG_A6XX_GRAS_GS_CL_CNTL, REG_A6XX_PC_GS_OUT_CNTL, REG_A6XX_SP_GS_PRIMITIVE_CNTL, REG_A6XX_VPC_GS_LAYER_CNTL, + REG_A6XX_VPC_GS_LAYER_CNTL_V2, REG_A6XX_GRAS_GS_LAYER_CNTL }, }; @@ -910,6 +919,10 @@ tu6_emit_vpc(struct tu_cs *cs, A6XX_VPC_VS_PACK_EXTRAPOS(extra_pos)); tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1); + tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) | + A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) | + A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc)); + tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl_v2, 1); tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) | A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) | A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc)); @@ -951,7 +964,12 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1); tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) | - A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc)); + A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc) | + 0xff0000); + tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl_v2, 1); + tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) | + A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc) | + 0xff0000); tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1); tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) | @@ -2911,7 +2929,7 @@ tu6_rast_size(struct tu_device *dev, if (CHIP == A6XX) { return 15 + (dev->physical_device->info->a6xx.has_shading_rate ? 8 : 0); } else { - return 15; + return 17; } } @@ -2967,6 +2985,10 @@ tu6_emit_rast(struct tu_cs *cs, if (CHIP == A6XX) { tu_cs_emit_regs(cs, A6XX_VPC_UNKNOWN_9107( .raster_discard = rs->rasterizer_discard_enable)); + } else { + tu_cs_emit_regs(cs, A7XX_PC_RASTER_CNTL_V2( + .stream = rs->rasterization_stream, + .discard = rs->rasterizer_discard_enable)); } /* move to hw ctx init? */ @@ -2996,7 +3018,7 @@ static unsigned tu6_ds_size(struct tu_device *dev, const struct vk_depth_stencil_state *ds) { - return 11; + return 13; } template @@ -3016,6 +3038,7 @@ tu6_emit_ds(struct tu_cs *cs, .fail_bf = tu6_stencil_op((VkStencilOp)ds->stencil.back.op.fail), .zpass_bf = tu6_stencil_op((VkStencilOp)ds->stencil.back.op.pass), .zfail_bf = tu6_stencil_op((VkStencilOp)ds->stencil.back.op.depth_fail))); + tu_cs_emit_regs(cs, A6XX_GRAS_SU_STENCIL_CNTL(ds->stencil.test_enable)); tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK( .mask = ds->stencil.front.compare_mask, @@ -3049,7 +3072,7 @@ tu6_rb_depth_cntl_size(struct tu_device *dev, const struct vk_render_pass_state *rp, const struct vk_rasterization_state *rs) { - return 2; + return 4; } template @@ -3084,8 +3107,10 @@ tu6_emit_rb_depth_cntl(struct tu_cs *cs, /* TODO don't set for ALWAYS/NEVER */ .z_read_enable = ds->depth.test_enable || ds->depth.bounds_test.enable, .z_bounds_enable = ds->depth.bounds_test.enable)); + tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_CNTL(depth_test)); } else { tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL()); + tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_CNTL()); } }