diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 7315f63dfab..62dc144d00c 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -885,7 +885,6 @@ a730_raw_magic_regs = [ [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_810B, 0x3], [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AC, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], @@ -1006,7 +1005,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_810B, 0x3], [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AC, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], @@ -1091,7 +1089,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_810B, 0x3], [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AC, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], @@ -1166,7 +1163,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_810B, 0x3], [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AC, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 06ef539f687..0cb5174e5ff 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -3816,7 +3816,7 @@ to upconvert to 32b float internally? - 0.0 if GREATER - 1.0 if LESS - + @@ -3830,7 +3830,7 @@ to upconvert to 32b float internally? Disable LRZ based on previous direction and the current one. If DIR_WRITE is not enabled - there is no write to direction buffer. - + @@ -3903,7 +3903,10 @@ to upconvert to 32b float internally? - + + + + diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 229a1dc98fa..86c6f0a280c 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -3317,7 +3317,7 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer, if ((pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) == 0) continue; - tu_lrz_disable_during_renderpass(cmd); + tu_lrz_disable_during_renderpass(cmd); } /* vkCmdClearAttachments is supposed to respect the predicate if active. The diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index ab635d877fe..dc5adb1a67b 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -5172,12 +5172,14 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, if (dirty_lrz) { struct tu_cs cs; - uint32_t size = cmd->device->physical_device->info->a6xx.lrz_track_quirk ? 10 : 8; + uint32_t size = 8 + + (cmd->device->physical_device->info->a6xx.lrz_track_quirk ? 2 : 0) + + (CHIP >= A7XX ? 2 : 0); // A7XX has extra packets from LRZ_CNTL2. cmd->state.lrz_and_depth_plane_state = tu_cs_draw_state(&cmd->sub_cs, &cs, size); tu6_update_simplified_stencil_state(cmd); - tu6_emit_lrz(cmd, &cs); + tu6_emit_lrz(cmd, &cs); tu6_build_depth_plane_z_mode(cmd, &cs); } diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 3c175133335..0d35a419277 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -97,6 +97,28 @@ tu6_write_lrz_reg(struct tu_cmd_buffer *cmd, struct tu_cs *cs, } } +template +static void +tu6_write_lrz_cntl(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct A6XX_GRAS_LRZ_CNTL cntl) +{ + if (CHIP >= A7XX) { + // A7XX split LRZ_CNTL into two seperate registers. + struct tu_reg_value cntl2 = A7XX_GRAS_LRZ_CNTL2( + .disable_on_wrong_dir = cntl.disable_on_wrong_dir, + .fc_enable = cntl.fc_enable, + ); + cntl.disable_on_wrong_dir = false; + cntl.fc_enable = false; + + tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL(cntl)); + tu6_write_lrz_reg(cmd, cs, cntl2); + } else { + tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL(cntl)); + } +} + +template static void tu6_disable_lrz_via_depth_view(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { @@ -107,10 +129,10 @@ tu6_disable_lrz_via_depth_view(struct tu_cmd_buffer *cmd, struct tu_cs *cs) .base_mip_level = 0b1111, )); - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL( + tu6_write_lrz_cntl(cmd, cs, { .enable = true, .disable_on_wrong_dir = true, - )); + }); tu_emit_event_write(cmd, cs, FD_LRZ_CLEAR); tu_emit_event_write(cmd, cs, FD_LRZ_FLUSH); @@ -315,7 +337,7 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) * This is accomplished by making later GRAS_LRZ_CNTL (in binning pass) * to fail the comparison of depth views. */ - tu6_disable_lrz_via_depth_view(cmd, cs); + tu6_disable_lrz_via_depth_view(cmd, cs); tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = 0)); } else if (lrz->fast_clear || lrz->gpu_dir_tracking) { if (lrz->gpu_dir_tracking) { @@ -323,11 +345,11 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = lrz->image_view->view.GRAS_LRZ_DEPTH_VIEW)); } - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL( + tu6_write_lrz_cntl(cmd, cs, { .enable = true, .fc_enable = lrz->fast_clear, .disable_on_wrong_dir = lrz->gpu_dir_tracking, - )); + }); /* LRZ_CLEAR.fc_enable + LRZ_CLEAR - clears fast-clear buffer; * LRZ_CLEAR.disable_on_wrong_dir + LRZ_CLEAR - sets direction to @@ -365,13 +387,13 @@ tu_lrz_tiling_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } /* Enable flushing of LRZ fast-clear and of direction buffer */ - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL( + tu6_write_lrz_cntl(cmd, cs, { .enable = true, .fc_enable = cmd->state.lrz.fast_clear, .disable_on_wrong_dir = cmd->state.lrz.gpu_dir_tracking, - )); + }); } else { - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL(0)); + tu6_write_lrz_cntl(cmd, cs, {.enable = false}); } tu_emit_event_write(cmd, cs, FD_LRZ_FLUSH); @@ -413,10 +435,10 @@ tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) * LRZ test, so LRZ should be cleared. */ if (lrz->fast_clear) { - tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_CNTL( + tu6_write_lrz_cntl(cmd, &cmd->cs, { .enable = true, .fc_enable = true, - )); + }); tu_emit_event_write(cmd, &cmd->cs, FD_LRZ_CLEAR); tu_emit_event_write(cmd, &cmd->cs, FD_LRZ_FLUSH); } else { @@ -445,7 +467,7 @@ tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs, return; tu6_emit_lrz_buffer(cs, image); - tu6_disable_lrz_via_depth_view(cmd, cs); + tu6_disable_lrz_via_depth_view(cmd, cs); } TU_GENX(tu_disable_lrz); @@ -488,11 +510,11 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd, .base_mip_level = range->baseMipLevel, )); - tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_CNTL( + tu6_write_lrz_cntl(cmd, &cmd->cs, { .enable = true, .fc_enable = fast_clear, .disable_on_wrong_dir = true, - )); + }); tu_emit_event_write(cmd, &cmd->cs, FD_LRZ_CLEAR); tu_emit_event_write(cmd, &cmd->cs, FD_LRZ_FLUSH); @@ -503,6 +525,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd, } TU_GENX(tu_lrz_clear_depth_image); +template void tu_lrz_disable_during_renderpass(struct tu_cmd_buffer *cmd) { @@ -512,13 +535,14 @@ tu_lrz_disable_during_renderpass(struct tu_cmd_buffer *cmd) cmd->state.dirty |= TU_CMD_DIRTY_LRZ; if (cmd->state.lrz.gpu_dir_tracking) { - tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_CNTL( + tu6_write_lrz_cntl(cmd, &cmd->cs, { .enable = true, .dir = LRZ_DIR_INVALID, .disable_on_wrong_dir = true, - )); + }); } } +TU_GENX(tu_lrz_disable_during_renderpass); /* update lrz state based on stencil-test func: * @@ -575,6 +599,7 @@ tu6_stencil_op_lrz_allowed(struct A6XX_GRAS_LRZ_CNTL *gras_lrz_cntl, return true; } +template static struct A6XX_GRAS_LRZ_CNTL tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, const uint32_t a) @@ -619,6 +644,8 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, gras_lrz_cntl.dir_write = cmd->state.lrz.gpu_dir_tracking; gras_lrz_cntl.disable_on_wrong_dir = cmd->state.lrz.gpu_dir_tracking; + if (CHIP >= A7XX) + gras_lrz_cntl.z_func = tu6_compare_func(depth_compare_op); /* LRZ is disabled until it is cleared, which means that one "wrong" * depth test or shader could disable LRZ until depth buffer is cleared. @@ -804,12 +831,14 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, return gras_lrz_cntl; } +template void tu6_emit_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { const uint32_t a = cmd->state.subpass->depth_stencil_attachment.attachment; - struct A6XX_GRAS_LRZ_CNTL gras_lrz_cntl = tu6_calculate_lrz_state(cmd, a); + struct A6XX_GRAS_LRZ_CNTL gras_lrz_cntl = tu6_calculate_lrz_state(cmd, a); - tu6_write_lrz_reg(cmd, cs, pack_A6XX_GRAS_LRZ_CNTL(gras_lrz_cntl)); + tu6_write_lrz_cntl(cmd, cs, gras_lrz_cntl); tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(.enable = gras_lrz_cntl.enable)); } +TU_GENX(tu6_emit_lrz); diff --git a/src/freedreno/vulkan/tu_lrz.h b/src/freedreno/vulkan/tu_lrz.h index 075dd9c6649..e751168f1e2 100644 --- a/src/freedreno/vulkan/tu_lrz.h +++ b/src/freedreno/vulkan/tu_lrz.h @@ -42,6 +42,7 @@ struct tu_lrz_state enum tu_lrz_direction prev_direction; }; +template void tu6_emit_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs); @@ -83,6 +84,7 @@ tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs); void tu_lrz_sysmem_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs); +template void tu_lrz_disable_during_renderpass(struct tu_cmd_buffer *cmd);