diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 9729585af2f..a262033322e 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -1027,7 +1027,6 @@ a730_raw_magic_regs = [ [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], - [A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000], ] a740_magic_regs = dict( @@ -1075,11 +1074,6 @@ a740_raw_magic_regs = [ [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000], - [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], @@ -1091,10 +1085,7 @@ a740_raw_magic_regs = [ [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], - [A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], - - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000], ] add_gpus([ @@ -1187,11 +1178,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000], - [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], @@ -1203,10 +1189,7 @@ add_gpus([ [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], - [A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], - - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000], ], )) @@ -1270,11 +1253,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000], - [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], @@ -1286,7 +1264,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], - [A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000], ], )) @@ -1374,10 +1351,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], @@ -1389,11 +1362,8 @@ add_gpus([ [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], - [A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000], - [0x930a, 0], [0x960a, 1], [A6XXRegs.REG_A7XX_SP_PS_OUTPUT_CONST_CNTL, 0], diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index d1abfa09222..2a8f7e31c71 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -1533,6 +1533,10 @@ r3d_setup(struct tu_cmd_buffer *cmd, if (!cmd->state.pass) { tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff); + if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) { + tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT()); + tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT()); + } } if (!(blit_param & R3D_DST_GMEM)) { @@ -3911,6 +3915,7 @@ fdm_apply_sysmem_clear_coords(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, VkOffset2D common_bin_offset, + const VkOffset2D *hw_viewport_offsets, unsigned views, const VkExtent2D *frag_areas, const VkRect2D *bins) @@ -4184,6 +4189,7 @@ fdm_apply_gmem_clear_coords(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, VkOffset2D common_bin_offset, + const VkOffset2D *hw_viewport_offsets, unsigned views, const VkExtent2D *frag_areas, const VkRect2D *bins) @@ -4819,6 +4825,7 @@ fdm_apply_load_coords(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, VkOffset2D common_bin_offset, + const VkOffset2D *hw_viewport_offsets, unsigned views, const VkExtent2D *frag_areas, const VkRect2D *bins) @@ -5291,6 +5298,7 @@ fdm_apply_store_coords(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, VkOffset2D common_bin_offset, + const VkOffset2D *hw_viewport_offsets, unsigned views, const VkExtent2D *frag_areas, const VkRect2D *bins) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 3e4af0d83e7..28fa02ae5be 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -1240,6 +1240,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, { struct tu_physical_device *phys_dev = cmd->device->physical_device; const struct tu_tiling_config *tiling = cmd->state.tiling; + const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling); bool hw_binning = use_hw_binning(cmd); @@ -1251,6 +1252,24 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, tu_emit_vsc(cmd, &cmd->cs); } + unsigned views = tu_fdm_num_layers(cmd); + bool bin_is_scaled = false; + + if (fdm) { + for (unsigned i = 0; i < views; i++) { + if (tile->frag_areas[i].width != 1 || + tile->frag_areas[i].height != 1) { + bin_is_scaled = true; + break; + } + } + } + + bool bin_scale_en = + cmd->device->physical_device->info->a7xx.has_hw_bin_scaling && + views <= MAX_HW_SCALED_VIEWS && !cmd->state.rp.shared_viewport && + bin_is_scaled; + tu6_emit_bin_size( cs, tiling->tile0.width, tiling->tile0.height, { @@ -1272,7 +1291,22 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE); const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE); - tu6_emit_window_scissor(cs, x1, y1, x2 - 1, y2 - 1); + + if (bin_scale_en) { + /* It seems that the window scissor happens *before* + * GRAS_BIN_FOVEAT_OFFSET_* is applied to the fragment coordinates, + * unlike the window offset which happens after it is applied. This + * means that the window scissor cannot do its job and we have to + * disable it by setting it to the entire FB size (plus an extra tile + * size, in case GRAS_BIN_FOVEAT_OFFSET_* is not in use). With FDM it is + * effectively replaced by the user's scissor anyway. + */ + uint32_t width = fb->width + tiling->tile0.width; + uint32_t height = fb->height + tiling->tile0.height; + tu6_emit_window_scissor(cs, 0, 0, width, height); + } else { + tu6_emit_window_scissor(cs, x1, y1, x2 - 1, y2 - 1); + } tu6_emit_window_offset(cs, x1, y1); unsigned slot = ffs(tile->slot_mask) - 1; @@ -1308,13 +1342,15 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, 0x0); if (fdm) { - unsigned views = tu_fdm_num_layers(cmd); VkRect2D bin = { { x1, y1 }, { (x2 - x1) * tile->extent.width, (y2 - y1) * tile->extent.height } }; VkRect2D bins[views]; + VkOffset2D frag_offsets[MAX_VIEWS]; for (unsigned i = 0; i < views; i++) { + frag_offsets[i] = (VkOffset2D) { 0, 0 }; + if (!fdm_offsets || cmd->state.rp.shared_viewport) { bins[i] = bin; continue; @@ -1330,12 +1366,67 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, MAX2(MIN2((int32_t)y1 + bin.extent.height - bin_offset.y, MAX_VIEWPORT_SIZE) - bins[i].offset.y, 0); } + if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) { + if (bin_scale_en) { + VkExtent2D frag_areas[MAX_HW_SCALED_VIEWS]; + for (unsigned i = 0; i < MAX_HW_SCALED_VIEWS; i++) { + if (i >= views) { + /* Make sure unused views aren't garbage */ + frag_areas[i] = (VkExtent2D) {1, 1}; + frag_offsets[i] = (VkOffset2D) { 0, 0 }; + continue; + } + + frag_areas[i] = tile->frag_areas[i]; + frag_offsets[i].x = x1 - x1 / tile->frag_areas[i].width; + frag_offsets[i].y = y1 - y1 / tile->frag_areas[i].height; + } + + tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT( + .binscaleen = bin_scale_en, + .xscale_0 = (enum a7xx_bin_scale)util_logbase2(frag_areas[0].width), + .yscale_0 = (enum a7xx_bin_scale)util_logbase2(frag_areas[0].height), + .xscale_1 = (enum a7xx_bin_scale)util_logbase2(frag_areas[1].width), + .yscale_1 = (enum a7xx_bin_scale)util_logbase2(frag_areas[1].height), + .xscale_2 = (enum a7xx_bin_scale)util_logbase2(frag_areas[2].width), + .yscale_2 = (enum a7xx_bin_scale)util_logbase2(frag_areas[2].height), + .xscale_3 = (enum a7xx_bin_scale)util_logbase2(frag_areas[3].width), + .yscale_3 = (enum a7xx_bin_scale)util_logbase2(frag_areas[3].height), + .xscale_4 = (enum a7xx_bin_scale)util_logbase2(frag_areas[4].width), + .yscale_4 = (enum a7xx_bin_scale)util_logbase2(frag_areas[4].height), + .xscale_5 = (enum a7xx_bin_scale)util_logbase2(frag_areas[5].width), + .yscale_5 = (enum a7xx_bin_scale)util_logbase2(frag_areas[5].height)), + A7XX_GRAS_BIN_FOVEAT_OFFSET_0( + .xoffset_0 = frag_offsets[0].x, + .xoffset_1 = frag_offsets[1].x, + .xoffset_2 = frag_offsets[2].x), + A7XX_GRAS_BIN_FOVEAT_OFFSET_1( + .xoffset_3 = frag_offsets[3].x, + .xoffset_4 = frag_offsets[4].x, + .xoffset_5 = frag_offsets[5].x), + A7XX_GRAS_BIN_FOVEAT_OFFSET_2( + .yoffset_0 = frag_offsets[0].y, + .yoffset_1 = frag_offsets[1].y, + .yoffset_2 = frag_offsets[2].y), + A7XX_GRAS_BIN_FOVEAT_OFFSET_3( + .yoffset_3 = frag_offsets[3].y, + .yoffset_4 = frag_offsets[4].y, + .yoffset_5 = frag_offsets[5].y)); + + tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT( + .binscaleen = bin_scale_en)); + } else { + tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT()); + tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT()); + } + } + util_dynarray_foreach (&cmd->fdm_bin_patchpoints, struct tu_fdm_bin_patchpoint, patch) { tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size); tu_cs_emit_qw(cs, patch->iova); - patch->apply(cmd, cs, patch->data, (VkOffset2D) { x1, y1 }, views, - tile->frag_areas, bins); + patch->apply(cmd, cs, patch->data, (VkOffset2D) { x1, y1 }, + frag_offsets, views, tile->frag_areas, bins); } /* Make the CP wait until the CP_MEM_WRITE's to the command buffers @@ -1989,6 +2080,12 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_tiling_config *tiling = cmd->state.tiling; + /* Reset bin scaling. */ + if (phys_dev->info->a7xx.has_hw_bin_scaling) { + tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT()); + tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT()); + } + /* If this command buffer may be executed multiple times, then * viewports/scissor states may have been changed by previous executions * and we need to reset them before executing the binning IB. With FDM @@ -2000,8 +2097,10 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs, unsigned num_views = tu_fdm_num_layers(cmd); VkExtent2D unscaled_frag_areas[num_views]; VkRect2D bins[num_views]; + VkOffset2D frag_offsets[num_views]; for (unsigned i = 0; i < num_views; i++) { unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 }; + frag_offsets[i] = (VkOffset2D) { 0, 0 }; if (fdm_offsets && !cmd->state.rp.shared_viewport) { /* We need to shift over the viewport and scissor during the * binning pass to match the shift applied when rendering. The way @@ -2034,8 +2133,8 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs, continue; tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size); tu_cs_emit_qw(cs, patch->iova); - patch->apply(cmd, cs, patch->data, (VkOffset2D) {0, 0}, num_views, - unscaled_frag_areas, bins); + patch->apply(cmd, cs, patch->data, (VkOffset2D) {0, 0}, frag_offsets, + num_views, unscaled_frag_areas, bins); } tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); @@ -2465,6 +2564,12 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs, tu_cs_emit_pkt7(cs, CP_SET_MODE, 1); tu_cs_emit(cs, 0x0); + /* Reset bin scaling. */ + if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) { + tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT()); + tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT()); + } + tu_autotune_begin_renderpass(cmd, cs, autotune_result); tu_cs_sanity_check(cs); @@ -2794,6 +2899,13 @@ tu_calc_frag_area(struct tu_cmd_buffer *cmd, height = MIN2(height, TU_FDM_OFFSET_GRANULARITY); } + /* HW viewport scaling supports a maximum fragment width/height of 4. + */ + if (views <= MAX_HW_SCALED_VIEWS) { + width = MIN2(width, 4); + height = MIN2(height, 4); + } + /* Make sure that the width/height divides the tile width/height so * we don't have to do extra awkward clamping of the edges of each * bin when resolving. It also has to divide the fdm offset, if any. @@ -6451,6 +6563,7 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, VkOffset2D common_bin_offset, + const VkOffset2D *hw_viewport_offsets, unsigned views, const VkExtent2D *frag_areas, const VkRect2D *bins) @@ -6466,7 +6579,10 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd, */ VkExtent2D area = frag_areas[MIN2(i, views - 1)]; VkRect2D bin = bins[MIN2(i, views - 1)]; + VkOffset2D hw_viewport_offset = hw_viewport_offsets[MIN2(i, views - 1)]; VkOffset2D offset = tu_fdm_per_bin_offset(area, bin, common_bin_offset); + offset.x -= hw_viewport_offset.x; + offset.y -= hw_viewport_offset.y; tu_cs_emit(cs, area.width); tu_cs_emit(cs, area.height); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 4bc23194064..d180c6094f1 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -796,6 +796,7 @@ typedef void (*tu_fdm_bin_apply_t)(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, VkOffset2D common_bin_offset, + const VkOffset2D *hw_viewport_offsets, unsigned views, const VkExtent2D *frag_areas, const VkRect2D *bins); @@ -852,6 +853,7 @@ _tu_create_fdm_bin_patchpoint(struct tu_cmd_buffer *cmd, */ unsigned num_views = MAX2(cmd->state.pass->num_views, 1); VkExtent2D unscaled_frag_areas[num_views]; + VkOffset2D hw_viewport_offsets[num_views]; VkRect2D bins[num_views]; for (unsigned i = 0; i < num_views; i++) { unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 }; @@ -859,8 +861,9 @@ _tu_create_fdm_bin_patchpoint(struct tu_cmd_buffer *cmd, { 0, 0 }, { MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE }, }; + hw_viewport_offsets[i] = (VkOffset2D) { 0, 0 }; } - apply(cmd, cs, state, (VkOffset2D) {0, 0}, num_views, unscaled_frag_areas, bins); + apply(cmd, cs, state, (VkOffset2D) {0, 0}, hw_viewport_offsets, num_views, unscaled_frag_areas, bins); assert(tu_cs_get_cur_iova(cs) == patch.iova + patch.size * sizeof(uint32_t)); util_dynarray_append(&cmd->fdm_bin_patchpoints, diff --git a/src/freedreno/vulkan/tu_common.h b/src/freedreno/vulkan/tu_common.h index 283dbf8e2e6..e92a3effbb0 100644 --- a/src/freedreno/vulkan/tu_common.h +++ b/src/freedreno/vulkan/tu_common.h @@ -105,6 +105,7 @@ #define TU_MAX_DRM_DEVICES 8 #define MAX_VIEWS 16 +#define MAX_HW_SCALED_VIEWS 6 #define MAX_BIND_POINTS 2 /* compute + graphics */ /* match the latest Qualcomm driver which is also a hw limit on later gens */ #define MAX_STORAGE_BUFFER_RANGE (1u << 27) diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 9dab846838a..b06f6d3a82f 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -2623,7 +2623,9 @@ tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin, static void fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, - VkOffset2D common_bin_offset, unsigned views, + VkOffset2D common_bin_offset, + const VkOffset2D *hw_viewport_offsets, + unsigned views, const VkExtent2D *frag_areas, const VkRect2D *bins) { const struct apply_viewport_state *state = @@ -2645,6 +2647,9 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, (state->share_scale || views == 1) ? frag_areas[0] : frag_areas[i]; VkRect2D bin = (state->share_scale || views == 1) ? bins[0] : bins[i]; + VkOffset2D hw_viewport_offset = + (state->share_scale || views == 1) ? hw_viewport_offsets[0] : + hw_viewport_offsets[i]; /* Implement fake_single_viewport by replicating viewport 0 across all * views. */ @@ -2667,6 +2672,8 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, common_bin_offset); + offset.x -= hw_viewport_offset.x; + offset.y -= hw_viewport_offset.y; vp.viewports[i].x = scale_x * viewport.x + offset.x; vp.viewports[i].y = scale_y * viewport.y + offset.y; @@ -2747,7 +2754,9 @@ tu6_emit_scissor(struct tu_cs *cs, const struct vk_viewport_state *vp) static void fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, - VkOffset2D common_bin_offset, unsigned views, + VkOffset2D common_bin_offset, + const VkOffset2D *hw_viewport_offsets, + unsigned views, const VkExtent2D *frag_areas, const VkRect2D *bins) { const struct apply_viewport_state *state = @@ -2762,6 +2771,9 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, (state->share_scale || views == 1) ? bins[0] : bins[i]; VkRect2D scissor = state->fake_single_viewport ? state->vp.scissors[0] : state->vp.scissors[i]; + VkOffset2D hw_viewport_offset = + (state->share_scale || views == 1) ? hw_viewport_offsets[0] : + hw_viewport_offsets[i]; /* Transform the scissor following the viewport. It's unclear how this * is supposed to handle cases where the scissor isn't aligned to the @@ -2771,6 +2783,8 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, */ VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, common_bin_offset); + offset.x -= hw_viewport_offset.x; + offset.y -= hw_viewport_offset.y; VkOffset2D min = { scissor.offset.x / frag_area.width + offset.x, scissor.offset.y / frag_area.width + offset.y, @@ -2785,12 +2799,14 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, */ uint32_t scaled_width = bin.extent.width / frag_area.width; uint32_t scaled_height = bin.extent.height / frag_area.height; - vp.scissors[i].offset.x = MAX2(min.x, common_bin_offset.x); - vp.scissors[i].offset.y = MAX2(min.y, common_bin_offset.y); + uint32_t bin_x = common_bin_offset.x - hw_viewport_offset.x; + uint32_t bin_y = common_bin_offset.y - hw_viewport_offset.y; + vp.scissors[i].offset.x = MAX2(min.x, bin_x); + vp.scissors[i].offset.y = MAX2(min.y, bin_y); vp.scissors[i].extent.width = - MIN2(max.x, common_bin_offset.x + scaled_width) - vp.scissors[i].offset.x; + MIN2(max.x, bin_x + scaled_width) - vp.scissors[i].offset.x; vp.scissors[i].extent.height = - MIN2(max.y, common_bin_offset.y + scaled_height) - vp.scissors[i].offset.y; + MIN2(max.y, bin_y + scaled_height) - vp.scissors[i].offset.y; } TU_CALLX(cs->device, tu6_emit_scissor)(cs, &vp); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index c4d933887db..60f428a346d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -827,7 +827,7 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs) { struct fd_screen *screen = ctx->screen; - fd_ncrb ncrb(cs, 25 + ARRAY_SIZE(screen->info->a6xx.magic_raw)); + fd_ncrb ncrb(cs, 27 + ARRAY_SIZE(screen->info->a6xx.magic_raw)); if (CHIP >= A7XX) { /* On A7XX, RB_CCU_CNTL was broken into two registers, RB_CCU_CNTL which has @@ -898,6 +898,11 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs) ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_3(CHIP, 0x3f5193ed)); ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_4(CHIP, 0x3f0243f0)); } + + if (screen->info->a7xx.has_hw_bin_scaling) { + ncrb.add(A7XX_GRAS_BIN_FOVEAT()); + ncrb.add(A7XX_RB_BIN_FOVEAT()); + } } /**