tu: Use GRAS bin offset registers

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36475>
This commit is contained in:
Connor Abbott 2025-01-23 18:05:53 -05:00 committed by Marge Bot
parent 10e7f63734
commit b34b089ca1
7 changed files with 163 additions and 44 deletions

View file

@ -1027,7 +1027,6 @@ a730_raw_magic_regs = [
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
] ]
a740_magic_regs = dict( a740_magic_regs = dict(
@ -1075,11 +1074,6 @@ a740_raw_magic_regs = [
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000],
@ -1091,10 +1085,7 @@ a740_raw_magic_regs = [
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000],
] ]
add_gpus([ add_gpus([
@ -1187,11 +1178,6 @@ add_gpus([
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000],
@ -1203,10 +1189,7 @@ add_gpus([
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000],
], ],
)) ))
@ -1270,11 +1253,6 @@ add_gpus([
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000],
@ -1286,7 +1264,6 @@ add_gpus([
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
], ],
)) ))
@ -1374,10 +1351,6 @@ add_gpus([
[A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000],
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
@ -1389,11 +1362,8 @@ add_gpus([
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000],
[0x930a, 0], [0x930a, 0],
[0x960a, 1], [0x960a, 1],
[A6XXRegs.REG_A7XX_SP_PS_OUTPUT_CONST_CNTL, 0], [A6XXRegs.REG_A7XX_SP_PS_OUTPUT_CONST_CNTL, 0],

View file

@ -1533,6 +1533,10 @@ r3d_setup(struct tu_cmd_buffer *cmd,
if (!cmd->state.pass) { if (!cmd->state.pass) {
tu_emit_cache_flush_ccu<CHIP>(cmd, cs, TU_CMD_CCU_SYSMEM); tu_emit_cache_flush_ccu<CHIP>(cmd, cs, TU_CMD_CCU_SYSMEM);
tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff); tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff);
if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) {
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT());
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT());
}
} }
if (!(blit_param & R3D_DST_GMEM)) { if (!(blit_param & R3D_DST_GMEM)) {
@ -3911,6 +3915,7 @@ fdm_apply_sysmem_clear_coords(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, struct tu_cs *cs,
void *data, void *data,
VkOffset2D common_bin_offset, VkOffset2D common_bin_offset,
const VkOffset2D *hw_viewport_offsets,
unsigned views, unsigned views,
const VkExtent2D *frag_areas, const VkExtent2D *frag_areas,
const VkRect2D *bins) const VkRect2D *bins)
@ -4184,6 +4189,7 @@ fdm_apply_gmem_clear_coords(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, struct tu_cs *cs,
void *data, void *data,
VkOffset2D common_bin_offset, VkOffset2D common_bin_offset,
const VkOffset2D *hw_viewport_offsets,
unsigned views, unsigned views,
const VkExtent2D *frag_areas, const VkExtent2D *frag_areas,
const VkRect2D *bins) const VkRect2D *bins)
@ -4819,6 +4825,7 @@ fdm_apply_load_coords(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, struct tu_cs *cs,
void *data, void *data,
VkOffset2D common_bin_offset, VkOffset2D common_bin_offset,
const VkOffset2D *hw_viewport_offsets,
unsigned views, unsigned views,
const VkExtent2D *frag_areas, const VkExtent2D *frag_areas,
const VkRect2D *bins) const VkRect2D *bins)
@ -5291,6 +5298,7 @@ fdm_apply_store_coords(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, struct tu_cs *cs,
void *data, void *data,
VkOffset2D common_bin_offset, VkOffset2D common_bin_offset,
const VkOffset2D *hw_viewport_offsets,
unsigned views, unsigned views,
const VkExtent2D *frag_areas, const VkExtent2D *frag_areas,
const VkRect2D *bins) const VkRect2D *bins)

View file

@ -1240,6 +1240,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
{ {
struct tu_physical_device *phys_dev = cmd->device->physical_device; struct tu_physical_device *phys_dev = cmd->device->physical_device;
const struct tu_tiling_config *tiling = cmd->state.tiling; const struct tu_tiling_config *tiling = cmd->state.tiling;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling); const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling);
bool hw_binning = use_hw_binning(cmd); bool hw_binning = use_hw_binning(cmd);
@ -1251,6 +1252,24 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu_emit_vsc<CHIP>(cmd, &cmd->cs); tu_emit_vsc<CHIP>(cmd, &cmd->cs);
} }
unsigned views = tu_fdm_num_layers(cmd);
bool bin_is_scaled = false;
if (fdm) {
for (unsigned i = 0; i < views; i++) {
if (tile->frag_areas[i].width != 1 ||
tile->frag_areas[i].height != 1) {
bin_is_scaled = true;
break;
}
}
}
bool bin_scale_en =
cmd->device->physical_device->info->a7xx.has_hw_bin_scaling &&
views <= MAX_HW_SCALED_VIEWS && !cmd->state.rp.shared_viewport &&
bin_is_scaled;
tu6_emit_bin_size<CHIP>( tu6_emit_bin_size<CHIP>(
cs, tiling->tile0.width, tiling->tile0.height, cs, tiling->tile0.width, tiling->tile0.height,
{ {
@ -1272,7 +1291,22 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE); const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE);
const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE); const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE);
if (bin_scale_en) {
/* It seems that the window scissor happens *before*
* GRAS_BIN_FOVEAT_OFFSET_* is applied to the fragment coordinates,
* unlike the window offset which happens after it is applied. This
* means that the window scissor cannot do its job and we have to
* disable it by setting it to the entire FB size (plus an extra tile
* size, in case GRAS_BIN_FOVEAT_OFFSET_* is not in use). With FDM it is
* effectively replaced by the user's scissor anyway.
*/
uint32_t width = fb->width + tiling->tile0.width;
uint32_t height = fb->height + tiling->tile0.height;
tu6_emit_window_scissor(cs, 0, 0, width, height);
} else {
tu6_emit_window_scissor(cs, x1, y1, x2 - 1, y2 - 1); tu6_emit_window_scissor(cs, x1, y1, x2 - 1, y2 - 1);
}
tu6_emit_window_offset<CHIP>(cs, x1, y1); tu6_emit_window_offset<CHIP>(cs, x1, y1);
unsigned slot = ffs(tile->slot_mask) - 1; unsigned slot = ffs(tile->slot_mask) - 1;
@ -1308,13 +1342,15 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu_cs_emit(cs, 0x0); tu_cs_emit(cs, 0x0);
if (fdm) { if (fdm) {
unsigned views = tu_fdm_num_layers(cmd);
VkRect2D bin = { VkRect2D bin = {
{ x1, y1 }, { x1, y1 },
{ (x2 - x1) * tile->extent.width, (y2 - y1) * tile->extent.height } { (x2 - x1) * tile->extent.width, (y2 - y1) * tile->extent.height }
}; };
VkRect2D bins[views]; VkRect2D bins[views];
VkOffset2D frag_offsets[MAX_VIEWS];
for (unsigned i = 0; i < views; i++) { for (unsigned i = 0; i < views; i++) {
frag_offsets[i] = (VkOffset2D) { 0, 0 };
if (!fdm_offsets || cmd->state.rp.shared_viewport) { if (!fdm_offsets || cmd->state.rp.shared_viewport) {
bins[i] = bin; bins[i] = bin;
continue; continue;
@ -1330,12 +1366,67 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
MAX2(MIN2((int32_t)y1 + bin.extent.height - bin_offset.y, MAX_VIEWPORT_SIZE) - bins[i].offset.y, 0); MAX2(MIN2((int32_t)y1 + bin.extent.height - bin_offset.y, MAX_VIEWPORT_SIZE) - bins[i].offset.y, 0);
} }
if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) {
if (bin_scale_en) {
VkExtent2D frag_areas[MAX_HW_SCALED_VIEWS];
for (unsigned i = 0; i < MAX_HW_SCALED_VIEWS; i++) {
if (i >= views) {
/* Make sure unused views aren't garbage */
frag_areas[i] = (VkExtent2D) {1, 1};
frag_offsets[i] = (VkOffset2D) { 0, 0 };
continue;
}
frag_areas[i] = tile->frag_areas[i];
frag_offsets[i].x = x1 - x1 / tile->frag_areas[i].width;
frag_offsets[i].y = y1 - y1 / tile->frag_areas[i].height;
}
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT(
.binscaleen = bin_scale_en,
.xscale_0 = (enum a7xx_bin_scale)util_logbase2(frag_areas[0].width),
.yscale_0 = (enum a7xx_bin_scale)util_logbase2(frag_areas[0].height),
.xscale_1 = (enum a7xx_bin_scale)util_logbase2(frag_areas[1].width),
.yscale_1 = (enum a7xx_bin_scale)util_logbase2(frag_areas[1].height),
.xscale_2 = (enum a7xx_bin_scale)util_logbase2(frag_areas[2].width),
.yscale_2 = (enum a7xx_bin_scale)util_logbase2(frag_areas[2].height),
.xscale_3 = (enum a7xx_bin_scale)util_logbase2(frag_areas[3].width),
.yscale_3 = (enum a7xx_bin_scale)util_logbase2(frag_areas[3].height),
.xscale_4 = (enum a7xx_bin_scale)util_logbase2(frag_areas[4].width),
.yscale_4 = (enum a7xx_bin_scale)util_logbase2(frag_areas[4].height),
.xscale_5 = (enum a7xx_bin_scale)util_logbase2(frag_areas[5].width),
.yscale_5 = (enum a7xx_bin_scale)util_logbase2(frag_areas[5].height)),
A7XX_GRAS_BIN_FOVEAT_OFFSET_0(
.xoffset_0 = frag_offsets[0].x,
.xoffset_1 = frag_offsets[1].x,
.xoffset_2 = frag_offsets[2].x),
A7XX_GRAS_BIN_FOVEAT_OFFSET_1(
.xoffset_3 = frag_offsets[3].x,
.xoffset_4 = frag_offsets[4].x,
.xoffset_5 = frag_offsets[5].x),
A7XX_GRAS_BIN_FOVEAT_OFFSET_2(
.yoffset_0 = frag_offsets[0].y,
.yoffset_1 = frag_offsets[1].y,
.yoffset_2 = frag_offsets[2].y),
A7XX_GRAS_BIN_FOVEAT_OFFSET_3(
.yoffset_3 = frag_offsets[3].y,
.yoffset_4 = frag_offsets[4].y,
.yoffset_5 = frag_offsets[5].y));
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT(
.binscaleen = bin_scale_en));
} else {
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT());
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT());
}
}
util_dynarray_foreach (&cmd->fdm_bin_patchpoints, util_dynarray_foreach (&cmd->fdm_bin_patchpoints,
struct tu_fdm_bin_patchpoint, patch) { struct tu_fdm_bin_patchpoint, patch) {
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size); tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size);
tu_cs_emit_qw(cs, patch->iova); tu_cs_emit_qw(cs, patch->iova);
patch->apply(cmd, cs, patch->data, (VkOffset2D) { x1, y1 }, views, patch->apply(cmd, cs, patch->data, (VkOffset2D) { x1, y1 },
tile->frag_areas, bins); frag_offsets, views, tile->frag_areas, bins);
} }
/* Make the CP wait until the CP_MEM_WRITE's to the command buffers /* Make the CP wait until the CP_MEM_WRITE's to the command buffers
@ -1989,6 +2080,12 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_framebuffer *fb = cmd->state.framebuffer;
const struct tu_tiling_config *tiling = cmd->state.tiling; const struct tu_tiling_config *tiling = cmd->state.tiling;
/* Reset bin scaling. */
if (phys_dev->info->a7xx.has_hw_bin_scaling) {
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT());
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT());
}
/* If this command buffer may be executed multiple times, then /* If this command buffer may be executed multiple times, then
* viewports/scissor states may have been changed by previous executions * viewports/scissor states may have been changed by previous executions
* and we need to reset them before executing the binning IB. With FDM * and we need to reset them before executing the binning IB. With FDM
@ -2000,8 +2097,10 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
unsigned num_views = tu_fdm_num_layers(cmd); unsigned num_views = tu_fdm_num_layers(cmd);
VkExtent2D unscaled_frag_areas[num_views]; VkExtent2D unscaled_frag_areas[num_views];
VkRect2D bins[num_views]; VkRect2D bins[num_views];
VkOffset2D frag_offsets[num_views];
for (unsigned i = 0; i < num_views; i++) { for (unsigned i = 0; i < num_views; i++) {
unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 }; unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 };
frag_offsets[i] = (VkOffset2D) { 0, 0 };
if (fdm_offsets && !cmd->state.rp.shared_viewport) { if (fdm_offsets && !cmd->state.rp.shared_viewport) {
/* We need to shift over the viewport and scissor during the /* We need to shift over the viewport and scissor during the
* binning pass to match the shift applied when rendering. The way * binning pass to match the shift applied when rendering. The way
@ -2034,8 +2133,8 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
continue; continue;
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size); tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size);
tu_cs_emit_qw(cs, patch->iova); tu_cs_emit_qw(cs, patch->iova);
patch->apply(cmd, cs, patch->data, (VkOffset2D) {0, 0}, num_views, patch->apply(cmd, cs, patch->data, (VkOffset2D) {0, 0}, frag_offsets,
unscaled_frag_areas, bins); num_views, unscaled_frag_areas, bins);
} }
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
@ -2465,6 +2564,12 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_cs_emit_pkt7(cs, CP_SET_MODE, 1); tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
tu_cs_emit(cs, 0x0); tu_cs_emit(cs, 0x0);
/* Reset bin scaling. */
if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) {
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT());
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT());
}
tu_autotune_begin_renderpass<CHIP>(cmd, cs, autotune_result); tu_autotune_begin_renderpass<CHIP>(cmd, cs, autotune_result);
tu_cs_sanity_check(cs); tu_cs_sanity_check(cs);
@ -2794,6 +2899,13 @@ tu_calc_frag_area(struct tu_cmd_buffer *cmd,
height = MIN2(height, TU_FDM_OFFSET_GRANULARITY); height = MIN2(height, TU_FDM_OFFSET_GRANULARITY);
} }
/* HW viewport scaling supports a maximum fragment width/height of 4.
*/
if (views <= MAX_HW_SCALED_VIEWS) {
width = MIN2(width, 4);
height = MIN2(height, 4);
}
/* Make sure that the width/height divides the tile width/height so /* Make sure that the width/height divides the tile width/height so
* we don't have to do extra awkward clamping of the edges of each * we don't have to do extra awkward clamping of the edges of each
* bin when resolving. It also has to divide the fdm offset, if any. * bin when resolving. It also has to divide the fdm offset, if any.
@ -6451,6 +6563,7 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, struct tu_cs *cs,
void *data, void *data,
VkOffset2D common_bin_offset, VkOffset2D common_bin_offset,
const VkOffset2D *hw_viewport_offsets,
unsigned views, unsigned views,
const VkExtent2D *frag_areas, const VkExtent2D *frag_areas,
const VkRect2D *bins) const VkRect2D *bins)
@ -6466,7 +6579,10 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd,
*/ */
VkExtent2D area = frag_areas[MIN2(i, views - 1)]; VkExtent2D area = frag_areas[MIN2(i, views - 1)];
VkRect2D bin = bins[MIN2(i, views - 1)]; VkRect2D bin = bins[MIN2(i, views - 1)];
VkOffset2D hw_viewport_offset = hw_viewport_offsets[MIN2(i, views - 1)];
VkOffset2D offset = tu_fdm_per_bin_offset(area, bin, common_bin_offset); VkOffset2D offset = tu_fdm_per_bin_offset(area, bin, common_bin_offset);
offset.x -= hw_viewport_offset.x;
offset.y -= hw_viewport_offset.y;
tu_cs_emit(cs, area.width); tu_cs_emit(cs, area.width);
tu_cs_emit(cs, area.height); tu_cs_emit(cs, area.height);

View file

@ -796,6 +796,7 @@ typedef void (*tu_fdm_bin_apply_t)(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, struct tu_cs *cs,
void *data, void *data,
VkOffset2D common_bin_offset, VkOffset2D common_bin_offset,
const VkOffset2D *hw_viewport_offsets,
unsigned views, unsigned views,
const VkExtent2D *frag_areas, const VkExtent2D *frag_areas,
const VkRect2D *bins); const VkRect2D *bins);
@ -852,6 +853,7 @@ _tu_create_fdm_bin_patchpoint(struct tu_cmd_buffer *cmd,
*/ */
unsigned num_views = MAX2(cmd->state.pass->num_views, 1); unsigned num_views = MAX2(cmd->state.pass->num_views, 1);
VkExtent2D unscaled_frag_areas[num_views]; VkExtent2D unscaled_frag_areas[num_views];
VkOffset2D hw_viewport_offsets[num_views];
VkRect2D bins[num_views]; VkRect2D bins[num_views];
for (unsigned i = 0; i < num_views; i++) { for (unsigned i = 0; i < num_views; i++) {
unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 }; unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 };
@ -859,8 +861,9 @@ _tu_create_fdm_bin_patchpoint(struct tu_cmd_buffer *cmd,
{ 0, 0 }, { 0, 0 },
{ MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE }, { MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE },
}; };
hw_viewport_offsets[i] = (VkOffset2D) { 0, 0 };
} }
apply(cmd, cs, state, (VkOffset2D) {0, 0}, num_views, unscaled_frag_areas, bins); apply(cmd, cs, state, (VkOffset2D) {0, 0}, hw_viewport_offsets, num_views, unscaled_frag_areas, bins);
assert(tu_cs_get_cur_iova(cs) == patch.iova + patch.size * sizeof(uint32_t)); assert(tu_cs_get_cur_iova(cs) == patch.iova + patch.size * sizeof(uint32_t));
util_dynarray_append(&cmd->fdm_bin_patchpoints, util_dynarray_append(&cmd->fdm_bin_patchpoints,

View file

@ -105,6 +105,7 @@
#define TU_MAX_DRM_DEVICES 8 #define TU_MAX_DRM_DEVICES 8
#define MAX_VIEWS 16 #define MAX_VIEWS 16
#define MAX_HW_SCALED_VIEWS 6
#define MAX_BIND_POINTS 2 /* compute + graphics */ #define MAX_BIND_POINTS 2 /* compute + graphics */
/* match the latest Qualcomm driver which is also a hw limit on later gens */ /* match the latest Qualcomm driver which is also a hw limit on later gens */
#define MAX_STORAGE_BUFFER_RANGE (1u << 27) #define MAX_STORAGE_BUFFER_RANGE (1u << 27)

View file

@ -2623,7 +2623,9 @@ tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin,
static void static void
fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
VkOffset2D common_bin_offset, unsigned views, VkOffset2D common_bin_offset,
const VkOffset2D *hw_viewport_offsets,
unsigned views,
const VkExtent2D *frag_areas, const VkRect2D *bins) const VkExtent2D *frag_areas, const VkRect2D *bins)
{ {
const struct apply_viewport_state *state = const struct apply_viewport_state *state =
@ -2645,6 +2647,9 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
(state->share_scale || views == 1) ? frag_areas[0] : frag_areas[i]; (state->share_scale || views == 1) ? frag_areas[0] : frag_areas[i];
VkRect2D bin = VkRect2D bin =
(state->share_scale || views == 1) ? bins[0] : bins[i]; (state->share_scale || views == 1) ? bins[0] : bins[i];
VkOffset2D hw_viewport_offset =
(state->share_scale || views == 1) ? hw_viewport_offsets[0] :
hw_viewport_offsets[i];
/* Implement fake_single_viewport by replicating viewport 0 across all /* Implement fake_single_viewport by replicating viewport 0 across all
* views. * views.
*/ */
@ -2667,6 +2672,8 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin,
common_bin_offset); common_bin_offset);
offset.x -= hw_viewport_offset.x;
offset.y -= hw_viewport_offset.y;
vp.viewports[i].x = scale_x * viewport.x + offset.x; vp.viewports[i].x = scale_x * viewport.x + offset.x;
vp.viewports[i].y = scale_y * viewport.y + offset.y; vp.viewports[i].y = scale_y * viewport.y + offset.y;
@ -2747,7 +2754,9 @@ tu6_emit_scissor(struct tu_cs *cs, const struct vk_viewport_state *vp)
static void static void
fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
VkOffset2D common_bin_offset, unsigned views, VkOffset2D common_bin_offset,
const VkOffset2D *hw_viewport_offsets,
unsigned views,
const VkExtent2D *frag_areas, const VkRect2D *bins) const VkExtent2D *frag_areas, const VkRect2D *bins)
{ {
const struct apply_viewport_state *state = const struct apply_viewport_state *state =
@ -2762,6 +2771,9 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
(state->share_scale || views == 1) ? bins[0] : bins[i]; (state->share_scale || views == 1) ? bins[0] : bins[i];
VkRect2D scissor = VkRect2D scissor =
state->fake_single_viewport ? state->vp.scissors[0] : state->vp.scissors[i]; state->fake_single_viewport ? state->vp.scissors[0] : state->vp.scissors[i];
VkOffset2D hw_viewport_offset =
(state->share_scale || views == 1) ? hw_viewport_offsets[0] :
hw_viewport_offsets[i];
/* Transform the scissor following the viewport. It's unclear how this /* Transform the scissor following the viewport. It's unclear how this
* is supposed to handle cases where the scissor isn't aligned to the * is supposed to handle cases where the scissor isn't aligned to the
@ -2771,6 +2783,8 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
*/ */
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin,
common_bin_offset); common_bin_offset);
offset.x -= hw_viewport_offset.x;
offset.y -= hw_viewport_offset.y;
VkOffset2D min = { VkOffset2D min = {
scissor.offset.x / frag_area.width + offset.x, scissor.offset.x / frag_area.width + offset.x,
scissor.offset.y / frag_area.width + offset.y, scissor.offset.y / frag_area.width + offset.y,
@ -2785,12 +2799,14 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
*/ */
uint32_t scaled_width = bin.extent.width / frag_area.width; uint32_t scaled_width = bin.extent.width / frag_area.width;
uint32_t scaled_height = bin.extent.height / frag_area.height; uint32_t scaled_height = bin.extent.height / frag_area.height;
vp.scissors[i].offset.x = MAX2(min.x, common_bin_offset.x); uint32_t bin_x = common_bin_offset.x - hw_viewport_offset.x;
vp.scissors[i].offset.y = MAX2(min.y, common_bin_offset.y); uint32_t bin_y = common_bin_offset.y - hw_viewport_offset.y;
vp.scissors[i].offset.x = MAX2(min.x, bin_x);
vp.scissors[i].offset.y = MAX2(min.y, bin_y);
vp.scissors[i].extent.width = vp.scissors[i].extent.width =
MIN2(max.x, common_bin_offset.x + scaled_width) - vp.scissors[i].offset.x; MIN2(max.x, bin_x + scaled_width) - vp.scissors[i].offset.x;
vp.scissors[i].extent.height = vp.scissors[i].extent.height =
MIN2(max.y, common_bin_offset.y + scaled_height) - vp.scissors[i].offset.y; MIN2(max.y, bin_y + scaled_height) - vp.scissors[i].offset.y;
} }
TU_CALLX(cs->device, tu6_emit_scissor)(cs, &vp); TU_CALLX(cs->device, tu6_emit_scissor)(cs, &vp);

View file

@ -827,7 +827,7 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
{ {
struct fd_screen *screen = ctx->screen; struct fd_screen *screen = ctx->screen;
fd_ncrb<CHIP> ncrb(cs, 25 + ARRAY_SIZE(screen->info->a6xx.magic_raw)); fd_ncrb<CHIP> ncrb(cs, 27 + ARRAY_SIZE(screen->info->a6xx.magic_raw));
if (CHIP >= A7XX) { if (CHIP >= A7XX) {
/* On A7XX, RB_CCU_CNTL was broken into two registers, RB_CCU_CNTL which has /* On A7XX, RB_CCU_CNTL was broken into two registers, RB_CCU_CNTL which has
@ -898,6 +898,11 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_3(CHIP, 0x3f5193ed)); ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_3(CHIP, 0x3f5193ed));
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_4(CHIP, 0x3f0243f0)); ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_4(CHIP, 0x3f0243f0));
} }
if (screen->info->a7xx.has_hw_bin_scaling) {
ncrb.add(A7XX_GRAS_BIN_FOVEAT());
ncrb.add(A7XX_RB_BIN_FOVEAT());
}
} }
/** /**