mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 11:40:10 +01:00
tu: Use GRAS bin offset registers
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36475>
This commit is contained in:
parent
10e7f63734
commit
b34b089ca1
7 changed files with 163 additions and 44 deletions
|
|
@ -1027,7 +1027,6 @@ a730_raw_magic_regs = [
|
|||
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
|
||||
]
|
||||
|
||||
a740_magic_regs = dict(
|
||||
|
|
@ -1075,11 +1074,6 @@ a740_raw_magic_regs = [
|
|||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
|
||||
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000],
|
||||
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000],
|
||||
|
|
@ -1091,10 +1085,7 @@ a740_raw_magic_regs = [
|
|||
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
|
||||
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000],
|
||||
]
|
||||
|
||||
add_gpus([
|
||||
|
|
@ -1187,11 +1178,6 @@ add_gpus([
|
|||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
|
||||
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000],
|
||||
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000],
|
||||
|
|
@ -1203,10 +1189,7 @@ add_gpus([
|
|||
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
|
||||
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000],
|
||||
],
|
||||
))
|
||||
|
||||
|
|
@ -1270,11 +1253,6 @@ add_gpus([
|
|||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
|
||||
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000],
|
||||
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000],
|
||||
|
|
@ -1286,7 +1264,6 @@ add_gpus([
|
|||
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
|
||||
],
|
||||
))
|
||||
|
||||
|
|
@ -1374,10 +1351,6 @@ add_gpus([
|
|||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_0, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_1, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_2, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT_OFFSET_3, 0x00000000],
|
||||
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
|
||||
|
|
@ -1389,11 +1362,8 @@ add_gpus([
|
|||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
|
||||
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_BIN_FOVEAT, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
|
||||
|
||||
[A6XXRegs.REG_A7XX_GRAS_BIN_FOVEAT, 0x00000000],
|
||||
|
||||
[0x930a, 0],
|
||||
[0x960a, 1],
|
||||
[A6XXRegs.REG_A7XX_SP_PS_OUTPUT_CONST_CNTL, 0],
|
||||
|
|
|
|||
|
|
@ -1533,6 +1533,10 @@ r3d_setup(struct tu_cmd_buffer *cmd,
|
|||
if (!cmd->state.pass) {
|
||||
tu_emit_cache_flush_ccu<CHIP>(cmd, cs, TU_CMD_CCU_SYSMEM);
|
||||
tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff);
|
||||
if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) {
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT());
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT());
|
||||
}
|
||||
}
|
||||
|
||||
if (!(blit_param & R3D_DST_GMEM)) {
|
||||
|
|
@ -3911,6 +3915,7 @@ fdm_apply_sysmem_clear_coords(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkOffset2D common_bin_offset,
|
||||
const VkOffset2D *hw_viewport_offsets,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
|
|
@ -4184,6 +4189,7 @@ fdm_apply_gmem_clear_coords(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkOffset2D common_bin_offset,
|
||||
const VkOffset2D *hw_viewport_offsets,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
|
|
@ -4819,6 +4825,7 @@ fdm_apply_load_coords(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkOffset2D common_bin_offset,
|
||||
const VkOffset2D *hw_viewport_offsets,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
|
|
@ -5291,6 +5298,7 @@ fdm_apply_store_coords(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkOffset2D common_bin_offset,
|
||||
const VkOffset2D *hw_viewport_offsets,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
|
|
|
|||
|
|
@ -1240,6 +1240,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
|||
{
|
||||
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling);
|
||||
bool hw_binning = use_hw_binning(cmd);
|
||||
|
||||
|
|
@ -1251,6 +1252,24 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
|||
tu_emit_vsc<CHIP>(cmd, &cmd->cs);
|
||||
}
|
||||
|
||||
unsigned views = tu_fdm_num_layers(cmd);
|
||||
bool bin_is_scaled = false;
|
||||
|
||||
if (fdm) {
|
||||
for (unsigned i = 0; i < views; i++) {
|
||||
if (tile->frag_areas[i].width != 1 ||
|
||||
tile->frag_areas[i].height != 1) {
|
||||
bin_is_scaled = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool bin_scale_en =
|
||||
cmd->device->physical_device->info->a7xx.has_hw_bin_scaling &&
|
||||
views <= MAX_HW_SCALED_VIEWS && !cmd->state.rp.shared_viewport &&
|
||||
bin_is_scaled;
|
||||
|
||||
tu6_emit_bin_size<CHIP>(
|
||||
cs, tiling->tile0.width, tiling->tile0.height,
|
||||
{
|
||||
|
|
@ -1272,7 +1291,22 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
|||
|
||||
const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE);
|
||||
const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE);
|
||||
tu6_emit_window_scissor(cs, x1, y1, x2 - 1, y2 - 1);
|
||||
|
||||
if (bin_scale_en) {
|
||||
/* It seems that the window scissor happens *before*
|
||||
* GRAS_BIN_FOVEAT_OFFSET_* is applied to the fragment coordinates,
|
||||
* unlike the window offset which happens after it is applied. This
|
||||
* means that the window scissor cannot do its job and we have to
|
||||
* disable it by setting it to the entire FB size (plus an extra tile
|
||||
* size, in case GRAS_BIN_FOVEAT_OFFSET_* is not in use). With FDM it is
|
||||
* effectively replaced by the user's scissor anyway.
|
||||
*/
|
||||
uint32_t width = fb->width + tiling->tile0.width;
|
||||
uint32_t height = fb->height + tiling->tile0.height;
|
||||
tu6_emit_window_scissor(cs, 0, 0, width, height);
|
||||
} else {
|
||||
tu6_emit_window_scissor(cs, x1, y1, x2 - 1, y2 - 1);
|
||||
}
|
||||
tu6_emit_window_offset<CHIP>(cs, x1, y1);
|
||||
|
||||
unsigned slot = ffs(tile->slot_mask) - 1;
|
||||
|
|
@ -1308,13 +1342,15 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit(cs, 0x0);
|
||||
|
||||
if (fdm) {
|
||||
unsigned views = tu_fdm_num_layers(cmd);
|
||||
VkRect2D bin = {
|
||||
{ x1, y1 },
|
||||
{ (x2 - x1) * tile->extent.width, (y2 - y1) * tile->extent.height }
|
||||
};
|
||||
VkRect2D bins[views];
|
||||
VkOffset2D frag_offsets[MAX_VIEWS];
|
||||
for (unsigned i = 0; i < views; i++) {
|
||||
frag_offsets[i] = (VkOffset2D) { 0, 0 };
|
||||
|
||||
if (!fdm_offsets || cmd->state.rp.shared_viewport) {
|
||||
bins[i] = bin;
|
||||
continue;
|
||||
|
|
@ -1330,12 +1366,67 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
|||
MAX2(MIN2((int32_t)y1 + bin.extent.height - bin_offset.y, MAX_VIEWPORT_SIZE) - bins[i].offset.y, 0);
|
||||
}
|
||||
|
||||
if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) {
|
||||
if (bin_scale_en) {
|
||||
VkExtent2D frag_areas[MAX_HW_SCALED_VIEWS];
|
||||
for (unsigned i = 0; i < MAX_HW_SCALED_VIEWS; i++) {
|
||||
if (i >= views) {
|
||||
/* Make sure unused views aren't garbage */
|
||||
frag_areas[i] = (VkExtent2D) {1, 1};
|
||||
frag_offsets[i] = (VkOffset2D) { 0, 0 };
|
||||
continue;
|
||||
}
|
||||
|
||||
frag_areas[i] = tile->frag_areas[i];
|
||||
frag_offsets[i].x = x1 - x1 / tile->frag_areas[i].width;
|
||||
frag_offsets[i].y = y1 - y1 / tile->frag_areas[i].height;
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT(
|
||||
.binscaleen = bin_scale_en,
|
||||
.xscale_0 = (enum a7xx_bin_scale)util_logbase2(frag_areas[0].width),
|
||||
.yscale_0 = (enum a7xx_bin_scale)util_logbase2(frag_areas[0].height),
|
||||
.xscale_1 = (enum a7xx_bin_scale)util_logbase2(frag_areas[1].width),
|
||||
.yscale_1 = (enum a7xx_bin_scale)util_logbase2(frag_areas[1].height),
|
||||
.xscale_2 = (enum a7xx_bin_scale)util_logbase2(frag_areas[2].width),
|
||||
.yscale_2 = (enum a7xx_bin_scale)util_logbase2(frag_areas[2].height),
|
||||
.xscale_3 = (enum a7xx_bin_scale)util_logbase2(frag_areas[3].width),
|
||||
.yscale_3 = (enum a7xx_bin_scale)util_logbase2(frag_areas[3].height),
|
||||
.xscale_4 = (enum a7xx_bin_scale)util_logbase2(frag_areas[4].width),
|
||||
.yscale_4 = (enum a7xx_bin_scale)util_logbase2(frag_areas[4].height),
|
||||
.xscale_5 = (enum a7xx_bin_scale)util_logbase2(frag_areas[5].width),
|
||||
.yscale_5 = (enum a7xx_bin_scale)util_logbase2(frag_areas[5].height)),
|
||||
A7XX_GRAS_BIN_FOVEAT_OFFSET_0(
|
||||
.xoffset_0 = frag_offsets[0].x,
|
||||
.xoffset_1 = frag_offsets[1].x,
|
||||
.xoffset_2 = frag_offsets[2].x),
|
||||
A7XX_GRAS_BIN_FOVEAT_OFFSET_1(
|
||||
.xoffset_3 = frag_offsets[3].x,
|
||||
.xoffset_4 = frag_offsets[4].x,
|
||||
.xoffset_5 = frag_offsets[5].x),
|
||||
A7XX_GRAS_BIN_FOVEAT_OFFSET_2(
|
||||
.yoffset_0 = frag_offsets[0].y,
|
||||
.yoffset_1 = frag_offsets[1].y,
|
||||
.yoffset_2 = frag_offsets[2].y),
|
||||
A7XX_GRAS_BIN_FOVEAT_OFFSET_3(
|
||||
.yoffset_3 = frag_offsets[3].y,
|
||||
.yoffset_4 = frag_offsets[4].y,
|
||||
.yoffset_5 = frag_offsets[5].y));
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT(
|
||||
.binscaleen = bin_scale_en));
|
||||
} else {
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT());
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT());
|
||||
}
|
||||
}
|
||||
|
||||
util_dynarray_foreach (&cmd->fdm_bin_patchpoints,
|
||||
struct tu_fdm_bin_patchpoint, patch) {
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size);
|
||||
tu_cs_emit_qw(cs, patch->iova);
|
||||
patch->apply(cmd, cs, patch->data, (VkOffset2D) { x1, y1 }, views,
|
||||
tile->frag_areas, bins);
|
||||
patch->apply(cmd, cs, patch->data, (VkOffset2D) { x1, y1 },
|
||||
frag_offsets, views, tile->frag_areas, bins);
|
||||
}
|
||||
|
||||
/* Make the CP wait until the CP_MEM_WRITE's to the command buffers
|
||||
|
|
@ -1989,6 +2080,12 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
|
||||
/* Reset bin scaling. */
|
||||
if (phys_dev->info->a7xx.has_hw_bin_scaling) {
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT());
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT());
|
||||
}
|
||||
|
||||
/* If this command buffer may be executed multiple times, then
|
||||
* viewports/scissor states may have been changed by previous executions
|
||||
* and we need to reset them before executing the binning IB. With FDM
|
||||
|
|
@ -2000,8 +2097,10 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
unsigned num_views = tu_fdm_num_layers(cmd);
|
||||
VkExtent2D unscaled_frag_areas[num_views];
|
||||
VkRect2D bins[num_views];
|
||||
VkOffset2D frag_offsets[num_views];
|
||||
for (unsigned i = 0; i < num_views; i++) {
|
||||
unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 };
|
||||
frag_offsets[i] = (VkOffset2D) { 0, 0 };
|
||||
if (fdm_offsets && !cmd->state.rp.shared_viewport) {
|
||||
/* We need to shift over the viewport and scissor during the
|
||||
* binning pass to match the shift applied when rendering. The way
|
||||
|
|
@ -2034,8 +2133,8 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
continue;
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size);
|
||||
tu_cs_emit_qw(cs, patch->iova);
|
||||
patch->apply(cmd, cs, patch->data, (VkOffset2D) {0, 0}, num_views,
|
||||
unscaled_frag_areas, bins);
|
||||
patch->apply(cmd, cs, patch->data, (VkOffset2D) {0, 0}, frag_offsets,
|
||||
num_views, unscaled_frag_areas, bins);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
|
|
@ -2465,6 +2564,12 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
|
||||
tu_cs_emit(cs, 0x0);
|
||||
|
||||
/* Reset bin scaling. */
|
||||
if (cmd->device->physical_device->info->a7xx.has_hw_bin_scaling) {
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_BIN_FOVEAT());
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BIN_FOVEAT());
|
||||
}
|
||||
|
||||
tu_autotune_begin_renderpass<CHIP>(cmd, cs, autotune_result);
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
|
|
@ -2794,6 +2899,13 @@ tu_calc_frag_area(struct tu_cmd_buffer *cmd,
|
|||
height = MIN2(height, TU_FDM_OFFSET_GRANULARITY);
|
||||
}
|
||||
|
||||
/* HW viewport scaling supports a maximum fragment width/height of 4.
|
||||
*/
|
||||
if (views <= MAX_HW_SCALED_VIEWS) {
|
||||
width = MIN2(width, 4);
|
||||
height = MIN2(height, 4);
|
||||
}
|
||||
|
||||
/* Make sure that the width/height divides the tile width/height so
|
||||
* we don't have to do extra awkward clamping of the edges of each
|
||||
* bin when resolving. It also has to divide the fdm offset, if any.
|
||||
|
|
@ -6451,6 +6563,7 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkOffset2D common_bin_offset,
|
||||
const VkOffset2D *hw_viewport_offsets,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
|
|
@ -6466,7 +6579,10 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd,
|
|||
*/
|
||||
VkExtent2D area = frag_areas[MIN2(i, views - 1)];
|
||||
VkRect2D bin = bins[MIN2(i, views - 1)];
|
||||
VkOffset2D hw_viewport_offset = hw_viewport_offsets[MIN2(i, views - 1)];
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(area, bin, common_bin_offset);
|
||||
offset.x -= hw_viewport_offset.x;
|
||||
offset.y -= hw_viewport_offset.y;
|
||||
|
||||
tu_cs_emit(cs, area.width);
|
||||
tu_cs_emit(cs, area.height);
|
||||
|
|
|
|||
|
|
@ -796,6 +796,7 @@ typedef void (*tu_fdm_bin_apply_t)(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkOffset2D common_bin_offset,
|
||||
const VkOffset2D *hw_viewport_offsets,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins);
|
||||
|
|
@ -852,6 +853,7 @@ _tu_create_fdm_bin_patchpoint(struct tu_cmd_buffer *cmd,
|
|||
*/
|
||||
unsigned num_views = MAX2(cmd->state.pass->num_views, 1);
|
||||
VkExtent2D unscaled_frag_areas[num_views];
|
||||
VkOffset2D hw_viewport_offsets[num_views];
|
||||
VkRect2D bins[num_views];
|
||||
for (unsigned i = 0; i < num_views; i++) {
|
||||
unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 };
|
||||
|
|
@ -859,8 +861,9 @@ _tu_create_fdm_bin_patchpoint(struct tu_cmd_buffer *cmd,
|
|||
{ 0, 0 },
|
||||
{ MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE },
|
||||
};
|
||||
hw_viewport_offsets[i] = (VkOffset2D) { 0, 0 };
|
||||
}
|
||||
apply(cmd, cs, state, (VkOffset2D) {0, 0}, num_views, unscaled_frag_areas, bins);
|
||||
apply(cmd, cs, state, (VkOffset2D) {0, 0}, hw_viewport_offsets, num_views, unscaled_frag_areas, bins);
|
||||
assert(tu_cs_get_cur_iova(cs) == patch.iova + patch.size * sizeof(uint32_t));
|
||||
|
||||
util_dynarray_append(&cmd->fdm_bin_patchpoints,
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@
|
|||
|
||||
#define TU_MAX_DRM_DEVICES 8
|
||||
#define MAX_VIEWS 16
|
||||
#define MAX_HW_SCALED_VIEWS 6
|
||||
#define MAX_BIND_POINTS 2 /* compute + graphics */
|
||||
/* match the latest Qualcomm driver which is also a hw limit on later gens */
|
||||
#define MAX_STORAGE_BUFFER_RANGE (1u << 27)
|
||||
|
|
|
|||
|
|
@ -2623,7 +2623,9 @@ tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin,
|
|||
|
||||
static void
|
||||
fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
||||
VkOffset2D common_bin_offset, unsigned views,
|
||||
VkOffset2D common_bin_offset,
|
||||
const VkOffset2D *hw_viewport_offsets,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas, const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_viewport_state *state =
|
||||
|
|
@ -2645,6 +2647,9 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
(state->share_scale || views == 1) ? frag_areas[0] : frag_areas[i];
|
||||
VkRect2D bin =
|
||||
(state->share_scale || views == 1) ? bins[0] : bins[i];
|
||||
VkOffset2D hw_viewport_offset =
|
||||
(state->share_scale || views == 1) ? hw_viewport_offsets[0] :
|
||||
hw_viewport_offsets[i];
|
||||
/* Implement fake_single_viewport by replicating viewport 0 across all
|
||||
* views.
|
||||
*/
|
||||
|
|
@ -2667,6 +2672,8 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin,
|
||||
common_bin_offset);
|
||||
offset.x -= hw_viewport_offset.x;
|
||||
offset.y -= hw_viewport_offset.y;
|
||||
|
||||
vp.viewports[i].x = scale_x * viewport.x + offset.x;
|
||||
vp.viewports[i].y = scale_y * viewport.y + offset.y;
|
||||
|
|
@ -2747,7 +2754,9 @@ tu6_emit_scissor(struct tu_cs *cs, const struct vk_viewport_state *vp)
|
|||
|
||||
static void
|
||||
fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
||||
VkOffset2D common_bin_offset, unsigned views,
|
||||
VkOffset2D common_bin_offset,
|
||||
const VkOffset2D *hw_viewport_offsets,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas, const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_viewport_state *state =
|
||||
|
|
@ -2762,6 +2771,9 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
(state->share_scale || views == 1) ? bins[0] : bins[i];
|
||||
VkRect2D scissor =
|
||||
state->fake_single_viewport ? state->vp.scissors[0] : state->vp.scissors[i];
|
||||
VkOffset2D hw_viewport_offset =
|
||||
(state->share_scale || views == 1) ? hw_viewport_offsets[0] :
|
||||
hw_viewport_offsets[i];
|
||||
|
||||
/* Transform the scissor following the viewport. It's unclear how this
|
||||
* is supposed to handle cases where the scissor isn't aligned to the
|
||||
|
|
@ -2771,6 +2783,8 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
*/
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin,
|
||||
common_bin_offset);
|
||||
offset.x -= hw_viewport_offset.x;
|
||||
offset.y -= hw_viewport_offset.y;
|
||||
VkOffset2D min = {
|
||||
scissor.offset.x / frag_area.width + offset.x,
|
||||
scissor.offset.y / frag_area.width + offset.y,
|
||||
|
|
@ -2785,12 +2799,14 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
*/
|
||||
uint32_t scaled_width = bin.extent.width / frag_area.width;
|
||||
uint32_t scaled_height = bin.extent.height / frag_area.height;
|
||||
vp.scissors[i].offset.x = MAX2(min.x, common_bin_offset.x);
|
||||
vp.scissors[i].offset.y = MAX2(min.y, common_bin_offset.y);
|
||||
uint32_t bin_x = common_bin_offset.x - hw_viewport_offset.x;
|
||||
uint32_t bin_y = common_bin_offset.y - hw_viewport_offset.y;
|
||||
vp.scissors[i].offset.x = MAX2(min.x, bin_x);
|
||||
vp.scissors[i].offset.y = MAX2(min.y, bin_y);
|
||||
vp.scissors[i].extent.width =
|
||||
MIN2(max.x, common_bin_offset.x + scaled_width) - vp.scissors[i].offset.x;
|
||||
MIN2(max.x, bin_x + scaled_width) - vp.scissors[i].offset.x;
|
||||
vp.scissors[i].extent.height =
|
||||
MIN2(max.y, common_bin_offset.y + scaled_height) - vp.scissors[i].offset.y;
|
||||
MIN2(max.y, bin_y + scaled_height) - vp.scissors[i].offset.y;
|
||||
}
|
||||
|
||||
TU_CALLX(cs->device, tu6_emit_scissor)(cs, &vp);
|
||||
|
|
|
|||
|
|
@ -827,7 +827,7 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
|
|||
{
|
||||
struct fd_screen *screen = ctx->screen;
|
||||
|
||||
fd_ncrb<CHIP> ncrb(cs, 25 + ARRAY_SIZE(screen->info->a6xx.magic_raw));
|
||||
fd_ncrb<CHIP> ncrb(cs, 27 + ARRAY_SIZE(screen->info->a6xx.magic_raw));
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
/* On A7XX, RB_CCU_CNTL was broken into two registers, RB_CCU_CNTL which has
|
||||
|
|
@ -898,6 +898,11 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
|
|||
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_3(CHIP, 0x3f5193ed));
|
||||
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_4(CHIP, 0x3f0243f0));
|
||||
}
|
||||
|
||||
if (screen->info->a7xx.has_hw_bin_scaling) {
|
||||
ncrb.add(A7XX_GRAS_BIN_FOVEAT());
|
||||
ncrb.add(A7XX_RB_BIN_FOVEAT());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue