mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 23:50:11 +01:00
tu: Implement VK_QCOM_fragment_density_map_offset
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33500>
This commit is contained in:
parent
7351f8d587
commit
75178c4655
15 changed files with 490 additions and 132 deletions
|
|
@ -694,6 +694,7 @@ Khronos extensions that are not part of any Vulkan version:
|
|||
VK_EXT_map_memory_placed DONE (anv, nvk, radv, tu)
|
||||
VK_MESA_image_alignment_control DONE (anv, nvk, radv)
|
||||
VK_EXT_legacy_dithering DONE (anv, tu, vn)
|
||||
VK_QCOM_fragment_density_map_offset DONE (tu)
|
||||
|
||||
|
||||
Clover OpenCL 1.0 -- all DONE:
|
||||
|
|
|
|||
|
|
@ -1363,6 +1363,22 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
|
|||
if (!iview->view.is_mutable)
|
||||
desc[0] &= ~A6XX_TEX_CONST_0_SWAP__MASK;
|
||||
desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
|
||||
|
||||
/* If FDM offset is used, the last row and column extend beyond the
|
||||
* framebuffer but are shifted over when storing. Expand the width and
|
||||
* height to account for that.
|
||||
*/
|
||||
if (tu_enable_fdm_offset(cmd)) {
|
||||
uint32_t width = desc[1] & A6XX_TEX_CONST_1_WIDTH__MASK;
|
||||
uint32_t height = (desc[1] & A6XX_TEX_CONST_1_HEIGHT__MASK) >>
|
||||
A6XX_TEX_CONST_1_HEIGHT__SHIFT;
|
||||
width += cmd->state.tiling->tile0.width;
|
||||
height += cmd->state.tiling->tile0.height;
|
||||
desc[1] = (desc[1] & ~(A6XX_TEX_CONST_1_WIDTH__MASK |
|
||||
A6XX_TEX_CONST_1_HEIGHT__MASK)) |
|
||||
A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
|
||||
}
|
||||
|
||||
desc[2] =
|
||||
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
|
||||
A6XX_TEX_CONST_2_PITCH(cmd->state.tiling->tile0.width * cpp);
|
||||
|
|
@ -3910,17 +3926,19 @@ static void
|
|||
fdm_apply_sysmem_clear_coords(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkRect2D bin,
|
||||
VkOffset2D common_bin_offset,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas)
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_sysmem_clear_coords_state *state =
|
||||
(const struct apply_sysmem_clear_coords_state *)data;
|
||||
assert(state->view < views);
|
||||
|
||||
VkExtent2D frag_area = frag_areas[state->view];
|
||||
VkRect2D bin = bins[state->view];
|
||||
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin);
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, common_bin_offset);
|
||||
|
||||
unsigned x1 = state->rect.offset.x / frag_area.width + offset.x;
|
||||
unsigned x2 = DIV_ROUND_UP(state->rect.offset.x + state->rect.extent.width,
|
||||
|
|
@ -4182,17 +4200,19 @@ static void
|
|||
fdm_apply_gmem_clear_coords(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkRect2D bin,
|
||||
VkOffset2D common_bin_offset,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas)
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_gmem_clear_coords_state *state =
|
||||
(const struct apply_gmem_clear_coords_state *)data;
|
||||
assert(state->view < views);
|
||||
|
||||
VkExtent2D frag_area = frag_areas[state->view];
|
||||
VkRect2D bin = bins[state->view];
|
||||
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin);
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, common_bin_offset);
|
||||
|
||||
unsigned x1 = state->rect.offset.x / frag_area.width + offset.x;
|
||||
unsigned x2 = DIV_ROUND_UP(state->rect.offset.x + state->rect.extent.width,
|
||||
|
|
@ -4816,14 +4836,16 @@ static void
|
|||
fdm_apply_load_coords(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkRect2D bin,
|
||||
VkOffset2D common_bin_offset,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas)
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_load_coords_state *state =
|
||||
(const struct apply_load_coords_state *)data;
|
||||
assert(state->view < views);
|
||||
VkExtent2D frag_area = frag_areas[state->view];
|
||||
VkRect2D bin = bins[state->view];
|
||||
|
||||
assert(bin.extent.width % frag_area.width == 0);
|
||||
assert(bin.extent.height % frag_area.height == 0);
|
||||
|
|
@ -4831,10 +4853,10 @@ fdm_apply_load_coords(struct tu_cmd_buffer *cmd,
|
|||
uint32_t scaled_height = bin.extent.height / frag_area.height;
|
||||
|
||||
const float coords[] = {
|
||||
bin.offset.x, bin.offset.y,
|
||||
bin.offset.x, bin.offset.y,
|
||||
bin.offset.x + scaled_width, bin.offset.y + scaled_height,
|
||||
bin.offset.x + bin.extent.width, bin.offset.y + bin.extent.height,
|
||||
common_bin_offset.x, common_bin_offset.y,
|
||||
bin.offset.x, bin.offset.y,
|
||||
common_bin_offset.x + scaled_width, common_bin_offset.y + scaled_height,
|
||||
bin.offset.x + bin.extent.width, bin.offset.y + bin.extent.height,
|
||||
};
|
||||
r3d_coords_raw(cmd, cs, coords);
|
||||
}
|
||||
|
|
@ -5050,6 +5072,19 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
|
|||
enum a6xx_format format = fmt.fmt;
|
||||
fixup_src_format(&src_format, dst_format, &format);
|
||||
|
||||
uint32_t src_width = dst_iview->vk.extent.width;
|
||||
uint32_t src_height = dst_iview->vk.extent.height;
|
||||
|
||||
/* With FDM offset, we may blit from an extra row/column of tiles whose
|
||||
* source coordinates are outside of the attachment. Add an extra tile
|
||||
* width/height to the size to avoid clipping the source.
|
||||
*/
|
||||
if (tu_enable_fdm_offset(cmd)) {
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
src_width += tiling->tile0.width;
|
||||
src_height += tiling->tile0.height;
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
SP_PS_2D_SRC_INFO(CHIP,
|
||||
.color_format = format,
|
||||
|
|
@ -5063,8 +5098,8 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
|
|||
.unk22 = 1,
|
||||
.mutableen = src_iview->view.is_mutable),
|
||||
SP_PS_2D_SRC_SIZE(CHIP,
|
||||
.width = dst_iview->vk.extent.width,
|
||||
.height = dst_iview->vk.extent.height),
|
||||
.width = src_width,
|
||||
.height = src_height),
|
||||
SP_PS_2D_SRC(CHIP, .qword = cmd->device->physical_device->gmem_base + gmem_offset),
|
||||
SP_PS_2D_SRC_PITCH(CHIP, .pitch = cmd->state.tiling->tile0.width * cpp));
|
||||
|
||||
|
|
@ -5274,14 +5309,16 @@ static void
|
|||
fdm_apply_store_coords(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkRect2D bin,
|
||||
VkOffset2D common_bin_offset,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas)
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_store_coords_state *state =
|
||||
(const struct apply_store_coords_state *)data;
|
||||
assert(state->view < views);
|
||||
VkExtent2D frag_area = frag_areas[state->view];
|
||||
VkRect2D bin = bins[state->view];
|
||||
|
||||
/* The bin width/height must be a multiple of the frag_area to make sure
|
||||
* that the scaling happens correctly. This means there may be some
|
||||
|
|
@ -5299,10 +5336,10 @@ fdm_apply_store_coords(struct tu_cmd_buffer *cmd,
|
|||
A6XX_GRAS_2D_DST_BR(.x = bin.offset.x + bin.extent.width - 1,
|
||||
.y = bin.offset.y + bin.extent.height - 1));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_GRAS_2D_SRC_TL_X(bin.offset.x),
|
||||
A6XX_GRAS_2D_SRC_BR_X(bin.offset.x + scaled_width - 1),
|
||||
A6XX_GRAS_2D_SRC_TL_Y(bin.offset.y),
|
||||
A6XX_GRAS_2D_SRC_BR_Y(bin.offset.y + scaled_height - 1));
|
||||
A6XX_GRAS_2D_SRC_TL_X(common_bin_offset.x),
|
||||
A6XX_GRAS_2D_SRC_BR_X(common_bin_offset.x + scaled_width - 1),
|
||||
A6XX_GRAS_2D_SRC_TL_Y(common_bin_offset.y),
|
||||
A6XX_GRAS_2D_SRC_BR_Y(common_bin_offset.y + scaled_height - 1));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
|
|||
|
|
@ -700,7 +700,8 @@ tu6_emit_render_cntl<A7XX>(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
static void
|
||||
tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
|
||||
tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align,
|
||||
bool used_by_sysmem)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
||||
const VkRect2D *render_area = &cmd->state.render_area;
|
||||
|
|
@ -727,9 +728,42 @@ tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
|
|||
y2 = ALIGN_POT(y2 + 1, phys_dev->info->gmem_align_h) - 1;
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_SCISSOR_TL(.x = x1, .y = y1),
|
||||
A6XX_RB_BLIT_SCISSOR_BR(.x = x2, .y = y2));
|
||||
/* With FDM offset, bins are shifted to the right in GMEM space compared to
|
||||
* framebuffer space. We do not use RB_BLIT_SCISSOR_* for loads and stores
|
||||
* because those do not use the fast path, but we do use it for
|
||||
* LOAD_OP_CLEAR. Expand the render area so that GMEM clears work
|
||||
* correctly. We may over-clear but that's ok because the store is clipped
|
||||
* to the render area.
|
||||
*/
|
||||
if (tu_enable_fdm_offset(cmd)) {
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
|
||||
/* If this is a generic clear that's also used in sysmem mode then we
|
||||
* need to emit the unmodified render area in sysmem mode because
|
||||
* over-clearing is not allowed.
|
||||
*/
|
||||
if (used_by_sysmem) {
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_SCISSOR_TL(.x = x1, .y = y1),
|
||||
A6XX_RB_BLIT_SCISSOR_BR(.x = x2, .y = y2));
|
||||
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(RENDER_MODE) |
|
||||
CP_COND_REG_EXEC_0_GMEM);
|
||||
}
|
||||
|
||||
x2 += tiling->tile0.width;
|
||||
y2 += tiling->tile0.height;
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_SCISSOR_TL(.x = x1, .y = y1),
|
||||
A6XX_RB_BLIT_SCISSOR_BR(.x = x2, .y = y2));
|
||||
|
||||
if (used_by_sysmem) {
|
||||
tu_cond_exec_end(cs);
|
||||
}
|
||||
} else {
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_SCISSOR_TL(.x = x1, .y = y1),
|
||||
A6XX_RB_BLIT_SCISSOR_BR(.x = x2, .y = y2));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -950,12 +984,20 @@ tu6_update_msaa_disable(struct tu_cmd_buffer *cmd)
|
|||
}
|
||||
}
|
||||
|
||||
static const struct tu_vsc_config *
|
||||
tu_vsc_config(struct tu_cmd_buffer *cmd, const struct tu_tiling_config *tiling)
|
||||
{
|
||||
if (tu_enable_fdm_offset(cmd))
|
||||
return &tiling->fdm_offset_vsc;
|
||||
return &tiling->vsc;
|
||||
}
|
||||
|
||||
static bool
|
||||
use_hw_binning(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_tiling_config *tiling = &fb->tiling[cmd->state.gmem_layout];
|
||||
const struct tu_vsc_config *vsc = &tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling);
|
||||
|
||||
/* XFB commands are emitted for BINNING || SYSMEM, which makes it
|
||||
* incompatible with non-hw binning GMEM rendering. this is required because
|
||||
|
|
@ -1014,7 +1056,7 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd,
|
|||
return true;
|
||||
}
|
||||
|
||||
const struct tu_vsc_config *vsc = &cmd->state.tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, cmd->state.tiling);
|
||||
|
||||
/* XFB is incompatible with non-hw binning GMEM rendering, see use_hw_binning */
|
||||
if (cmd->state.rp.xfb_used && !vsc->binning_possible) {
|
||||
|
|
@ -1059,7 +1101,7 @@ static void
|
|||
tu6_emit_cond_for_load_stores(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
uint32_t pipe, uint32_t slot, bool skip_wfm)
|
||||
{
|
||||
const struct tu_vsc_config *vsc = &cmd->state.tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, cmd->state.tiling);
|
||||
|
||||
if (vsc->binning_possible &&
|
||||
cmd->state.pass->has_cond_load_store) {
|
||||
|
|
@ -1080,16 +1122,48 @@ struct tu_tile_config {
|
|||
VkExtent2D frag_areas[MAX_VIEWS];
|
||||
};
|
||||
|
||||
/* For bin offsetting we want to do "Euclidean division," where the remainder
|
||||
* (i.e. the offset of the bin) is always positive. Unfortunately C/C++
|
||||
* remainder and division don't do this, so we have to implement it ourselves.
|
||||
*
|
||||
* For example, we should have:
|
||||
*
|
||||
* euclid_rem(-3, 4) = 1
|
||||
* euclid_rem(-4, 4) = 0
|
||||
* euclid_rem(-4, 4) = 3
|
||||
*/
|
||||
|
||||
static int32_t
|
||||
euclid_rem(int32_t divisor, int32_t divisend)
|
||||
{
|
||||
if (divisor >= 0)
|
||||
return divisor % divisend;
|
||||
int32_t tmp = divisend - (-divisor % divisend);
|
||||
return tmp == divisend ? 0 : tmp;
|
||||
}
|
||||
|
||||
/* Calculate how much the bins for a given view should be shifted to the left
|
||||
* and upwards, given the application-provided FDM offset.
|
||||
*/
|
||||
static VkOffset2D
|
||||
tu_bin_offset(VkOffset2D fdm_offset, const struct tu_tiling_config *tiling)
|
||||
{
|
||||
return (VkOffset2D) {
|
||||
euclid_rem(-fdm_offset.x, tiling->tile0.width),
|
||||
euclid_rem(-fdm_offset.y, tiling->tile0.height),
|
||||
};
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_tile_config *tile,
|
||||
bool fdm)
|
||||
bool fdm, const VkOffset2D *fdm_offsets)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
const struct tu_vsc_config *vsc = &tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling);
|
||||
bool hw_binning = use_hw_binning(cmd);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
|
|
@ -1118,6 +1192,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
|||
|
||||
const uint32_t x1 = tiling->tile0.width * tile->pos.x;
|
||||
const uint32_t y1 = tiling->tile0.height * tile->pos.y;
|
||||
|
||||
const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE);
|
||||
const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE);
|
||||
tu6_emit_window_scissor(cs, x1, y1, x2 - 1, y2 - 1);
|
||||
|
|
@ -1161,11 +1236,29 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
|||
{ x1, y1 },
|
||||
{ (x2 - x1) * tile->extent.width, (y2 - y1) * tile->extent.height }
|
||||
};
|
||||
VkRect2D bins[views];
|
||||
for (unsigned i = 0; i < views; i++) {
|
||||
if (!fdm_offsets || cmd->state.rp.shared_viewport) {
|
||||
bins[i] = bin;
|
||||
continue;
|
||||
}
|
||||
|
||||
VkOffset2D bin_offset = tu_bin_offset(fdm_offsets[i], tiling);
|
||||
|
||||
bins[i].offset.x = MAX2(0, (int32_t)x1 - bin_offset.x);
|
||||
bins[i].offset.y = MAX2(0, (int32_t)y1 - bin_offset.y);
|
||||
bins[i].extent.width =
|
||||
MAX2(MIN2((int32_t)x1 + bin.extent.width - bin_offset.x, MAX_VIEWPORT_SIZE) - bins[i].offset.x, 0);
|
||||
bins[i].extent.height =
|
||||
MAX2(MIN2((int32_t)y1 + bin.extent.height - bin_offset.y, MAX_VIEWPORT_SIZE) - bins[i].offset.y, 0);
|
||||
}
|
||||
|
||||
util_dynarray_foreach (&cmd->fdm_bin_patchpoints,
|
||||
struct tu_fdm_bin_patchpoint, patch) {
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size);
|
||||
tu_cs_emit_qw(cs, patch->iova);
|
||||
patch->apply(cmd, cs, patch->data, bin, views, tile->frag_areas);
|
||||
patch->apply(cmd, cs, patch->data, (VkOffset2D) { x1, y1 }, views,
|
||||
tile->frag_areas, bins);
|
||||
}
|
||||
|
||||
/* Make the CP wait until the CP_MEM_WRITE's to the command buffers
|
||||
|
|
@ -1252,7 +1345,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
const struct tu_render_pass *pass = cmd->state.pass;
|
||||
const struct tu_subpass *subpass = &pass->subpasses[pass->subpass_count-1];
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_vsc_config *vsc = &cmd->state.tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, cmd->state.tiling);
|
||||
|
||||
if (pass->has_fdm)
|
||||
tu_cs_set_writeable(cs, true);
|
||||
|
|
@ -1261,7 +1354,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BIN_RESOLVE) |
|
||||
A6XX_CP_SET_MARKER_0_USES_GMEM);
|
||||
|
||||
tu6_emit_blit_scissor(cmd, cs, true);
|
||||
tu6_emit_blit_scissor(cmd, cs, true, false);
|
||||
|
||||
struct tu_resolve_group resolve_group = {};
|
||||
|
||||
|
|
@ -1646,13 +1739,31 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_sanity_check(cs);
|
||||
}
|
||||
|
||||
bool
|
||||
tu_enable_fdm_offset(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
if (!cmd->state.pass)
|
||||
return false;
|
||||
|
||||
if (!cmd->state.pass->has_fdm)
|
||||
return false;
|
||||
|
||||
unsigned fdm_a = cmd->state.pass->fragment_density_map.attachment;
|
||||
if (fdm_a == VK_ATTACHMENT_UNUSED)
|
||||
return TU_DEBUG(FDM_OFFSET);
|
||||
|
||||
const struct tu_image_view *fdm = cmd->state.attachments[fdm_a];
|
||||
return fdm->image->vk.create_flags &
|
||||
VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_QCOM;
|
||||
}
|
||||
|
||||
static void
|
||||
update_vsc_pipe(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t num_vsc_pipes)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
const struct tu_vsc_config *vsc = &tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_BIN_SIZE(.width = tiling->tile0.width,
|
||||
|
|
@ -1680,7 +1791,7 @@ static void
|
|||
emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
const struct tu_vsc_config *vsc = &tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling);
|
||||
const uint32_t used_pipe_count =
|
||||
vsc->pipe_count.width * vsc->pipe_count.height;
|
||||
|
||||
|
|
@ -1711,36 +1822,70 @@ emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
const VkOffset2D *fdm_offsets)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
|
||||
/* If this command buffer may be executed multiple times, then
|
||||
* viewports/scissor states may have been changed by previous executions
|
||||
* and we need to reset them before executing the binning IB.
|
||||
* and we need to reset them before executing the binning IB. With FDM
|
||||
* offset the viewport also needs to be transformed during the binning
|
||||
* phase.
|
||||
*/
|
||||
if (!(cmd->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) &&
|
||||
cmd->fdm_bin_patchpoints.size != 0) {
|
||||
if ((!(cmd->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) ||
|
||||
fdm_offsets) && cmd->fdm_bin_patchpoints.size != 0) {
|
||||
unsigned num_views = MAX2(cmd->state.pass->num_views, 1);
|
||||
VkExtent2D unscaled_frag_areas[num_views];
|
||||
for (unsigned i = 0; i < num_views; i++)
|
||||
VkRect2D bins[num_views];
|
||||
for (unsigned i = 0; i < num_views; i++) {
|
||||
unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 };
|
||||
VkRect2D bin = { { 0, 0 }, { fb->width, fb->height } };
|
||||
if (fdm_offsets && !cmd->state.rp.shared_viewport) {
|
||||
/* We need to shift over the viewport and scissor during the
|
||||
* binning pass to match the shift applied when rendering. The way
|
||||
* to do this is to make the per-view bin start negative. In the
|
||||
* actual rendering pass, the per-view bin start is shifted in a
|
||||
* negative direction but the first bin is clipped so that the bin
|
||||
* start is never negative, but we need to do this to avoid
|
||||
* clipping the user scissor to a non-zero common bin start. We
|
||||
* skip patching load/store below in order to avoid patching loads
|
||||
* and stores to a crazy negative-offset bin. The parts of the
|
||||
* framebuffer left or above the origin correspond to the
|
||||
* non-visible parts of the left or top bins that will be
|
||||
* discarded. The framebuffer still needs to extend to the
|
||||
* original bottom and right, to avoid incorrectly clipping the
|
||||
* user scissor, so we need to add to the width and height to
|
||||
* compensate.
|
||||
*/
|
||||
VkOffset2D bin_offset = tu_bin_offset(fdm_offsets[i], tiling);
|
||||
bins[i] = {
|
||||
{ -bin_offset.x, -bin_offset.y },
|
||||
{ fb->width + bin_offset.x, fb->height + bin_offset.y },
|
||||
};
|
||||
} else {
|
||||
bins[i] = { { 0, 0 }, { fb->width, fb->height } };
|
||||
}
|
||||
}
|
||||
util_dynarray_foreach (&cmd->fdm_bin_patchpoints,
|
||||
struct tu_fdm_bin_patchpoint, patch) {
|
||||
if (patch->flags & TU_FDM_SKIP_BINNING)
|
||||
continue;
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + patch->size);
|
||||
tu_cs_emit_qw(cs, patch->iova);
|
||||
patch->apply(cmd, cs, patch->data, bin, num_views, unscaled_frag_areas);
|
||||
patch->apply(cmd, cs, patch->data, (VkOffset2D) {0, 0}, num_views,
|
||||
unscaled_frag_areas, bins);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
}
|
||||
|
||||
tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
|
||||
uint32_t width = fb->width + (fdm_offsets ? tiling->tile0.width : 0);
|
||||
uint32_t height = fb->height + (fdm_offsets ? tiling->tile0.height : 0);
|
||||
|
||||
tu6_emit_window_scissor(cs, 0, 0, width - 1, height - 1);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BIN_VISIBILITY));
|
||||
|
|
@ -1929,6 +2074,22 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
|
|||
if (!iview->view.is_mutable)
|
||||
dst[0] &= ~A6XX_TEX_CONST_0_SWAP__MASK;
|
||||
dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
|
||||
|
||||
/* If FDM offset is used, the last row and column extend beyond the
|
||||
* framebuffer but are shifted over when storing. Expand the width and
|
||||
* height to account for that.
|
||||
*/
|
||||
if (tu_enable_fdm_offset(cmd)) {
|
||||
uint32_t width = dst[1] & A6XX_TEX_CONST_1_WIDTH__MASK;
|
||||
uint32_t height = (dst[1] & A6XX_TEX_CONST_1_HEIGHT__MASK) >>
|
||||
A6XX_TEX_CONST_1_HEIGHT__SHIFT;
|
||||
width += cmd->state.tiling->tile0.width;
|
||||
height += cmd->state.tiling->tile0.height;
|
||||
dst[1] = (dst[1] & ~(A6XX_TEX_CONST_1_WIDTH__MASK |
|
||||
A6XX_TEX_CONST_1_HEIGHT__MASK)) |
|
||||
A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
|
||||
}
|
||||
|
||||
dst[2] =
|
||||
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
|
||||
A6XX_TEX_CONST_2_PITCH(tiling->tile0.width * cpp);
|
||||
|
|
@ -2177,11 +2338,12 @@ tu6_sysmem_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
template <chip CHIP>
|
||||
static void
|
||||
tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
struct tu_renderpass_result *autotune_result)
|
||||
struct tu_renderpass_result *autotune_result,
|
||||
const VkOffset2D *fdm_offsets)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
const struct tu_vsc_config *vsc = &tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling);
|
||||
tu_lrz_tiling_begin<CHIP>(cmd, cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
|
||||
|
|
@ -2225,7 +2387,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
tu6_emit_render_cntl<CHIP>(cmd, cmd->state.subpass, cs, true);
|
||||
|
||||
tu6_emit_binning_pass<CHIP>(cmd, cs);
|
||||
tu6_emit_binning_pass<CHIP>(cmd, cs, fdm_offsets);
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
tu_cs_emit_regs(cs,
|
||||
|
|
@ -2270,9 +2432,9 @@ template <chip CHIP>
|
|||
static void
|
||||
tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
const struct tu_tile_config *tile,
|
||||
bool fdm)
|
||||
bool fdm, const VkOffset2D *fdm_offsets)
|
||||
{
|
||||
tu6_emit_tile_select<CHIP>(cmd, &cmd->cs, tile, fdm);
|
||||
tu6_emit_tile_select<CHIP>(cmd, &cmd->cs, tile, fdm, fdm_offsets);
|
||||
tu_lrz_before_tile<CHIP>(cmd, &cmd->cs);
|
||||
|
||||
trace_start_draw_ib_gmem(&cmd->trace, &cmd->cs);
|
||||
|
|
@ -2338,7 +2500,8 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
static void
|
||||
tu_calc_frag_area(struct tu_cmd_buffer *cmd,
|
||||
struct tu_tile_config *tile,
|
||||
const struct tu_image_view *fdm)
|
||||
const struct tu_image_view *fdm,
|
||||
const VkOffset2D *fdm_offsets)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
const uint32_t x1 = tiling->tile0.width * tile->pos.x;
|
||||
|
|
@ -2351,11 +2514,71 @@ tu_calc_frag_area(struct tu_cmd_buffer *cmd,
|
|||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
struct tu_frag_area raw_areas[views];
|
||||
if (fdm) {
|
||||
tu_fragment_density_map_sample(fdm,
|
||||
(x1 + MIN2(x2, fb->width)) / 2,
|
||||
(y1 + MIN2(y2, fb->height)) / 2,
|
||||
fb->width, fb->height, views,
|
||||
raw_areas);
|
||||
for (unsigned i = 0; i < views; i++) {
|
||||
VkOffset2D sample_pos = { 0, 0 };
|
||||
|
||||
/* Offsets less than a tile size are accomplished by sliding the
|
||||
* tiles. However once we shift a whole tile size then we reset the
|
||||
* tiles back to where they were at the beginning and we need to
|
||||
* adjust where each bin is sampling from:
|
||||
*
|
||||
* x offset = 0:
|
||||
*
|
||||
* ------------------------------------
|
||||
* | * | * | * | (unused) |
|
||||
* ------------------------------------
|
||||
*
|
||||
* x offset = 4:
|
||||
*
|
||||
* -------------------------
|
||||
* | * | * | * | * |
|
||||
* -------------------------
|
||||
*
|
||||
* x offset = 8:
|
||||
*
|
||||
* ------------------------------------
|
||||
* | * | * | * | (unused) |
|
||||
* ------------------------------------
|
||||
*
|
||||
* As the user's offset increases we slide the tiles to the right,
|
||||
* until we reach the whole tile size and reset the tile positions.
|
||||
* tu_bin_offset() returns an amount to shift to the left, negating
|
||||
* the offset.
|
||||
*
|
||||
* If we were forced to use a shared viewport, then we must not shift
|
||||
* over the tiles and instead must only shift when sampling because
|
||||
* we cannot shift the tiles differently per view. This disables
|
||||
* smooth transitions of the fragment density map and effectively
|
||||
* negates the extension.
|
||||
*
|
||||
* Note that we cannot clamp x2/y2 to the framebuffer size, as we
|
||||
* normally would do, because then tiles along the edge would
|
||||
* incorrectly nudge the sample_pos towards the center of the
|
||||
* framebuffer. If we shift one complete tile over towards the
|
||||
* center and reset the tiles as above, the sample_pos would
|
||||
* then shift back towards the edge and we could get a "pop" from
|
||||
* suddenly changing density due to the slight shift.
|
||||
*/
|
||||
if (fdm_offsets) {
|
||||
VkOffset2D offset = fdm_offsets[i];
|
||||
if (!cmd->state.rp.shared_viewport) {
|
||||
VkOffset2D bin_offset = tu_bin_offset(fdm_offsets[i], tiling);
|
||||
offset.x += bin_offset.x;
|
||||
offset.y += bin_offset.y;
|
||||
}
|
||||
sample_pos.x = (x1 + x2) / 2 - offset.x;
|
||||
sample_pos.y = (y1 + y2) / 2 - offset.y;
|
||||
} else {
|
||||
sample_pos.x = (x1 + MIN2(x2, fb->width)) / 2;
|
||||
sample_pos.y = (y1 + MIN2(y2, fb->height)) / 2;
|
||||
}
|
||||
|
||||
tu_fragment_density_map_sample(fdm,
|
||||
sample_pos.x,
|
||||
sample_pos.y,
|
||||
fb->width, fb->height, i,
|
||||
&raw_areas[i]);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < views; i++)
|
||||
raw_areas[i].width = raw_areas[i].height = 1.0f;
|
||||
|
|
@ -2388,10 +2611,24 @@ tu_calc_frag_area(struct tu_cmd_buffer *cmd,
|
|||
width = 1u << util_logbase2(width);
|
||||
height = 1u << util_logbase2(height);
|
||||
|
||||
/* When FDM offset is enabled, the fragment area has to divide the
|
||||
* offset to make sure that we don't have tiles with partial fragments.
|
||||
* It would be bad to have the fragment area change as a function of the
|
||||
* offset, because we'd get "popping" as the resolution changes with the
|
||||
* offset, so just make sure it divides the offset granularity. This
|
||||
* should mean it always divides the offset for any possible offset.
|
||||
*/
|
||||
if (fdm_offsets) {
|
||||
width = MIN2(width, TU_FDM_OFFSET_GRANULARITY);
|
||||
height = MIN2(height, TU_FDM_OFFSET_GRANULARITY);
|
||||
}
|
||||
|
||||
/* Make sure that the width/height divides the tile width/height so
|
||||
* we don't have to do extra awkward clamping of the edges of each
|
||||
* bin when resolving. Note that because the tile width is rounded to
|
||||
* a multiple of 32 any power of two 32 or less will work.
|
||||
* bin when resolving. It also has to divide the fdm offset, if any.
|
||||
* Note that because the tile width is rounded to a multiple of 32 any
|
||||
* power of two 32 or less will work, and if there is an offset then it
|
||||
* must be a multiple of 4 so 2 or 4 will definitely work.
|
||||
*
|
||||
* TODO: Try to take advantage of the total area allowance here, too.
|
||||
*/
|
||||
|
|
@ -2486,7 +2723,8 @@ template <chip CHIP>
|
|||
void
|
||||
tu_render_pipe_fdm(struct tu_cmd_buffer *cmd, uint32_t pipe,
|
||||
uint32_t tx1, uint32_t ty1, uint32_t tx2, uint32_t ty2,
|
||||
const struct tu_image_view *fdm)
|
||||
const struct tu_image_view *fdm,
|
||||
const VkOffset2D *fdm_offsets)
|
||||
{
|
||||
uint32_t width = tx2 - tx1;
|
||||
uint32_t height = ty2 - ty1;
|
||||
|
|
@ -2505,7 +2743,7 @@ tu_render_pipe_fdm(struct tu_cmd_buffer *cmd, uint32_t pipe,
|
|||
tile->extent = { 1, 1 };
|
||||
tile->pipe = pipe;
|
||||
tile->slot_mask = 1u << (width * y + x);
|
||||
tu_calc_frag_area(cmd, tile, fdm);
|
||||
tu_calc_frag_area(cmd, tile, fdm, fdm_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2549,7 +2787,8 @@ tu_render_pipe_fdm(struct tu_cmd_buffer *cmd, uint32_t pipe,
|
|||
if (merged_tiles & (1u << tile_idx))
|
||||
continue;
|
||||
|
||||
tu6_render_tile<CHIP>(cmd, &cmd->cs, &tiles[tile_idx], true);
|
||||
tu6_render_tile<CHIP>(cmd, &cmd->cs, &tiles[tile_idx],
|
||||
true, fdm_offsets);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2557,10 +2796,11 @@ tu_render_pipe_fdm(struct tu_cmd_buffer *cmd, uint32_t pipe,
|
|||
template <chip CHIP>
|
||||
static void
|
||||
tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
||||
struct tu_renderpass_result *autotune_result)
|
||||
struct tu_renderpass_result *autotune_result,
|
||||
const VkOffset2D *fdm_offsets)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
const struct tu_vsc_config *vsc = &tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, tiling);
|
||||
const struct tu_image_view *fdm = NULL;
|
||||
|
||||
if (cmd->state.pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) {
|
||||
|
|
@ -2571,6 +2811,10 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
|||
bool merge_tiles = has_fdm && !TU_DEBUG(NO_BIN_MERGING) &&
|
||||
cmd->device->physical_device->info->a6xx.has_bin_mask;
|
||||
|
||||
/* If not using FDM make sure not to accidentally apply the offsets */
|
||||
if (!has_fdm)
|
||||
fdm_offsets = NULL;
|
||||
|
||||
/* Create gmem stores now (at EndRenderPass time)) because they needed to
|
||||
* know whether to allow their conditional execution, which was tied to a
|
||||
* state that was known only at the end of the renderpass. They will be
|
||||
|
|
@ -2582,7 +2826,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
|||
|
||||
cmd->trace_renderpass_end = u_trace_end_iterator(&cmd->trace);
|
||||
|
||||
tu6_tile_render_begin<CHIP>(cmd, &cmd->cs, autotune_result);
|
||||
tu6_tile_render_begin<CHIP>(cmd, &cmd->cs, autotune_result, fdm_offsets);
|
||||
|
||||
/* Note: we reverse the order of walking the pipes and tiles on every
|
||||
* other row, to improve texture cache locality compared to raster order.
|
||||
|
|
@ -2602,7 +2846,8 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
|||
uint32_t ty2 = MIN2(ty1 + vsc->pipe0.height, vsc->tile_count.height);
|
||||
|
||||
if (merge_tiles) {
|
||||
tu_render_pipe_fdm<CHIP>(cmd, pipe, tx1, ty1, tx2, ty2, fdm);
|
||||
tu_render_pipe_fdm<CHIP>(cmd, pipe, tx1, ty1, tx2, ty2, fdm,
|
||||
fdm_offsets);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -2623,9 +2868,10 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
|||
.extent = { 1, 1 },
|
||||
};
|
||||
if (has_fdm)
|
||||
tu_calc_frag_area(cmd, &tile, fdm);
|
||||
tu_calc_frag_area(cmd, &tile, fdm, fdm_offsets);
|
||||
|
||||
tu6_render_tile<CHIP>(cmd, &cmd->cs, &tile, has_fdm);
|
||||
tu6_render_tile<CHIP>(cmd, &cmd->cs, &tile, has_fdm,
|
||||
fdm_offsets);
|
||||
}
|
||||
slot_row += tile_row_stride;
|
||||
}
|
||||
|
|
@ -2676,7 +2922,8 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd,
|
|||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_cmd_render(struct tu_cmd_buffer *cmd_buffer)
|
||||
tu_cmd_render(struct tu_cmd_buffer *cmd_buffer,
|
||||
const VkOffset2D *fdm_offsets)
|
||||
{
|
||||
if (cmd_buffer->state.rp.has_tess)
|
||||
tu6_lazy_emit_tessfactor_addr<CHIP>(cmd_buffer);
|
||||
|
|
@ -2685,7 +2932,7 @@ tu_cmd_render(struct tu_cmd_buffer *cmd_buffer)
|
|||
if (use_sysmem_rendering(cmd_buffer, &autotune_result))
|
||||
tu_cmd_render_sysmem<CHIP>(cmd_buffer, autotune_result);
|
||||
else
|
||||
tu_cmd_render_tiles<CHIP>(cmd_buffer, autotune_result);
|
||||
tu_cmd_render_tiles<CHIP>(cmd_buffer, autotune_result, fdm_offsets);
|
||||
|
||||
/* Outside of renderpasses we assume all draw states are disabled. We do
|
||||
* this outside the draw CS for the normal case where 3d gmem stores aren't
|
||||
|
|
@ -4771,7 +5018,7 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
*/
|
||||
tu_restore_suspended_pass(cmd, cmd);
|
||||
|
||||
TU_CALLX(cmd->device, tu_cmd_render)(cmd);
|
||||
TU_CALLX(cmd->device, tu_cmd_render)(cmd, NULL);
|
||||
if (cmd->state.suspend_resume == SR_IN_CHAIN)
|
||||
cmd->state.suspend_resume = SR_NONE;
|
||||
else
|
||||
|
|
@ -4877,7 +5124,7 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd, struct tu_resolve_group *r
|
|||
{
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
uint32_t subpass_idx = cmd->state.subpass - cmd->state.pass->subpasses;
|
||||
const struct tu_vsc_config *vsc = &cmd->state.tiling->vsc;
|
||||
const struct tu_vsc_config *vsc = tu_vsc_config(cmd, cmd->state.tiling);
|
||||
|
||||
/* If we might choose to bin, then put the loads under a check for geometry
|
||||
* having been binned to this tile. If we don't choose to bin in the end,
|
||||
|
|
@ -4902,7 +5149,7 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd, struct tu_resolve_group *r
|
|||
struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[i];
|
||||
if ((att->load || att->load_stencil) && att->first_subpass_idx == subpass_idx) {
|
||||
if (!emitted_scissor) {
|
||||
tu6_emit_blit_scissor(cmd, cs, true);
|
||||
tu6_emit_blit_scissor(cmd, cs, true, false);
|
||||
emitted_scissor = true;
|
||||
}
|
||||
tu_load_gmem_attachment<CHIP>(cmd, cs, resolve_group, i,
|
||||
|
|
@ -4918,7 +5165,7 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd, struct tu_resolve_group *r
|
|||
&cmd->state.pass->attachments[i];
|
||||
if (att->clear_mask && att->first_subpass_idx == subpass_idx) {
|
||||
if (!emitted_scissor) {
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
tu6_emit_blit_scissor(cmd, cs, false, false);
|
||||
emitted_scissor = true;
|
||||
}
|
||||
tu_clear_gmem_attachment<CHIP>(cmd, cs, resolve_group, i);
|
||||
|
|
@ -4969,7 +5216,7 @@ tu7_emit_subpass_clear(struct tu_cmd_buffer *cmd, struct tu_resolve_group *resol
|
|||
&cmd->state.pass->attachments[i];
|
||||
if (att->clear_mask && att->first_subpass_idx == subpass_idx) {
|
||||
if (!emitted_scissor) {
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
tu6_emit_blit_scissor(cmd, cs, false, true);
|
||||
emitted_scissor = true;
|
||||
}
|
||||
tu7_generic_clear_attachment(cmd, cs, resolve_group, i);
|
||||
|
|
@ -5432,7 +5679,7 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
|
|||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
||||
|
||||
if (subpass->resolve_attachments) {
|
||||
tu6_emit_blit_scissor(cmd, cs, true);
|
||||
tu6_emit_blit_scissor(cmd, cs, true, false);
|
||||
|
||||
struct tu_resolve_group resolve_group = {};
|
||||
|
||||
|
|
@ -5908,9 +6155,10 @@ static void
|
|||
fdm_apply_fs_params(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkRect2D bin,
|
||||
VkOffset2D common_bin_offset,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas)
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_fs_params_state *state =
|
||||
(const struct apply_fs_params_state *)data;
|
||||
|
|
@ -5919,7 +6167,8 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd,
|
|||
for (unsigned i = 0; i < num_consts; i++) {
|
||||
assert(i < views);
|
||||
VkExtent2D area = frag_areas[i];
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(area, bin);
|
||||
VkRect2D bin = bins[i];
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(area, bin, common_bin_offset);
|
||||
|
||||
tu_cs_emit(cs, area.width);
|
||||
tu_cs_emit(cs, area.height);
|
||||
|
|
@ -7443,9 +7692,25 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
|||
return;
|
||||
}
|
||||
|
||||
const VkSubpassFragmentDensityMapOffsetEndInfoQCOM *fdm_offset_info =
|
||||
vk_find_struct_const(pSubpassEndInfo->pNext,
|
||||
SUBPASS_FRAGMENT_DENSITY_MAP_OFFSET_END_INFO_QCOM);
|
||||
const VkOffset2D *fdm_offsets =
|
||||
(fdm_offset_info && fdm_offset_info->fragmentDensityOffsetCount > 0) ?
|
||||
fdm_offset_info->pFragmentDensityOffsets : NULL;
|
||||
|
||||
VkOffset2D test_offsets[MAX_VIEWS];
|
||||
if (TU_DEBUG(FDM) && TU_DEBUG(FDM_OFFSET)) {
|
||||
for (unsigned i = 0;
|
||||
i < MAX2(cmd_buffer->state.pass->num_views, 1); i++) {
|
||||
test_offsets[i] = { 64, 64 };
|
||||
}
|
||||
fdm_offsets = test_offsets;
|
||||
}
|
||||
|
||||
tu_cs_end(&cmd_buffer->draw_cs);
|
||||
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
||||
TU_CALLX(cmd_buffer->device, tu_cmd_render)(cmd_buffer);
|
||||
TU_CALLX(cmd_buffer->device, tu_cmd_render)(cmd_buffer, fdm_offsets);
|
||||
|
||||
cmd_buffer->state.cache.pending_flush_bits |=
|
||||
cmd_buffer->state.renderpass_cache.pending_flush_bits;
|
||||
|
|
@ -7483,7 +7748,16 @@ tu_CmdEndRendering(VkCommandBuffer commandBuffer)
|
|||
*/
|
||||
tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs);
|
||||
} else {
|
||||
TU_CALLX(cmd_buffer->device, tu_cmd_render)(cmd_buffer);
|
||||
VkOffset2D test_offsets[MAX_VIEWS];
|
||||
const VkOffset2D *fdm_offsets = NULL;
|
||||
if (TU_DEBUG(FDM) && TU_DEBUG(FDM_OFFSET)) {
|
||||
for (unsigned i = 0;
|
||||
i < MAX2(cmd_buffer->state.pass->num_views, 1); i++) {
|
||||
test_offsets[i] = { 64, 64 };
|
||||
}
|
||||
fdm_offsets = test_offsets;
|
||||
}
|
||||
TU_CALLX(cmd_buffer->device, tu_cmd_render)(cmd_buffer, fdm_offsets);
|
||||
}
|
||||
|
||||
tu_reset_render_pass(cmd_buffer);
|
||||
|
|
|
|||
|
|
@ -695,7 +695,7 @@ tu_restore_suspended_pass(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cmd_buffer *suspended);
|
||||
|
||||
template <chip CHIP>
|
||||
void tu_cmd_render(struct tu_cmd_buffer *cmd);
|
||||
void tu_cmd_render(struct tu_cmd_buffer *cmd, const VkOffset2D *fdm_offsets);
|
||||
|
||||
void tu_dispatch_unaligned(VkCommandBuffer commandBuffer,
|
||||
uint32_t x, uint32_t y, uint32_t z);
|
||||
|
|
@ -748,12 +748,15 @@ void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
|||
void tu6_apply_depth_bounds_workaround(struct tu_device *device,
|
||||
uint32_t *rb_depth_cntl);
|
||||
|
||||
bool tu_enable_fdm_offset(struct tu_cmd_buffer *cmd);
|
||||
|
||||
typedef void (*tu_fdm_bin_apply_t)(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
void *data,
|
||||
VkRect2D bin,
|
||||
VkOffset2D common_bin_offset,
|
||||
unsigned views,
|
||||
const VkExtent2D *frag_areas);
|
||||
const VkExtent2D *frag_areas,
|
||||
const VkRect2D *bins);
|
||||
|
||||
enum tu_fdm_flags {
|
||||
TU_FDM_NONE = 0,
|
||||
|
|
@ -807,13 +810,15 @@ _tu_create_fdm_bin_patchpoint(struct tu_cmd_buffer *cmd,
|
|||
*/
|
||||
unsigned num_views = MAX2(cmd->state.pass->num_views, 1);
|
||||
VkExtent2D unscaled_frag_areas[num_views];
|
||||
VkRect2D bins[num_views];
|
||||
for (unsigned i = 0; i < num_views; i++) {
|
||||
unscaled_frag_areas[i] = (VkExtent2D) { 1, 1 };
|
||||
}
|
||||
apply(cmd, cs, state, (VkRect2D) {
|
||||
bins[i] = (VkRect2D) {
|
||||
{ 0, 0 },
|
||||
{ MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE },
|
||||
}, num_views, unscaled_frag_areas);
|
||||
};
|
||||
}
|
||||
apply(cmd, cs, state, (VkOffset2D) {0, 0}, num_views, unscaled_frag_areas, bins);
|
||||
assert(tu_cs_get_cur_iova(cs) == patch.iova + patch.size * sizeof(uint32_t));
|
||||
|
||||
util_dynarray_append(&cmd->fdm_bin_patchpoints,
|
||||
|
|
|
|||
|
|
@ -138,6 +138,18 @@
|
|||
#define MAX_FDM_TEXEL_SIZE_LOG2 10
|
||||
#define MAX_FDM_TEXEL_SIZE (1u << MAX_FDM_TEXEL_SIZE_LOG2)
|
||||
|
||||
/* This granularity is arbitrary, but there are two competing concerns here:
|
||||
*
|
||||
* - The fragment area has to always divide the offset, and we don't want the
|
||||
* fragment area changing with the offset, so we have to clamp the fragment
|
||||
* area to this granularity. Therefore larger granularities lead to lower
|
||||
* minimum resolution.
|
||||
* - The larger the offset granularity, the choppier the motion is.
|
||||
*
|
||||
* Choose 8 as a compromise between the two.
|
||||
*/
|
||||
#define TU_FDM_OFFSET_GRANULARITY 8
|
||||
|
||||
#define TU_GENX(FUNC_NAME) FD_GENX(FUNC_NAME)
|
||||
|
||||
#define TU_CALLX(device, thing) FD_CALLX((device)->physical_device->info, thing)
|
||||
|
|
|
|||
|
|
@ -333,6 +333,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.GOOGLE_user_type = true,
|
||||
.IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
|
||||
.NV_compute_shader_derivatives = device->info->chip >= 7,
|
||||
.QCOM_fragment_density_map_offset = true,
|
||||
.VALVE_mutable_descriptor_type = true,
|
||||
} };
|
||||
|
||||
|
|
@ -747,6 +748,9 @@ tu_get_features(struct tu_physical_device *pdevice,
|
|||
/* VK_KHR_subgroup_rotate */
|
||||
features->shaderSubgroupRotate = true;
|
||||
features->shaderSubgroupRotateClustered = true;
|
||||
|
||||
/* VK_QCOM_fragment_density_map_offset */
|
||||
features->fragmentDensityMapOffset = true;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1385,6 +1389,11 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
|||
props->degenerateLinesRasterized = false;
|
||||
props->fullyCoveredFragmentShaderInputVariable = false;
|
||||
props->conservativeRasterizationPostDepthCoverage = false;
|
||||
|
||||
/* VK_QCOM_fragment_density_map_offset */
|
||||
props->fragmentDensityOffsetGranularity = (VkExtent2D) {
|
||||
TU_FDM_OFFSET_GRANULARITY, TU_FDM_OFFSET_GRANULARITY
|
||||
};
|
||||
}
|
||||
|
||||
static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
|
||||
|
|
|
|||
|
|
@ -488,7 +488,7 @@ struct tu_tiling_config {
|
|||
/* Whether using GMEM is even possible with this configuration */
|
||||
bool possible;
|
||||
|
||||
struct tu_vsc_config vsc;
|
||||
struct tu_vsc_config vsc, fdm_offset_vsc;
|
||||
};
|
||||
|
||||
struct tu_framebuffer
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ tu_insert_dynamic_cmdbufs(struct tu_device *dev,
|
|||
old_cmds[i]->pre_chain.trace_renderpass_end);
|
||||
}
|
||||
|
||||
TU_CALLX(dev, tu_cmd_render)(cmd_buffer);
|
||||
TU_CALLX(dev, tu_cmd_render)(cmd_buffer, NULL);
|
||||
|
||||
tu_cs_emit_pkt7(&cmd_buffer->cs, CP_MEM_WRITE, 3);
|
||||
tu_cs_emit_qw(&cmd_buffer->cs,
|
||||
|
|
|
|||
|
|
@ -1163,10 +1163,10 @@ tu_DestroyImageView(VkDevice _device,
|
|||
*/
|
||||
void
|
||||
tu_fragment_density_map_sample(const struct tu_image_view *fdm,
|
||||
uint32_t x, uint32_t y,
|
||||
int32_t x, int32_t y,
|
||||
uint32_t width, uint32_t height,
|
||||
uint32_t layers,
|
||||
struct tu_frag_area *areas)
|
||||
uint32_t layer,
|
||||
struct tu_frag_area *area)
|
||||
{
|
||||
assert(fdm->image->layout[0].tile_mode == TILE6_LINEAR);
|
||||
|
||||
|
|
@ -1176,20 +1176,19 @@ tu_fragment_density_map_sample(const struct tu_image_view *fdm,
|
|||
fdm_shift_x = CLAMP(fdm_shift_x, MIN_FDM_TEXEL_SIZE_LOG2, MAX_FDM_TEXEL_SIZE_LOG2);
|
||||
fdm_shift_y = CLAMP(fdm_shift_y, MIN_FDM_TEXEL_SIZE_LOG2, MAX_FDM_TEXEL_SIZE_LOG2);
|
||||
|
||||
uint32_t i = x >> fdm_shift_x;
|
||||
uint32_t j = y >> fdm_shift_y;
|
||||
int32_t i = x >> fdm_shift_x;
|
||||
int32_t j = y >> fdm_shift_y;
|
||||
|
||||
i = CLAMP(i, 0, fdm->vk.extent.width - 1);
|
||||
j = CLAMP(j, 0, fdm->vk.extent.height - 1);
|
||||
|
||||
unsigned cpp = fdm->image->layout[0].cpp;
|
||||
unsigned pitch = fdm->view.pitch;
|
||||
|
||||
void *pixel = (char *)fdm->image->map + fdm->view.offset + cpp * i + pitch * j;
|
||||
for (unsigned i = 0; i < layers; i++) {
|
||||
float density_src[4], density[4];
|
||||
util_format_unpack_rgba(fdm->view.format, density_src, pixel, 1);
|
||||
pipe_swizzle_4f(density, density_src, fdm->swizzle);
|
||||
areas[i].width = 1.0f / density[0];
|
||||
areas[i].height = 1.0f / density[1];
|
||||
|
||||
pixel = (char *)pixel + fdm->view.layer_size;
|
||||
}
|
||||
void *pixel = (char *)fdm->image->map + fdm->view.offset + fdm->view.layer_size * layer + cpp * i + pitch * j;
|
||||
float density_src[4], density[4];
|
||||
util_format_unpack_rgba(fdm->view.format, density_src, pixel, 1);
|
||||
pipe_swizzle_4f(density, density_src, fdm->swizzle);
|
||||
area->width = 1.0f / density[0];
|
||||
area->height = 1.0f / density[1];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -129,9 +129,9 @@ struct tu_frag_area {
|
|||
|
||||
void
|
||||
tu_fragment_density_map_sample(const struct tu_image_view *fdm,
|
||||
uint32_t x, uint32_t y,
|
||||
int32_t x, int32_t y,
|
||||
uint32_t width, uint32_t height,
|
||||
uint32_t layers, struct tu_frag_area *areas);
|
||||
uint32_t layer, struct tu_frag_area *area);
|
||||
|
||||
VkResult
|
||||
tu_image_update_layout(struct tu_device *device, struct tu_image *image,
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ tu6_write_lrz_cntl(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
struct A6XX_GRAS_LRZ_CNTL cntl)
|
||||
{
|
||||
if (CHIP >= A7XX) {
|
||||
// A7XX split LRZ_CNTL into two seperate registers.
|
||||
/* A7XX split LRZ_CNTL into two seperate registers. */
|
||||
struct tu_reg_value cntl2 = A7XX_GRAS_LRZ_CNTL2(
|
||||
.disable_on_wrong_dir = cntl.disable_on_wrong_dir,
|
||||
.fc_enable = cntl.fc_enable,
|
||||
|
|
|
|||
|
|
@ -2548,44 +2548,49 @@ struct apply_viewport_state {
|
|||
bool share_scale;
|
||||
};
|
||||
|
||||
/* It's a hardware restriction that the window offset (i.e. bin.offset) must
|
||||
* be the same for all views. This means that GMEM coordinates cannot be a
|
||||
* simple scaling of framebuffer coordinates, because this would require us to
|
||||
* scale the window offset and the scale may be different per view. Instead we
|
||||
* have to apply a per-bin offset to the GMEM coordinate transform to make
|
||||
* sure that the window offset maps to itself. Specifically we need an offset
|
||||
* o to the transform:
|
||||
/* It's a hardware restriction that the window offset (i.e. common_bin_offset)
|
||||
* must be the same for all views. This means that GMEM coordinates cannot be
|
||||
* a simple scaling of framebuffer coordinates, because this would require us
|
||||
* to scale the window offset and the scale may be different per view. Instead
|
||||
* we have to apply a per-bin offset to the GMEM coordinate transform to make
|
||||
* sure that the window offset maps to the per-view bin coordinate, which will
|
||||
* be the same if there is no offset. Specifically we need an offset o to the
|
||||
* transform:
|
||||
*
|
||||
* x' = s * x + o
|
||||
*
|
||||
* so that when we plug in the bin start b_s:
|
||||
* so that when we plug in the per-view bin start b_s and the common window
|
||||
* offset b_cs:
|
||||
*
|
||||
* b_s = s * b_s + o
|
||||
* b_cs = s * b_s + o
|
||||
*
|
||||
* and we get:
|
||||
*
|
||||
* o = b_s - s * b_s
|
||||
* o = b_cs - s * b_s
|
||||
*
|
||||
* We use this form exactly, because we know the bin offset is a multiple of
|
||||
* We use this form exactly, because we know the bin start is a multiple of
|
||||
* the frag area so s * b_s is an integer and we can compute an exact result
|
||||
* easily.
|
||||
* easily. We also have to make sure that the bin offset is a multiple of the
|
||||
* frag area by restricting the frag area.
|
||||
*/
|
||||
|
||||
VkOffset2D
|
||||
tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin)
|
||||
tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin,
|
||||
VkOffset2D common_bin_offset)
|
||||
{
|
||||
assert(bin.offset.x % frag_area.width == 0);
|
||||
assert(bin.offset.y % frag_area.height == 0);
|
||||
|
||||
return (VkOffset2D) {
|
||||
bin.offset.x - bin.offset.x / frag_area.width,
|
||||
bin.offset.y - bin.offset.y / frag_area.height
|
||||
common_bin_offset.x - bin.offset.x / frag_area.width,
|
||||
common_bin_offset.y - bin.offset.y / frag_area.height
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
||||
VkRect2D bin, unsigned views, const VkExtent2D *frag_areas)
|
||||
VkOffset2D common_bin_offset, unsigned views,
|
||||
const VkExtent2D *frag_areas, const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_viewport_state *state =
|
||||
(const struct apply_viewport_state *)data;
|
||||
|
|
@ -2603,9 +2608,12 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
* replicate it across all viewports.
|
||||
*/
|
||||
VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i];
|
||||
VkRect2D bin = state->share_scale ? bins[0] : bins[i];
|
||||
VkViewport viewport =
|
||||
state->share_scale ? state->vp.viewports[i] : state->vp.viewports[0];
|
||||
if (frag_area.width == 1 && frag_area.height == 1) {
|
||||
if (frag_area.width == 1 && frag_area.height == 1 &&
|
||||
common_bin_offset.x == bin.offset.x &&
|
||||
common_bin_offset.y == bin.offset.y) {
|
||||
vp.viewports[i] = viewport;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -2618,7 +2626,8 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
vp.viewports[i].width = viewport.width * scale_x;
|
||||
vp.viewports[i].height = viewport.height * scale_y;
|
||||
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin);
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin,
|
||||
common_bin_offset);
|
||||
|
||||
vp.viewports[i].x = scale_x * viewport.x + offset.x;
|
||||
vp.viewports[i].y = scale_y * viewport.y + offset.y;
|
||||
|
|
@ -2694,7 +2703,8 @@ tu6_emit_scissor(struct tu_cs *cs, const struct vk_viewport_state *vp)
|
|||
|
||||
static void
|
||||
fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
||||
VkRect2D bin, unsigned views, const VkExtent2D *frag_areas)
|
||||
VkOffset2D common_bin_offset, unsigned views,
|
||||
const VkExtent2D *frag_areas, const VkRect2D *bins)
|
||||
{
|
||||
const struct apply_viewport_state *state =
|
||||
(const struct apply_viewport_state *)data;
|
||||
|
|
@ -2703,12 +2713,9 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
|
||||
for (unsigned i = 0; i < vp.scissor_count; i++) {
|
||||
VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i];
|
||||
VkRect2D bin = state->share_scale ? bins[0] : bins[i];
|
||||
VkRect2D scissor =
|
||||
state->share_scale ? state->vp.scissors[i] : state->vp.scissors[0];
|
||||
if (frag_area.width == 1 && frag_area.height == 1) {
|
||||
vp.scissors[i] = scissor;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Transform the scissor following the viewport. It's unclear how this
|
||||
* is supposed to handle cases where the scissor isn't aligned to the
|
||||
|
|
@ -2716,7 +2723,8 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
* fragments if the scissor size equals the framebuffer size and it
|
||||
* isn't aligned to the fragment area.
|
||||
*/
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin);
|
||||
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin,
|
||||
common_bin_offset);
|
||||
VkOffset2D min = {
|
||||
scissor.offset.x / frag_area.width + offset.x,
|
||||
scissor.offset.y / frag_area.width + offset.y,
|
||||
|
|
@ -2731,12 +2739,12 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
|
|||
*/
|
||||
uint32_t scaled_width = bin.extent.width / frag_area.width;
|
||||
uint32_t scaled_height = bin.extent.height / frag_area.height;
|
||||
vp.scissors[i].offset.x = MAX2(min.x, bin.offset.x);
|
||||
vp.scissors[i].offset.y = MAX2(min.y, bin.offset.y);
|
||||
vp.scissors[i].offset.x = MAX2(min.x, common_bin_offset.x);
|
||||
vp.scissors[i].offset.y = MAX2(min.y, common_bin_offset.y);
|
||||
vp.scissors[i].extent.width =
|
||||
MIN2(max.x, bin.offset.x + scaled_width) - vp.scissors[i].offset.x;
|
||||
MIN2(max.x, common_bin_offset.x + scaled_width) - vp.scissors[i].offset.x;
|
||||
vp.scissors[i].extent.height =
|
||||
MIN2(max.y, bin.offset.y + scaled_height) - vp.scissors[i].offset.y;
|
||||
MIN2(max.y, common_bin_offset.y + scaled_height) - vp.scissors[i].offset.y;
|
||||
}
|
||||
|
||||
TU_CALLX(cs->device, tu6_emit_scissor)(cs, &vp);
|
||||
|
|
|
|||
|
|
@ -243,7 +243,8 @@ TU_DECL_PIPELINE_DOWNCAST(graphics, TU_PIPELINE_GRAPHICS)
|
|||
TU_DECL_PIPELINE_DOWNCAST(graphics_lib, TU_PIPELINE_GRAPHICS_LIB)
|
||||
TU_DECL_PIPELINE_DOWNCAST(compute, TU_PIPELINE_COMPUTE)
|
||||
|
||||
VkOffset2D tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin);
|
||||
VkOffset2D tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin,
|
||||
VkOffset2D common_bin_offset);
|
||||
|
||||
template <chip CHIP>
|
||||
uint32_t tu_emit_draw_state(struct tu_cmd_buffer *cmd);
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ static const struct debug_control tu_debug_options[] = {
|
|||
{ "dumpas", TU_DEBUG_DUMPAS },
|
||||
{ "nobinmerging", TU_DEBUG_NO_BIN_MERGING },
|
||||
{ "perfcraw", TU_DEBUG_PERFCRAW },
|
||||
{ "fdmoffset", TU_DEBUG_FDM_OFFSET },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
|
||||
|
|
@ -454,6 +455,16 @@ tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
|
|||
tu_tiling_config_update_pipe_layout(vsc, device, pass->has_fdm);
|
||||
tu_tiling_config_update_pipes(vsc, device);
|
||||
tu_tiling_config_update_binning(vsc, device);
|
||||
|
||||
if (pass->has_fdm) {
|
||||
struct tu_vsc_config *fdm_offset_vsc = &tiling->fdm_offset_vsc;
|
||||
fdm_offset_vsc->tile_count = (VkExtent2D) {
|
||||
vsc->tile_count.width + 1, vsc->tile_count.height + 1
|
||||
};
|
||||
tu_tiling_config_update_pipe_layout(fdm_offset_vsc, device, true);
|
||||
tu_tiling_config_update_pipes(fdm_offset_vsc, device);
|
||||
tu_tiling_config_update_binning(fdm_offset_vsc, device);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@ enum tu_debug_flags : uint64_t
|
|||
TU_DEBUG_DUMPAS = BITFIELD64_BIT(28),
|
||||
TU_DEBUG_NO_BIN_MERGING = BITFIELD64_BIT(29),
|
||||
TU_DEBUG_PERFCRAW = BITFIELD64_BIT(30),
|
||||
TU_DEBUG_FDM_OFFSET = BITFIELD64_BIT(31),
|
||||
};
|
||||
|
||||
struct tu_env {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue