freedreno/a6xx+tu: rename VSC_DATA/VSC_DATA2

These are the draw-stream and primitive-stream, so lets give them more
descriptive names.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4750>
This commit is contained in:
Rob Clark 2020-04-25 09:51:09 -07:00 committed by Marge Bot
parent 3ee3ad561a
commit a9c255d70c
8 changed files with 128 additions and 133 deletions

View file

@ -1741,9 +1741,9 @@ to upconvert to 32b float internally?
<bitfield name="WIDTH" low="0" high="7" shr="5" type="uint"/>
<bitfield name="HEIGHT" low="8" high="16" shr="4" type="uint"/>
</reg32>
<reg32 offset="0x0c03" name="VSC_SIZE_ADDRESS_LO"/>
<reg32 offset="0x0c04" name="VSC_SIZE_ADDRESS_HI"/>
<reg64 offset="0x0c03" name="VSC_SIZE_ADDRESS" type="waddress"/>
<reg32 offset="0x0c03" name="VSC_DRAW_STRM_SIZE_ADDRESS_LO"/>
<reg32 offset="0x0c04" name="VSC_DRAW_STRM_SIZE_ADDRESS_HI"/>
<reg64 offset="0x0c03" name="VSC_DRAW_STRM_SIZE_ADDRESS" type="waddress"/>
<reg32 offset="0x0c06" name="VSC_BIN_COUNT">
<bitfield name="NX" low="1" high="10" type="uint"/>
<bitfield name="NY" low="11" high="20" type="uint"/>
@ -1765,33 +1765,23 @@ to upconvert to 32b float internally?
</reg32>
</array>
<!--
compared to a5xx and earlier, we just program the address of the first
visibility stream and hw adds (pipe_num * VSC_PIPE_DATA_PITCH)
HW binning primitive & draw streams, which enable draws and primitives
within a draw to be skipped in the main tile pass. See:
https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format
TODO now there seem to be two buffers of VSC data (both referenced by
CP_SET_BIN_DATA packet. Not sure what this new DATA2 one is, but seems
to have the larger pitch.
The "DATA2" buffer is probably actually the main visibility stream; it
is at least the larger of the two.
For VSC_DATA_PITCH, 0x20 actually seems to be sufficient (although blob
uses something somewhat larger) for many cases, although required value
can ramp up somewhat higher. Values less than 0x20 trigger GPU hangs
even with small amount of geometry (so possibly 0x20 is minimum
alignment or something like that). So far I can't seem to find any-
thing that needs values larger than 0x20
Compared to a5xx and earlier, we just program the address of the first
stream and hw adds (pipe_num * VSC_*_STRM_PITCH)
-->
<reg32 offset="0x0c30" name="VSC_PIPE_DATA2_ADDRESS_LO"/>
<reg32 offset="0x0c31" name="VSC_PIPE_DATA2_ADDRESS_HI"/>
<reg64 offset="0x0c30" name="VSC_PIPE_DATA2_ADDRESS" type="waddress"/>
<reg32 offset="0x0c32" name="VSC_PIPE_DATA2_PITCH"/>
<reg32 offset="0x0c33" name="VSC_PIPE_DATA2_ARRAY_PITCH" shr="4" type="uint"/>
<reg32 offset="0x0c34" name="VSC_PIPE_DATA_ADDRESS_LO"/>
<reg32 offset="0x0c35" name="VSC_PIPE_DATA_ADDRESS_HI"/>
<reg64 offset="0x0c34" name="VSC_PIPE_DATA_ADDRESS" type="waddress"/>
<reg32 offset="0x0c36" name="VSC_PIPE_DATA_PITCH"/>
<reg32 offset="0x0c37" name="VSC_PIPE_DATA_ARRAY_PITCH" shr="4" type="uint"/>
<reg32 offset="0x0c30" name="VSC_PRIM_STRM_ADDRESS_LO"/>
<reg32 offset="0x0c31" name="VSC_PRIM_STRM_ADDRESS_HI"/>
<reg64 offset="0x0c30" name="VSC_PRIM_STRM_ADDRESS" type="waddress"/>
<reg32 offset="0x0c32" name="VSC_PRIM_STRM_PITCH"/>
<reg32 offset="0x0c33" name="VSC_PRIM_STRM_ARRAY_PITCH" shr="4" type="uint"/>
<reg32 offset="0x0c34" name="VSC_DRAW_STRM_ADDRESS_LO"/>
<reg32 offset="0x0c35" name="VSC_DRAW_STRM_ADDRESS_HI"/>
<reg64 offset="0x0c34" name="VSC_DRAW_STRM_ADDRESS" type="waddress"/>
<reg32 offset="0x0c36" name="VSC_DRAW_STRM_PITCH"/>
<reg32 offset="0x0c37" name="VSC_DRAW_STRM_ARRAY_PITCH" shr="4" type="uint"/>
<array offset="0x0c38" name="VSC_STATE" stride="1" length="32">
<doc>
@ -1804,18 +1794,18 @@ to upconvert to 32b float internally?
<reg32 offset="0x0" name="REG"/>
</array>
<array offset="0x0c58" name="VSC_SIZE2" stride="1" length="32">
<array offset="0x0c58" name="VSC_PRIM_STRM_SIZE" stride="1" length="32">
<doc>
Has the size of data written to corresponding VSC_DATA2
Has the size of data written to corresponding VSC_PRIM_STRM
buffer.
</doc>
<reg32 offset="0x0" name="REG"/>
</array>
<array offset="0x0c78" name="VSC_SIZE" stride="1" length="32">
<array offset="0x0c78" name="VSC_DRAW_STRM_SIZE" stride="1" length="32">
<doc>
Has the size of data written to corresponding VSC pipe, ie.
same thing that is written out to VSC_SIZE_ADDRESS_LO/HI
same thing that is written out to VSC_DRAW_STRM_SIZE_ADDRESS_LO/HI
</doc>
<reg32 offset="0x0" name="REG"/>
</array>

View file

@ -832,12 +832,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<reg32 offset="4" name="4">
<bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/>
</reg32>
<!-- what is this new address? -->
<!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: -->
<reg32 offset="5" name="5">
<bitfield name="BIN_DATA_ADDR2_LO" low="0" high="31"/>
<bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/>
</reg32>
<reg32 offset="6" name="6">
<bitfield name="BIN_DATA_ADDR2_HI" low="0" high="31"/>
<bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/>
</reg32>
</domain>

View file

@ -675,9 +675,9 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
tu_cs_emit(cs, cmd->state.tiling_config.pipe_sizes[tile->pipe] |
CP_SET_BIN_DATA5_0_VSC_N(tile->slot));
tu_cs_emit_qw(cs, cmd->vsc_data.iova + tile->pipe * cmd->vsc_data_pitch);
tu_cs_emit_qw(cs, cmd->vsc_data.iova + (tile->pipe * 4) + (32 * cmd->vsc_data_pitch));
tu_cs_emit_qw(cs, cmd->vsc_data2.iova + (tile->pipe * cmd->vsc_data2_pitch));
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + tile->pipe * cmd->vsc_draw_strm_pitch);
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + (tile->pipe * 4) + (32 * cmd->vsc_draw_strm_pitch));
tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + (tile->pipe * cmd->vsc_prim_strm_pitch));
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
tu_cs_emit(cs, 0x0);
@ -919,8 +919,8 @@ update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_regs(cs,
A6XX_VSC_BIN_SIZE(.width = tiling->tile0.extent.width,
.height = tiling->tile0.extent.height),
A6XX_VSC_SIZE_ADDRESS(.bo = &cmd->vsc_data,
.bo_offset = 32 * cmd->vsc_data_pitch));
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = &cmd->vsc_draw_strm,
.bo_offset = 32 * cmd->vsc_draw_strm_pitch));
tu_cs_emit_regs(cs,
A6XX_VSC_BIN_COUNT(.nx = tiling->tile_count.width,
@ -931,14 +931,14 @@ update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit(cs, tiling->pipe_config[i]);
tu_cs_emit_regs(cs,
A6XX_VSC_PIPE_DATA2_ADDRESS(.bo = &cmd->vsc_data2),
A6XX_VSC_PIPE_DATA2_PITCH(cmd->vsc_data2_pitch),
A6XX_VSC_PIPE_DATA2_ARRAY_PITCH(cmd->vsc_data2.size));
A6XX_VSC_PRIM_STRM_ADDRESS(.bo = &cmd->vsc_prim_strm),
A6XX_VSC_PRIM_STRM_PITCH(cmd->vsc_prim_strm_pitch),
A6XX_VSC_PRIM_STRM_ARRAY_PITCH(cmd->vsc_prim_strm.size));
tu_cs_emit_regs(cs,
A6XX_VSC_PIPE_DATA_ADDRESS(.bo = &cmd->vsc_data),
A6XX_VSC_PIPE_DATA_PITCH(cmd->vsc_data_pitch),
A6XX_VSC_PIPE_DATA_ARRAY_PITCH(cmd->vsc_data.size));
A6XX_VSC_DRAW_STRM_ADDRESS(.bo = &cmd->vsc_draw_strm),
A6XX_VSC_DRAW_STRM_PITCH(cmd->vsc_draw_strm_pitch),
A6XX_VSC_DRAW_STRM_ARRAY_PITCH(cmd->vsc_draw_strm.size));
}
static void
@ -958,22 +958,22 @@ emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
CP_COND_WRITE5_0_WRITE_MEMORY);
tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE_REG(i)));
tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data_pitch));
tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_draw_strm_pitch));
tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(vsc_scratch));
tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(1 + cmd->vsc_data_pitch));
tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(1 + cmd->vsc_draw_strm_pitch));
tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
CP_COND_WRITE5_0_WRITE_MEMORY);
tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE2_REG(i)));
tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data2_pitch));
tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_prim_strm_pitch));
tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(vsc_scratch));
tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(3 + cmd->vsc_data2_pitch));
tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(3 + cmd->vsc_prim_strm_pitch));
}
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
@ -995,7 +995,7 @@ emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
* if (b0 set)..
*/
/* b0 will be set if VSC_DATA or VSC_DATA2 overflow: */
/* b0 will be set if VSC_DRAW_STRM or VSC_PRIM_STRM overflow: */
tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
A6XX_CP_REG_TEST_0_BIT(0) |
@ -1567,10 +1567,10 @@ tu_create_cmd_buffer(struct tu_device *device,
goto fail_scratch_bo;
/* TODO: resize on overflow */
cmd_buffer->vsc_data_pitch = device->vsc_data_pitch;
cmd_buffer->vsc_data2_pitch = device->vsc_data2_pitch;
cmd_buffer->vsc_data = device->vsc_data;
cmd_buffer->vsc_data2 = device->vsc_data2;
cmd_buffer->vsc_draw_strm_pitch = device->vsc_draw_strm_pitch;
cmd_buffer->vsc_prim_strm_pitch = device->vsc_prim_strm_pitch;
cmd_buffer->vsc_draw_strm = device->vsc_draw_strm;
cmd_buffer->vsc_prim_strm = device->vsc_prim_strm;
return VK_SUCCESS;
@ -1953,9 +1953,9 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
}
if (cmd_buffer->use_vsc_data) {
tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_data,
tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_draw_strm,
MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_data2,
tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_prim_strm,
MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
}

View file

@ -1217,17 +1217,17 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
if (!device->compiler)
goto fail_queues;
#define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
#define VSC_DRAW_STRM_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
#define VSC_PRIM_STRM_SIZE(pitch) ((pitch) * 32)
device->vsc_data_pitch = 0x440 * 4;
device->vsc_data2_pitch = 0x1040 * 4;
device->vsc_draw_strm_pitch = 0x440 * 4;
device->vsc_prim_strm_pitch = 0x1040 * 4;
result = tu_bo_init_new(device, &device->vsc_data, VSC_DATA_SIZE(device->vsc_data_pitch));
result = tu_bo_init_new(device, &device->vsc_draw_strm, VSC_DRAW_STRM_SIZE(device->vsc_draw_strm_pitch));
if (result != VK_SUCCESS)
goto fail_vsc_data;
result = tu_bo_init_new(device, &device->vsc_data2, VSC_DATA2_SIZE(device->vsc_data2_pitch));
result = tu_bo_init_new(device, &device->vsc_prim_strm, VSC_PRIM_STRM_SIZE(device->vsc_prim_strm_pitch));
if (result != VK_SUCCESS)
goto fail_vsc_data2;
@ -1264,10 +1264,10 @@ fail_border_color_map:
tu_bo_finish(device, &device->border_color);
fail_border_color:
tu_bo_finish(device, &device->vsc_data2);
tu_bo_finish(device, &device->vsc_prim_strm);
fail_vsc_data2:
tu_bo_finish(device, &device->vsc_data);
tu_bo_finish(device, &device->vsc_draw_strm);
fail_vsc_data:
ralloc_free(device->compiler);
@ -1292,8 +1292,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
if (!device)
return;
tu_bo_finish(device, &device->vsc_data);
tu_bo_finish(device, &device->vsc_data2);
tu_bo_finish(device, &device->vsc_draw_strm);
tu_bo_finish(device, &device->vsc_prim_strm);
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)

View file

@ -503,10 +503,10 @@ struct tu_device
/* Backup in-memory cache to be used if the app doesn't provide one */
struct tu_pipeline_cache *mem_cache;
struct tu_bo vsc_data;
struct tu_bo vsc_data2;
uint32_t vsc_data_pitch;
uint32_t vsc_data2_pitch;
struct tu_bo vsc_draw_strm;
struct tu_bo vsc_prim_strm;
uint32_t vsc_draw_strm_pitch;
uint32_t vsc_prim_strm_pitch;
struct tu_bo border_color;
@ -1026,10 +1026,10 @@ struct tu_cmd_buffer
struct tu_bo scratch_bo;
uint32_t scratch_seqno;
struct tu_bo vsc_data;
struct tu_bo vsc_data2;
uint32_t vsc_data_pitch;
uint32_t vsc_data2_pitch;
struct tu_bo vsc_draw_strm;
struct tu_bo vsc_prim_strm;
uint32_t vsc_draw_strm_pitch;
uint32_t vsc_prim_strm_pitch;
bool use_vsc_data;
bool wait_for_idle;

View file

@ -50,10 +50,10 @@ fd6_context_destroy(struct pipe_context *pctx)
fd_context_destroy(pctx);
if (fd6_ctx->vsc_data)
fd_bo_del(fd6_ctx->vsc_data);
if (fd6_ctx->vsc_data2)
fd_bo_del(fd6_ctx->vsc_data2);
if (fd6_ctx->vsc_draw_strm)
fd_bo_del(fd6_ctx->vsc_draw_strm);
if (fd6_ctx->vsc_prim_strm)
fd_bo_del(fd6_ctx->vsc_prim_strm);
fd_bo_del(fd6_ctx->control_mem);
fd_context_cleanup_common_vbos(&fd6_ctx->base);
@ -176,8 +176,8 @@ PC_UNKNOWN_9805:
/* initial sizes for VSC buffers (or rather the per-pipe sizes
* which is used to derive entire buffer size:
*/
fd6_ctx->vsc_data_pitch = 0x440;
fd6_ctx->vsc_data2_pitch = 0x1040;
fd6_ctx->vsc_draw_strm_pitch = 0x440;
fd6_ctx->vsc_prim_strm_pitch = 0x1040;
fd6_ctx->control_mem = fd_bo_new(screen->dev, 0x1000,
DRM_FREEDRENO_GEM_TYPE_KMEM, "control");

View file

@ -43,12 +43,12 @@ struct fd6_context {
* Compared to previous generations
* (1) we cannot specify individual buffers per VSC, instead
* just a pitch and base address
* (2) there is a second smaller buffer, for something.. we
* also stash VSC_BIN_SIZE at end of 2nd buffer.
* (2) there is a second smaller buffer.. we also stash
* VSC_BIN_SIZE at end of 2nd buffer.
*/
struct fd_bo *vsc_data, *vsc_data2;
struct fd_bo *vsc_draw_strm, *vsc_prim_strm;
unsigned vsc_data_pitch, vsc_data2_pitch;
unsigned vsc_draw_strm_pitch, vsc_prim_strm_pitch;
/* The 'control' mem BO is used for various housekeeping
* functions. See 'struct fd6_control'

View file

@ -313,8 +313,9 @@ update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb, b
A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
}
#define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
/* extra size to store VSC_DRAW_STRM_SIZE: */
#define VSC_DRAW_STRM_SIZE(pitch) ((pitch) * 32 + 0x100)
#define VSC_PRIM_STRM_SIZE(pitch) ((pitch) * 32)
static void
update_vsc_pipe(struct fd_batch *batch)
@ -326,21 +327,23 @@ update_vsc_pipe(struct fd_batch *batch)
int i;
if (!fd6_ctx->vsc_data) {
fd6_ctx->vsc_data = fd_bo_new(ctx->screen->dev,
VSC_DATA_SIZE(fd6_ctx->vsc_data_pitch),
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_data");
if (!fd6_ctx->vsc_draw_strm) {
fd6_ctx->vsc_draw_strm = fd_bo_new(ctx->screen->dev,
VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_draw_strm");
}
if (!fd6_ctx->vsc_data2) {
fd6_ctx->vsc_data2 = fd_bo_new(ctx->screen->dev,
VSC_DATA2_SIZE(fd6_ctx->vsc_data2_pitch),
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_data2");
if (!fd6_ctx->vsc_prim_strm) {
fd6_ctx->vsc_prim_strm = fd_bo_new(ctx->screen->dev,
VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_prim_strm");
}
OUT_REG(ring,
A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
A6XX_VSC_SIZE_ADDRESS(.bo = fd6_ctx->vsc_data, .bo_offset = 32 * fd6_ctx->vsc_data_pitch));
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(
.bo = fd6_ctx->vsc_draw_strm,
.bo_offset = 32 * fd6_ctx->vsc_draw_strm_pitch));
OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x,
.ny = gmem->nbins_y));
@ -355,14 +358,14 @@ update_vsc_pipe(struct fd_batch *batch)
}
OUT_REG(ring,
A6XX_VSC_PIPE_DATA2_ADDRESS(.bo = fd6_ctx->vsc_data2),
A6XX_VSC_PIPE_DATA2_PITCH(.dword = fd6_ctx->vsc_data2_pitch),
A6XX_VSC_PIPE_DATA2_ARRAY_PITCH(.dword = fd_bo_size(fd6_ctx->vsc_data2)));
A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
A6XX_VSC_PRIM_STRM_ARRAY_PITCH(.dword = fd_bo_size(fd6_ctx->vsc_prim_strm)));
OUT_REG(ring,
A6XX_VSC_PIPE_DATA_ADDRESS(.bo = fd6_ctx->vsc_data),
A6XX_VSC_PIPE_DATA_PITCH(.dword = fd6_ctx->vsc_data_pitch),
A6XX_VSC_PIPE_DATA_ARRAY_PITCH(.dword = fd_bo_size(fd6_ctx->vsc_data)));
A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
A6XX_VSC_DRAW_STRM_ARRAY_PITCH(.dword = fd_bo_size(fd6_ctx->vsc_draw_strm)));
}
/* TODO we probably have more than 8 scratch regs.. although the first
@ -372,8 +375,8 @@ update_vsc_pipe(struct fd_batch *batch)
#define OVERFLOW_FLAG_REG REG_A6XX_CP_SCRATCH_REG(0)
/*
* If overflow is detected, either 0x1 (VSC_DATA overflow) or 0x3
* (VSC_DATA2 overflow) plus the size of the overflowed buffer is
* If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3
* (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is
* written to control->vsc_overflow. This allows the CPU to
* detect which buffer overflowed (and, since the current size is
* encoded as well, this protects against already-submitted but
@ -392,8 +395,8 @@ emit_vsc_overflow_test(struct fd_batch *batch)
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
debug_assert((fd6_ctx->vsc_data_pitch & 0x3) == 0);
debug_assert((fd6_ctx->vsc_data2_pitch & 0x3) == 0);
debug_assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
debug_assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
/* Clear vsc_scratch: */
OUT_PKT7(ring, CP_MEM_WRITE, 3);
@ -405,22 +408,22 @@ emit_vsc_overflow_test(struct fd_batch *batch)
OUT_PKT7(ring, CP_COND_WRITE5, 8);
OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
CP_COND_WRITE5_0_WRITE_MEMORY);
OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE_REG(i)));
OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_data_pitch));
OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch));
OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
OUT_RELOCW(ring, control_ptr(fd6_ctx, vsc_scratch)); /* WRITE_ADDR_LO/HI */
OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_data_pitch));
OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
OUT_PKT7(ring, CP_COND_WRITE5, 8);
OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
CP_COND_WRITE5_0_WRITE_MEMORY);
OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE2_REG(i)));
OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_data2_pitch));
OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch));
OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
OUT_RELOCW(ring, control_ptr(fd6_ctx, vsc_scratch)); /* WRITE_ADDR_LO/HI */
OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_data2_pitch));
OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
}
OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
@ -444,7 +447,7 @@ emit_vsc_overflow_test(struct fd_batch *batch)
BEGIN_RING(ring, 10); /* ensure if/else doesn't get split */
/* b0 will be set if VSC_DATA or VSC_DATA2 overflow: */
/* b0 will be set if VSC_DRAW_STRM or VSC_PRIM_STRM overflow: */
OUT_PKT7(ring, CP_REG_TEST, 1);
OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
A6XX_CP_REG_TEST_0_BIT(0) |
@ -492,9 +495,9 @@ check_vsc_overflow(struct fd_context *ctx)
unsigned size = vsc_overflow & ~0x3;
if (buffer == 0x1) {
/* VSC_PIPE_DATA overflow: */
/* VSC_DRAW_STRM overflow: */
if (size < fd6_ctx->vsc_data_pitch) {
if (size < fd6_ctx->vsc_draw_strm_pitch) {
/* we've already increased the size, this overflow is
* from a batch submitted before resize, but executed
* after
@ -502,25 +505,27 @@ check_vsc_overflow(struct fd_context *ctx)
return;
}
fd_bo_del(fd6_ctx->vsc_data);
fd6_ctx->vsc_data = NULL;
fd6_ctx->vsc_data_pitch *= 2;
fd_bo_del(fd6_ctx->vsc_draw_strm);
fd6_ctx->vsc_draw_strm = NULL;
fd6_ctx->vsc_draw_strm_pitch *= 2;
debug_printf("resized VSC_DATA_PITCH to: 0x%x\n", fd6_ctx->vsc_data_pitch);
debug_printf("resized VSC_DRAW_STRM_PITCH to: 0x%x\n",
fd6_ctx->vsc_draw_strm_pitch);
} else if (buffer == 0x3) {
/* VSC_PIPE_DATA2 overflow: */
/* VSC_PRIM_STRM overflow: */
if (size < fd6_ctx->vsc_data2_pitch) {
if (size < fd6_ctx->vsc_prim_strm_pitch) {
/* we've already increased the size */
return;
}
fd_bo_del(fd6_ctx->vsc_data2);
fd6_ctx->vsc_data2 = NULL;
fd6_ctx->vsc_data2_pitch *= 2;
fd_bo_del(fd6_ctx->vsc_prim_strm);
fd6_ctx->vsc_prim_strm = NULL;
fd6_ctx->vsc_prim_strm_pitch *= 2;
debug_printf("resized VSC_DATA2_PITCH to: 0x%x\n", fd6_ctx->vsc_data2_pitch);
debug_printf("resized VSC_PRIM_STRM_PITCH to: 0x%x\n",
fd6_ctx->vsc_prim_strm_pitch);
} else {
/* NOTE: it's possible, for example, for overflow to corrupt the
@ -865,12 +870,12 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
CP_SET_BIN_DATA5_0_VSC_N(tile->n));
OUT_RELOC(ring, fd6_ctx->vsc_data, /* VSC_PIPE[p].DATA_ADDRESS */
(tile->p * fd6_ctx->vsc_data_pitch), 0, 0);
OUT_RELOC(ring, fd6_ctx->vsc_data, /* VSC_SIZE_ADDRESS + (p * 4) */
(tile->p * 4) + (32 * fd6_ctx->vsc_data_pitch), 0, 0);
OUT_RELOC(ring, fd6_ctx->vsc_data2,
(tile->p * fd6_ctx->vsc_data2_pitch), 0, 0);
OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
(tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
(tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
(tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
OUT_RING(ring, 0x0);