freedreno/a6xx: Rename and document HLSQ_UPDATE_CNTL

It turns out that this clears CP_LOAD_STATE6 packets, including
disabling any pending loads for SS6_INDIRECT/SS6_BINDLESS (these loads
don't actually happen until the draw itself, and I'm not sure if they
happen if the state is unused by the shader) and marking constants and
UBO descriptors loaded with SS6_DIRECT as invalid. It's used very
differently from HLSQ_UPDATE_CNTL on a4xx from whence the name came, and
unlike on a4xx it's not readable, so this probably doesn't line up with
HLSQ_UPDATE_CNTL on a4xx.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5877>
This commit is contained in:
Connor Abbott 2020-07-13 12:22:20 +02:00
parent dad042b15a
commit e1fa740c4c
9 changed files with 130 additions and 22 deletions

View file

@ -117,8 +117,15 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
const struct ir3_info *i = &v->info;
enum a3xx_threadsize thrsz = FOUR_QUADS;
OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0xff);
OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1);
OUT_RING(ring, A6XX_HLSQ_INVALIDATE_CMD_VS_STATE |
A6XX_HLSQ_INVALIDATE_CMD_HS_STATE |
A6XX_HLSQ_INVALIDATE_CMD_DS_STATE |
A6XX_HLSQ_INVALIDATE_CMD_GS_STATE |
A6XX_HLSQ_INVALIDATE_CMD_FS_STATE |
A6XX_HLSQ_INVALIDATE_CMD_CS_STATE |
A6XX_HLSQ_INVALIDATE_CMD_CS_IBO |
A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO);
unsigned constlen = align(v->constlen, 4);
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);

View file

@ -3408,8 +3408,31 @@ to upconvert to 32b float internally?
<bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/>
</reg32>
<!-- probably: -->
<reg32 offset="0xbb08" name="HLSQ_UPDATE_CNTL"/>
<reg32 offset="0xbb08" name="HLSQ_INVALIDATE_CMD">
<doc>
This register clears pending loads queued up by
CP_LOAD_STATE6. Each bit resets a particular kind(s) of
CP_LOAD_STATE6.
</doc>
<!-- per-stage state: shader, non-bindless UBO, textures, and samplers -->
<bitfield name="VS_STATE" pos="0" type="boolean"/>
<bitfield name="HS_STATE" pos="1" type="boolean"/>
<bitfield name="DS_STATE" pos="2" type="boolean"/>
<bitfield name="GS_STATE" pos="3" type="boolean"/>
<bitfield name="FS_STATE" pos="4" type="boolean"/>
<bitfield name="CS_STATE" pos="5" type="boolean"/>
<bitfield name="CS_IBO" pos="6" type="boolean"/>
<bitfield name="GFX_IBO" pos="7" type="boolean"/>
<bitfield name="CS_SHARED_CONST" pos="19" type="boolean"/>
<bitfield name="GFX_SHARED_CONST" pos="8" type="boolean"/>
<!-- SS6_BINDLESS: one bit per bindless base -->
<bitfield name="CS_BINDLESS" low="9" high="13" type="hex"/>
<bitfield name="GFX_BINDLESS" low="14" high="18" type="hex"/>
</reg32>
<reg32 offset="0xbb10" name="HLSQ_FS_CNTL" type="a6xx_hlsq_xs_cntl"/>

View file

@ -428,7 +428,18 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
.const_state = &dummy_const_state,
};
tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
.vs_state = true,
.hs_state = true,
.ds_state = true,
.gs_state = true,
.fs_state = true,
.cs_state = true,
.gfx_ibo = true,
.cs_ibo = true,
.gfx_shared_const = true,
.gfx_bindless = 0x1f,
.cs_bindless = 0x1f));
tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS]));
tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0);

View file

@ -719,7 +719,19 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff);
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
.vs_state = true,
.hs_state = true,
.ds_state = true,
.gs_state = true,
.fs_state = true,
.cs_state = true,
.gfx_ibo = true,
.cs_ibo = true,
.gfx_shared_const = true,
.cs_shared_const = true,
.gfx_bindless = 0x1f,
.cs_bindless = 0x1f));
tu_cs_emit_regs(cs,
A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
@ -1684,7 +1696,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
}
assert(dyn_idx == dynamicOffsetCount);
uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_update_value;
uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value;
uint64_t addr[MAX_SETS + 1] = {};
struct tu_cs cs;
@ -1709,7 +1721,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
hlsq_update_value = 0x7c000;
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS;
} else {
@ -1717,7 +1729,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
hlsq_update_value = 0x3e00;
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f);
cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
}
@ -1728,7 +1740,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
tu_cs_emit_pkt4(&cs, hlsq_bindless_base_reg, 10);
tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
tu_cs_emit_regs(&cs, A6XX_HLSQ_UPDATE_CNTL(.dword = hlsq_update_value));
tu_cs_emit_regs(&cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value));
struct tu_cs_entry ib = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {

View file

@ -468,8 +468,15 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
const struct ir3_shader_variant *v,
uint32_t binary_iova)
{
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
tu_cs_emit(cs, 0xff);
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
.vs_state = true,
.hs_state = true,
.ds_state = true,
.gs_state = true,
.fs_state = true,
.cs_state = true,
.cs_ibo = true,
.gfx_ibo = true));
tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, binary_iova);
@ -1355,8 +1362,15 @@ tu6_emit_program(struct tu_cs *cs,
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
tu_cs_emit(cs, 0xff); /* XXX */
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
.vs_state = true,
.hs_state = true,
.ds_state = true,
.gs_state = true,
.fs_state = true,
.cs_state = true,
.cs_ibo = true,
.gfx_ibo = true));
/* Don't use the binning pass variant when GS is present because we don't
* support compiling correct binning pass variants with GS.

View file

@ -34,6 +34,7 @@
#include "fd6_const.h"
#include "fd6_context.h"
#include "fd6_emit.h"
#include "fd6_pack.h"
struct fd6_compute_stateobj {
struct ir3_shader *shader;
@ -78,8 +79,16 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
const struct ir3_info *i = &v->info;
enum a3xx_threadsize thrsz = FOUR_QUADS;
OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0xff);
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
.vs_state = true,
.hs_state = true,
.ds_state = true,
.gs_state = true,
.fs_state = true,
.cs_state = true,
.gfx_ibo = true,
.cs_ibo = true,
));
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) |

View file

@ -356,8 +356,19 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
OUT_RING(ring, fd6_ctx->magic.RB_CCU_CNTL_bypass);
OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0x7ffff);
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
.vs_state = true,
.hs_state = true,
.ds_state = true,
.gs_state = true,
.fs_state = true,
.cs_state = true,
.gfx_ibo = true,
.cs_ibo = true,
.gfx_shared_const = true,
.gfx_bindless = 0x1f,
.cs_bindless = 0x1f
));
emit_marker6(ring, 7);
OUT_PKT7(ring, CP_SET_MARKER, 1);

View file

@ -1130,8 +1130,20 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
fd6_cache_inv(batch, ring);
OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0xfffff);
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
.vs_state = true,
.hs_state = true,
.ds_state = true,
.gs_state = true,
.fs_state = true,
.cs_state = true,
.gfx_ibo = true,
.cs_ibo = true,
.gfx_shared_const = true,
.cs_shared_const = true,
.gfx_bindless = 0x1f,
.cs_bindless = 0x1f
));
OUT_WFI5(ring);

View file

@ -39,6 +39,7 @@
#include "fd6_emit.h"
#include "fd6_texture.h"
#include "fd6_format.h"
#include "fd6_pack.h"
void
fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
@ -225,8 +226,16 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_varian
static void
setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state)
{
OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0xff); /* XXX */
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
.vs_state = true,
.hs_state = true,
.ds_state = true,
.gs_state = true,
.fs_state = true,
.cs_state = true,
.gfx_ibo = true,
.cs_ibo = true,
));
debug_assert(state->vs->constlen >= state->bs->constlen);