mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 05:58:05 +02:00
turnip: Allocate tess BOs as a function of draw size
To store tess outputs, the HS stg's into two buffers, one for per-vertex/per-patch output variables (tess_param) and one for TessLevelInner/Outer (tess_factor). The addresses of these buffers are uploaded as consts to the HS/DS and the tess_factor iova is written to REG_A6XX_PC_TESSFACTOR_ADDR. While the sizes of these buffers are a function of vetex count and patch count, allocation is relatively straightforward on freedreno- just keep track of the max required buffer size for the entire batch and allocate before batch submit. In Vulkan, however, a given pipeline can be bound multiple times across any number of command buffers, each drawing with a different number of vertices. One solution is to track the max buffer size for the entire command buffer (similar to fd_batch) and on vkEndCommandBuffer, allocate appropriately sized tess BOs. Since the tess BOs addresses are emitted as part of the pipeline state setup (e.g. PKT4 to REG_A6XX_PC_TESSFACTOR_ADDR), we need to create a new state group independent of a specific pipeline and parameterize its IB with the command buffer specific tess BO iovas. Without a larger refactor, the simplest way to do this is just to emit per-draw call consts and leverage scratch_bo to re-use buffers. This way we won't have to store and rewrite earlier packets in the command stream on vkEndCommandBuffer. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5059>
This commit is contained in:
parent
eefdca2e2f
commit
f08a80dcd4
3 changed files with 234 additions and 17 deletions
|
|
@ -3027,6 +3027,121 @@ tu6_emit_streamout(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_tess_param_bo_size(const struct tu_pipeline *pipeline,
|
||||
const struct tu_draw_info *draw_info)
|
||||
{
|
||||
/* TODO: For indirect draws, we can't compute the BO size ahead of time.
|
||||
* Still not sure what to do here, so just allocate a reasonably large
|
||||
* BO and hope for the best for now.
|
||||
* (maxTessellationControlPerVertexOutputComponents * 2048 vertices +
|
||||
* maxTessellationControlPerPatchOutputComponents * 512 patches) */
|
||||
if (draw_info->indirect) {
|
||||
return ((128 * 2048) + (128 * 512)) * 4;
|
||||
}
|
||||
|
||||
/* For each patch, adreno lays out the tess param BO in memory as:
|
||||
* (v_input[0][0])...(v_input[i][j])(p_input[0])...(p_input[k]).
|
||||
* where i = # vertices per patch, j = # per-vertex outputs, and
|
||||
* k = # per-patch outputs.*/
|
||||
uint32_t verts_per_patch = pipeline->ia.primtype - DI_PT_PATCHES0;
|
||||
uint32_t num_patches = draw_info->count / verts_per_patch;
|
||||
return draw_info->count * pipeline->tess.per_vertex_output_size +
|
||||
pipeline->tess.per_patch_output_size * num_patches;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_tess_factor_bo_size(const struct tu_pipeline *pipeline,
|
||||
const struct tu_draw_info *draw_info)
|
||||
{
|
||||
/* TODO: For indirect draws, we can't compute the BO size ahead of time.
|
||||
* Still not sure what to do here, so just allocate a reasonably large
|
||||
* BO and hope for the best for now.
|
||||
* (quad factor stride * 512 patches) */
|
||||
if (draw_info->indirect) {
|
||||
return (28 * 512) * 4;
|
||||
}
|
||||
|
||||
/* Each distinct patch gets its own tess factor output. */
|
||||
uint32_t verts_per_patch = pipeline->ia.primtype - DI_PT_PATCHES0;
|
||||
uint32_t num_patches = draw_info->count / verts_per_patch;
|
||||
uint32_t factor_stride;
|
||||
switch (pipeline->tess.patch_type) {
|
||||
case IR3_TESS_ISOLINES:
|
||||
factor_stride = 12;
|
||||
break;
|
||||
case IR3_TESS_TRIANGLES:
|
||||
factor_stride = 20;
|
||||
break;
|
||||
case IR3_TESS_QUADS:
|
||||
factor_stride = 28;
|
||||
break;
|
||||
default:
|
||||
unreachable("bad tessmode");
|
||||
}
|
||||
return factor_stride * num_patches;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu6_emit_tess_consts(struct tu_cmd_buffer *cmd,
|
||||
const struct tu_draw_info *draw,
|
||||
const struct tu_pipeline *pipeline,
|
||||
struct tu_cs_entry *entry)
|
||||
{
|
||||
struct tu_cs cs;
|
||||
VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 20, &cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
uint64_t tess_factor_size = get_tess_factor_bo_size(pipeline, draw);
|
||||
uint64_t tess_param_size = get_tess_param_bo_size(pipeline, draw);
|
||||
uint64_t tess_bo_size = tess_factor_size + tess_param_size;
|
||||
if (tess_bo_size > 0) {
|
||||
struct tu_bo *tess_bo;
|
||||
result = tu_get_scratch_bo(cmd->device, tess_bo_size, &tess_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
tu_bo_list_add(&cmd->bo_list, tess_bo,
|
||||
MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
|
||||
uint64_t tess_factor_iova = tess_bo->iova;
|
||||
uint64_t tess_param_iova = tess_factor_iova + tess_factor_size;
|
||||
|
||||
tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_GEOM, 3 + 4);
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(pipeline->tess.hs_bo_regid) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_HS_SHADER) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
tu_cs_emit_qw(&cs, tess_param_iova);
|
||||
tu_cs_emit_qw(&cs, tess_factor_iova);
|
||||
|
||||
tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_GEOM, 3 + 4);
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(pipeline->tess.ds_bo_regid) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_DS_SHADER) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
tu_cs_emit_qw(&cs, tess_param_iova);
|
||||
tu_cs_emit_qw(&cs, tess_factor_iova);
|
||||
|
||||
tu_cs_emit_pkt4(&cs, REG_A6XX_PC_TESSFACTOR_ADDR_LO, 2);
|
||||
tu_cs_emit_qw(&cs, tess_factor_iova);
|
||||
|
||||
/* TODO: Without this WFI here, the hardware seems unable to read these
|
||||
* addresses we just emitted. Freedreno emits these consts as part of
|
||||
* IB1 instead of in a draw state which might make this WFI unnecessary,
|
||||
* but it requires a bit more indirection (SS6_INDIRECT for consts). */
|
||||
tu_cs_emit_wfi(&cs);
|
||||
}
|
||||
*entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
|
|
@ -3092,6 +3207,15 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
bool has_tess =
|
||||
pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
struct tu_cs_entry tess_consts = {};
|
||||
if (has_tess) {
|
||||
result = tu6_emit_tess_consts(cmd, draw, pipeline, &tess_consts);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
|
||||
/* for the first draw in a renderpass, re-emit all the draw states
|
||||
*
|
||||
* and if a draw-state disabling path (CmdClearAttachments 3D fallback) was
|
||||
|
|
@ -3107,6 +3231,7 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state_ib);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state_ib);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_TESS, tess_consts);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI, pipeline->vi.state_ib);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state_ib);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib);
|
||||
|
|
@ -3132,6 +3257,7 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
|
|||
* note we eventually don't want to have to emit anything here
|
||||
*/
|
||||
uint32_t draw_state_count =
|
||||
has_tess +
|
||||
((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 3 : 0) +
|
||||
((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 1 : 0) +
|
||||
((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
|
||||
|
|
@ -3139,6 +3265,10 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count);
|
||||
|
||||
/* We may need to re-emit tess consts if the current draw call is
|
||||
* sufficiently larger than the last draw call. */
|
||||
if (has_tess)
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_TESS, tess_consts);
|
||||
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const_ib[MESA_SHADER_VERTEX]);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
|
||||
|
|
|
|||
|
|
@ -663,8 +663,8 @@ tu6_emit_link_map(struct tu_cs *cs,
|
|||
if (size <= 0)
|
||||
return;
|
||||
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, SB6_GS_SHADER, 0, size,
|
||||
patch_locs);
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, SB6_GS_SHADER, 0,
|
||||
size, patch_locs);
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
|
|
@ -1129,24 +1129,65 @@ tu6_emit_fs_outputs(struct tu_cs *cs,
|
|||
}
|
||||
|
||||
static void
|
||||
tu6_emit_geometry_consts(struct tu_cs *cs,
|
||||
const struct ir3_shader_variant *vs,
|
||||
const struct ir3_shader_variant *gs) {
|
||||
unsigned num_vertices = gs->shader->nir->info.gs.vertices_in;
|
||||
tu6_emit_geom_tess_consts(struct tu_cs *cs,
|
||||
const struct ir3_shader_variant *vs,
|
||||
const struct ir3_shader_variant *hs,
|
||||
const struct ir3_shader_variant *ds,
|
||||
const struct ir3_shader_variant *gs,
|
||||
uint32_t cps_per_patch)
|
||||
{
|
||||
uint32_t num_vertices =
|
||||
hs ? cps_per_patch : gs->shader->nir->info.gs.vertices_in;
|
||||
|
||||
uint32_t params[4] = {
|
||||
vs->output_size * num_vertices * 4, /* primitive stride */
|
||||
vs->output_size * 4, /* vertex stride */
|
||||
uint32_t vs_params[4] = {
|
||||
vs->output_size * num_vertices * 4, /* vs primitive stride */
|
||||
vs->output_size * 4, /* vs vertex stride */
|
||||
0,
|
||||
0,
|
||||
};
|
||||
uint32_t vs_base = ir3_const_state(vs)->offsets.primitive_param;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, vs_base, SB6_VS_SHADER, 0,
|
||||
ARRAY_SIZE(params), params);
|
||||
ARRAY_SIZE(vs_params), vs_params);
|
||||
|
||||
uint32_t gs_base = ir3_const_state(gs)->offsets.primitive_param;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0,
|
||||
ARRAY_SIZE(params), params);
|
||||
if (hs) {
|
||||
assert(ds->type != MESA_SHADER_NONE);
|
||||
uint32_t hs_params[4] = {
|
||||
vs->output_size * num_vertices * 4, /* hs primitive stride */
|
||||
vs->output_size * 4, /* hs vertex stride */
|
||||
hs->output_size,
|
||||
cps_per_patch,
|
||||
};
|
||||
|
||||
uint32_t hs_base = hs->const_state->offsets.primitive_param;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, hs_base, SB6_HS_SHADER, 0,
|
||||
ARRAY_SIZE(hs_params), hs_params);
|
||||
if (gs)
|
||||
num_vertices = gs->shader->nir->info.gs.vertices_in;
|
||||
|
||||
uint32_t ds_params[4] = {
|
||||
ds->output_size * num_vertices * 4, /* ds primitive stride */
|
||||
ds->output_size * 4, /* ds vertex stride */
|
||||
hs->output_size, /* hs vertex stride (dwords) */
|
||||
hs->shader->nir->info.tess.tcs_vertices_out
|
||||
};
|
||||
|
||||
uint32_t ds_base = ds->const_state->offsets.primitive_param;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, ds_base, SB6_DS_SHADER, 0,
|
||||
ARRAY_SIZE(ds_params), ds_params);
|
||||
}
|
||||
|
||||
if (gs) {
|
||||
const struct ir3_shader_variant *prev = ds ? ds : vs;
|
||||
uint32_t gs_params[4] = {
|
||||
prev->output_size * num_vertices * 4, /* gs primitive stride */
|
||||
prev->output_size * 4, /* gs vertex stride */
|
||||
0,
|
||||
0,
|
||||
};
|
||||
uint32_t gs_base = gs->const_state->offsets.primitive_param;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0,
|
||||
ARRAY_SIZE(gs_params), gs_params);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1158,6 +1199,8 @@ tu6_emit_program(struct tu_cs *cs,
|
|||
{
|
||||
const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
|
||||
const struct ir3_shader_variant *bs = builder->binning_variant;
|
||||
const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
|
||||
const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
|
||||
const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY];
|
||||
const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
|
||||
gl_shader_stage stage = MESA_SHADER_VERTEX;
|
||||
|
|
@ -1207,8 +1250,11 @@ tu6_emit_program(struct tu_cs *cs,
|
|||
builder->render_components);
|
||||
}
|
||||
|
||||
if (gs)
|
||||
tu6_emit_geometry_consts(cs, vs, gs);
|
||||
if (gs || hs) {
|
||||
uint32_t cps_per_patch = builder->create_info->pTessellationState ?
|
||||
builder->create_info->pTessellationState->patchControlPoints : 0;
|
||||
tu6_emit_geom_tess_consts(cs, vs, hs, ds, gs, cps_per_patch);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1695,7 +1741,8 @@ tu6_get_tessmode(struct tu_shader* shader)
|
|||
}
|
||||
|
||||
static VkResult
|
||||
tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
|
||||
tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
|
||||
struct tu_pipeline *pipeline)
|
||||
{
|
||||
const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
|
||||
NULL
|
||||
|
|
@ -1732,6 +1779,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
|
|||
builder->shaders[stage] = shader;
|
||||
}
|
||||
|
||||
pipeline->tess.patch_type = key.tessellation;
|
||||
|
||||
for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
|
||||
stage > MESA_SHADER_NONE; stage--) {
|
||||
if (!builder->shaders[stage])
|
||||
|
|
@ -1767,6 +1816,30 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
|
|||
sizeof(uint32_t) * variant->info.sizedwords;
|
||||
builder->binning_variant = variant;
|
||||
|
||||
if (builder->shaders[MESA_SHADER_TESS_CTRL]) {
|
||||
struct ir3_shader *hs =
|
||||
builder->shaders[MESA_SHADER_TESS_CTRL]->ir3_shader;
|
||||
assert(hs->type != MESA_SHADER_NONE);
|
||||
|
||||
/* Calculate and store the per-vertex and per-patch HS-output sizes. */
|
||||
uint32_t per_vertex_output_size = 0;
|
||||
uint32_t per_patch_output_size = 0;
|
||||
nir_foreach_variable (output, &hs->nir->outputs) {
|
||||
switch (output->data.location) {
|
||||
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
||||
case VARYING_SLOT_TESS_LEVEL_INNER:
|
||||
continue;
|
||||
}
|
||||
uint32_t size = glsl_count_attribute_slots(output->type, false) * 4;
|
||||
if (output->data.patch)
|
||||
per_patch_output_size += size;
|
||||
else
|
||||
per_vertex_output_size += size;
|
||||
}
|
||||
pipeline->tess.per_vertex_output_size = per_vertex_output_size;
|
||||
pipeline->tess.per_patch_output_size = per_patch_output_size;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -1942,6 +2015,10 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
|
|||
assert(pipeline->ia.primtype == DI_PT_PATCHES0);
|
||||
assert(tess_info->patchControlPoints <= 32);
|
||||
pipeline->ia.primtype += tess_info->patchControlPoints;
|
||||
const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
|
||||
const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
|
||||
pipeline->tess.hs_bo_regid = hs->const_state->offsets.primitive_param + 1;
|
||||
pipeline->tess.ds_bo_regid = ds->const_state->offsets.primitive_param + 1;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2151,7 +2228,7 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
|
|||
(*pipeline)->layout = builder->layout;
|
||||
|
||||
/* compile and upload shaders */
|
||||
result = tu_pipeline_builder_compile_shaders(builder);
|
||||
result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
|
||||
if (result == VK_SUCCESS)
|
||||
result = tu_pipeline_builder_upload_shaders(builder, *pipeline);
|
||||
if (result != VK_SUCCESS) {
|
||||
|
|
|
|||
|
|
@ -427,6 +427,7 @@ enum tu_draw_state_group_id
|
|||
{
|
||||
TU_DRAW_STATE_PROGRAM,
|
||||
TU_DRAW_STATE_PROGRAM_BINNING,
|
||||
TU_DRAW_STATE_TESS,
|
||||
TU_DRAW_STATE_VB,
|
||||
TU_DRAW_STATE_VI,
|
||||
TU_DRAW_STATE_VI_BINNING,
|
||||
|
|
@ -1100,6 +1101,15 @@ struct tu_pipeline
|
|||
bool primitive_restart;
|
||||
} ia;
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t patch_type;
|
||||
uint32_t per_vertex_output_size;
|
||||
uint32_t per_patch_output_size;
|
||||
uint32_t hs_bo_regid;
|
||||
uint32_t ds_bo_regid;
|
||||
} tess;
|
||||
|
||||
struct
|
||||
{
|
||||
struct tu_cs_entry state_ib;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue