mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 22:00:13 +01:00
radeonsi: implement draw_vertex_state for lower display list overhead
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13050>
This commit is contained in:
parent
73f7142ace
commit
fb8f532ea1
7 changed files with 459 additions and 94 deletions
|
|
@ -164,6 +164,9 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_ATOMINC_WRAP:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_DRAW_VERTEX_STATE:
|
||||
return !(sscreen->debug_flags & DBG(NO_FAST_DISPLAY_LIST));
|
||||
|
||||
case PIPE_CAP_GLSL_ZERO_INIT:
|
||||
return 2;
|
||||
|
||||
|
|
|
|||
|
|
@ -298,20 +298,34 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
|
|||
ctx->last_gs_out_prim = 0; /* cleared by CLEAR_STATE */
|
||||
}
|
||||
|
||||
void si_install_draw_wrapper(struct si_context *sctx, pipe_draw_vbo_func wrapper)
|
||||
void si_install_draw_wrapper(struct si_context *sctx, pipe_draw_vbo_func wrapper,
|
||||
pipe_draw_vertex_state_func vstate_wrapper)
|
||||
{
|
||||
if (wrapper) {
|
||||
if (wrapper != sctx->b.draw_vbo) {
|
||||
assert (!sctx->real_draw_vbo);
|
||||
assert(!sctx->real_draw_vbo);
|
||||
assert(!sctx->real_draw_vertex_state);
|
||||
sctx->real_draw_vbo = sctx->b.draw_vbo;
|
||||
sctx->real_draw_vertex_state = sctx->b.draw_vertex_state;
|
||||
sctx->b.draw_vbo = wrapper;
|
||||
sctx->b.draw_vertex_state = vstate_wrapper;
|
||||
}
|
||||
} else if (sctx->real_draw_vbo) {
|
||||
sctx->real_draw_vbo = NULL;
|
||||
sctx->real_draw_vertex_state = NULL;
|
||||
si_select_draw_vbo(sctx);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_tmz_preamble(struct si_context *sctx)
|
||||
{
|
||||
bool secure = si_gfx_resources_check_encrypted(sctx);
|
||||
if (secure != sctx->ws->cs_is_secure(&sctx->gfx_cs)) {
|
||||
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW |
|
||||
RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_draw_vbo_tmz_preamble(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
|
|
@ -320,15 +334,22 @@ static void si_draw_vbo_tmz_preamble(struct pipe_context *ctx,
|
|||
unsigned num_draws) {
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
bool secure = si_gfx_resources_check_encrypted(sctx);
|
||||
if (secure != sctx->ws->cs_is_secure(&sctx->gfx_cs)) {
|
||||
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW |
|
||||
RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL);
|
||||
}
|
||||
|
||||
si_tmz_preamble(sctx);
|
||||
sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws);
|
||||
}
|
||||
|
||||
static void si_draw_vstate_tmz_preamble(struct pipe_context *ctx,
|
||||
struct pipe_vertex_state *state,
|
||||
uint32_t partial_velem_mask,
|
||||
struct pipe_draw_vertex_state_info info,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws) {
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
si_tmz_preamble(sctx);
|
||||
sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws);
|
||||
}
|
||||
|
||||
void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
||||
{
|
||||
bool is_secure = false;
|
||||
|
|
@ -336,7 +357,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
|||
if (unlikely(radeon_uses_secure_bos(ctx->ws))) {
|
||||
is_secure = ctx->ws->cs_is_secure(&ctx->gfx_cs);
|
||||
|
||||
si_install_draw_wrapper(ctx, si_draw_vbo_tmz_preamble);
|
||||
si_install_draw_wrapper(ctx, si_draw_vbo_tmz_preamble,
|
||||
si_draw_vstate_tmz_preamble);
|
||||
}
|
||||
|
||||
if (ctx->is_debug)
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ static const struct debug_named_value radeonsi_debug_options[] = {
|
|||
{"check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info."},
|
||||
{"reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context."},
|
||||
{"shadowregs", DBG(SHADOW_REGS), "Enable CP register shadowing."},
|
||||
{"nofastdlist", DBG(NO_FAST_DISPLAY_LIST), "Disable fast display lists"},
|
||||
|
||||
/* 3D engine options: */
|
||||
{"nogfx", DBG(NO_GFX), "Disable graphics. Only multimedia compute paths can be used."},
|
||||
|
|
@ -916,6 +917,7 @@ static void si_destroy_screen(struct pipe_screen *pscreen)
|
|||
disk_cache_destroy(sscreen->disk_shader_cache);
|
||||
util_live_shader_cache_deinit(&sscreen->live_shader_cache);
|
||||
util_idalloc_mt_fini(&sscreen->buffer_ids);
|
||||
util_vertex_state_cache_deinit(&sscreen->vertex_state_cache);
|
||||
|
||||
sscreen->ws->destroy(sscreen->ws);
|
||||
FREE(sscreen);
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include "util/u_idalloc.h"
|
||||
#include "util/u_suballoc.h"
|
||||
#include "util/u_threaded_context.h"
|
||||
#include "util/u_vertex_state_cache.h"
|
||||
#include "ac_sqtt.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
@ -210,6 +211,7 @@ enum
|
|||
DBG_CHECK_VM,
|
||||
DBG_RESERVE_VMID,
|
||||
DBG_SHADOW_REGS,
|
||||
DBG_NO_FAST_DISPLAY_LIST,
|
||||
|
||||
/* 3D engine options: */
|
||||
DBG_NO_GFX,
|
||||
|
|
@ -659,6 +661,7 @@ struct si_screen {
|
|||
unsigned ngg_subgroup_size;
|
||||
|
||||
struct util_idalloc_mt buffer_ids;
|
||||
struct util_vertex_state_cache vertex_state_cache;
|
||||
};
|
||||
|
||||
struct si_sampler_view {
|
||||
|
|
@ -867,12 +870,24 @@ struct si_small_prim_cull_info {
|
|||
float small_prim_precision;
|
||||
};
|
||||
|
||||
struct si_vertex_state {
|
||||
struct pipe_vertex_state b;
|
||||
struct si_vertex_elements velems;
|
||||
uint32_t descriptors[4 * SI_MAX_ATTRIBS];
|
||||
};
|
||||
|
||||
typedef void (*pipe_draw_vbo_func)(struct pipe_context *pipe,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws);
|
||||
typedef void (*pipe_draw_vertex_state_func)(struct pipe_context *ctx,
|
||||
struct pipe_vertex_state *vstate,
|
||||
uint32_t partial_velem_mask,
|
||||
struct pipe_draw_vertex_state_info info,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws);
|
||||
|
||||
struct si_context {
|
||||
struct pipe_context b; /* base class */
|
||||
|
|
@ -1011,6 +1026,8 @@ struct si_context {
|
|||
struct si_vertex_elements *vertex_elements;
|
||||
unsigned num_vertex_elements;
|
||||
unsigned cs_max_waves_per_sh;
|
||||
bool uses_nontrivial_vs_prolog;
|
||||
bool force_trivial_vs_prolog;
|
||||
bool do_update_shaders;
|
||||
bool compute_shaderbuf_sgprs_dirty;
|
||||
bool compute_image_sgprs_dirty;
|
||||
|
|
@ -1219,8 +1236,10 @@ struct si_context {
|
|||
struct hash_table *dirty_implicit_resources;
|
||||
|
||||
pipe_draw_vbo_func draw_vbo[2][2][2];
|
||||
pipe_draw_vertex_state_func draw_vertex_state[2][2][2];
|
||||
/* When b.draw_vbo is a wrapper, real_draw_vbo is the real draw_vbo function */
|
||||
pipe_draw_vbo_func real_draw_vbo;
|
||||
pipe_draw_vertex_state_func real_draw_vertex_state;
|
||||
void (*emit_spi_map[33])(struct si_context *sctx);
|
||||
|
||||
/* SQTT */
|
||||
|
|
@ -1422,7 +1441,8 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs);
|
|||
/* Replace the sctx->b.draw_vbo function with a wrapper. This can be use to implement
|
||||
* optimizations without affecting the normal draw_vbo functions perf.
|
||||
*/
|
||||
void si_install_draw_wrapper(struct si_context *sctx, pipe_draw_vbo_func wrapper);
|
||||
void si_install_draw_wrapper(struct si_context *sctx, pipe_draw_vbo_func wrapper,
|
||||
pipe_draw_vertex_state_func vstate_wrapper);
|
||||
|
||||
/* si_gpu_load.c */
|
||||
void si_gpu_load_kill_thread(struct si_screen *sscreen);
|
||||
|
|
@ -1954,11 +1974,22 @@ static inline void si_select_draw_vbo(struct si_context *sctx)
|
|||
pipe_draw_vbo_func draw_vbo = sctx->draw_vbo[!!sctx->shader.tes.cso]
|
||||
[!!sctx->shader.gs.cso]
|
||||
[sctx->ngg];
|
||||
pipe_draw_vertex_state_func draw_vertex_state =
|
||||
sctx->draw_vertex_state[!!sctx->shader.tes.cso]
|
||||
[!!sctx->shader.gs.cso]
|
||||
[sctx->ngg];
|
||||
assert(draw_vbo);
|
||||
if (unlikely(sctx->real_draw_vbo))
|
||||
assert(draw_vertex_state);
|
||||
|
||||
if (unlikely(sctx->real_draw_vbo)) {
|
||||
assert(sctx->real_draw_vertex_state);
|
||||
sctx->real_draw_vbo = draw_vbo;
|
||||
else
|
||||
sctx->real_draw_vertex_state = draw_vertex_state;
|
||||
} else {
|
||||
assert(!sctx->real_draw_vertex_state);
|
||||
sctx->b.draw_vbo = draw_vbo;
|
||||
sctx->b.draw_vertex_state = draw_vertex_state;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the number of samples that the rasterizer uses. */
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
#include "util/format/u_format.h"
|
||||
#include "util/format/u_format_s3tc.h"
|
||||
#include "util/u_dual_blend.h"
|
||||
#include "util/u_helpers.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_resource.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
|
|
@ -636,14 +637,8 @@ static void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_b
|
|||
return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
|
||||
}
|
||||
|
||||
static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws) {
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
static bool si_check_blend_dst_sampler_noop(struct si_context *sctx)
|
||||
{
|
||||
if (sctx->framebuffer.state.nr_cbufs == 1) {
|
||||
struct si_shader_selector *sel = sctx->shader.ps.cso;
|
||||
bool free_nir;
|
||||
|
|
@ -677,16 +672,44 @@ static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx,
|
|||
if (tex->is_depth &&
|
||||
tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) &&
|
||||
tex->depth_clear_value[0] == 1) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
/* TODO: handle color textures */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws) {
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
if (!si_check_blend_dst_sampler_noop(sctx))
|
||||
return;
|
||||
|
||||
sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws);
|
||||
}
|
||||
|
||||
static void si_draw_vstate_blend_dst_sampler_noop(struct pipe_context *ctx,
|
||||
struct pipe_vertex_state *state,
|
||||
uint32_t partial_velem_mask,
|
||||
struct pipe_draw_vertex_state_info info,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws) {
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
if (!si_check_blend_dst_sampler_noop(sctx))
|
||||
return;
|
||||
|
||||
sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws);
|
||||
}
|
||||
|
||||
static void si_bind_blend_state(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
|
@ -731,9 +754,10 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
|
|||
|
||||
if (likely(!radeon_uses_secure_bos(sctx->ws))) {
|
||||
if (unlikely(blend->allows_noop_optimization)) {
|
||||
si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop);
|
||||
si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop,
|
||||
si_draw_vstate_blend_dst_sampler_noop);
|
||||
} else {
|
||||
si_install_draw_wrapper(sctx, NULL);
|
||||
si_install_draw_wrapper(sctx, NULL, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5011,6 +5035,78 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot,
|
|||
}
|
||||
}
|
||||
|
||||
static struct pipe_vertex_state *
|
||||
si_create_vertex_state(struct pipe_screen *screen,
|
||||
struct pipe_vertex_buffer *buffer,
|
||||
const struct pipe_vertex_element *elements,
|
||||
unsigned num_elements,
|
||||
struct pipe_resource *indexbuf,
|
||||
uint32_t full_velem_mask)
|
||||
{
|
||||
struct si_screen *sscreen = (struct si_screen *)screen;
|
||||
struct si_vertex_state *state = CALLOC_STRUCT(si_vertex_state);
|
||||
|
||||
util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, full_velem_mask,
|
||||
&state->b);
|
||||
|
||||
/* Initialize the vertex element state in state->element.
|
||||
* Do it by creating a vertex element state object and copying it there.
|
||||
*/
|
||||
struct pipe_context ctx = {};
|
||||
ctx.screen = screen;
|
||||
struct si_vertex_elements *velems = si_create_vertex_elements(&ctx, num_elements, elements);
|
||||
state->velems = *velems;
|
||||
si_delete_vertex_element(&ctx, velems);
|
||||
|
||||
assert(!state->velems.instance_divisor_is_one);
|
||||
assert(!state->velems.instance_divisor_is_fetched);
|
||||
assert(!state->velems.fix_fetch_always);
|
||||
assert(buffer->stride % 4 == 0);
|
||||
assert(buffer->buffer_offset % 4 == 0);
|
||||
assert(!buffer->is_user_buffer);
|
||||
for (unsigned i = 0; i < num_elements; i++) {
|
||||
assert(elements[i].src_offset % 4 == 0);
|
||||
assert(!elements[i].dual_slot);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_elements; i++) {
|
||||
si_set_vertex_buffer_descriptor(sscreen, &state->velems, &state->b.input.vbuffer, i,
|
||||
&state->descriptors[i * 4]);
|
||||
}
|
||||
|
||||
return &state->b;
|
||||
}
|
||||
|
||||
static void si_vertex_state_destroy(struct pipe_screen *screen,
|
||||
struct pipe_vertex_state *state)
|
||||
{
|
||||
pipe_vertex_buffer_unreference(&state->input.vbuffer);
|
||||
pipe_resource_reference(&state->input.indexbuf, NULL);
|
||||
FREE(state);
|
||||
}
|
||||
|
||||
static struct pipe_vertex_state *
|
||||
si_pipe_create_vertex_state(struct pipe_screen *screen,
|
||||
struct pipe_vertex_buffer *buffer,
|
||||
const struct pipe_vertex_element *elements,
|
||||
unsigned num_elements,
|
||||
struct pipe_resource *indexbuf,
|
||||
uint32_t full_velem_mask)
|
||||
{
|
||||
struct si_screen *sscreen = (struct si_screen *)screen;
|
||||
|
||||
return util_vertex_state_cache_get(screen, buffer, elements, num_elements, indexbuf,
|
||||
full_velem_mask, &sscreen->vertex_state_cache);
|
||||
}
|
||||
|
||||
static void si_pipe_vertex_state_destroy(struct pipe_screen *screen,
|
||||
struct pipe_vertex_state *state)
|
||||
{
|
||||
struct si_screen *sscreen = (struct si_screen *)screen;
|
||||
|
||||
util_vertex_state_destroy(screen, &sscreen->vertex_state_cache, state);
|
||||
}
|
||||
|
||||
/*
|
||||
* Misc
|
||||
*/
|
||||
|
|
@ -5177,12 +5273,17 @@ void si_init_state_functions(struct si_context *sctx)
|
|||
void si_init_screen_state_functions(struct si_screen *sscreen)
|
||||
{
|
||||
sscreen->b.is_format_supported = si_is_format_supported;
|
||||
sscreen->b.create_vertex_state = si_pipe_create_vertex_state;
|
||||
sscreen->b.vertex_state_destroy = si_pipe_vertex_state_destroy;
|
||||
|
||||
if (sscreen->info.chip_class >= GFX10) {
|
||||
sscreen->make_texture_descriptor = gfx10_make_texture_descriptor;
|
||||
} else {
|
||||
sscreen->make_texture_descriptor = si_make_texture_descriptor;
|
||||
}
|
||||
|
||||
util_vertex_state_cache_init(&sscreen->vertex_state_cache,
|
||||
si_create_vertex_state, si_vertex_state_destroy);
|
||||
}
|
||||
|
||||
static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
#include "ac_exp_param.h"
|
||||
#include "ac_sqtt.h"
|
||||
#include "si_build_pm4.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_index_modify.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
|
|
@ -944,6 +945,12 @@ static bool si_is_line_stipple_enabled(struct si_context *sctx)
|
|||
(rs->polygon_mode_is_lines || util_prim_is_lines(sctx->current_rast_prim));
|
||||
}
|
||||
|
||||
enum si_is_draw_vertex_state {
|
||||
DRAW_VERTEX_STATE_OFF,
|
||||
DRAW_VERTEX_STATE_ON,
|
||||
};
|
||||
|
||||
template <si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE
|
||||
static bool num_instanced_prims_less_than(const struct pipe_draw_indirect_info *indirect,
|
||||
enum pipe_prim_type prim,
|
||||
unsigned min_vertex_count,
|
||||
|
|
@ -951,6 +958,9 @@ static bool num_instanced_prims_less_than(const struct pipe_draw_indirect_info *
|
|||
unsigned num_prims,
|
||||
ubyte vertices_per_patch)
|
||||
{
|
||||
if (IS_DRAW_VERTEX_STATE)
|
||||
return 0;
|
||||
|
||||
if (indirect) {
|
||||
return indirect->buffer ||
|
||||
(instance_count > 1 && indirect->count_from_stream_output);
|
||||
|
|
@ -960,7 +970,8 @@ static bool num_instanced_prims_less_than(const struct pipe_draw_indirect_info *
|
|||
}
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS> ALWAYS_INLINE
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS,
|
||||
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE
|
||||
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
enum pipe_prim_type prim, unsigned num_patches,
|
||||
|
|
@ -980,12 +991,15 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
|||
}
|
||||
|
||||
key.u.prim = prim;
|
||||
key.u.uses_instancing = (indirect && indirect->buffer) || instance_count > 1;
|
||||
key.u.uses_instancing = !IS_DRAW_VERTEX_STATE &&
|
||||
((indirect && indirect->buffer) || instance_count > 1);
|
||||
key.u.multi_instances_smaller_than_primgroup =
|
||||
num_instanced_prims_less_than(indirect, prim, min_vertex_count, instance_count,
|
||||
primgroup_size, sctx->patch_vertices);
|
||||
key.u.primitive_restart = primitive_restart;
|
||||
key.u.count_from_stream_output = indirect && indirect->count_from_stream_output;
|
||||
num_instanced_prims_less_than<IS_DRAW_VERTEX_STATE>(indirect, prim, min_vertex_count,
|
||||
instance_count, primgroup_size,
|
||||
sctx->patch_vertices);
|
||||
key.u.primitive_restart = !IS_DRAW_VERTEX_STATE && primitive_restart;
|
||||
key.u.count_from_stream_output = !IS_DRAW_VERTEX_STATE && indirect &&
|
||||
indirect->count_from_stream_output;
|
||||
key.u.line_stipple_enabled = si_is_line_stipple_enabled(sctx);
|
||||
|
||||
ia_multi_vgt_param =
|
||||
|
|
@ -1003,8 +1017,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
|||
*/
|
||||
if (GFX_VERSION == GFX7 &&
|
||||
sctx->family == CHIP_HAWAII && G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) &&
|
||||
num_instanced_prims_less_than(indirect, prim, min_vertex_count, instance_count, 2,
|
||||
sctx->patch_vertices))
|
||||
num_instanced_prims_less_than<IS_DRAW_VERTEX_STATE>(indirect, prim, min_vertex_count,
|
||||
instance_count, 2, sctx->patch_vertices))
|
||||
sctx->flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
}
|
||||
|
||||
|
|
@ -1089,11 +1103,11 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
|
|||
}
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG>
|
||||
ALWAYS_INLINE
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
|
||||
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE
|
||||
static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
|
||||
{
|
||||
if (sctx->num_vs_blit_sgprs) {
|
||||
if (!IS_DRAW_VERTEX_STATE && sctx->num_vs_blit_sgprs) {
|
||||
/* Re-emit the state after we leave u_blitter. */
|
||||
sctx->last_vs_state = ~0;
|
||||
return;
|
||||
|
|
@ -1143,7 +1157,8 @@ static bool si_prim_restart_index_changed(struct si_context *sctx, bool primitiv
|
|||
sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN);
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS> ALWAYS_INLINE
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS,
|
||||
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE
|
||||
static void si_emit_ia_multi_vgt_param(struct si_context *sctx,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
enum pipe_prim_type prim, unsigned num_patches,
|
||||
|
|
@ -1154,7 +1169,7 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx,
|
|||
unsigned ia_multi_vgt_param;
|
||||
|
||||
ia_multi_vgt_param =
|
||||
si_get_ia_multi_vgt_param<GFX_VERSION, HAS_TESS, HAS_GS>
|
||||
si_get_ia_multi_vgt_param<GFX_VERSION, HAS_TESS, HAS_GS, IS_DRAW_VERTEX_STATE>
|
||||
(sctx, indirect, prim, num_patches, instance_count, primitive_restart,
|
||||
min_vertex_count);
|
||||
|
||||
|
|
@ -1225,7 +1240,8 @@ static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches)
|
|||
}
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG> ALWAYS_INLINE
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
|
||||
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE
|
||||
static void si_emit_draw_registers(struct si_context *sctx,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
enum pipe_prim_type prim, unsigned num_patches,
|
||||
|
|
@ -1234,10 +1250,13 @@ static void si_emit_draw_registers(struct si_context *sctx,
|
|||
{
|
||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
|
||||
if (IS_DRAW_VERTEX_STATE)
|
||||
primitive_restart = false;
|
||||
|
||||
if (GFX_VERSION >= GFX10)
|
||||
gfx10_emit_ge_cntl<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx, num_patches);
|
||||
else
|
||||
si_emit_ia_multi_vgt_param<GFX_VERSION, HAS_TESS, HAS_GS>
|
||||
si_emit_ia_multi_vgt_param<GFX_VERSION, HAS_TESS, HAS_GS, IS_DRAW_VERTEX_STATE>
|
||||
(sctx, indirect, prim, num_patches, instance_count, primitive_restart,
|
||||
min_vertex_count);
|
||||
|
||||
|
|
@ -1284,7 +1303,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_ngg NGG>
|
||||
template <chip_class GFX_VERSION, si_has_ngg NGG, si_is_draw_vertex_state IS_DRAW_VERTEX_STATE>
|
||||
ALWAYS_INLINE
|
||||
static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info,
|
||||
unsigned drawid_base,
|
||||
|
|
@ -1304,7 +1323,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
|||
|
||||
uint32_t use_opaque = 0;
|
||||
|
||||
if (indirect && indirect->count_from_stream_output) {
|
||||
if (!IS_DRAW_VERTEX_STATE && indirect && indirect->count_from_stream_output) {
|
||||
struct si_streamout_target *t = (struct si_streamout_target *)indirect->count_from_stream_output;
|
||||
|
||||
radeon_begin(cs);
|
||||
|
|
@ -1379,7 +1398,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
|||
unsigned sh_base_reg = sctx->shader_pointers.sh_base[PIPE_SHADER_VERTEX];
|
||||
bool render_cond_bit = sctx->render_cond_enabled;
|
||||
|
||||
if (indirect) {
|
||||
if (!IS_DRAW_VERTEX_STATE && indirect) {
|
||||
assert(num_draws == 1);
|
||||
uint64_t indirect_va = si_resource(indirect->buffer)->gpu_address;
|
||||
|
||||
|
|
@ -1454,10 +1473,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
|||
/* Base vertex and start instance. */
|
||||
int base_vertex = original_index_size ? draws[0].index_bias : draws[0].start;
|
||||
|
||||
bool set_draw_id = sctx->vs_uses_draw_id;
|
||||
bool set_draw_id = !IS_DRAW_VERTEX_STATE && sctx->vs_uses_draw_id;
|
||||
bool set_base_instance = sctx->vs_uses_base_instance;
|
||||
|
||||
if (sctx->num_vs_blit_sgprs) {
|
||||
if (!IS_DRAW_VERTEX_STATE && sctx->num_vs_blit_sgprs) {
|
||||
/* Re-emit draw constants after we leave u_blitter. */
|
||||
si_invalidate_draw_sh_constants(sctx);
|
||||
|
||||
|
|
@ -1496,7 +1515,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
|||
}
|
||||
|
||||
/* Don't update draw_id in the following code if it doesn't increment. */
|
||||
bool increment_draw_id = num_draws > 1 && set_draw_id && info->increment_draw_id;
|
||||
bool increment_draw_id = !IS_DRAW_VERTEX_STATE && num_draws > 1 &&
|
||||
set_draw_id && info->increment_draw_id;
|
||||
|
||||
if (index_size) {
|
||||
/* NOT_EOP allows merging multiple draws into 1 wave, but only user VGPRs
|
||||
|
|
@ -1514,7 +1534,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
|||
* else for (all draws);
|
||||
*
|
||||
*/
|
||||
bool index_bias_varies = num_draws > 1 && info->index_bias_varies;
|
||||
bool index_bias_varies = !IS_DRAW_VERTEX_STATE && num_draws > 1 &&
|
||||
info->index_bias_varies;
|
||||
|
||||
if (increment_draw_id) {
|
||||
if (index_bias_varies) {
|
||||
|
|
@ -1655,7 +1676,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
|||
radeon_emit(draws[i].count);
|
||||
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
|
||||
}
|
||||
if (num_draws > 1 && !sctx->num_vs_blit_sgprs)
|
||||
if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs))
|
||||
sctx->last_base_vertex = draws[num_draws - 1].start;
|
||||
}
|
||||
}
|
||||
|
|
@ -1743,20 +1764,56 @@ void si_set_vertex_buffer_descriptor(struct si_screen *sscreen, struct si_vertex
|
|||
|
||||
#endif
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG> ALWAYS_INLINE
|
||||
static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
|
||||
/* util_bitcount has large measurable overhead (~2% difference in viewperf), so we use
|
||||
* the POPCNT x86 instruction via inline assembly if the CPU supports it.
|
||||
*/
|
||||
enum si_has_popcnt {
|
||||
POPCNT_NO,
|
||||
POPCNT_YES,
|
||||
};
|
||||
|
||||
template<si_has_popcnt POPCNT>
|
||||
unsigned bitcount_asm(unsigned n)
|
||||
{
|
||||
unsigned count = sctx->num_vertex_elements;
|
||||
if (POPCNT == POPCNT_YES)
|
||||
return util_popcnt_inline_asm(n);
|
||||
else
|
||||
return util_bitcount(n);
|
||||
}
|
||||
|
||||
template<si_has_popcnt POPCNT>
|
||||
static ALWAYS_INLINE unsigned get_next_vertex_state_elem(struct pipe_vertex_state *state,
|
||||
uint32_t *partial_velem_mask)
|
||||
{
|
||||
unsigned semantic_index = u_bit_scan(partial_velem_mask);
|
||||
assert(state->input.full_velem_mask & BITFIELD_BIT(semantic_index));
|
||||
/* A prefix mask of the full mask gives us the index in pipe_vertex_state. */
|
||||
return bitcount_asm<POPCNT>(state->input.full_velem_mask & BITFIELD_MASK(semantic_index));
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
|
||||
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_popcnt POPCNT> ALWAYS_INLINE
|
||||
static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx,
|
||||
struct pipe_vertex_state *state,
|
||||
uint32_t partial_velem_mask)
|
||||
{
|
||||
struct si_vertex_state *vstate = (struct si_vertex_state *)state;
|
||||
unsigned count = IS_DRAW_VERTEX_STATE ? bitcount_asm<POPCNT>(partial_velem_mask) :
|
||||
sctx->num_vertex_elements;
|
||||
unsigned sh_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG,
|
||||
PIPE_SHADER_VERTEX);
|
||||
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs_inline(GFX_VERSION);
|
||||
bool pointer_dirty, user_sgprs_dirty;
|
||||
|
||||
assert(count <= SI_MAX_ATTRIBS);
|
||||
|
||||
if (sctx->vertex_buffers_dirty) {
|
||||
if (sctx->vertex_buffers_dirty || IS_DRAW_VERTEX_STATE) {
|
||||
assert(count);
|
||||
|
||||
struct si_vertex_elements *velems = sctx->vertex_elements;
|
||||
unsigned alloc_size = velems->vb_desc_list_alloc_size;
|
||||
unsigned alloc_size = IS_DRAW_VERTEX_STATE ?
|
||||
vstate->velems.vb_desc_list_alloc_size :
|
||||
velems->vb_desc_list_alloc_size;
|
||||
uint32_t *ptr;
|
||||
|
||||
if (alloc_size) {
|
||||
|
|
@ -1783,27 +1840,64 @@ static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
|
|||
si_resource_reference(&sctx->vb_descriptors_buffer, NULL);
|
||||
}
|
||||
|
||||
unsigned first_vb_use_mask = velems->first_vb_use_mask;
|
||||
if (IS_DRAW_VERTEX_STATE) {
|
||||
unsigned partial_count = bitcount_asm<POPCNT>(partial_velem_mask);
|
||||
unsigned i = 0;
|
||||
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
unsigned vbo_index = velems->vertex_buffer_index[i];
|
||||
struct pipe_vertex_buffer *vb = &sctx->vertex_buffer[vbo_index];
|
||||
uint32_t *desc = i < num_vbos_in_user_sgprs ? &sctx->vb_descriptor_user_sgprs[i * 4]
|
||||
: &ptr[(i - num_vbos_in_user_sgprs) * 4];
|
||||
if (num_vbos_in_user_sgprs) {
|
||||
unsigned num_vb_sgprs = MIN2(partial_count, num_vbos_in_user_sgprs) * 4;
|
||||
|
||||
if (!si_set_vb_descriptor<GFX_VERSION>(velems, vb, i, desc))
|
||||
continue;
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
radeon_set_sh_reg_seq(sh_base + SI_SGPR_VS_VB_DESCRIPTOR_FIRST * 4, num_vb_sgprs);
|
||||
|
||||
if (first_vb_use_mask & (1 << i)) {
|
||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(vb->buffer.resource),
|
||||
for (; partial_velem_mask && i < num_vbos_in_user_sgprs; i++) {
|
||||
unsigned velem_index = get_next_vertex_state_elem<POPCNT>(state, &partial_velem_mask);
|
||||
|
||||
radeon_emit_array(&vstate->descriptors[velem_index * 4], 4);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
for (; partial_velem_mask; i++) {
|
||||
unsigned velem_index = get_next_vertex_state_elem<POPCNT>(state, &partial_velem_mask);
|
||||
uint32_t *desc = &ptr[(i - num_vbos_in_user_sgprs) * 4];
|
||||
|
||||
memcpy(desc, &vstate->descriptors[velem_index * 4], 16);
|
||||
}
|
||||
|
||||
if (vstate->b.input.vbuffer.buffer.resource != vstate->b.input.indexbuf) {
|
||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs,
|
||||
si_resource(vstate->b.input.vbuffer.buffer.resource),
|
||||
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
|
||||
}
|
||||
|
||||
/* The next draw_vbo should recompute and rebind vertex buffer descriptors. */
|
||||
sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;
|
||||
|
||||
user_sgprs_dirty = false; /* We just set them above. */
|
||||
pointer_dirty = count > num_vbos_in_user_sgprs;
|
||||
} else {
|
||||
unsigned first_vb_use_mask = velems->first_vb_use_mask;
|
||||
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
unsigned vbo_index = velems->vertex_buffer_index[i];
|
||||
struct pipe_vertex_buffer *vb = &sctx->vertex_buffer[vbo_index];
|
||||
uint32_t *desc = i < num_vbos_in_user_sgprs ? &sctx->vb_descriptor_user_sgprs[i * 4]
|
||||
: &ptr[(i - num_vbos_in_user_sgprs) * 4];
|
||||
|
||||
if (!si_set_vb_descriptor<GFX_VERSION>(velems, vb, i, desc))
|
||||
continue;
|
||||
|
||||
if (first_vb_use_mask & (1 << i)) {
|
||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(vb->buffer.resource),
|
||||
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
|
||||
}
|
||||
}
|
||||
|
||||
sctx->vertex_buffers_dirty = false;
|
||||
user_sgprs_dirty = num_vbos_in_user_sgprs > 0;
|
||||
pointer_dirty = alloc_size != 0;
|
||||
}
|
||||
|
||||
sctx->vertex_buffers_dirty = false;
|
||||
|
||||
pointer_dirty = alloc_size != 0;
|
||||
user_sgprs_dirty = num_vbos_in_user_sgprs > 0;
|
||||
} else {
|
||||
pointer_dirty = sctx->vertex_buffer_pointer_dirty;
|
||||
user_sgprs_dirty = sctx->vertex_buffer_user_sgprs_dirty;
|
||||
|
|
@ -1811,8 +1905,6 @@ static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx)
|
|||
|
||||
if (pointer_dirty || user_sgprs_dirty) {
|
||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
unsigned sh_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG,
|
||||
PIPE_SHADER_VERTEX);
|
||||
assert(count);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
|
@ -1922,8 +2014,8 @@ static void si_get_draw_start_count(struct si_context *sctx, const struct pipe_d
|
|||
}
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG>
|
||||
ALWAYS_INLINE
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
|
||||
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE
|
||||
static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
enum pipe_prim_type prim, unsigned instance_count,
|
||||
|
|
@ -1964,8 +2056,8 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
|
|||
}
|
||||
|
||||
/* Emit draw states. */
|
||||
si_emit_vs_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx, info->index_size);
|
||||
si_emit_draw_registers<GFX_VERSION, HAS_TESS, HAS_GS, NGG>
|
||||
si_emit_vs_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE>(sctx, info->index_size);
|
||||
si_emit_draw_registers<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE>
|
||||
(sctx, indirect, prim, num_patches, instance_count, primitive_restart,
|
||||
info->restart_index, min_vertex_count);
|
||||
}
|
||||
|
|
@ -1975,13 +2067,16 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
|
|||
pipe_resource_reference(&indexbuf, NULL); \
|
||||
} while (0)
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG>
|
||||
static void si_draw_vbo(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws)
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
|
||||
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_popcnt POPCNT> ALWAYS_INLINE
|
||||
static void si_draw(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws,
|
||||
struct pipe_vertex_state *state,
|
||||
uint32_t partial_velem_mask)
|
||||
{
|
||||
/* Keep code that uses the least number of local variables as close to the beginning
|
||||
* of this function as possible to minimize register pressure.
|
||||
|
|
@ -2052,11 +2147,14 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
* 'instance_count == 0' seems to be problematic on Renoir chips (#4866),
|
||||
* so simplify the condition and drop these draws for all <= GFX9 chips.
|
||||
*/
|
||||
if (GFX_VERSION <= GFX9 && unlikely(!indirect && !instance_count))
|
||||
if (GFX_VERSION <= GFX9 && unlikely(!IS_DRAW_VERTEX_STATE && !indirect && !instance_count))
|
||||
return;
|
||||
|
||||
struct si_shader_selector *vs = sctx->shader.vs.cso;
|
||||
if (unlikely(!vs || sctx->num_vertex_elements < vs->num_vs_inputs ||
|
||||
struct si_vertex_state *vstate = (struct si_vertex_state *)state;
|
||||
if (unlikely(!vs ||
|
||||
(!IS_DRAW_VERTEX_STATE && sctx->num_vertex_elements < vs->num_vs_inputs) ||
|
||||
(IS_DRAW_VERTEX_STATE && vstate->velems.count < vs->num_vs_inputs) ||
|
||||
!sctx->shader.ps.cso || (HAS_TESS != (prim == PIPE_PRIM_PATCHES)))) {
|
||||
assert(0);
|
||||
return;
|
||||
|
|
@ -2084,7 +2182,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
if (index_size) {
|
||||
/* Translate or upload, if needed. */
|
||||
/* 8-bit indices are supported on GFX8. */
|
||||
if (GFX_VERSION <= GFX7 && index_size == 1) {
|
||||
if (!IS_DRAW_VERTEX_STATE && GFX_VERSION <= GFX7 && index_size == 1) {
|
||||
unsigned start, count, start_offset, size, offset;
|
||||
void *ptr;
|
||||
|
||||
|
|
@ -2103,7 +2201,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
/* info->start will be added by the drawing code */
|
||||
index_offset = offset - start_offset;
|
||||
index_size = 2;
|
||||
} else if (info->has_user_indices) {
|
||||
} else if (!IS_DRAW_VERTEX_STATE && info->has_user_indices) {
|
||||
unsigned start_offset;
|
||||
|
||||
assert(!indirect);
|
||||
|
|
@ -2130,7 +2228,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
unsigned min_direct_count = 0;
|
||||
unsigned total_direct_count = 0;
|
||||
|
||||
if (indirect) {
|
||||
if (!IS_DRAW_VERTEX_STATE && indirect) {
|
||||
/* Add the buffer size for memory checking in need_cs_space. */
|
||||
if (indirect->buffer)
|
||||
si_context_add_resource_size(sctx, indirect->buffer);
|
||||
|
|
@ -2192,6 +2290,32 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
if (IS_DRAW_VERTEX_STATE) {
|
||||
/* draw_vertex_state doesn't use the current vertex buffers and vertex elements,
|
||||
* so disable any non-trivial VS prolog that is based on them, such as vertex
|
||||
* format lowering.
|
||||
*/
|
||||
if (!sctx->force_trivial_vs_prolog) {
|
||||
sctx->force_trivial_vs_prolog = true;
|
||||
|
||||
/* Update shaders to disable the non-trivial VS prolog. */
|
||||
if (sctx->uses_nontrivial_vs_prolog) {
|
||||
si_vs_key_update_inputs(sctx);
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (sctx->force_trivial_vs_prolog) {
|
||||
sctx->force_trivial_vs_prolog = false;
|
||||
|
||||
/* Update shaders to enable the non-trivial VS prolog. */
|
||||
if (sctx->uses_nontrivial_vs_prolog) {
|
||||
si_vs_key_update_inputs(sctx);
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Update NGG culling settings. */
|
||||
uint8_t old_ngg_culling = sctx->ngg_culling;
|
||||
if (GFX_VERSION >= GFX10) {
|
||||
|
|
@ -2314,7 +2438,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.scissors);
|
||||
gfx9_scissor_bug = true;
|
||||
|
||||
if ((indirect && indirect->count_from_stream_output) ||
|
||||
if ((!IS_DRAW_VERTEX_STATE && indirect && indirect->count_from_stream_output) ||
|
||||
sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
|
||||
sctx->dirty_states & si_states_that_always_roll_context())
|
||||
sctx->context_roll = true;
|
||||
|
|
@ -2333,7 +2457,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
|
||||
|
||||
/* Emit all states except possibly render condition. */
|
||||
si_emit_all_states<GFX_VERSION, HAS_TESS, HAS_GS, NGG>
|
||||
si_emit_all_states<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE>
|
||||
(sctx, info, indirect, prim, instance_count, min_direct_count,
|
||||
primitive_restart, masked_atoms);
|
||||
sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
|
||||
|
|
@ -2342,7 +2466,9 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
/* This uploads VBO descriptors, sets user SGPRs, and executes the L2 prefetch.
|
||||
* It should done after cache flushing.
|
||||
*/
|
||||
if (unlikely((!si_upload_and_prefetch_VB_descriptors<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx)))) {
|
||||
if (unlikely((!si_upload_and_prefetch_VB_descriptors
|
||||
<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE, POPCNT>
|
||||
(sctx, state, partial_velem_mask)))) {
|
||||
DRAW_CLEANUP;
|
||||
return;
|
||||
}
|
||||
|
|
@ -2359,7 +2485,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
}
|
||||
assert(sctx->dirty_atoms == 0);
|
||||
|
||||
si_emit_draw_packets<GFX_VERSION, NGG>
|
||||
si_emit_draw_packets<GFX_VERSION, NGG, IS_DRAW_VERTEX_STATE>
|
||||
(sctx, info, drawid_offset, indirect, draws, num_draws, total_direct_count, indexbuf,
|
||||
index_size, index_offset, instance_count, original_index_size);
|
||||
/* <-- CUs are busy here. */
|
||||
|
|
@ -2381,12 +2507,14 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
/* This uploads VBO descriptors, sets user SGPRs, and executes the L2 prefetch.
|
||||
* It should done after cache flushing and after the VS prefetch.
|
||||
*/
|
||||
if (unlikely((!si_upload_and_prefetch_VB_descriptors<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx)))) {
|
||||
if (unlikely((!si_upload_and_prefetch_VB_descriptors
|
||||
<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE, POPCNT>
|
||||
(sctx, state, partial_velem_mask)))) {
|
||||
DRAW_CLEANUP;
|
||||
return;
|
||||
}
|
||||
|
||||
si_emit_all_states<GFX_VERSION, HAS_TESS, HAS_GS, NGG>
|
||||
si_emit_all_states<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE>
|
||||
(sctx, info, indirect, prim, instance_count, min_direct_count,
|
||||
primitive_restart, masked_atoms);
|
||||
|
||||
|
|
@ -2397,7 +2525,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
}
|
||||
assert(sctx->dirty_atoms == 0);
|
||||
|
||||
si_emit_draw_packets<GFX_VERSION, NGG>
|
||||
si_emit_draw_packets<GFX_VERSION, NGG, IS_DRAW_VERTEX_STATE>
|
||||
(sctx, info, drawid_offset, indirect, draws, num_draws, total_direct_count, indexbuf,
|
||||
index_size, index_offset, instance_count, original_index_size);
|
||||
|
||||
|
|
@ -2428,9 +2556,9 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
if (unlikely(sctx->decompression_enabled)) {
|
||||
sctx->num_decompress_calls++;
|
||||
} else {
|
||||
sctx->num_draw_calls++;
|
||||
sctx->num_draw_calls += num_draws;
|
||||
if (primitive_restart)
|
||||
sctx->num_prim_restart_calls++;
|
||||
sctx->num_prim_restart_calls += num_draws;
|
||||
}
|
||||
|
||||
if (!sctx->blitter_running && sctx->framebuffer.state.zsbuf) {
|
||||
|
|
@ -2441,6 +2569,39 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
DRAW_CLEANUP;
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG>
|
||||
static void si_draw_vbo(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws)
|
||||
{
|
||||
si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_OFF, POPCNT_NO>
|
||||
(ctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0);
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
|
||||
si_has_popcnt POPCNT>
|
||||
static void si_draw_vertex_state(struct pipe_context *ctx,
|
||||
struct pipe_vertex_state *vstate,
|
||||
uint32_t partial_velem_mask,
|
||||
struct pipe_draw_vertex_state_info info,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws)
|
||||
{
|
||||
struct si_vertex_state *state = (struct si_vertex_state *)vstate;
|
||||
struct pipe_draw_info dinfo = {};
|
||||
|
||||
dinfo.mode = info.mode;
|
||||
dinfo.index_size = 4;
|
||||
dinfo.instance_count = 1;
|
||||
dinfo.index.resource = state->b.input.indexbuf;
|
||||
|
||||
si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_ON, POPCNT>
|
||||
(ctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask);
|
||||
}
|
||||
|
||||
static void si_draw_rectangle(struct blitter_context *blitter, void *vertex_elements_cso,
|
||||
blitter_get_vs_func get_vs, int x1, int y1, int x2, int y2,
|
||||
float depth, unsigned num_instances, enum blitter_attrib_type type,
|
||||
|
|
@ -2492,6 +2653,14 @@ static void si_init_draw_vbo(struct si_context *sctx)
|
|||
|
||||
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
|
||||
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG>;
|
||||
|
||||
if (util_get_cpu_caps()->has_popcnt) {
|
||||
sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] =
|
||||
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, POPCNT_YES>;
|
||||
} else {
|
||||
sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] =
|
||||
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, POPCNT_NO>;
|
||||
}
|
||||
}
|
||||
|
||||
template <chip_class GFX_VERSION>
|
||||
|
|
@ -2517,6 +2686,16 @@ static void si_invalid_draw_vbo(struct pipe_context *pipe,
|
|||
unreachable("vertex shader not bound");
|
||||
}
|
||||
|
||||
static void si_invalid_draw_vertex_state(struct pipe_context *ctx,
|
||||
struct pipe_vertex_state *vstate,
|
||||
uint32_t partial_velem_mask,
|
||||
struct pipe_draw_vertex_state_info info,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws)
|
||||
{
|
||||
unreachable("vertex shader not bound");
|
||||
}
|
||||
|
||||
extern "C"
|
||||
void GFX(si_init_draw_functions_)(struct si_context *sctx)
|
||||
{
|
||||
|
|
@ -2528,6 +2707,7 @@ void GFX(si_init_draw_functions_)(struct si_context *sctx)
|
|||
* initialization of callbacks in upper layers (such as u_threaded_context).
|
||||
*/
|
||||
sctx->b.draw_vbo = si_invalid_draw_vbo;
|
||||
sctx->b.draw_vertex_state = si_invalid_draw_vertex_state;
|
||||
sctx->blitter->draw_rectangle = si_draw_rectangle;
|
||||
|
||||
si_init_ia_multi_vgt_param_table(sctx);
|
||||
|
|
|
|||
|
|
@ -1816,9 +1816,15 @@ void si_vs_key_update_inputs(struct si_context *sctx)
|
|||
if (vs->info.base.vs.blit_sgprs_amd) {
|
||||
si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog);
|
||||
key->opt.prefer_mono = 0;
|
||||
sctx->uses_nontrivial_vs_prolog = false;
|
||||
return;
|
||||
}
|
||||
|
||||
bool uses_nontrivial_vs_prolog = false;
|
||||
|
||||
if (elts->instance_divisor_is_one || elts->instance_divisor_is_fetched)
|
||||
uses_nontrivial_vs_prolog = true;
|
||||
|
||||
key->part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one;
|
||||
key->part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
|
||||
key->opt.prefer_mono = elts->instance_divisor_is_fetched;
|
||||
|
|
@ -1846,9 +1852,29 @@ void si_vs_key_update_inputs(struct si_context *sctx)
|
|||
|
||||
while (fix) {
|
||||
unsigned i = u_bit_scan(&fix);
|
||||
key->mono.vs_fix_fetch[i].bits = elts->fix_fetch[i];
|
||||
uint8_t fix_fetch = elts->fix_fetch[i];
|
||||
|
||||
key->mono.vs_fix_fetch[i].bits = fix_fetch;
|
||||
if (fix_fetch)
|
||||
uses_nontrivial_vs_prolog = true;
|
||||
}
|
||||
key->mono.vs_fetch_opencode = opencode;
|
||||
if (opencode)
|
||||
uses_nontrivial_vs_prolog = true;
|
||||
|
||||
sctx->uses_nontrivial_vs_prolog = uses_nontrivial_vs_prolog;
|
||||
|
||||
/* draw_vertex_state (display lists) requires a trivial VS prolog that ignores
|
||||
* the current vertex buffers and vertex elements.
|
||||
*
|
||||
* We just computed the prolog key because we needed to set uses_nontrivial_vs_prolog,
|
||||
* so that we know whether the VS prolog should be updated when we switch from
|
||||
* draw_vertex_state to draw_vbo. Now clear the VS prolog for draw_vertex_state.
|
||||
* This should happen rarely because the VS prolog should be trivial in most
|
||||
* cases.
|
||||
*/
|
||||
if (uses_nontrivial_vs_prolog && sctx->force_trivial_vs_prolog)
|
||||
si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog);
|
||||
}
|
||||
|
||||
void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue