broadcom: move double-buffer heuristic helpers to the compiler

This avoids pulling the dependency on NIR headers in
libbroadcom_v3d.

Reviewed-by: Christian Gmeiner <cgmeiner@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32240>
This commit is contained in:
Iago Toral Quiroga 2024-11-20 08:06:53 +01:00 committed by Marge Bot
parent 653808c209
commit f988a2f336
5 changed files with 51 additions and 52 deletions

View file

@ -268,23 +268,3 @@ v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
return (tile_width * bpp) / 2;
}
static inline uint32_t
compute_prog_score(struct v3d_prog_data *p, uint32_t qpu_size)
{
const uint32_t inst_count = qpu_size / sizeof(uint64_t);
const uint32_t tmu_count = p->tmu_count + p->tmu_spills + p->tmu_fills;
return inst_count + 4 * tmu_count;
}
void
v3d_update_double_buffer_score(uint32_t vertex_count,
uint32_t vs_qpu_size,
uint32_t fs_qpu_size,
struct v3d_prog_data *vs,
struct v3d_prog_data *fs,
struct v3d_double_buffer_score *score)
{
score->geom += vertex_count * compute_prog_score(vs, vs_qpu_size);
score->render += compute_prog_score(fs, fs_qpu_size);
}

View file

@ -27,7 +27,6 @@
#include "util/macros.h"
#include "common/v3d_device_info.h"
#include "compiler/shader_enums.h"
#include "broadcom/compiler/v3d_compiler.h"
#include "util/format/u_formats.h"
uint32_t
@ -81,34 +80,4 @@ uint32_t
v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
uint32_t bpp);
struct v3d_double_buffer_score {
uint32_t geom;
uint32_t render;
};
void
v3d_update_double_buffer_score(uint32_t vertex_count,
uint32_t vs_qpu_size,
uint32_t fs_qpu_size,
struct v3d_prog_data *vs,
struct v3d_prog_data *fs,
struct v3d_double_buffer_score *score);
static inline bool
v3d_double_buffer_score_ok(struct v3d_double_buffer_score *score)
{
/* Double buffer decreases tile size, which increases
* VS invocations so too much geometry is not good.
*/
if (score->geom > 200000)
return false;
/* We want enough rendering work to be able to hide
* latency from tile stores.
*/
if (score->render < 200)
return false;
return true;
}
#endif

View file

@ -1547,6 +1547,36 @@ vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
return vir_emit_nondef(c, vir_branch_inst(c, cond));
}
struct v3d_double_buffer_score {
uint32_t geom;
uint32_t render;
};
void
v3d_update_double_buffer_score(uint32_t vertex_count,
uint32_t vs_qpu_size,
uint32_t fs_qpu_size,
struct v3d_prog_data *vs,
struct v3d_prog_data *fs,
struct v3d_double_buffer_score *score);
static inline bool
v3d_double_buffer_score_ok(struct v3d_double_buffer_score *score)
{
/* Double buffer decreases tile size, which increases
* VS invocations so too much geometry is not good.
*/
if (score->geom > 200000)
return false;
/* We want enough rendering work to be able to hide
* latency from tile stores.
*/
if (score->render < 200)
return false;
return true;
}
#define vir_for_each_block(block, c) \
list_for_each_entry(struct qblock, block, &c->blocks, link)

View file

@ -2523,3 +2523,23 @@ v3d_compute_vpm_config(struct v3d_device_info *devinfo,
return true;
}
static inline uint32_t
compute_prog_score(struct v3d_prog_data *p, uint32_t qpu_size)
{
const uint32_t inst_count = qpu_size / sizeof(uint64_t);
const uint32_t tmu_count = p->tmu_count + p->tmu_spills + p->tmu_fills;
return inst_count + 4 * tmu_count;
}
void
v3d_update_double_buffer_score(uint32_t vertex_count,
uint32_t vs_qpu_size,
uint32_t fs_qpu_size,
struct v3d_prog_data *vs,
struct v3d_prog_data *fs,
struct v3d_double_buffer_score *score)
{
score->geom += vertex_count * compute_prog_score(vs, vs_qpu_size);
score->render += compute_prog_score(fs, fs_qpu_size);
}

View file

@ -70,7 +70,7 @@ libbroadcom_v3d = static_library(
link_whole : v3d_libs + per_version_libs,
link_with: [libv3d_neon],
build_by_default : false,
dependencies: [dep_valgrind, dep_thread, idep_mesautil, idep_nir_headers],
dependencies: [dep_valgrind, dep_thread, idep_mesautil],
)
if with_broadcom_vk