diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 7a43c72cc36..61d0c5ccdc8 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -715,6 +715,11 @@ struct v3d_compile { bool disable_constant_ubo_load_sorting; bool sorted_any_ubo_loads; + /* Moves UBO/SSBO loads right before their first user (nir_opt_move). + * This can reduce register pressure. + */ + bool move_buffer_loads; + /* Emits ldunif for each new uniform, even if the uniform was already * emitted in the same block. Useful to compile shaders with high * register pressure or to disable the optimization during uniform diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 707c96911e8..5355645085d 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -549,6 +549,7 @@ struct v3d_compiler_strategy { bool disable_gcm; bool disable_loop_unrolling; bool disable_ubo_load_sorting; + bool move_buffer_loads; bool disable_tmu_pipelining; uint32_t max_tmu_spills; }; @@ -583,6 +584,7 @@ vir_compile_init(const struct v3d_compiler *compiler, c->disable_general_tmu_sched = strategy->disable_general_tmu_sched; c->disable_tmu_pipelining = strategy->disable_tmu_pipelining; c->disable_constant_ubo_load_sorting = strategy->disable_ubo_load_sorting; + c->move_buffer_loads = strategy->move_buffer_loads; c->disable_gcm = strategy->disable_gcm; c->disable_loop_unrolling = V3D_DBG(NO_LOOP_UNROLL) ? true : strategy->disable_loop_unrolling; @@ -1649,8 +1651,11 @@ v3d_attempt_compile(struct v3d_compile *c) if (!c->disable_constant_ubo_load_sorting) NIR_PASS(_, c->s, v3d_nir_sort_constant_ubo_loads, c); + const nir_move_options buffer_opts = c->move_buffer_loads ? + (nir_move_load_ubo | nir_move_load_ssbo) : 0; NIR_PASS(_, c->s, nir_opt_move, nir_move_load_uniform | - nir_move_const_undef); + nir_move_const_undef | + buffer_opts); v3d_nir_to_vir(c); } @@ -1710,19 +1715,19 @@ int v3d_shaderdb_dump(struct v3d_compile *c, * because v3d_nir_to_vir will cap this to the actual minimum. */ static const struct v3d_compiler_strategy strategies[] = { - /*0*/ { "default", 4, 4, false, false, false, false, false, 0 }, - /*1*/ { "disable general TMU sched", 4, 4, true, false, false, false, false, 0 }, - /*2*/ { "disable gcm", 4, 4, true, true, false, false, false, 0 }, - /*3*/ { "disable loop unrolling", 4, 4, true, true, true, false, false, 0 }, - /*4*/ { "disable UBO load sorting", 4, 4, true, true, true, true, false, 0 }, - /*5*/ { "disable TMU pipelining", 4, 4, true, true, true, true, true, 0 }, - /*6*/ { "lower thread count", 2, 1, false, false, false, false, false, -1 }, - /*7*/ { "disable general TMU sched (2t)", 2, 1, true, false, false, false, false, -1 }, - /*8*/ { "disable gcm (2t)", 2, 1, true, true, false, false, false, -1 }, - /*9*/ { "disable loop unrolling (2t)", 2, 1, true, true, true, false, false, -1 }, - /*10*/ { "disable UBO load sorting (2t)", 2, 1, true, true, true, true, false, -1 }, - /*11*/ { "disable TMU pipelining (2t)", 2, 1, true, true, true, true, true, -1 }, - /*12*/ { "fallback scheduler", 2, 1, true, true, true, true, true, -1 } + /*0*/ { "default", 4, 4, false, false, false, false, false, false, 0 }, + /*1*/ { "disable general TMU sched", 4, 4, true, false, false, false, false, false, 0 }, + /*2*/ { "disable gcm", 4, 4, true, true, false, false, false, false, 0 }, + /*3*/ { "disable loop unrolling", 4, 4, true, true, true, false, false, false, 0 }, + /*4*/ { "disable UBO load sorting", 4, 4, true, true, true, true, false, false, 0 }, + /*5*/ { "disable TMU pipelining", 4, 4, true, true, true, true, false, true, 0 }, + /*6*/ { "lower thread count", 2, 1, false, false, false, false, false, false, -1 }, + /*7*/ { "disable general TMU sched (2t)", 2, 1, true, false, false, false, false, false, -1 }, + /*8*/ { "disable gcm (2t)", 2, 1, true, true, false, false, false, false, -1 }, + /*9*/ { "disable loop unrolling (2t)", 2, 1, true, true, true, false, false, false, -1 }, + /*10*/ { "Move buffer loads (2t)", 2, 1, true, true, true, true, true, false, -1 }, + /*11*/ { "disable TMU pipelining (2t)", 2, 1, true, true, true, true, true, false, -1 }, + /*12*/ { "fallback scheduler", 2, 1, true, true, true, true, true, false, -1 } }; /** @@ -1763,8 +1768,15 @@ skip_compile_strategy(struct v3d_compile *c, uint32_t idx) return !c->unrolled_any_loops; /* UBO load sorting: skip if we didn't sort any loads */ case 4: - case 10: return !c->sorted_any_ubo_loads; + /* Move buffer loads: we assume any shader with difficult RA + * most likely has UBO / SSBO loads so we never try to skip. + * For now, we only try this for 2-thread compiles since it + * is expected to impact instruction counts and latency. + */ + case 10: + assert(c->threads < 4); + return false; /* TMU pipelining: skip if we didn't pipeline any TMU ops */ case 5: case 11: