mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 13:00:21 +01:00
ir3: allow shared srcs for ldc
This works just fine and opens up a lot more opportunities for early
preamble. Note that I haven't seen actual cases where the index is large
enough to need a register but verified in computerator that it works.
Totals:
MaxWaves: 2377396 -> 2377428 (+0.00%); split: +0.00%, -0.00%
Instrs: 48194946 -> 48173991 (-0.04%); split: -0.10%, +0.06%
CodeSize: 101878832 -> 101892290 (+0.01%); split: -0.06%, +0.07%
NOPs: 8383282 -> 8362061 (-0.25%); split: -0.46%, +0.20%
MOVs: 1468620 -> 1464570 (-0.28%); split: -1.14%, +0.86%
COVs: 823514 -> 823506 (-0.00%); split: -0.00%, +0.00%
Full: 1716474 -> 1714338 (-0.12%); split: -0.13%, +0.01%
(ss): 1112861 -> 1117802 (+0.44%); split: -0.52%, +0.96%
(sy): 552143 -> 553148 (+0.18%); split: -0.31%, +0.49%
(ss)-stall: 4011510 -> 4018364 (+0.17%); split: -0.26%, +0.43%
(sy)-stall: 16736200 -> 16766871 (+0.18%); split: -0.39%, +0.57%
STPs: 18895 -> 18887 (-0.04%)
LDPs: 23853 -> 23845 (-0.03%)
Preamble Instrs: 11502184 -> 11461058 (-0.36%); split: -0.48%, +0.12%
Early Preamble: 121333 -> 125474 (+3.41%)
Last helper: 11683394 -> 11675574 (-0.07%); split: -0.14%, +0.07%
Subgroup size: 12951168 -> 12952320 (+0.01%)
Cat0: 9238171 -> 9217761 (-0.22%); split: -0.42%, +0.20%
Cat1: 2352968 -> 2348920 (-0.17%); split: -0.71%, +0.54%
Cat2: 17464877 -> 17468941 (+0.02%); split: -0.00%, +0.03%
Cat6: 515664 -> 515648 (-0.00%)
Cat7: 1636736 -> 1636191 (-0.03%); split: -0.33%, +0.29%
Totals from 12861 (7.81% of 164705) affected shaders:
MaxWaves: 141814 -> 141846 (+0.02%); split: +0.07%, -0.05%
Instrs: 12731084 -> 12710129 (-0.16%); split: -0.39%, +0.22%
CodeSize: 24749138 -> 24762596 (+0.05%); split: -0.23%, +0.29%
NOPs: 2744093 -> 2722872 (-0.77%); split: -1.40%, +0.62%
MOVs: 492373 -> 488323 (-0.82%); split: -3.39%, +2.57%
COVs: 170074 -> 170066 (-0.00%); split: -0.01%, +0.00%
Full: 224044 -> 221908 (-0.95%); split: -1.00%, +0.05%
(ss): 325836 -> 330777 (+1.52%); split: -1.76%, +3.28%
(sy): 143661 -> 144666 (+0.70%); split: -1.20%, +1.89%
(ss)-stall: 1397335 -> 1404189 (+0.49%); split: -0.75%, +1.24%
(sy)-stall: 4286193 -> 4316864 (+0.72%); split: -1.52%, +2.24%
STPs: 1628 -> 1620 (-0.49%)
LDPs: 2183 -> 2175 (-0.37%)
Preamble Instrs: 2486870 -> 2445744 (-1.65%); split: -2.23%, +0.57%
Early Preamble: 170 -> 4311 (+2435.88%)
Last helper: 3053311 -> 3045491 (-0.26%); split: -0.52%, +0.26%
Subgroup size: 991296 -> 992448 (+0.12%)
Cat0: 3031604 -> 3011194 (-0.67%); split: -1.29%, +0.61%
Cat1: 667377 -> 663329 (-0.61%); split: -2.50%, +1.90%
Cat2: 4485219 -> 4489283 (+0.09%); split: -0.01%, +0.10%
Cat6: 87365 -> 87349 (-0.02%)
Cat7: 731126 -> 730581 (-0.07%); split: -0.73%, +0.65%
Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36674>
This commit is contained in:
parent
c579b8eff6
commit
6d779e900c
1 changed files with 6 additions and 4 deletions
|
|
@ -1177,8 +1177,11 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
assert(nir_intrinsic_base(intr) == 0);
|
||||
|
||||
unsigned ncomp = intr->num_components;
|
||||
struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[1])[0];
|
||||
struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
bool use_shared = !intr->def.divergent && ctx->compiler->has_scalar_alu;
|
||||
struct ir3_instruction *offset =
|
||||
ir3_get_src_shared(ctx, &intr->src[1], use_shared)[0];
|
||||
struct ir3_instruction *idx =
|
||||
ir3_get_src_shared(ctx, &intr->src[0], use_shared)[0];
|
||||
struct ir3_instruction *ldc = ir3_LDC(b, idx, 0, offset, 0);
|
||||
ldc->dsts[0]->wrmask = MASK(ncomp);
|
||||
ldc->cat6.iim_val = ncomp;
|
||||
|
|
@ -1190,8 +1193,7 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
ctx->so->bindless_ubo = true;
|
||||
ir3_handle_nonuniform(ldc, intr);
|
||||
|
||||
if (!intr->def.divergent &&
|
||||
ctx->compiler->has_scalar_alu) {
|
||||
if (use_shared) {
|
||||
ldc->dsts[0]->flags |= IR3_REG_SHARED;
|
||||
ldc->flags |= IR3_INSTR_U;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue