From 5d377f435b4e64762ce706f6082005e974b894ee Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 12 Apr 2022 21:04:35 +0300 Subject: [PATCH] freedreno/a6xx: Add EARLYPREAMBLE flag to all a6xx_sp_xs_ctrl_reg0 Each shader stage has its own "early preamble" flag. Early preamble is likely an optimization to hide some of latency when loading UBOs into consts in the preamble. Early preamble has the following limitations: - Only shared, a1, and consts regs could be used (accessing other regs would result in GPU fault); - No cat5/cat6, only stc/ldc variants are working; - Values writen to shared regs are not accessible by the rest of the shader; - Instructions before shps are also considered to be a part of early preamble. Note, for all shaders from d3d11 games blob produced preambles compatible with early preamble mode. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/computerator/a6xx.c | 1 + .../computerator/examples/early_preamble.asm | 25 ++++++++++++ src/freedreno/ir3/ir3_assembler.h | 1 + src/freedreno/ir3/ir3_lexer.l | 1 + src/freedreno/ir3/ir3_parser.y | 4 ++ src/freedreno/registers/adreno/a6xx.xml | 40 +++++++++++-------- 6 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 src/freedreno/computerator/examples/early_preamble.asm diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index a0ce6f986da..59c3ebfc196 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -153,6 +153,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | + COND(ir3_kernel->info.early_preamble, A6XX_SP_CS_CTRL_REG0_EARLYPREAMBLE) | A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v))); OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); diff --git a/src/freedreno/computerator/examples/early_preamble.asm b/src/freedreno/computerator/examples/early_preamble.asm new file mode 100644 index 00000000000..717b374deb6 --- /dev/null +++ b/src/freedreno/computerator/examples/early_preamble.asm @@ -0,0 +1,25 @@ +@localsize 1, 1, 1 +@buf 4 ; g[0] +@invocationid(r0.x) ; r0.xyz +@const(c0.x) 0.0, 0.0, 0.0, 0.0 +@earlypreamble + +shps #l_preamble_end +getone #l_preamble_end + +mov.u32u32 r48.x, 1 +mov.u32u32 r48.y, 2 +mov.u32u32 r48.z, 3 +mov.u32u32 r48.w, 4 +(rpt5)nop +stc.u32 c[0], r48.x, 4 + +(sy)(ss)shpe + +l_preamble_end: +(jp)nop + +(rpt3)mov.u32u32 r1.x, (r)c0.x +(rpt5)nop +stib.b.untyped.1d.u32.4.imm r1.x, r0.x, 0 +end diff --git a/src/freedreno/ir3/ir3_assembler.h b/src/freedreno/ir3/ir3_assembler.h index 5ff28242c80..328915e1ae9 100644 --- a/src/freedreno/ir3/ir3_assembler.h +++ b/src/freedreno/ir3/ir3_assembler.h @@ -37,6 +37,7 @@ struct ir3_kernel_info { /* driver-param / replaced uniforms: */ unsigned numwg; unsigned wgid; + unsigned early_preamble; }; struct ir3_shader; diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l index 52b97789645..07843a84881 100644 --- a/src/freedreno/ir3/ir3_lexer.l +++ b/src/freedreno/ir3/ir3_lexer.l @@ -107,6 +107,7 @@ static int parse_reg(const char *str) "@out" return TOKEN(T_A_OUT); "@tex" return TOKEN(T_A_TEX); "@pvtmem" return TOKEN(T_A_PVTMEM); +"@earlypreamble" return TOKEN(T_A_EARLYPREAMBLE); "(sy)" return TOKEN(T_SY); "(ss)" return TOKEN(T_SS); "(absneg)" return TOKEN(T_ABSNEG); diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index c306f9a7a6f..d2fb5753d7a 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -334,6 +334,7 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token T_A_OUT %token T_A_TEX %token T_A_PVTMEM +%token T_A_EARLYPREAMBLE /* todo, re-add @sampler/@uniform/@varying if needed someday */ /* src register flags */ @@ -701,6 +702,7 @@ header: localsize_header | out_header | tex_header | pvtmem_header +| earlypreamble_header const_val: T_FLOAT { $$ = fui($1); } | T_INT { $$ = $1; } @@ -767,6 +769,8 @@ branchstack_header: T_A_BRANCHSTACK const_val { variant->branchstack = $2; } pvtmem_header: T_A_PVTMEM const_val { variant->pvtmem_size = $2; } +earlypreamble_header: T_A_EARLYPREAMBLE { info->early_preamble = 1; } + /* Stubs for now */ in_header: T_A_IN '(' T_REGISTER ')' T_IDENTIFIER '(' T_IDENTIFIER '=' integer ')' { } diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 22a9961755d..e8e6f8accee 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -2887,8 +2887,22 @@ to upconvert to 32b float internally? GS must have the same mergedregs setting as VS. --> - - + + @@ -3001,11 +3015,8 @@ to upconvert to 32b float internally? - - + + - + @@ -3064,11 +3075,8 @@ to upconvert to 32b float internally? - - + + @@ -3137,7 +3145,8 @@ to upconvert to 32b float internally? - + + @@ -3249,8 +3258,7 @@ to upconvert to 32b float internally? - - +