diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index bb43377bfb6..7d378857377 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -881,7 +881,7 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen, v static void si_destroy_screen(struct pipe_screen *pscreen) { struct si_screen *sscreen = (struct si_screen *)pscreen; - struct si_shader_part *parts[] = {sscreen->vs_prologs, sscreen->tcs_epilogs, sscreen->gs_prologs, + struct si_shader_part *parts[] = {sscreen->vs_prologs, sscreen->tcs_epilogs, sscreen->ps_prologs, sscreen->ps_epilogs}; unsigned i; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b6fe302e7d6..72e5e7e5c14 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -622,7 +622,6 @@ struct si_screen { simple_mtx_t shader_parts_mutex; struct si_shader_part *vs_prologs; struct si_shader_part *tcs_epilogs; - struct si_shader_part *gs_prologs; struct si_shader_part *ps_prologs; struct si_shader_part *ps_epilogs; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 655ffecea8b..c8892a31b53 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1207,8 +1207,7 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) key->ge.part.gs.es->info.stage == MESA_SHADER_VERTEX) { si_dump_shader_key_vs(key, &key->ge.part.gs.vs_prolog, "part.gs.vs_prolog", f); } - fprintf(f, " part.gs.prolog.tri_strip_adj_fix = %u\n", - key->ge.part.gs.prolog.tri_strip_adj_fix); + fprintf(f, " mono.u.gs_tri_strip_adj_fix = %u\n", key->ge.mono.u.gs_tri_strip_adj_fix); fprintf(f, " as_ngg = %u\n", key->ge.as_ngg); break; @@ -1593,10 +1592,6 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list, assert(!prolog); shader.key.ge.part.tcs.epilog = key->tcs_epilog.states; break; - case MESA_SHADER_GEOMETRY: - assert(prolog); - shader.key.ge.as_ngg = key->gs_prolog.as_ngg; - break; case MESA_SHADER_FRAGMENT: if (prolog) shader.key.ps.part.prolog = key->ps_prolog.states; @@ -1719,18 +1714,7 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, struct ac_llvm_ shader->previous_stage = es_main_part; } - if (!shader->key.ge.part.gs.prolog.tri_strip_adj_fix) - return true; - - union si_shader_part_key prolog_key; - memset(&prolog_key, 0, sizeof(prolog_key)); - prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog; - prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg; - - shader->prolog2 = - si_get_shader_part(sscreen, &sscreen->gs_prologs, MESA_SHADER_GEOMETRY, true, &prolog_key, - compiler, debug, si_llvm_build_gs_prolog, "Geometry Shader Prolog"); - return shader->prolog2 != NULL; + return true; } /** diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index ae3e8b1f515..118c37ee5a7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -544,10 +544,6 @@ struct si_tcs_epilog_bits { unsigned tes_reads_tess_factors : 1; }; -struct si_gs_prolog_bits { - unsigned tri_strip_adj_fix : 1; -}; - /* Common PS bits between the shader key and the prolog key. */ struct si_ps_prolog_bits { unsigned color_two_side : 1; @@ -591,10 +587,6 @@ union si_shader_part_key { struct { struct si_tcs_epilog_bits states; } tcs_epilog; - struct { - struct si_gs_prolog_bits states; - unsigned as_ngg : 1; - } gs_prolog; struct { struct si_ps_prolog_bits states; unsigned num_input_sgprs : 6; @@ -633,7 +625,6 @@ struct si_shader_key_ge { struct { struct si_vs_prolog_bits vs_prolog; /* for merged ES-GS */ struct si_shader_selector *es; /* for merged ES-GS */ - struct si_gs_prolog_bits prolog; } gs; } part; @@ -654,9 +645,10 @@ struct si_shader_key_ge { union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS]; union { - uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */ + uint64_t ff_tcs_inputs_to_copy; /* fixed-func TCS only */ /* When PS needs PrimID and GS is disabled. */ - unsigned vs_export_prim_id : 1; + unsigned vs_export_prim_id : 1; /* VS and TES only */ + unsigned gs_tri_strip_adj_fix : 1; /* GS only */ } u; } mono; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 106abde1c97..b99ded02a04 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -140,6 +140,9 @@ struct si_shader_context { struct ac_llvm_compiler *compiler; + /* GS vertex offsets unpacked with the gfx6-9 tristrip_adj bug workaround. */ + LLVMValueRef gs_vtx_offset[6]; + /* Preloaded descriptors. */ LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; @@ -236,7 +239,6 @@ LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx); void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi); void si_preload_esgs_ring(struct si_shader_context *ctx); void si_preload_gs_rings(struct si_shader_context *ctx); -void si_llvm_build_gs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key); void si_llvm_init_gs_callbacks(struct si_shader_context *ctx); /* si_shader_llvm_tess.c */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index bd3fe0ea326..dd944e7f8b5 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -443,7 +443,32 @@ static void si_llvm_declare_compute_memory(struct si_shader_context *ctx) static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) { - if (nir->info.stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_GEOMETRY) { + /* Unpack GS vertex offsets. */ + for (unsigned i = 0; i < 6; i++) { + if (ctx->screen->info.chip_class >= GFX9) { + ctx->gs_vtx_offset[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[i / 2], (i & 1) * 16, 16); + } else { + ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[i]); + } + } + + /* Apply the hw bug workaround for triangle strips with adjacency. */ + if (ctx->screen->info.chip_class <= GFX9 && + ctx->shader->key.ge.mono.u.gs_tri_strip_adj_fix) { + LLVMValueRef prim_id = ac_get_arg(&ctx->ac, ctx->args.gs_prim_id); + /* Remap GS vertex offsets for every other primitive. */ + LLVMValueRef rotate = LLVMBuildTrunc(ctx->ac.builder, prim_id, ctx->ac.i1, ""); + LLVMValueRef fixed[6]; + + for (unsigned i = 0; i < 6; i++) { + fixed[i] = LLVMBuildSelect(ctx->ac.builder, rotate, + ctx->gs_vtx_offset[(i + 4) % 6], + ctx->gs_vtx_offset[i], ""); + } + memcpy(ctx->gs_vtx_offset, fixed, sizeof(fixed)); + } + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { unsigned colors_read = ctx->shader->selector->info.colors_read; LLVMValueRef main_fn = ctx->main_fn; @@ -1205,17 +1230,8 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * struct si_shader_selector *es = shader->key.ge.part.gs.es; LLVMValueRef es_prolog = NULL; LLVMValueRef es_main = NULL; - LLVMValueRef gs_prolog = NULL; LLVMValueRef gs_main = ctx.main_fn; - /* GS prolog */ - union si_shader_part_key gs_prolog_key; - memset(&gs_prolog_key, 0, sizeof(gs_prolog_key)); - gs_prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog; - gs_prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg; - si_llvm_build_gs_prolog(&ctx, &gs_prolog_key); - gs_prolog = ctx.main_fn; - /* ES main part */ struct si_shader shader_es = {}; shader_es.selector = es; @@ -1253,28 +1269,17 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* Prepare the array of shader parts. */ LLVMValueRef parts[4]; - unsigned num_parts = 0, main_part, next_first_part; + unsigned num_parts = 0, main_part; if (es_prolog) parts[num_parts++] = es_prolog; parts[main_part = num_parts++] = es_main; - parts[next_first_part = num_parts++] = gs_prolog; parts[num_parts++] = gs_main; - si_build_wrapper_function(&ctx, parts, num_parts, main_part, next_first_part, false); + si_build_wrapper_function(&ctx, parts, num_parts, main_part, main_part + 1, false); } else { - LLVMValueRef parts[2]; - union si_shader_part_key prolog_key; - - parts[1] = ctx.main_fn; - - memset(&prolog_key, 0, sizeof(prolog_key)); - prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog; - si_llvm_build_gs_prolog(&ctx, &prolog_key); - parts[0] = ctx.main_fn; - - si_build_wrapper_function(&ctx, parts, 2, 1, 0, false); + /* Nothing to do for gfx6-8. The shader has only 1 part and it's ctx.main_fn. */ } } else if (shader->is_monolithic && ctx.stage == MESA_SHADER_FRAGMENT) { si_llvm_build_monolithic_ps(&ctx, shader); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 4a711c80539..0a9f503ddb4 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -56,13 +56,10 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in /* GFX9 has the ESGS ring in LDS. */ if (ctx->screen->info.chip_class >= GFX9) { - unsigned index = vtx_offset_param; - vtx_offset = - si_unpack_param(ctx, ctx->args.gs_vtx_offset[index / 2], (index & 1) * 16, 16); - unsigned offset = param * 4 + swizzle; - vtx_offset = - LLVMBuildAdd(ctx->ac.builder, vtx_offset, LLVMConstInt(ctx->ac.i32, offset, false), ""); + + vtx_offset = LLVMBuildAdd(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param], + LLVMConstInt(ctx->ac.i32, offset, false), ""); LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset); LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, ""); @@ -71,9 +68,8 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in /* GFX6: input load from the ESGS ring in memory. */ /* Get the vertex offset parameter on GFX6. */ - LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[vtx_offset_param]); - - vtx_offset = LLVMBuildMul(ctx->ac.builder, gs_vtx_offset, LLVMConstInt(ctx->ac.i32, 4, 0), ""); + vtx_offset = LLVMBuildMul(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param], + LLVMConstInt(ctx->ac.i32, 4, 0), ""); soffset = LLVMConstInt(ctx->ac.i32, (param * 4 + swizzle) * 256, 0); @@ -545,113 +541,6 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, return shader; } -/** - * Build the GS prolog function. Rotate the input vertices for triangle strips - * with adjacency. - */ -void si_llvm_build_gs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key) -{ - unsigned num_sgprs, num_vgprs; - LLVMBuilderRef builder = ctx->ac.builder; - LLVMTypeRef returns[AC_MAX_ARGS]; - LLVMValueRef func, ret; - - memset(&ctx->args, 0, sizeof(ctx->args)); - - if (ctx->screen->info.chip_class >= GFX9) { - /* Other user SGPRs are not needed by GS. */ - num_sgprs = 8 + SI_NUM_VS_STATE_RESOURCE_SGPRS; - num_vgprs = 5; /* ES inputs are not needed by GS */ - } else { - num_sgprs = GFX6_GS_NUM_USER_SGPR + 2; - num_vgprs = 8; - } - - for (unsigned i = 0; i < num_sgprs; ++i) { - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - returns[i] = ctx->ac.i32; - } - - for (unsigned i = 0; i < num_vgprs; ++i) { - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); - returns[num_sgprs + i] = ctx->ac.f32; - } - - /* Create the function. */ - si_llvm_create_func(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, 0); - func = ctx->main_fn; - - /* Copy inputs to outputs. This should be no-op, as the registers match, - * but it will prevent the compiler from overwriting them unintentionally. - */ - ret = ctx->return_value; - for (unsigned i = 0; i < num_sgprs; i++) { - LLVMValueRef p = LLVMGetParam(func, i); - ret = LLVMBuildInsertValue(builder, ret, p, i, ""); - } - for (unsigned i = 0; i < num_vgprs; i++) { - LLVMValueRef p = LLVMGetParam(func, num_sgprs + i); - p = ac_to_float(&ctx->ac, p); - ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, ""); - } - - if (key->gs_prolog.states.tri_strip_adj_fix) { - /* Remap the input vertices for every other primitive. */ - const struct ac_arg gfx6_vtx_params[6] = { - {.used = true, .arg_index = num_sgprs}, {.used = true, .arg_index = num_sgprs + 1}, - {.used = true, .arg_index = num_sgprs + 3}, {.used = true, .arg_index = num_sgprs + 4}, - {.used = true, .arg_index = num_sgprs + 5}, {.used = true, .arg_index = num_sgprs + 6}, - }; - const struct ac_arg gfx9_vtx_params[3] = { - {.used = true, .arg_index = num_sgprs}, - {.used = true, .arg_index = num_sgprs + 1}, - {.used = true, .arg_index = num_sgprs + 4}, - }; - LLVMValueRef vtx_in[6], vtx_out[6]; - LLVMValueRef prim_id, rotate; - - if (ctx->screen->info.chip_class >= GFX9) { - for (unsigned i = 0; i < 3; i++) { - vtx_in[i * 2] = si_unpack_param(ctx, gfx9_vtx_params[i], 0, 16); - vtx_in[i * 2 + 1] = si_unpack_param(ctx, gfx9_vtx_params[i], 16, 16); - } - } else { - for (unsigned i = 0; i < 6; i++) - vtx_in[i] = ac_get_arg(&ctx->ac, gfx6_vtx_params[i]); - } - - prim_id = LLVMGetParam(func, num_sgprs + 2); - rotate = LLVMBuildTrunc(builder, prim_id, ctx->ac.i1, ""); - - for (unsigned i = 0; i < 6; ++i) { - LLVMValueRef base, rotated; - base = vtx_in[i]; - rotated = vtx_in[(i + 4) % 6]; - vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, ""); - } - - if (ctx->screen->info.chip_class >= GFX9) { - for (unsigned i = 0; i < 3; i++) { - LLVMValueRef hi, out; - - hi = LLVMBuildShl(builder, vtx_out[i * 2 + 1], LLVMConstInt(ctx->ac.i32, 16, 0), ""); - out = LLVMBuildOr(builder, vtx_out[i * 2], hi, ""); - out = ac_to_float(&ctx->ac, out); - ret = LLVMBuildInsertValue(builder, ret, out, gfx9_vtx_params[i].arg_index, ""); - } - } else { - for (unsigned i = 0; i < 6; i++) { - LLVMValueRef out; - - out = ac_to_float(&ctx->ac, vtx_out[i]); - ret = LLVMBuildInsertValue(builder, ret, out, gfx6_vtx_params[i].arg_index, ""); - } - } - } - - LLVMBuildRet(builder, ret); -} - void si_llvm_init_gs_callbacks(struct si_shader_context *ctx) { ctx->abi.load_inputs = si_nir_load_input_gs; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index fd863e26338..896abcf9e80 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -2133,8 +2133,8 @@ static void si_draw(struct pipe_context *ctx, bool gs_tri_strip_adj_fix = !HAS_TESS && prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY; - if (gs_tri_strip_adj_fix != sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix) { - sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix = gs_tri_strip_adj_fix; + if (gs_tri_strip_adj_fix != sctx->shader.gs.key.ge.mono.u.gs_tri_strip_adj_fix) { + sctx->shader.gs.key.ge.mono.u.gs_tri_strip_adj_fix = gs_tri_strip_adj_fix; sctx->do_update_shaders = true; } }