radv/gfx10: adjust the GS NGG scratch size for streamout

It needs more space for multiple streams.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
Samuel Pitoiset 2019-09-09 10:29:22 +02:00
parent e1dc3ab753
commit 5ebc76471c
2 changed files with 19 additions and 3 deletions

View file

@ -4208,9 +4208,11 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
}
/* TODO: streamout */
unsigned scratch_size = 8;
if (ctx.shader_info->so.num_outputs)
scratch_size = 44;
LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, 8);
LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size);
ctx.gs_ngg_scratch =
LLVMAddGlobalInAddressSpace(ctx.ac.module,
ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);

View file

@ -900,9 +900,23 @@ radv_shader_variant_create(struct radv_device *device,
* size randomly hangs with CTS. Just use the maximum
* possible LDS size for now.
*/
unsigned ngg_scratch_size = 8 * 4;
if (binary->info.so.num_outputs) {
/* Memory layout of NGG streamout scratch:
* [0-3]: number of generated primitives
* [4-7]: number of emitted primitives
* [8-11]: streamout offsets
* [12:19]: primitive offsets for stream 0
* [20:27]: primitive offsets for stream 1
* [28:35]: primitive offsets for stream 2
* [36:43]: primitive offsets for stream 3
*/
ngg_scratch_size = 44 * 4;
}
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
sym->name = "esgs_ring";
sym->size = (32 * 1024) - (binary->info.ngg_info.ngg_emit_size * 4) - 32; /* 32 is NGG scratch */
sym->size = (32 * 1024) - (binary->info.ngg_info.ngg_emit_size * 4) - ngg_scratch_size;
sym->align = 64 * 1024;
}