diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 2e5dff73f83..ab83b464ec2 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -349,14 +349,18 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage); break; case nir_intrinsic_load_lshs_vertex_stride_amd: - if (stage == MESA_SHADER_VERTEX) + if (stage == MESA_SHADER_VERTEX) { replacement = nir_imm_int(b, sel->info.lshs_vertex_stride); - else if (stage == MESA_SHADER_TESS_CTRL) - replacement = sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic ? - nir_imm_int(b, key->ge.part.tcs.ls->info.lshs_vertex_stride) : - nir_ishl_imm(b, GET_FIELD_NIR(VS_STATE_LS_OUT_VERTEX_SIZE), 2); - else + } else if (stage == MESA_SHADER_TESS_CTRL) { + if (sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic) { + replacement = nir_imm_int(b, key->ge.part.tcs.ls->info.lshs_vertex_stride); + } else { + nir_def *num_ls_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 17, 6); + replacement = nir_iadd_imm_nuw(b, nir_ishl_imm(b, num_ls_out, 4), 4); + } + } else { unreachable("no nir_load_lshs_vertex_stride_amd"); + } break; case nir_intrinsic_load_esgs_vertex_stride_amd: assert(sel->screen->info.gfx_level >= GFX9); diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index f74416f113f..a778237ea0c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -57,8 +57,10 @@ struct si_shader_args { * [6:10] = the number of output vertices per patch - 1, max = 31 * # 5 bits * [11:15] = the number of input vertices per patch - 1, max = 31 (TCS only) - * # 7 bits - * [16:22] = reserved for future use + * # 1 bit + * [16] = reserved for future use + * # 6 bits + * [17:22] = the number of LS outputs, max = 63 * # 6 bits * [23:28] = the number of HS per-vertex outputs, max = 63 * # 2 bits diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 7a62910c32d..18a9ba63122 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4486,6 +4486,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx) unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written); unsigned input_vertex_size = ls->info.lshs_vertex_stride; + unsigned num_vs_outputs = (input_vertex_size - 4) / 16; unsigned output_vertex_size = num_tcs_outputs * 16; unsigned input_patch_size; @@ -4595,6 +4596,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx) assert(num_tcs_input_cp <= 32); assert(num_tcs_output_cp <= 32); assert(num_patches <= 64); + assert(num_vs_outputs <= 63); assert(num_tcs_outputs <= 63); assert(((pervertex_output_patch_size * num_patches) & ~0xffff) == 0); @@ -4608,7 +4610,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx) sctx->tcs_offchip_layout &= 0xe0000000; sctx->tcs_offchip_layout |= (num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((num_tcs_input_cp - 1) << 11) | - (num_tcs_outputs << 23); + (num_vs_outputs << 17) | (num_tcs_outputs << 23); /* Compute the LDS size. */ unsigned lds_size = lds_per_patch * num_patches;