diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index ab83b464ec2..537056c043f 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -338,9 +338,9 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s } case nir_intrinsic_load_patch_vertices_in: if (stage == MESA_SHADER_TESS_CTRL) - replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 11, 5); + replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 12, 5); else if (stage == MESA_SHADER_TESS_EVAL) { - replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 6, 5); + replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5); } else unreachable("no nir_load_patch_vertices_in"); replacement = nir_iadd_imm(b, replacement, 1); @@ -372,7 +372,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s } break; case nir_intrinsic_load_tcs_num_patches_amd: { - nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 6); + nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7); replacement = nir_iadd_imm(b, tmp, 1); break; } @@ -387,12 +387,12 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s } else { nir_def *num_hs_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 23, 6); nir_def *out_vtx_size = nir_ishl_imm(b, num_hs_out, 4); - nir_def *o = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 6, 5); + nir_def *o = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5); nir_def *out_vtx_per_patch = nir_iadd_imm_nuw(b, o, 1); per_vtx_out_patch_size = nir_imul(b, out_vtx_per_patch, out_vtx_size); } - nir_def *p = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 6); + nir_def *p = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7); nir_def *num_patches = nir_iadd_imm_nuw(b, p, 1); replacement = nir_imul(b, per_vtx_out_patch_size, num_patches); break; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index a778237ea0c..8e989fe862b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -51,14 +51,12 @@ struct si_shader_args { /* API TCS & TES */ /* Layout of TCS outputs in the offchip buffer - * # 6 bits - * [0:5] = the number of patches per threadgroup - 1, max = 63 + * # 7 bits + * [0:6] = the number of patches per threadgroup - 1, max = 127 * # 5 bits - * [6:10] = the number of output vertices per patch - 1, max = 31 + * [7:11] = the number of output vertices per patch - 1, max = 31 * # 5 bits - * [11:15] = the number of input vertices per patch - 1, max = 31 (TCS only) - * # 1 bit - * [16] = reserved for future use + * [12:16] = the number of input vertices per patch - 1, max = 31 (TCS only) * # 6 bits * [17:22] = the number of LS outputs, max = 63 * # 6 bits diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 7aa25138bef..38bc0b3f444 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4587,18 +4587,12 @@ void si_update_tess_io_layout_state(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->atoms.s.vgt_pipeline_state); } - unsigned output_patch0_offset = input_patch_size * num_patches; - unsigned perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; - /* Compute userdata SGPRs. */ - assert(((input_vertex_size / 4) & ~0xff) == 0); - assert(((perpatch_output_offset / 4) & ~0xffff) == 0); assert(num_tcs_input_cp <= 32); assert(num_tcs_output_cp <= 32); - assert(num_patches <= 64); + assert(num_patches <= 128); assert(num_vs_outputs <= 63); assert(num_tcs_outputs <= 63); - assert(((pervertex_output_patch_size * num_patches) & ~0xffff) == 0); uint64_t ring_va = sctx->ws->cs_is_secure(&sctx->gfx_cs) ? @@ -4609,7 +4603,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx) sctx->tes_offchip_ring_va_sgpr = ring_va; sctx->tcs_offchip_layout &= 0xe0000000; sctx->tcs_offchip_layout |= - (num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((num_tcs_input_cp - 1) << 11) | + (num_patches - 1) | ((num_tcs_output_cp - 1) << 7) | ((num_tcs_input_cp - 1) << 12) | (num_vs_outputs << 17) | (num_tcs_outputs << 23); /* Compute the LDS size. */