diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index af3688212b3..0ef0ed2da77 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -1022,6 +1022,8 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu num_patches = MIN2(num_patches, one_wave); } + /* This is the maximum number that fits into tcs_offchip_layout. */ + assert(num_patches <= 127); return num_patches; } diff --git a/src/amd/vulkan/nir/radv_nir_lower_abi.c b/src/amd/vulkan/nir/radv_nir_lower_abi.c index 87905c3e67a..3d1e80b50fd 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_abi.c +++ b/src/amd/vulkan/nir/radv_nir_lower_abi.c @@ -80,8 +80,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state) if (s->info->num_tess_patches) { replacement = nir_imm_int(b, s->info->num_tess_patches); } else { - nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES); - replacement = nir_iadd_imm_nuw(b, n, 1); + replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES); } break; case nir_intrinsic_load_tcs_tess_levels_to_tes_amd: @@ -249,8 +248,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state) if (s->info->num_tess_patches) { num_patches = nir_imm_int(b, s->info->num_tess_patches); } else { - nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES); - num_patches = nir_iadd_imm_nuw(b, n, 1); + num_patches = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES); } if (stage == MESA_SHADER_TESS_CTRL) { diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 69a09eae418..cc2642db5b2 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -10654,7 +10654,7 @@ radv_emit_tess_state(struct radv_cmd_buffer *cmd_buffer) const uint32_t tcs_offchip_layout_offset = radv_get_user_sgpr_loc(tcs, AC_UD_TCS_OFFCHIP_LAYOUT); const uint32_t tes_offchip_layout_offset = radv_get_user_sgpr_loc(tes, AC_UD_TCS_OFFCHIP_LAYOUT); if (tcs_offchip_layout_offset) { - uint32_t tmp = SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_NUM_PATCHES, cmd_buffer->state.tess_num_patches - 1) | + uint32_t tmp = SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_NUM_PATCHES, cmd_buffer->state.tess_num_patches) | SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS, vs->info.vs.num_linked_outputs) | SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS, tcs->info.tcs.num_linked_outputs) | SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_TES_READS_TF, tes->info.tes.reads_tess_factors) | diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index 2a3c3766d88..8060c26f5e8 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -107,7 +107,7 @@ struct radv_shader_args { struct ac_arg sample_mask; /* TCS */ - /* # [0:6] = the number of tessellation patches minus one, max = 127 + /* # [0:6] = the number of tessellation patches, max = 127 * # [7:11] = TCS: the number of input patch control points minus one, max = 31 * TES: the number of output patch control points minus one, max = 31 * # [12:16] = (unused) diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 30b9bf84767..0de1c7dad2e 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -299,13 +299,11 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s } break; case nir_intrinsic_load_tcs_num_patches_amd: { - nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7); - replacement = nir_iadd_imm(b, tmp, 1); + replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7); break; } case nir_intrinsic_load_hs_out_patch_data_offset_amd: { - nir_def *tcs_num_patches = - nir_iadd_imm_nuw(b, ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7), 1); + nir_def *tcs_num_patches = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7); nir_def *tcs_out_vertices, *num_tcs_mem_outputs; if (stage == MESA_SHADER_TESS_CTRL) { diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index e4c3c1ef026..700743860f9 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -48,7 +48,7 @@ struct si_shader_args { /* API TCS & TES */ /* Layout of TCS outputs in the offchip buffer - * [0:6] (7 bits) = the number of patches per threadgroup - 1, max = 127 + * [0:6] (7 bits) = the number of patches per threadgroup, max = 127 * [7:11] (5 bits) = patch_vertices_in - 1, different for TCS and TES, max = 31 * [12:16] (5 bits) = (unused) * [17:22] (6 bits) = the number of LS outputs in LDS, max = 63 diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 5eabec2733a..63811ee21b9 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4836,7 +4836,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx) assert(ls_current->config.lds_size == 0); assert(num_tcs_input_cp <= 32); assert(num_tcs_output_cp <= 32); - assert(num_patches <= 128); + assert(num_patches <= 127); assert(num_lds_vs_outputs <= 63); assert(num_mem_tcs_outputs <= 63); @@ -4846,7 +4846,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx) si_resource(sctx->screen->tess_rings)->gpu_address; assert((ring_va & BITFIELD_MASK(19)) == 0); - unsigned shared_fields = (num_patches - 1) | (num_lds_vs_outputs << 17) | (num_mem_tcs_outputs << 23); + unsigned shared_fields = num_patches | (num_lds_vs_outputs << 17) | (num_mem_tcs_outputs << 23); sctx->tes_offchip_ring_va_sgpr = ring_va; sctx->tcs_offchip_layout = (sctx->tcs_offchip_layout & 0xe0000000) |