diff --git a/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c b/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c index f833542dd2a..e2bb74eca8c 100644 --- a/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c +++ b/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c @@ -111,6 +111,7 @@ typedef struct { /* Which hardware generation we're dealing with */ enum amd_gfx_level gfx_level; + unsigned wave_size; nir_tcs_info tcs_info; ac_nir_tess_io_info io_info; @@ -942,7 +943,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st, nir_if *thread0 = nir_push_if(&top_b, nir_iand(&top_b, nir_ieq_imm(&top_b, nir_load_subgroup_id(&top_b), 0), - nir_inverse_ballot(&top_b, 1, nir_imm_ivec4(&top_b, 0x1, 0, 0, 0)))); + nir_inverse_ballot(&top_b, 1, nir_imm_intN_t(&top_b, 0x1, st->wave_size)))); { /* 0x3 is the initial bitmask (tf0 | tf1). Each subgroup will do atomic iand on it for the vote. */ nir_store_shared(&top_b, nir_imm_int(&top_b, 0x3), nir_imm_int(&top_b, 0), @@ -1069,7 +1070,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st, const unsigned tcs_vertices_out = b->shader->info.tess.tcs_vertices_out; assert(tcs_vertices_out <= 32); nir_def *is_first_active_lane = - nir_inverse_ballot(b, 1, nir_imm_ivec4(b, BITFIELD_MASK(tcs_vertices_out), 0, 0, 0)); + nir_inverse_ballot(b, 1, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size)); /* Only the first active invocation in each subgroup performs the AND reduction through LDS. */ nir_if *if_first_active_lane = nir_push_if(b, is_first_active_lane); @@ -1093,7 +1094,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st, /* Read the result from LDS. Only 1 lane should load it to prevent LDS bank conflicts. */ nir_def *lds_result; - nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_ivec4(b, 0x1, 0, 0, 0))); + nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0x1, st->wave_size))); if_lane0->control = nir_selection_control_divergent_always_taken; { lds_result = nir_load_shared(b, 1, 32, nir_imm_int(b, 0), .align_mul = 4); @@ -1601,6 +1602,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info, lower_tess_io_state state = { .gfx_level = gfx_level, + .wave_size = wave_size, .tcs_info = *info, .io_info = *io_info, .tcs_out_patch_fits_subgroup = wave_size % shader->info.tess.tcs_vertices_out == 0,