diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 2628746a302..1ee29568790 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -433,17 +433,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) break; case MESA_SHADER_TESS_CTRL: NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so, so->key.tessellation); - NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler); + NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so); progress = true; break; case MESA_SHADER_TESS_EVAL: - NIR_PASS_V(s, ir3_nir_lower_tess_eval, so->key.tessellation); + NIR_PASS_V(s, ir3_nir_lower_tess_eval, so, so->key.tessellation); if (so->key.has_gs) NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation); progress = true; break; case MESA_SHADER_GEOMETRY: - NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler); + NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so); progress = true; break; default: @@ -694,12 +694,12 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, constoff = align(constoff - 1, 4) + 3; const_state->offsets.primitive_param = constoff; const_state->offsets.primitive_map = constoff + 5; - constoff += 5 + DIV_ROUND_UP(nir->num_inputs, 4); + constoff += 5 + DIV_ROUND_UP(v->input_size, 4); break; case MESA_SHADER_GEOMETRY: const_state->offsets.primitive_param = constoff; const_state->offsets.primitive_map = constoff + 1; - constoff += 1 + DIV_ROUND_UP(nir->num_inputs, 4); + constoff += 1 + DIV_ROUND_UP(v->input_size, 4); break; default: break; diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index e9fe495767b..a6ec1440e9b 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -46,9 +46,9 @@ bool ir3_nir_lower_tex_prefetch(nir_shader *shader); void ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology); -void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler); +void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v); void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology); -void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology); +void ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology); void ir3_nir_lower_gs(nir_shader *shader); const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler); @@ -65,10 +65,6 @@ bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v); nir_ssa_def * ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift); -uint32_t ir3_link_geometry_stages(const struct ir3_shader_variant *producer, - const struct ir3_shader_variant *consumer, - uint32_t *locs); - static inline nir_intrinsic_instr * ir3_bindless_resource(nir_src src) { diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c index 44b2921dcb7..d54f9a4c928 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tess.c +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -30,7 +30,6 @@ struct state { struct primitive_map { unsigned loc[32]; - unsigned size[32]; unsigned stride; } map; @@ -73,45 +72,65 @@ build_local_primitive_id(nir_builder *b, struct state *state) return bitfield_extract(b, state->header, state->local_primitive_id_start, 63); } -static nir_variable * -get_var(nir_shader *shader, nir_variable_mode mode, int driver_location) +static bool +is_tess_levels(gl_varying_slot slot) { - nir_foreach_variable_with_modes (v, shader, mode) { - if (v->data.driver_location == driver_location) { - return v; - } - } - - return NULL; + return (slot == VARYING_SLOT_TESS_LEVEL_OUTER || + slot == VARYING_SLOT_TESS_LEVEL_INNER); } -static bool -is_tess_levels(nir_variable *var) +/* Return a deterministic index for varyings. We can't rely on driver_location + * to be correct without linking the different stages first, so we create + * "primitive maps" where the producer decides on the location of each varying + * slot and then exports a per-slot array to the consumer. This compacts the + * gl_varying_slot space down a bit so that the primitive maps aren't too + * large. + * + * Note: per-patch varyings are currently handled separately, without any + * compacting. + * + * TODO: We could probably use the driver_location's directly in the non-SSO + * (Vulkan) case. + */ + +static unsigned +shader_io_get_unique_index(gl_varying_slot slot) { - return (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER || - var->data.location == VARYING_SLOT_TESS_LEVEL_INNER); + if (slot == VARYING_SLOT_POS) + return 0; + if (slot == VARYING_SLOT_PSIZ) + return 1; + if (slot == VARYING_SLOT_CLIP_DIST0) + return 2; + if (slot == VARYING_SLOT_CLIP_DIST1) + return 3; + if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) + return 4 + (slot - VARYING_SLOT_VAR0); + unreachable("illegal slot in get unique index\n"); } static nir_ssa_def * build_local_offset(nir_builder *b, struct state *state, - nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset) + nir_ssa_def *vertex, uint32_t location, uint32_t comp, nir_ssa_def *offset) { nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b); nir_ssa_def *primitive_offset = nir_imul24(b, build_local_primitive_id(b, state), primitive_stride); nir_ssa_def *attr_offset; nir_ssa_def *vertex_stride; + unsigned index = shader_io_get_unique_index(location); switch (b->shader->info.stage) { case MESA_SHADER_VERTEX: case MESA_SHADER_TESS_EVAL: vertex_stride = nir_imm_int(b, state->map.stride * 4); - attr_offset = nir_imm_int(b, state->map.loc[base] * 4); + attr_offset = nir_imm_int(b, state->map.loc[index] + 4 * comp); break; case MESA_SHADER_TESS_CTRL: case MESA_SHADER_GEOMETRY: vertex_stride = nir_load_vs_vertex_stride_ir3(b); - attr_offset = nir_load_primitive_location_ir3(b, base); + attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index), + nir_imm_int(b, comp * 4)); break; default: unreachable("bad shader stage"); @@ -120,7 +139,7 @@ build_local_offset(nir_builder *b, struct state *state, nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride); return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset), - nir_iadd(b, attr_offset, offset)); + nir_iadd(b, attr_offset, nir_ishl(b, offset, nir_imm_int(b, 4)))); } static nir_intrinsic_instr * @@ -153,37 +172,58 @@ replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, } static void -build_primitive_map(nir_shader *shader, nir_variable_mode mode, struct primitive_map *map) +build_primitive_map(nir_shader *shader, struct primitive_map *map) { - nir_foreach_variable_with_modes (var, shader, mode) { - switch (var->data.location) { - case VARYING_SLOT_TESS_LEVEL_OUTER: - case VARYING_SLOT_TESS_LEVEL_INNER: - continue; - } - - unsigned size = glsl_count_attribute_slots(var->type, false) * 4; - - assert(var->data.driver_location < ARRAY_SIZE(map->size)); - map->size[var->data.driver_location] = - MAX2(map->size[var->data.driver_location], size); + /* All interfaces except the TCS <-> TES interface use ldlw, which takes + * an offset in bytes, so each vec4 slot is 16 bytes. TCS <-> TES uses + * ldg, which takes an offset in dwords, but each per-vertex slot has + * space for every vertex, and there's space at the beginning for + * per-patch varyings. + */ + unsigned slot_size = 16, start = 0; + if (shader->info.stage == MESA_SHADER_TESS_CTRL) { + slot_size = shader->info.tess.tcs_vertices_out * 4; + start = util_last_bit(shader->info.patch_outputs_written) * 4; } - unsigned loc = 0; - for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) { - if (map->size[i] == 0) - continue; - nir_variable *var = get_var(shader, mode, i); - map->loc[i] = loc; - loc += map->size[i]; + uint64_t mask = shader->info.outputs_written; + unsigned loc = start; + while (mask) { + int location = u_bit_scan64(&mask); + if (is_tess_levels(location)) + continue; - if (var->data.patch) - map->size[i] = 0; - else - map->size[i] = map->size[i] / glsl_get_length(var->type); + unsigned index = shader_io_get_unique_index(location); + map->loc[index] = loc; + loc += slot_size; } map->stride = loc; + /* Use units of dwords for the stride. */ + if (shader->info.stage != MESA_SHADER_TESS_CTRL) + map->stride /= 4; +} + +/* For shader stages that receive a primitive map, calculate how big it should + * be. + */ + +static unsigned +calc_primitive_map_size(nir_shader *shader) +{ + uint64_t mask = shader->info.inputs_read; + unsigned max_index = 0; + while (mask) { + int location = u_bit_scan64(&mask); + + if (is_tess_levels(location)) + continue; + + unsigned index = shader_io_get_unique_index(location); + max_index = MAX2(max_index, index + 1); + } + + return max_index; } static void @@ -209,7 +249,9 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s b->cursor = nir_instr_remove(&intr->instr); nir_ssa_def *vertex_id = build_vertex_id(b, state); - nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr), + nir_ssa_def *offset = build_local_offset(b, state, vertex_id, + nir_intrinsic_io_semantics(intr).location, + nir_intrinsic_component(intr), intr->src[1].ssa); nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3); @@ -240,7 +282,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant * { struct state state = { }; - build_primitive_map(shader, nir_var_shader_out, &state.map); + build_primitive_map(shader, &state.map); memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc)); nir_function_impl *impl = nir_shader_get_entrypoint(shader); @@ -282,7 +324,8 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st nir_ssa_def *offset = build_local_offset(b, state, intr->src[0].ssa, // this is typically gl_InvocationID - nir_intrinsic_base(intr), + nir_intrinsic_io_semantics(intr).location, + nir_intrinsic_component(intr), intr->src[1].ssa); replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL); @@ -305,14 +348,14 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st } void -ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler) +ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v) { struct state state = { }; /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS, * HS uses a different primitive id, which starts at bit 16 in the header */ - if (shader->info.stage == MESA_SHADER_TESS_CTRL && compiler->tess_use_shared) + if (shader->info.stage == MESA_SHADER_TESS_CTRL && v->shader->compiler->tess_use_shared) state.local_primitive_id_start = 16; nir_function_impl *impl = nir_shader_get_entrypoint(shader); @@ -329,43 +372,74 @@ ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compile nir_foreach_block_safe (block, impl) lower_block_to_explicit_input(block, &b, &state); + + v->input_size = calc_primitive_map_size(shader); } +static nir_ssa_def * +build_tcs_out_vertices(nir_builder *b) +{ + if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) + return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out); + else + return nir_load_patch_vertices_in(b); +} static nir_ssa_def * build_per_vertex_offset(nir_builder *b, struct state *state, - nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var) + nir_ssa_def *vertex, uint32_t location, uint32_t comp, nir_ssa_def *offset) { nir_ssa_def *primitive_id = nir_load_primitive_id(b); nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b); nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride); nir_ssa_def *attr_offset; - int loc = var->data.driver_location; - switch (b->shader->info.stage) { - case MESA_SHADER_TESS_CTRL: - attr_offset = nir_imm_int(b, state->map.loc[loc]); - break; - case MESA_SHADER_TESS_EVAL: - attr_offset = nir_load_primitive_location_ir3(b, loc); - break; - default: - unreachable("bad shader state"); + if (nir_src_is_const(nir_src_for_ssa(offset))) { + location += nir_src_as_uint(nir_src_for_ssa(offset)); + offset = nir_imm_int(b, 0); + } else { + /* Offset is in vec4's, but we need it in unit of components for the + * load/store_global_ir3 offset. + */ + offset = nir_ishl(b, offset, nir_imm_int(b, 2)); } - nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]); - nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride); + nir_ssa_def *vertex_offset; + if (vertex) { + unsigned index = shader_io_get_unique_index(location); + switch (b->shader->info.stage) { + case MESA_SHADER_TESS_CTRL: + attr_offset = nir_imm_int(b, state->map.loc[index] + comp); + break; + case MESA_SHADER_TESS_EVAL: + attr_offset = + nir_iadd(b, nir_load_primitive_location_ir3(b, index), + nir_imm_int(b, comp)); + break; + default: + unreachable("bad shader state"); + } - return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), - nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2)))); + attr_offset = nir_iadd(b, attr_offset, + nir_imul24(b, offset, + build_tcs_out_vertices(b))); + vertex_offset = nir_ishl(b, vertex, nir_imm_int(b, 2)); + } else { + assert(location >= VARYING_SLOT_PATCH0 && + location <= VARYING_SLOT_TESS_MAX); + unsigned index = location - VARYING_SLOT_PATCH0; + attr_offset = nir_iadd(b, nir_imm_int(b, index * 4 + comp), offset); + vertex_offset = nir_imm_int(b, 0); + } + + return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), vertex_offset); } static nir_ssa_def * -build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var) +build_patch_offset(nir_builder *b, struct state *state, + uint32_t base, uint32_t comp, nir_ssa_def *offset) { - debug_assert(var && var->data.patch); - - return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var); + return build_per_vertex_offset(b, state, NULL, base, comp, offset); } static void @@ -444,9 +518,11 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) b->cursor = nir_before_instr(&intr->instr); nir_ssa_def *address = nir_load_tess_param_base_ir3(b); - nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr)); nir_ssa_def *offset = build_per_vertex_offset(b, state, - intr->src[0].ssa, intr->src[1].ssa, var); + intr->src[0].ssa, + nir_intrinsic_io_semantics(intr).location, + nir_intrinsic_component(intr), + intr->src[1].ssa); replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL); break; @@ -462,12 +538,13 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) nir_ssa_def *value = intr->src[0].ssa; nir_ssa_def *address = nir_load_tess_param_base_ir3(b); - nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr)); nir_ssa_def *offset = build_per_vertex_offset(b, state, - intr->src[1].ssa, intr->src[2].ssa, var); + intr->src[1].ssa, + nir_intrinsic_io_semantics(intr).location, + nir_intrinsic_component(intr), + intr->src[2].ssa); - replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address, - nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)))); + replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address, offset); break; } @@ -475,8 +552,6 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) case nir_intrinsic_load_output: { // src[] = { offset }. - nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr)); - b->cursor = nir_before_instr(&intr->instr); nir_ssa_def *address, *offset; @@ -486,13 +561,17 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) * are never used. most likely some issue with (sy) not properly * syncing with values coming from a second memory transaction. */ - if (is_tess_levels(var)) { + gl_varying_slot location = nir_intrinsic_io_semantics(intr).location; + if (is_tess_levels(location)) { assert(intr->dest.ssa.num_components == 1); address = nir_load_tess_factor_base_ir3(b); - offset = build_tessfactor_base(b, var->data.location, state); + offset = build_tessfactor_base(b, location, state); } else { address = nir_load_tess_param_base_ir3(b); - offset = build_patch_offset(b, state, intr->src[0].ssa, var); + offset = build_patch_offset(b, state, + location, + nir_intrinsic_component(intr), + intr->src[0].ssa); } replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL); @@ -504,14 +583,13 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) /* write patch output to bo */ - nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr)); - b->cursor = nir_before_instr(&intr->instr); /* sparse writemask not supported */ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)); - if (is_tess_levels(var)) { + gl_varying_slot location = nir_intrinsic_io_semantics(intr).location; + if (is_tess_levels(location)) { /* with tess levels are defined as float[4] and float[2], * but tess factor BO has smaller sizes for tris/isolines, * so we have to discard any writes beyond the number of @@ -519,7 +597,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) uint32_t inner_levels, outer_levels, levels; tess_level_components(state, &inner_levels, &outer_levels); - if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) + if (location == VARYING_SLOT_TESS_LEVEL_OUTER) levels = outer_levels; else levels = inner_levels; @@ -534,12 +612,15 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, intr->src[0].ssa, nir_load_tess_factor_base_ir3(b), - nir_iadd(b, offset, build_tessfactor_base(b, var->data.location, state))); + nir_iadd(b, offset, build_tessfactor_base(b, location, state))); nir_pop_if(b, nif); } else { nir_ssa_def *address = nir_load_tess_param_base_ir3(b); - nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var); + nir_ssa_def *offset = build_patch_offset(b, state, + location, + nir_intrinsic_component(intr), + intr->src[1].ssa); debug_assert(nir_intrinsic_component(intr) == 0); @@ -580,7 +661,7 @@ ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, nir_print_shader(shader, stderr); } - build_primitive_map(shader, nir_var_shader_out, &state.map); + build_primitive_map(shader, &state.map); memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc)); v->output_size = state.map.stride; @@ -672,9 +753,11 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state) b->cursor = nir_before_instr(&intr->instr); nir_ssa_def *address = nir_load_tess_param_base_ir3(b); - nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr)); nir_ssa_def *offset = build_per_vertex_offset(b, state, - intr->src[0].ssa, intr->src[1].ssa, var); + intr->src[0].ssa, + nir_intrinsic_io_semantics(intr).location, + nir_intrinsic_component(intr), + intr->src[1].ssa); replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL); break; @@ -683,10 +766,6 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state) case nir_intrinsic_load_input: { // src[] = { offset }. - nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr)); - - debug_assert(var->data.patch); - b->cursor = nir_before_instr(&intr->instr); nir_ssa_def *address, *offset; @@ -696,13 +775,17 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state) * are never used. most likely some issue with (sy) not properly * syncing with values coming from a second memory transaction. */ - if (is_tess_levels(var)) { + gl_varying_slot location = nir_intrinsic_io_semantics(intr).location; + if (is_tess_levels(location)) { assert(intr->dest.ssa.num_components == 1); address = nir_load_tess_factor_base_ir3(b); - offset = build_tessfactor_base(b, var->data.location, state); + offset = build_tessfactor_base(b, location, state); } else { address = nir_load_tess_param_base_ir3(b); - offset = build_patch_offset(b, state, intr->src[0].ssa, var); + offset = build_patch_offset(b, state, + location, + nir_intrinsic_component(intr), + intr->src[0].ssa); } offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))); @@ -718,7 +801,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state) } void -ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology) +ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology) { struct state state = { .topology = topology }; @@ -728,9 +811,6 @@ ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology) nir_print_shader(shader, stderr); } - /* Build map of inputs so we have the sizes. */ - build_primitive_map(shader, nir_var_shader_in, &state.map); - nir_function_impl *impl = nir_shader_get_entrypoint(shader); assert(impl); @@ -740,6 +820,8 @@ ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology) nir_foreach_block_safe (block, impl) lower_tess_eval_block(block, &b, &state); + v->input_size = calc_primitive_map_size(shader); + nir_metadata_preserve(impl, 0); } @@ -804,8 +886,6 @@ ir3_nir_lower_gs(nir_shader *shader) nir_print_shader(shader, stderr); } - build_primitive_map(shader, nir_var_shader_in, &state.map); - /* Create an output var for vertex_flags. This will be shadowed below, * same way regular outputs get shadowed, and this variable will become a * temporary. @@ -914,38 +994,3 @@ ir3_nir_lower_gs(nir_shader *shader) } } -uint32_t -ir3_link_geometry_stages(const struct ir3_shader_variant *producer, - const struct ir3_shader_variant *consumer, - uint32_t *locs) -{ - uint32_t num_loc = 0, factor; - - switch (consumer->type) { - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_GEOMETRY: - /* These stages load with ldlw, which expects byte offsets. */ - factor = 4; - break; - case MESA_SHADER_TESS_EVAL: - /* The tess eval shader uses ldg, which takes dword offsets. */ - factor = 1; - break; - default: - unreachable("bad shader stage"); - } - - nir_foreach_shader_in_variable(in_var, consumer->shader->nir) { - nir_foreach_shader_out_variable(out_var, producer->shader->nir) { - if (in_var->data.location == out_var->data.location) { - locs[in_var->data.driver_location] = - producer->output_loc[out_var->data.driver_location] * factor; - - debug_assert(num_loc <= in_var->data.driver_location + 1); - num_loc = in_var->data.driver_location + 1; - } - } - } - - return num_loc; -} diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 39870066956..f78d8026e2a 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -570,7 +570,13 @@ struct ir3_shader_variant { /* Size in dwords of all outputs for VS, size of entire patch for HS. */ uint32_t output_size; - /* Map from driver_location to byte offset in per-primitive storage */ + /* Expected size of incoming output_loc for HS, DS, and GS */ + uint32_t input_size; + + /* Map from location to offset in per-primitive storage. In dwords for + * HS, where varyings are read in the next stage via ldg with a dword + * offset, and in bytes for all other stages. + */ unsigned output_loc[32]; /* attributes (VS) / varyings (FS): diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index c2cc4dbd009..c1e6000c138 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -686,7 +686,7 @@ tu6_setup_streamout(struct tu_cs *cs, static void tu6_emit_const(struct tu_cs *cs, uint32_t opcode, uint32_t base, enum a6xx_state_block block, uint32_t offset, - uint32_t size, uint32_t *dwords) { + uint32_t size, const uint32_t *dwords) { assert(size % 4 == 0); tu_cs_emit_pkt7(cs, opcode, 3 + size); @@ -711,16 +711,14 @@ tu6_emit_link_map(struct tu_cs *cs, { const struct ir3_const_state *const_state = ir3_const_state(consumer); uint32_t base = const_state->offsets.primitive_map; - uint32_t patch_locs[MAX_VARYING] = { }, num_loc; - num_loc = ir3_link_geometry_stages(producer, consumer, patch_locs); - int size = DIV_ROUND_UP(num_loc, 4); + int size = DIV_ROUND_UP(consumer->input_size, 4); size = (MIN2(size + base, consumer->constlen) - base) * 4; if (size <= 0) return; tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, sb, 0, size, - patch_locs); + producer->output_loc); } static uint16_t diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index 5e79661c5bf..4784ac673bc 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -308,11 +308,7 @@ ir3_emit_link_map(struct fd_screen *screen, { const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t base = const_state->offsets.primitive_map; - uint32_t patch_locs[MAX_VARYING] = { }, num_loc; - - num_loc = ir3_link_geometry_stages(producer, v, patch_locs); - - int size = DIV_ROUND_UP(num_loc, 4); + int size = DIV_ROUND_UP(v->input_size, 4); /* truncate size to avoid writing constants that shader * does not use: @@ -324,7 +320,7 @@ ir3_emit_link_map(struct fd_screen *screen, size *= 4; if (size > 0) - emit_const_user(ring, v, base, size, patch_locs); + emit_const_user(ring, v, base, size, producer->output_loc); } /* emit stream-out buffers: */