diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index ebd4b0da962..8a2a7a004a1 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -116,9 +116,9 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count) const struct ir3_compiler *compiler = v->shader->compiler; /* If the user forced a particular wavesize respect that. */ - if (v->shader->real_wavesize == IR3_SINGLE_ONLY) + if (v->real_wavesize == IR3_SINGLE_ONLY) return false; - if (v->shader->real_wavesize == IR3_DOUBLE_ONLY) + if (v->real_wavesize == IR3_DOUBLE_ONLY) return true; /* We can't support more than compiler->branchstack_size diverging threads @@ -220,9 +220,9 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v, */ if (v->has_barrier && (max_waves < waves_per_wg)) { mesa_loge( - "Compute shader (%s:%s) which has workgroup barrier cannot be used " + "Compute shader (%s) which has workgroup barrier cannot be used " "because it's impossible to have enough concurrent waves.", - v->shader->nir->info.name, v->shader->nir->info.label); + v->name); exit(1); } } @@ -381,7 +381,7 @@ ir3_collect_info(struct ir3_shader_variant *v) unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves( compiler, regs_count, info->double_threadsize); info->max_waves = MIN2(reg_independent_max_waves, reg_dependent_max_waves); - assert(info->max_waves <= v->shader->compiler->max_waves); + assert(info->max_waves <= v->compiler->max_waves); } static struct ir3_register * diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index d8d0bcd85b7..3319b9f8113 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2071,7 +2071,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) */ ctx->so->constlen = MAX2(ctx->so->constlen, - ctx->so->shader->num_reserved_user_consts + + ctx->so->num_reserved_user_consts + const_state->ubo_state.size / 16); } break; diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 24fead918b0..4fb4c0921f9 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -155,7 +155,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so) if (shader_debug_enabled(so->type)) { mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so), - so->shader->nir->info.name); + so->name); nir_log_shaderi(ctx->s); } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index ebda680e2d3..6a3bce58687 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -848,7 +848,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, const_state->num_ubos = nir->info.num_ubos; debug_assert((const_state->ubo_state.size % 16) == 0); - unsigned constoff = v->shader->num_reserved_user_consts + + unsigned constoff = v->num_reserved_user_consts + const_state->ubo_state.size / 16 + const_state->preamble_size; unsigned ptrsz = ir3_pointer_size(compiler); @@ -866,7 +866,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, if (v->type == MESA_SHADER_KERNEL) { const_state->offsets.kernel_params = constoff; - constoff += align(v->shader->cs.req_input_mem, 4) / 4; + constoff += align(v->cs.req_input_mem, 4) / 4; } if (const_state->num_driver_params > 0) { diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 113e0ee96e9..e7a2bc1ba02 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -449,7 +449,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) * first. */ - uint32_t offset = v->shader->num_reserved_user_consts * 16; + uint32_t offset = v->num_reserved_user_consts * 16; for (uint32_t i = 0; i < state->num_enabled; i++) { uint32_t range_size = state->range[i].end - state->range[i].start; @@ -458,7 +458,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) assert(offset <= max_upload); offset += range_size; } - state->size = offset - v->shader->num_reserved_user_consts * 16; + state->size = offset - v->num_reserved_user_consts * 16; } bool diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c index 7c5c60c78d9..a36c14130bf 100644 --- a/src/freedreno/ir3/ir3_nir_opt_preamble.c +++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c @@ -305,7 +305,7 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v) /* First, lower load/store_preamble. */ const struct ir3_const_state *const_state = ir3_const_state(v); - unsigned preamble_base = v->shader->num_reserved_user_consts * 4 + + unsigned preamble_base = v->num_reserved_user_consts * 4 + const_state->ubo_state.size / 4; unsigned preamble_size = const_state->preamble_size * 4; diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index b9b735f530d..96f944d0444 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -2572,7 +2572,7 @@ ir3_ra(struct ir3_shader_variant *v) * because on some gens the register file is not big enough to hold a * double-size wave with all 48 registers in use. */ - if (v->shader->real_wavesize == IR3_DOUBLE_ONLY) { + if (v->real_wavesize == IR3_DOUBLE_ONLY) { limit_pressure.full = MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16); } diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index bb8d67d8a0c..5aec6a77a28 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -271,7 +271,7 @@ assemble_variant(struct ir3_shader_variant *v) fprintf(stream, "Native code%s for unnamed %s shader %s with sha1 %s:\n", shader_overridden ? " (overridden)" : "", ir3_shader_stage(v), - v->shader->nir->info.name, sha1buf); + v->name, sha1buf); if (v->shader->type == MESA_SHADER_FRAGMENT) fprintf(stream, "SIMD0\n"); ir3_shader_disasm(v, v->bin, stream); @@ -328,6 +328,7 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, return NULL; v->id = ++shader->variant_count; + v->shader_id = shader->id; v->shader = shader; v->binning_pass = !!nonbinning; v->nonbinning = nonbinning; @@ -336,6 +337,46 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, v->mergedregs = shader->compiler->gen >= 6; v->stream_output = shader->stream_output; + v->name = ralloc_strdup(v, shader->nir->info.name); + + struct shader_info *info = &shader->nir->info; + switch (v->type) { + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + v->tess.primitive_mode = info->tess._primitive_mode; + v->tess.tcs_vertices_out = info->tess.tcs_vertices_out; + v->tess.spacing = info->tess.spacing; + v->tess.ccw = info->tess.ccw; + v->tess.point_mode = info->tess.point_mode; + break; + + case MESA_SHADER_GEOMETRY: + v->gs.output_primitive = info->gs.output_primitive; + v->gs.vertices_out = info->gs.vertices_out; + v->gs.invocations = info->gs.invocations; + v->gs.vertices_in = info->gs.vertices_in; + break; + + case MESA_SHADER_FRAGMENT: + v->fs.early_fragment_tests = info->fs.early_fragment_tests; + v->fs.color_is_dual_source = info->fs.color_is_dual_source; + break; + + case MESA_SHADER_COMPUTE: + v->cs.req_input_mem = shader->cs.req_input_mem; + v->cs.req_local_mem = shader->cs.req_local_mem; + break; + + default: + break; + } + + v->num_ssbos = info->num_ssbos; + v->num_ibos = info->num_ssbos + info->num_images; + v->num_reserved_user_consts = shader->num_reserved_user_consts; + v->api_wavesize = shader->api_wavesize; + v->real_wavesize = shader->real_wavesize; + if (!v->binning_pass) v->const_state = rzalloc_size(v, sizeof(*v->const_state)); @@ -779,19 +820,19 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) fprintf( out, "; %s prog %d/%d: %u instr, %u nops, %u non-nops, %u mov, %u cov, %u dwords\n", - type, so->shader->id, so->id, so->info.instrs_count, so->info.nops_count, + type, so->shader_id, so->id, so->info.instrs_count, so->info.nops_count, so->info.instrs_count - so->info.nops_count, so->info.mov_count, so->info.cov_count, so->info.sizedwords); fprintf(out, "; %s prog %d/%d: %u last-baryf, %d half, %d full, %u constlen\n", - type, so->shader->id, so->id, so->info.last_baryf, + type, so->shader_id, so->id, so->info.last_baryf, so->info.max_half_reg + 1, so->info.max_reg + 1, so->constlen); fprintf( out, "; %s prog %d/%d: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, \n", - type, so->shader->id, so->id, so->info.instrs_per_cat[0], + type, so->shader_id, so->id, so->info.instrs_per_cat[0], so->info.instrs_per_cat[1], so->info.instrs_per_cat[2], so->info.instrs_per_cat[3], so->info.instrs_per_cat[4], so->info.instrs_per_cat[5], so->info.instrs_per_cat[6], @@ -800,7 +841,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) fprintf( out, "; %s prog %d/%d: %u sstall, %u (ss), %u systall, %u (sy), %d loops\n", - type, so->shader->id, so->id, so->info.sstall, so->info.ss, + type, so->shader_id, so->id, so->info.sstall, so->info.ss, so->info.systall, so->info.sy, so->loops); /* print shader type specific info: */ diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index a28f48902ad..8ab5c4a01bb 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -508,6 +508,9 @@ struct ir3_shader_variant { /* variant id (for debug) */ uint32_t id; + /* id of the shader the variant came from (for debug) */ + uint32_t shader_id; + struct ir3_shader_key key; /* vertex shaders can have an extra version for hwbinning pass, @@ -528,6 +531,8 @@ struct ir3_shader_variant { gl_shader_stage type; struct ir3_shader *shader; + char *name; + /* variant's copy of nir->constant_data (since we don't track the NIR in * the variant, and shader->nir is before the opt pass). Moves to v->bin * after assembly. @@ -731,6 +736,51 @@ struct ir3_shader_variant { /* Important for compute shader to determine max reg footprint */ bool has_barrier; + /* The offset where images start in the IBO array. */ + unsigned num_ssbos; + + /* The total number of SSBOs and images, i.e. the number of hardware IBOs. */ + unsigned num_ibos; + + unsigned num_reserved_user_consts; + + union { + struct { + enum tess_primitive_mode primitive_mode; + + /** The number of vertices in the TCS output patch. */ + uint8_t tcs_vertices_out; + unsigned spacing:2; /*gl_tess_spacing*/ + + /** Is the vertex order counterclockwise? */ + bool ccw:1; + bool point_mode:1; + } tess; + struct { + /** The output primitive type */ + uint16_t output_primitive; + + /** The maximum number of vertices the geometry shader might write. */ + uint16_t vertices_out; + + /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ + uint8_t invocations; + + /** The number of vertices received per input primitive (max. 6) */ + uint8_t vertices_in:3; + } gs; + struct { + bool early_fragment_tests : 1; + bool color_is_dual_source : 1; + } fs; + struct { + unsigned req_input_mem; + unsigned req_local_mem; + } cs; + }; + + enum ir3_wavesize_option api_wavesize, real_wavesize; + /* For when we don't have a shader, variant's copy of streamout state */ struct ir3_stream_output_info stream_output; @@ -1094,11 +1144,7 @@ ir3_shader_halfregs(const struct ir3_shader_variant *v) static inline uint32_t ir3_shader_nibo(const struct ir3_shader_variant *v) { - /* The dummy variant used in binning mode won't have an actual shader. */ - if (!v->shader) - return 0; - - return v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images; + return v->num_ibos; } static inline uint32_t diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 81655b37393..6b4946e28cc 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -1181,10 +1181,8 @@ tu6_emit_vpc(struct tu_cs *cs, A6XX_VPC_CNTL_0_VIEWIDLOC(linkage.viewid_loc)); if (hs) { - shader_info *hs_info = &hs->shader->nir->info; - tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1); - tu_cs_emit(cs, hs_info->tess.tcs_vertices_out); + tu_cs_emit(cs, hs->tess.tcs_vertices_out); /* Total attribute slots in HS incoming patch. */ tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_INPUT_SIZE, 1); @@ -1194,10 +1192,10 @@ tu6_emit_vpc(struct tu_cs *cs, const uint32_t max_wave_input_size = 64; /* note: if HS is really just the VS extended, then this - * should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out) + * should be by MAX2(patch_control_points, hs->tess.tcs_vertices_out) * however that doesn't match the blob, and fails some dEQP tests. */ - uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out; + uint32_t prims_per_wave = wavesize / hs->tess.tcs_vertices_out; uint32_t max_prims_per_wave = max_wave_input_size * wavesize / (vs->output_size * patch_control_points); prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave); @@ -1211,22 +1209,21 @@ tu6_emit_vpc(struct tu_cs *cs, /* In SPIR-V generated from GLSL, the tessellation primitive params are * are specified in the tess eval shader, but in SPIR-V generated from * HLSL, they are specified in the tess control shader. */ - shader_info *tess_info = - ds->shader->nir->info.tess.spacing == TESS_SPACING_UNSPECIFIED ? - &hs->shader->nir->info : &ds->shader->nir->info; + const struct ir3_shader_variant *tess = + ds->tess.spacing == TESS_SPACING_UNSPECIFIED ? hs : ds; tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_CNTL, 1); uint32_t output; - if (tess_info->tess.point_mode) + if (tess->tess.point_mode) output = TESS_POINTS; - else if (tess_info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) + else if (tess->tess.primitive_mode == TESS_PRIMITIVE_ISOLINES) output = TESS_LINES; - else if (tess_info->tess.ccw) + else if (tess->tess.ccw) output = TESS_CCW_TRIS; else output = TESS_CW_TRIS; enum a6xx_tess_spacing spacing; - switch (tess_info->tess.spacing) { + switch (tess->tess.spacing) { case TESS_SPACING_EQUAL: spacing = TESS_EQUAL; break; @@ -1257,11 +1254,11 @@ tu6_emit_vpc(struct tu_cs *cs, } else { tu6_emit_link_map(cs, vs, gs, SB6_GS_SHADER); } - vertices_out = gs->shader->nir->info.gs.vertices_out - 1; - output = primitive_to_tess(gs->shader->nir->info.gs.output_primitive); - invocations = gs->shader->nir->info.gs.invocations - 1; + vertices_out = gs->gs.vertices_out - 1; + output = primitive_to_tess(gs->gs.output_primitive); + invocations = gs->gs.invocations - 1; /* Size of per-primitive alloction in ldlw memory in vec4s. */ - vec4_size = gs->shader->nir->info.gs.vertices_in * + vec4_size = gs->gs.vertices_in * DIV_ROUND_UP(prev_stage_output_size, 4); tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1); @@ -1578,9 +1575,9 @@ tu6_emit_fs_outputs(struct tu_cs *cs, if (pipeline) { pipeline->lrz.fs_has_kill = fs->has_kill; - pipeline->lrz.early_fragment_tests = fs->shader->nir->info.fs.early_fragment_tests; + pipeline->lrz.early_fragment_tests = fs->fs.early_fragment_tests; - if ((fs->shader && !fs->shader->nir->info.fs.early_fragment_tests) && + if (!fs->fs.early_fragment_tests && (fs->no_earlyz || fs->has_kill || fs->writes_pos || fs->writes_stencilref || no_earlyz || fs->writes_smask)) { pipeline->lrz.force_late_z = true; } @@ -1601,7 +1598,7 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs, struct tu_device *dev = cs->device; uint32_t num_vertices = - hs ? cps_per_patch : gs->shader->nir->info.gs.vertices_in; + hs ? cps_per_patch : gs->gs.vertices_in; uint32_t vs_params[4] = { vs->output_size * num_vertices * 4, /* vs primitive stride */ @@ -1641,13 +1638,13 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs, tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, hs_base, SB6_HS_SHADER, 0, hs_param_dwords, hs_params); if (gs) - num_vertices = gs->shader->nir->info.gs.vertices_in; + num_vertices = gs->gs.vertices_in; uint32_t ds_params[8] = { ds->output_size * num_vertices * 4, /* ds primitive stride */ ds->output_size * 4, /* ds vertex stride */ hs->output_size, /* hs vertex stride (dwords) */ - hs->shader->nir->info.tess.tcs_vertices_out, + hs->tess.tcs_vertices_out, tess_param_iova, tess_param_iova >> 32, tess_factor_iova, diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 0d001d1a15e..7349b415fe7 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -710,7 +710,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); bool fragz = fp->no_earlyz || fp->has_kill || fp->writes_pos; - bool latez = fp->shader && !fp->shader->nir->info.fs.early_fragment_tests && fragz; + bool latez = !fp->fs.early_fragment_tests && fragz; bool clamp = !ctx->rasterizer->depth_clip_near; OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_image.c b/src/gallium/drivers/freedreno/a4xx/fd4_image.c index 20e41907efa..12b38b6cdb1 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_image.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_image.c @@ -247,6 +247,6 @@ fd4_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, if (m->image_to_tex[index] != IBO_INVALID) emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader); - emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, shader); + emit_image_ssbo(ring, v->num_ssbos + index, &img, shader); } } diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c index 8690b1651b1..f94a98370d7 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_image.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.c @@ -226,7 +226,7 @@ fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, translate_image(&img, &so->si[index]); emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader); - emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, + emit_image_ssbo(ring, v->num_ssbos + index, &img, shader); } } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index bd8e2500ef2..f8f5648b2d2 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -56,8 +56,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2); OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | - A6XX_SP_CS_CONFIG_NIBO(v->shader->nir->info.num_ssbos + - v->shader->nir->info.num_images) | + A6XX_SP_CS_CONFIG_NIBO(ir3_shader_nibo(v)) | A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */ OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */ @@ -70,7 +69,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v))); - uint32_t shared_size = MAX2(((int)v->shader->cs.req_local_mem - 1) / 1024, 1); + uint32_t shared_size = MAX2(((int)v->cs.req_local_mem - 1) / 1024, 1); OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) | A6XX_SP_CS_UNKNOWN_A9B1_UNK6); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index b45bc293767..284803b21af 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -131,7 +131,7 @@ fd6_build_tess_consts(struct fd6_emit *emit) */ unsigned num_vertices = emit->hs ? emit->patch_vertices - : emit->gs->shader->nir->info.gs.vertices_in; + : emit->gs->gs.vertices_in; uint32_t vs_params[4] = { emit->vs->output_size * num_vertices * 4, /* vs primitive stride */ @@ -150,13 +150,13 @@ fd6_build_tess_consts(struct fd6_emit *emit) ARRAY_SIZE(hs_params)); if (emit->gs) - num_vertices = emit->gs->shader->nir->info.gs.vertices_in; + num_vertices = emit->gs->gs.vertices_in; uint32_t ds_params[4] = { emit->ds->output_size * num_vertices * 4, /* ds primitive stride */ emit->ds->output_size * 4, /* ds vertex stride */ emit->hs->output_size, /* hs vertex stride (dwords) */ - emit->hs->shader->nir->info.tess.tcs_vertices_out}; + emit->hs->tess.tcs_vertices_out}; emit_stage_tess_consts(constobj, emit->ds, ds_params, ARRAY_SIZE(ds_params)); @@ -176,7 +176,7 @@ fd6_build_tess_consts(struct fd6_emit *emit) 0, }; - num_vertices = emit->gs->shader->nir->info.gs.vertices_in; + num_vertices = emit->gs->gs.vertices_in; emit_stage_tess_consts(constobj, emit->gs, gs_params, ARRAY_SIZE(gs_params)); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index ba894290e08..8a087015b41 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -586,7 +586,7 @@ compute_ztest_mode(struct fd6_emit *emit, bool lrz_valid) assert_dt struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); const struct ir3_shader_variant *fs = emit->fs; - if (fs->shader->nir->info.fs.early_fragment_tests) + if (fs->fs.early_fragment_tests) return A6XX_EARLY_Z; if (fs->no_earlyz || fs->writes_pos || !zsa->base.depth_enabled || diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index 3a778b916fb..23c39a3424f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -156,20 +156,19 @@ fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *state = fd_submit_new_ringbuffer( ctx->batch->submit, - (v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images) * 16 * - 4, + ir3_shader_nibo(v) * 16 * 4, FD_RINGBUFFER_STREAMING); assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); uint32_t descriptor[FDL6_TEX_CONST_DWORDS]; - for (unsigned i = 0; i < v->shader->nir->info.num_ssbos; i++) { + for (unsigned i = 0; i < v->num_ssbos; i++) { fd6_ssbo_descriptor(ctx, &bufso->sb[i], descriptor); fd6_emit_single_plane_descriptor(state, bufso->sb[i].buffer, descriptor); } - for (unsigned i = 0; i < v->shader->nir->info.num_images; i++) { - fd6_emit_image_descriptor(ctx, state, &imgso->si[i], true); + for (unsigned i = v->num_ssbos; i < v->num_ibos; i++) { + fd6_emit_image_descriptor(ctx, state, &imgso->si[i - v->num_ssbos], true); } return state; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 68cf39e1b59..351d5dc525b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -98,7 +98,7 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring, #ifdef DEBUG /* Name should generally match what you get with MESA_SHADER_CAPTURE_PATH: */ - const char *name = so->shader->nir->info.name; + const char *name = so->name; if (name) fd_emit_string5(ring, name, strlen(name)); #endif @@ -552,7 +552,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4); bool fs_has_dual_src_color = - !binning_pass && fs->shader->nir->info.fs.color_is_dual_source; + !binning_pass && fs->fs.color_is_dual_source; OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1); OUT_RING(ring, @@ -719,15 +719,14 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, fd6_emit_immediates(ctx->screen, ds, ring); fd6_emit_link_map(ctx->screen, hs, ds, ring); - shader_info *hs_info = &hs->shader->nir->info; OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1); - OUT_RING(ring, hs_info->tess.tcs_vertices_out); + OUT_RING(ring, hs->tess.tcs_vertices_out); if (ctx->screen->info->a6xx.tess_use_shared) { unsigned hs_input_size = 6 + (3 * (vs->output_size - 1)); unsigned wave_input_size = MIN2(64, DIV_ROUND_UP(hs_input_size * 4, - hs_info->tess.tcs_vertices_out)); + hs->tess.tcs_vertices_out)); OUT_PKT4(ring, REG_A6XX_PC_HS_INPUT_SIZE, 1); OUT_RING(ring, hs_input_size); @@ -736,7 +735,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_RING(ring, wave_input_size); } else { uint32_t hs_input_size = - hs_info->tess.tcs_vertices_out * vs->output_size / 4; + hs->tess.tcs_vertices_out * vs->output_size / 4; /* Total attribute slots in HS incoming patch. */ OUT_PKT4(ring, REG_A6XX_PC_HS_INPUT_SIZE, 1); @@ -744,13 +743,13 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, const uint32_t wavesize = 64; const uint32_t max_wave_input_size = 64; - const uint32_t patch_control_points = hs_info->tess.tcs_vertices_out; + const uint32_t patch_control_points = hs->tess.tcs_vertices_out; /* note: if HS is really just the VS extended, then this * should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out) * however that doesn't match the blob, and fails some dEQP tests. */ - uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out; + uint32_t prims_per_wave = wavesize / hs->tess.tcs_vertices_out; uint32_t max_prims_per_wave = max_wave_input_size * wavesize / (vs->output_size * patch_control_points); prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave); @@ -763,20 +762,19 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_RING(ring, wave_input_size); } - shader_info *ds_info = &ds->shader->nir->info; OUT_PKT4(ring, REG_A6XX_PC_TESS_CNTL, 1); uint32_t output; - if (ds_info->tess.point_mode) + if (ds->tess.point_mode) output = TESS_POINTS; - else if (ds_info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) + else if (ds->tess.primitive_mode == TESS_PRIMITIVE_ISOLINES) output = TESS_LINES; - else if (ds_info->tess.ccw) + else if (ds->tess.ccw) output = TESS_CCW_TRIS; else output = TESS_CW_TRIS; OUT_RING(ring, A6XX_PC_TESS_CNTL_SPACING( - fd6_gl2spacing(ds_info->tess.spacing)) | + fd6_gl2spacing(ds->tess.spacing)) | A6XX_PC_TESS_CNTL_OUTPUT(output)); OUT_PKT4(ring, REG_A6XX_VPC_DS_CLIP_CNTL, 1); @@ -1006,7 +1004,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_PC_GS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); uint32_t output; - switch (gs->shader->nir->info.gs.output_primitive) { + switch (gs->gs.output_primitive) { case SHADER_PRIM_POINTS: output = TESS_POINTS; break; @@ -1021,10 +1019,10 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, } OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1); OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT( - gs->shader->nir->info.gs.vertices_out - 1) | + gs->gs.vertices_out - 1) | A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) | A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS( - gs->shader->nir->info.gs.invocations - 1)); + gs->gs.invocations - 1)); OUT_PKT4(ring, REG_A6XX_GRAS_GS_CL_CNTL, 1); OUT_RING(ring, A6XX_GRAS_GS_CL_CNTL_CLIP_MASK(clip_mask) | @@ -1041,7 +1039,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, const struct ir3_shader_variant *prev = state->ds ? state->ds : state->vs; /* Size of per-primitive alloction in ldlw memory in vec4s. */ - uint32_t vec4_size = gs->shader->nir->info.gs.vertices_in * + uint32_t vec4_size = gs->gs.vertices_in * DIV_ROUND_UP(prev->output_size, 4); OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size)); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index 2b2a870135a..5f2402c5677 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -444,7 +444,7 @@ emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v, if (v->constlen > offset) { ring_wfi(ctx->batch, ring); emit_const_user(ring, v, offset * 4, - align(v->shader->cs.req_input_mem, 4), + align(v->cs.req_input_mem, 4), info->input); } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 8625625c835..c7c844274ee 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -102,14 +102,13 @@ dump_shader_info(struct ir3_shader_variant *v, static void upload_shader_variant(struct ir3_shader_variant *v) { - struct shader_info *info = &v->shader->nir->info; struct ir3_compiler *compiler = v->shader->compiler; assert(!v->bo); v->bo = fd_bo_new(compiler->dev, v->info.size, FD_BO_NOMAP, - "%s:%s", ir3_shader_stage(v), info->name); + "%s:%s", ir3_shader_stage(v), v->name); /* Always include shaders in kernel crash dumps. */ fd_bo_mark_for_dump(v->bo);