diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index 89c060567e1..c987d216eda 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -32,8 +32,14 @@ struct fd_dev_info { uint32_t num_vsc_pipes; + /* The size of local memory in bytes */ uint32_t cs_shared_mem_size; + /* On at least a6xx, waves are always launched in pairs. In calculations + * about occupancy, we pretend that each wave pair is actually one wave, + * which simplifies many of the calculations, but means we have to + * multiply threadsize_base by this number. + */ int wave_granularity; /* These are fallback values that should match what drm/msm programs, for @@ -47,8 +53,12 @@ struct fd_dev_info { /* Information for private memory calculations */ uint32_t fibers_per_sp; + /* The base number of threads per wave. Some stages may be able to double + * this. + */ uint32_t threadsize_base; + /* The maximum number of simultaneous waves per core. */ uint32_t max_waves; /* Local Memory (i.e. shared memory in GL/Vulkan) and compute shader @@ -105,6 +115,7 @@ struct fd_dev_info { bool concurrent_resolve; bool has_z24uint_s8uint; + /* on a650, vertex shader <-> tess control io uses LDL/STL */ bool tess_use_shared; /* Does the hw support GL_QCOM_shading_rate? */ @@ -149,8 +160,16 @@ struct fd_dev_info { bool has_lpac; + /* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle + * instructions are supported which are necessary to support + * subgroup quad and arithmetic operations. + */ bool has_getfiberid; + + /* Whether half register shared->non-shared moves are broken. */ bool mov_half_shared_quirk; + + /* Whether movs is supported for subgroupBroadcast. */ bool has_movs; bool has_dp2acc; @@ -214,14 +233,31 @@ struct fd_dev_info { bool broken_ds_ubwc_quirk; - /* See ir3_compiler::has_scalar_alu. */ + /* True if there is a scalar ALU capable of executing a subset of + * cat2-cat4 instructions with a shared register destination. This also + * implies expanded MOV/COV capability when writing to shared registers, + * as MOV/COV is now executed on the scalar ALU except when reading from a + * normal register, as well as the ability for ldc to write to a shared + * register. + */ bool has_scalar_alu; - /* See ir3_compiler::has_scalar_predicates. */ + + /* True if cat2 instructions can write predicate registers from the scalar + * ALU. + */ bool has_scalar_predicates; - /* See ir3_compiler::has_early_preamble. */ + + /* On all generations that support scalar ALU, there is also a copy of the + * scalar ALU and some other HW units in HLSQ that can execute preambles + * before work is dispatched to the SPs, called "early preamble". We detect + * whether the shader can use early preamble in ir3. + */ bool has_early_preamble; + /* Whether isam.v is supported to sample multiple components from SSBOs */ bool has_isam_v; + + /* Whether isam/stib/ldib have immediate offsets. */ bool has_ssbo_imm_offsets; /* Whether writing to UBWC attachment and reading the same image as input diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 2b80a3e82e6..43c16c4bea5 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -213,7 +213,7 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count) * in a wave. Thus, doubling the threadsize is only possible if we don't * exceed the branchstack size limit. */ - if (MIN2(v->branchstack, compiler->threadsize_base * 2) > + if (MIN2(v->branchstack, compiler->info->threadsize_base * 2) > compiler->branchstack_size) { return false; } @@ -259,13 +259,13 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v, bool double_threadsize) { const struct ir3_compiler *compiler = v->compiler; - unsigned max_waves = compiler->max_waves; + unsigned max_waves = compiler->info->max_waves; /* Compute the limit based on branchstack */ if (v->branchstack > 0) { unsigned branchstack_max_waves = compiler->branchstack_size / v->branchstack * - compiler->wave_granularity; + compiler->info->wave_granularity; max_waves = MIN2(max_waves, branchstack_max_waves); } @@ -275,17 +275,17 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v, unsigned threads_per_wg = v->local_size[0] * v->local_size[1] * v->local_size[2]; unsigned waves_per_wg = - DIV_ROUND_UP(threads_per_wg, compiler->threadsize_base * + DIV_ROUND_UP(threads_per_wg, compiler->info->threadsize_base * (double_threadsize ? 2 : 1) * - compiler->wave_granularity); + compiler->info->wave_granularity); /* Shared is allocated in chunks of 1k */ unsigned shared_per_wg = ALIGN_POT(v->shared_size, 1024); if (shared_per_wg > 0 && !v->local_size_variable) { - unsigned wgs_per_core = compiler->local_mem_size / shared_per_wg; + unsigned wgs_per_core = compiler->info->cs_shared_mem_size / shared_per_wg; max_waves = MIN2(max_waves, waves_per_wg * wgs_per_core * - compiler->wave_granularity); + compiler->info->wave_granularity); } /* If we have a compute shader that has a big workgroup, a barrier, and @@ -316,8 +316,8 @@ ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler, { return reg_count ? (compiler->reg_size_vec4 / (reg_count * (double_threadsize ? 2 : 1)) * - compiler->wave_granularity) - : compiler->max_waves; + compiler->info->wave_granularity) + : compiler->info->max_waves; } void @@ -569,7 +569,7 @@ ir3_collect_info(struct ir3_shader_variant *v) unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves( compiler, regs_count, info->double_threadsize); info->max_waves = MIN2(reg_independent_max_waves, reg_dependent_max_waves); - assert(info->max_waves <= v->compiler->max_waves); + assert(info->max_waves <= v->compiler->info->max_waves); ralloc_free(mem_ctx); } @@ -1362,7 +1362,7 @@ is_scalar_alu(struct ir3_instruction *instr, * supported, so that we treat them like vector->scalar mov instructions * (such as requiring (ss)). */ - compiler->has_scalar_alu && + compiler->info->props.has_scalar_alu && /* moves from normal to shared seem to use a separate ALU as before and * require a (ss) on dependent instructions. */ @@ -1712,7 +1712,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) /* Conversions seem not to work in shared->shared copies before scalar * ALU is supported. */ - if (!compiler->has_scalar_alu && + if (!compiler->info->props.has_scalar_alu && (flags & IR3_REG_SHARED) && (instr->dsts[0]->flags & IR3_REG_SHARED) && instr->cat1.src_type != instr->cat1.dst_type) @@ -2124,7 +2124,7 @@ ir3_cat3_absneg(struct ir3_compiler *compiler, opc_t opc, unsigned src_n) case OPC_SEL_B16: case OPC_SEL_B32: - return compiler->has_sel_b_fneg ? IR3_REG_FNEG : 0; + return compiler->info->props.has_sel_b_fneg ? IR3_REG_FNEG : 0; case OPC_SAD_S16: case OPC_SAD_S32: diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index 82703473b1e..62e4e944672 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -24,7 +24,7 @@ lower_ssbo_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr, nir_src *offset_src, struct ir3_instruction **offset, unsigned *imm_offset) { - if (ctx->compiler->has_ssbo_imm_offsets) { + if (ctx->compiler->info->props.has_ssbo_imm_offsets) { ir3_lower_imm_offset(ctx, intr, offset_src, 7, offset, imm_offset); } else { assert(nir_intrinsic_base(intr) == 0); @@ -71,7 +71,7 @@ emit_load_uav(struct ir3_context *ctx, nir_intrinsic_instr *intr, ldib->barrier_conflict = IR3_BARRIER_BUFFER_W; if (imm_offset_val) { - assert(ctx->compiler->has_ssbo_imm_offsets); + assert(ctx->compiler->info->props.has_ssbo_imm_offsets); ldib->flags |= IR3_INSTR_IMM_OFFSET; } @@ -163,7 +163,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; if (imm_offset_val) { - assert(ctx->compiler->has_ssbo_imm_offsets); + assert(ctx->compiler->info->props.has_ssbo_imm_offsets); stib->flags |= IR3_INSTR_IMM_OFFSET; } diff --git a/src/freedreno/ir3/ir3_alias.c b/src/freedreno/ir3/ir3_alias.c index 826fdb4b9e6..12d65608215 100644 --- a/src/freedreno/ir3/ir3_alias.c +++ b/src/freedreno/ir3/ir3_alias.c @@ -746,7 +746,7 @@ create_output_aliases(struct ir3_shader_variant *v, struct ir3_instruction *end) bool ir3_create_alias_rt(struct ir3 *ir, struct ir3_shader_variant *v) { - if (!ir->compiler->has_alias_rt) + if (!ir->compiler->info->props.has_alias_rt) return false; if (ir3_shader_debug & IR3_DBG_NOALIASRT) return false; diff --git a/src/freedreno/ir3/ir3_cf.c b/src/freedreno/ir3/ir3_cf.c index 71696397b06..ac04e2f2d22 100644 --- a/src/freedreno/ir3/ir3_cf.c +++ b/src/freedreno/ir3/ir3_cf.c @@ -199,7 +199,7 @@ try_conversion_folding(struct ir3_instruction *conv, /* Don't fold in a conversion to a half register on gens where that is * broken. */ - if (compiler->mov_half_shared_quirk && + if (compiler->info->props.mov_half_shared_quirk && (conv->dsts[0]->flags & IR3_REG_HALF)) { return false; } diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 6e0e6e60862..d6d3ff07565 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -163,16 +163,13 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->gen = fd_dev_gen(dev_id); compiler->is_64bit = fd_dev_64b(dev_id); compiler->options = *options; + compiler->info = dev_info; /* TODO see if older GPU's were different here */ compiler->branchstack_size = 64; - compiler->wave_granularity = dev_info->wave_granularity; - compiler->max_waves = dev_info->max_waves; compiler->max_variable_workgroup_size = 1024; - compiler->local_mem_size = dev_info->cs_shared_mem_size; - compiler->num_predicates = 1; compiler->bitops_can_write_predicates = false; compiler->has_branch_and_or = false; @@ -227,16 +224,6 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->has_preamble = true; - compiler->tess_use_shared = dev_info->props.tess_use_shared; - - compiler->has_getfiberid = dev_info->props.has_getfiberid; - compiler->mov_half_shared_quirk = dev_info->props.mov_half_shared_quirk; - compiler->has_movs = dev_info->props.has_movs; - - compiler->has_dp2acc = dev_info->props.has_dp2acc; - compiler->has_dp4acc = dev_info->props.has_dp4acc; - compiler->has_compliant_dp4acc = dev_info->props.has_compliant_dp4acc; - if (compiler->gen == 6 && options->shared_push_consts) { compiler->shared_consts_base_offset = 504; compiler->shared_consts_size = 8; @@ -247,32 +234,13 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->geom_shared_consts_size_quirk = 0; } - compiler->has_fs_tex_prefetch = dev_info->props.has_fs_tex_prefetch; - compiler->stsc_duplication_quirk = dev_info->props.stsc_duplication_quirk; - compiler->load_shader_consts_via_preamble = dev_info->props.load_shader_consts_via_preamble; - compiler->load_inline_uniforms_via_preamble_ldgk = dev_info->props.load_inline_uniforms_via_preamble_ldgk; compiler->num_predicates = 4; compiler->bitops_can_write_predicates = true; compiler->has_branch_and_or = true; compiler->has_predication = true; - compiler->predtf_nop_quirk = dev_info->props.predtf_nop_quirk; - compiler->prede_nop_quirk = dev_info->props.prede_nop_quirk; - compiler->has_salu_int_narrowing_quirk = dev_info->props.has_salu_int_narrowing_quirk; - compiler->has_scalar_alu = dev_info->props.has_scalar_alu; - compiler->has_scalar_predicates = dev_info->props.has_scalar_predicates; - compiler->has_isam_v = dev_info->props.has_isam_v; - compiler->has_ssbo_imm_offsets = dev_info->props.has_ssbo_imm_offsets; - compiler->fs_must_have_non_zero_constlen_quirk = dev_info->props.fs_must_have_non_zero_constlen_quirk; - compiler->has_early_preamble = dev_info->props.has_early_preamble; compiler->has_rpt_bary_f = true; compiler->has_shfl = true; - compiler->reading_shading_rate_requires_smask_quirk = - dev_info->props.reading_shading_rate_requires_smask_quirk; - compiler->shading_rate_matches_vk = dev_info->props.shading_rate_matches_vk; - compiler->has_alias_rt = dev_info->props.has_alias_rt; compiler->mergedregs = true; - compiler->has_sel_b_fneg = dev_info->props.has_sel_b_fneg; - compiler->has_eolm_eogm = dev_info->props.has_eolm_eogm; compiler->has_alias_tex = (compiler->gen >= 7); @@ -291,11 +259,6 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, * earlier gen's. */ compiler->max_const_safe = 256; - - compiler->has_scalar_alu = false; - compiler->has_isam_v = false; - compiler->has_ssbo_imm_offsets = false; - compiler->has_early_preamble = false; } if (dev_info->compute_lb_size) { @@ -303,7 +266,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, } else { compiler->compute_lb_size = compiler->max_const_compute * 16 /* bytes/vec4 */ * - compiler->wave_granularity + compiler->local_mem_size; + compiler->info->wave_granularity + compiler->info->cs_shared_mem_size; } /* This is just a guess for a4xx. */ @@ -325,8 +288,6 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->reg_size_vec4 = 96; } - compiler->threadsize_base = dev_info->threadsize_base; - if (compiler->gen >= 4) { /* need special handling for "flat" */ compiler->flat_bypass = true; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index c86845186c5..601f0e5822f 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -107,12 +107,11 @@ struct ir3_compiler { */ bool samgq_workaround; - /* on a650, vertex shader <-> tess control io uses LDL/STL */ - bool tess_use_shared; - /* Whether full and half regs are merged. */ bool mergedregs; + const struct fd_dev_info *info; + /* The maximum number of constants, in vec4's, across the entire graphics * pipeline. */ @@ -145,21 +144,6 @@ struct ir3_compiler { */ uint32_t const_upload_unit; - /* The base number of threads per wave. Some stages may be able to double - * this. - */ - uint32_t threadsize_base; - - /* On at least a6xx, waves are always launched in pairs. In calculations - * about occupancy, we pretend that each wave pair is actually one wave, - * which simplifies many of the calculations, but means we have to - * multiply threadsize_base by this number. - */ - uint32_t wave_granularity; - - /* The maximum number of simultaneous waves per core. */ - uint32_t max_waves; - /* This is theoretical maximum number of vec4 registers that one wave of * the base threadsize could use. To get the actual size of the register * file in bytes one would need to compute: @@ -177,9 +161,6 @@ struct ir3_compiler { */ uint32_t reg_size_vec4; - /* The size of local memory in bytes */ - uint32_t local_mem_size; - /* The number of total branch stack entries, divided by wave_granularity. */ uint32_t branchstack_size; @@ -195,27 +176,9 @@ struct ir3_compiler { /* Whether SSBOs have descriptors for sampling with ISAM */ bool has_isam_ssbo; - /* Whether isam.v is supported to sample multiple components from SSBOs */ - bool has_isam_v; - - /* Whether isam/stib/ldib have immediate offsets. */ - bool has_ssbo_imm_offsets; - - /* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle - * instructions are supported which are necessary to support - * subgroup quad and arithmetic operations. - */ - bool has_getfiberid; - - /* Whether half register shared->non-shared moves are broken. */ - bool mov_half_shared_quirk; - /* Is lock/unlock sequence needed for CS? */ bool cs_lock_unlock_quirk; - /* Whether movs is supported for subgroupBroadcast. */ - bool has_movs; - /* True if the shfl instruction is supported. Needed for subgroup rotate and * (more efficient) shuffle. */ @@ -235,18 +198,10 @@ struct ir3_compiler { /* True if predt/predf/prede are supported. */ bool has_predication; - bool predtf_nop_quirk; - bool prede_nop_quirk; - - bool has_salu_int_narrowing_quirk; /* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */ uint32_t max_variable_workgroup_size; - bool has_dp2acc; - bool has_dp4acc; - bool has_compliant_dp4acc; - /* Type to use for 1b nir bools: */ type_t bool_type; @@ -273,55 +228,14 @@ struct ir3_compiler { */ uint64_t geom_shared_consts_size_quirk; - bool has_fs_tex_prefetch; - - bool stsc_duplication_quirk; - - bool load_shader_consts_via_preamble; - bool load_inline_uniforms_via_preamble_ldgk; - - /* True if there is a scalar ALU capable of executing a subset of - * cat2-cat4 instructions with a shared register destination. This also - * implies expanded MOV/COV capability when writing to shared registers, - * as MOV/COV is now executed on the scalar ALU except when reading from a - * normal register, as well as the ability for ldc to write to a shared - * register. - */ - bool has_scalar_alu; - - /* True if cat2 instructions can write predicate registers from the scalar - * ALU. - */ - bool has_scalar_predicates; - - bool fs_must_have_non_zero_constlen_quirk; - - /* On all generations that support scalar ALU, there is also a copy of the - * scalar ALU and some other HW units in HLSQ that can execute preambles - * before work is dispatched to the SPs, called "early preamble". We detect - * whether the shader can use early preamble in ir3. - */ - bool has_early_preamble; - /* True if (rptN) is supported for bary.f. */ bool has_rpt_bary_f; /* True if alias.tex is supported. */ bool has_alias_tex; - /* True if alias.rt is supported. */ - bool has_alias_rt; - - bool reading_shading_rate_requires_smask_quirk; - - bool shading_rate_matches_vk; - bool cat3_rel_offset_0_quirk; - bool has_sel_b_fneg; - - bool has_eolm_eogm; - struct { /* The number of cycles needed for the result of one ALU operation to be * available to another ALU operation. Only valid when the halfness of the diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index d3728cdd6f9..04ffc5d0116 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -349,7 +349,7 @@ create_cov(struct ir3_context *ctx, unsigned nrpt, */ if (src_type == TYPE_U8 && full_type(dst_type) == TYPE_U32) { struct ir3_instruction_rpt mask = create_immed_typed_shared_rpt( - &ctx->build, nrpt, 0xff, TYPE_U8, ctx->compiler->has_scalar_alu); + &ctx->build, nrpt, 0xff, TYPE_U8, ctx->compiler->info->props.has_scalar_alu); struct ir3_instruction_rpt cov = ir3_AND_B_rpt(&ctx->build, nrpt, src, 0, mask, 0); set_dst_flags(cov.rpts, nrpt, type_flags(dst_type)); @@ -367,7 +367,8 @@ create_cov(struct ir3_context *ctx, unsigned nrpt, struct ir3_instruction_rpt cov; if (op == nir_op_u2f16 || op == nir_op_u2f32) { struct ir3_instruction_rpt mask = create_immed_typed_shared_rpt( - &ctx->build, nrpt, 0xff, TYPE_U8, ctx->compiler->has_scalar_alu); + &ctx->build, nrpt, 0xff, TYPE_U8, + ctx->compiler->info->props.has_scalar_alu); cov = ir3_AND_B_rpt(&ctx->build, nrpt, src, 0, mask, 0); set_dst_flags(cov.rpts, nrpt, IR3_REG_HALF); cov = ir3_COV_rpt(&ctx->build, nrpt, cov, TYPE_U16, dst_type); @@ -432,7 +433,7 @@ emit_alu_dot_4x8_as_dp4acc(struct ir3_context *ctx, nir_alu_instr *alu, struct ir3_instruction **dst, struct ir3_instruction **src) { - if (ctx->compiler->has_compliant_dp4acc) { + if (ctx->compiler->info->props.has_compliant_dp4acc) { dst[0] = ir3_DP4ACC(&ctx->build, src[0], 0, src[1], 0, src[2], 0); /* This is actually the LHS signedness attribute. @@ -576,7 +577,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) assert(dst_sz == 1 || ir3_supports_vectorized_nir_op(alu->op)); bool use_shared = !alu->def.divergent && - ctx->compiler->has_scalar_alu && + ctx->compiler->info->props.has_scalar_alu && /* it probably isn't worth emulating these with scalar-only ops */ alu->op != nir_op_udot_4x8_uadd && alu->op != nir_op_udot_4x8_uadd_sat && @@ -1126,9 +1127,9 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) struct ir3_instruction *src_rpt0[] = {src[0].rpts[0], src[1].rpts[0], src[2].rpts[0]}; - if (ctx->compiler->has_dp4acc) { + if (ctx->compiler->info->props.has_dp4acc) { emit_alu_dot_4x8_as_dp4acc(ctx, alu, dst.rpts, src_rpt0); - } else if (ctx->compiler->has_dp2acc) { + } else if (ctx->compiler->info->props.has_dp2acc) { emit_alu_dot_4x8_as_dp2acc(ctx, alu, dst.rpts, src_rpt0); } else { ir3_context_error(ctx, "ALU op should have been lowered: %s\n", @@ -1180,7 +1181,7 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr, assert(nir_intrinsic_base(intr) == 0); unsigned ncomp = intr->num_components; - bool use_shared = !intr->def.divergent && ctx->compiler->has_scalar_alu; + bool use_shared = !intr->def.divergent && ctx->compiler->info->props.has_scalar_alu; struct ir3_instruction *offset = ir3_get_src_shared(ctx, &intr->src[1], use_shared)[0]; struct ir3_instruction *idx = @@ -1215,7 +1216,7 @@ emit_intrinsic_copy_ubo_to_uniform(struct ir3_context *ctx, struct ir3_instruction *addr1 = ir3_create_addr1(&ctx->build, base); - bool use_shared = ctx->compiler->has_scalar_alu; + bool use_shared = ctx->compiler->info->props.has_scalar_alu; struct ir3_instruction *offset = ir3_get_src_shared(ctx, &intr->src[1], use_shared)[0]; struct ir3_instruction *idx = @@ -1454,7 +1455,7 @@ emit_intrinsic_load_shared_ir3(struct ir3_context *ctx, create_immed(b, intr->num_components), 0); /* for a650, use LDL for tess ctrl inputs: */ - if (ctx->so->type == MESA_SHADER_TESS_CTRL && ctx->compiler->tess_use_shared) + if (ctx->so->type == MESA_SHADER_TESS_CTRL && ctx->compiler->info->props.tess_use_shared) load->opc = OPC_LDL; load->cat6.type = utype_def(&intr->def); @@ -1484,7 +1485,7 @@ emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, /* for a650, use STL for vertex outputs used by tess ctrl shader: */ if (ctx->so->type == MESA_SHADER_VERTEX && ctx->so->key.tessellation && - ctx->compiler->tess_use_shared) + ctx->compiler->info->props.tess_use_shared) store->opc = OPC_STL; store->cat6.dst_offset = nir_intrinsic_base(intr); @@ -1969,7 +1970,7 @@ emit_readonly_load_uav(struct ir3_context *ctx, struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, index, false); struct ir3_instruction *src1; - if (ctx->compiler->has_isam_v && !uav_load) { + if (ctx->compiler->info->props.has_isam_v && !uav_load) { src1 = create_immed(b, imm_offset); } else { assert(imm_offset == 0); @@ -1988,7 +1989,7 @@ emit_readonly_load_uav(struct ir3_context *ctx, ir3_split_dest(b, dst, sam, 0, num_components); - if (ctx->compiler->has_isam_v && !uav_load) { + if (ctx->compiler->info->props.has_isam_v && !uav_load) { sam->flags |= (IR3_INSTR_V | IR3_INSTR_INV_1D); if (imm_offset) { @@ -2011,7 +2012,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, * Note: isam also can't handle 8-bit loads. */ if (!(nir_intrinsic_access(intr) & ACCESS_CAN_REORDER) || - (intr->def.num_components > 1 && !ctx->compiler->has_isam_v) || + (intr->def.num_components > 1 && !ctx->compiler->info->props.has_isam_v) || (ctx->compiler->options.storage_8bit && intr->def.bit_size == 8) || !ctx->compiler->has_isam_ssbo) { ctx->funcs->emit_intrinsic_load_ssbo(ctx, intr, dst); @@ -2023,7 +2024,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, struct ir3_instruction *coords = NULL; unsigned imm_offset = 0; - if (ctx->compiler->has_isam_v) { + if (ctx->compiler->info->props.has_isam_v) { ir3_lower_imm_offset(ctx, intr, offset_src, 8, &coords, &imm_offset); } else { coords = @@ -2619,7 +2620,7 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx, struct ir3_instruction *src, struct ir3_instruction *dst) { - if (!ctx->compiler->mov_half_shared_quirk) { + if (!ctx->compiler->info->props.mov_half_shared_quirk) { return dst; } @@ -2642,7 +2643,7 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx, } else { dst = ir3_MOV(&ctx->build, dst, TYPE_U32); } - if (!ctx->compiler->has_scalar_alu) + if (!ctx->compiler->info->props.has_scalar_alu) dst->dsts[0]->flags &= ~IR3_REG_SHARED; } @@ -2752,8 +2753,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) } create_rpt = true; } else { - src = ctx->compiler->has_scalar_alu ? - ir3_get_src_maybe_shared(ctx, &intr->src[0]) : + src = ctx->compiler->info->props.has_scalar_alu ? + ir3_get_src_maybe_shared(ctx, &intr->src[0]) : ir3_get_src(ctx, &intr->src[0]); for (int i = 0; i < dest_components; i++) { dst[i] = create_uniform_indirect( @@ -2764,7 +2765,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) * registers, manually make it shared. Optimizations can undo this if * the user can't use shared regs. */ - if (ctx->compiler->has_scalar_alu && !intr->def.divergent) + if (ctx->compiler->info->props.has_scalar_alu && !intr->def.divergent) dst[i]->dsts[0]->flags |= IR3_REG_SHARED; } @@ -3092,7 +3093,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) dst[0] = create_driver_param(ctx, IR3_DP_CS(work_dim)); break; case nir_intrinsic_load_subgroup_invocation: - assert(ctx->compiler->has_getfiberid); + assert(ctx->compiler->info->props.has_getfiberid); dst[0] = ir3_GETFIBERID(b); dst[0]->cat6.type = TYPE_U32; __ssa_dst(dst[0]); @@ -3166,7 +3167,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) } else { /* unconditional discard: */ cond = create_immed_typed_shared(b, 1, ctx->compiler->bool_type, - ctx->compiler->has_scalar_alu); + ctx->compiler->info->props.has_scalar_alu); } cond = ir3_get_predicate(ctx, cond); @@ -3381,7 +3382,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *src = ir3_create_collect(b, ir3_get_src_shared(ctx, &intr->src[0], - ctx->compiler->has_scalar_alu), + ctx->compiler->info->props.has_scalar_alu), components); ir3_store_const(ctx->so, b, src, dst); break; @@ -4172,7 +4173,7 @@ emit_phi(struct ir3_context *ctx, nir_phi_instr *nphi) phi->phi.nphi = nphi; phi->phi.comp = i; - if (ctx->compiler->has_scalar_alu && !nphi->def.divergent) + if (ctx->compiler->info->props.has_scalar_alu && !nphi->def.divergent) phi->dsts[0]->flags |= IR3_REG_SHARED; dst[i] = phi; @@ -5442,7 +5443,7 @@ emit_instructions(struct ir3_context *ctx) emit_function(ctx, fxn); if (ctx->so->type == MESA_SHADER_TESS_CTRL && - ctx->compiler->tess_use_shared) { + ctx->compiler->info->props.tess_use_shared) { /* Anything before shpe seems to be ignored in the main shader when early * preamble is enabled on a7xx, so we have to put the barrier after. */ @@ -5671,7 +5672,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (so->type == MESA_SHADER_FRAGMENT && so->reads_shading_rate && !so->reads_smask && - compiler->reading_shading_rate_requires_smask_quirk) { + compiler->info->props.reading_shading_rate_requires_smask_quirk) { create_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_MASK_IN, 0x1); } @@ -5858,7 +5859,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, * for the binning variant, ir3_const_add_imm will ensure we don't add more * immediates than allowed. */ - if (so->binning_pass && !compiler->load_shader_consts_via_preamble && + if (so->binning_pass && !compiler->info->props.load_shader_consts_via_preamble && so->nonbinning->imm_state.size) { ASSERTED bool success = ir3_const_ensure_imm_size(so, so->nonbinning->imm_state.size); @@ -6091,7 +6092,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, } if (ctx->so->type == MESA_SHADER_FRAGMENT && - compiler->fs_must_have_non_zero_constlen_quirk) { + compiler->info->props.fs_must_have_non_zero_constlen_quirk) { so->constlen = MAX2(so->constlen, 4); } diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 7ca03f5a7e8..9fa6ad651cc 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -109,7 +109,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, /* Enable the texture pre-fetch feature only a4xx onwards. But * only enable it on generations that have been tested: */ - if ((so->type == MESA_SHADER_FRAGMENT) && compiler->has_fs_tex_prefetch) { + if ((so->type == MESA_SHADER_FRAGMENT) && compiler->info->props.has_fs_tex_prefetch) { NIR_PASS(_, ctx->s, ir3_nir_lower_tex_prefetch, &so->prefetch_bary_type); } @@ -435,7 +435,7 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) if (cond->dsts[0]->flags & IR3_REG_SHARED) { cond->dsts[0]->flags &= ~IR3_REG_SHARED; - if (ctx->compiler->has_scalar_predicates) { + if (ctx->compiler->info->props.has_scalar_predicates) { cond->dsts[0]->flags |= IR3_REG_UNIFORM; } } diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index d70f8476ad2..b725d924403 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -311,7 +311,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, * 0xffff depending on whether src_type is signed or unsigned. * Float conversions behave as expected. */ - if (ctx->shader->compiler->has_salu_int_narrowing_quirk && + if (ctx->shader->compiler->info->props.has_salu_int_narrowing_quirk && (instr->opc == OPC_MOV) && (instr->cat1.dst_type != instr->cat1.src_type) && (type_size(instr->cat1.dst_type) < diff --git a/src/freedreno/ir3/ir3_dce.c b/src/freedreno/ir3/ir3_dce.c index 4d74264a789..025a5ae5e5a 100644 --- a/src/freedreno/ir3/ir3_dce.c +++ b/src/freedreno/ir3/ir3_dce.c @@ -154,7 +154,7 @@ find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so) continue; if (instr->input.sysval == SYSTEM_VALUE_SAMPLE_MASK_IN && so->reads_shading_rate && - ir->compiler->reading_shading_rate_requires_smask_quirk) + ir->compiler->info->props.reading_shading_rate_requires_smask_quirk) continue; } diff --git a/src/freedreno/ir3/ir3_disk_cache.c b/src/freedreno/ir3/ir3_disk_cache.c index 43bb5fd6992..eccf86a4be7 100644 --- a/src/freedreno/ir3/ir3_disk_cache.c +++ b/src/freedreno/ir3/ir3_disk_cache.c @@ -128,7 +128,7 @@ retrieve_variant(struct blob_reader *blob, struct ir3_shader_variant *v) blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state)); } - if (!v->compiler->load_shader_consts_via_preamble) { + if (!v->compiler->info->props.load_shader_consts_via_preamble) { v->imm_state.size = blob_read_uint32(blob); v->imm_state.count = v->imm_state.size; uint32_t immeds_sz = v->imm_state.size * sizeof(v->imm_state.values[0]); @@ -157,7 +157,7 @@ store_variant(struct blob *blob, const struct ir3_shader_variant *v) /* When load_shader_consts_via_preamble, immediates are loaded in the * preamble and hence part of bin. */ - if (!v->compiler->load_shader_consts_via_preamble) { + if (!v->compiler->info->props.load_shader_consts_via_preamble) { blob_write_uint32(blob, v->imm_state.size); uint32_t immeds_sz = v->imm_state.size * sizeof(v->imm_state.values[0]); blob_write_bytes(blob, v->imm_state.values, immeds_sz); diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 8e29697d478..bc70f596e48 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -963,7 +963,7 @@ apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx, stsc->cat6.iim_val = n->push_consts.src_size; stsc->cat6.type = TYPE_U32; - if (ctx->compiler->stsc_duplication_quirk) { + if (ctx->compiler->info->props.stsc_duplication_quirk) { struct ir3_builder build = ir3_builder_at(ir3_after_instr(stsc)); struct ir3_instruction *nop = ir3_NOP(&build); nop->flags |= IR3_INSTR_SS; @@ -1353,11 +1353,11 @@ add_predication_workaround(struct ir3_compiler *compiler, struct ir3_instruction *predtf, struct ir3_instruction *prede) { - if (predtf && compiler->predtf_nop_quirk) { + if (predtf && compiler->info->props.predtf_nop_quirk) { add_nop_before_block(predtf->block->predecessors[0]->successors[1], 4); } - if (compiler->prede_nop_quirk) { + if (compiler->info->props.prede_nop_quirk) { add_nop_before_block(prede->block->successors[0], 6); } } @@ -2509,7 +2509,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) so->early_preamble = has_preamble && !gpr_in_preamble && !pred_in_preamble && !relative_in_preamble && - ir->compiler->has_early_preamble && + ir->compiler->info->props.has_early_preamble && !(ir3_shader_debug & IR3_DBG_NOEARLYPREAMBLE); /* On a7xx, sync behavior for a1.x is different in the early preamble. RaW @@ -2567,7 +2567,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) kill_sched(ir, so); if ((so->type == MESA_SHADER_FRAGMENT || so->type == MESA_SHADER_COMPUTE) && - so->compiler->has_eolm_eogm) { + so->compiler->info->props.has_eolm_eogm) { feature_usage_sched(ctx, ir, so, needs_eolm, is_cheap_for_eolm_eogm, IR3_INSTR_EOLM); feature_usage_sched(ctx, ir, so, needs_eogm, is_cheap_for_eolm_eogm, diff --git a/src/freedreno/ir3/ir3_lower_parallelcopy.c b/src/freedreno/ir3/ir3_lower_parallelcopy.c index 5441fef27b4..126e4950378 100644 --- a/src/freedreno/ir3/ir3_lower_parallelcopy.c +++ b/src/freedreno/ir3/ir3_lower_parallelcopy.c @@ -557,7 +557,7 @@ ir3_lower_copies(struct ir3_shader_variant *v) * components of the normal src and its even neighbor and then * unswap afterwords to make it work for everything. */ - if (v->compiler->mov_half_shared_quirk && + if (v->compiler->info->props.mov_half_shared_quirk && (instr->dsts[0]->flags & IR3_REG_SHARED) && (instr->dsts[0]->flags & IR3_REG_HALF) && !(instr->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_IMMED | diff --git a/src/freedreno/ir3/ir3_lower_subgroups.c b/src/freedreno/ir3/ir3_lower_subgroups.c index 0bae999bfd4..c3542897556 100644 --- a/src/freedreno/ir3/ir3_lower_subgroups.c +++ b/src/freedreno/ir3/ir3_lower_subgroups.c @@ -704,7 +704,7 @@ lower_scan_reduce(struct nir_builder *b, nir_instr *instr, void *data) bool ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v) { - if (!v->compiler->has_getfiberid) + if (!v->compiler->info->props.has_getfiberid) return false; return nir_shader_lower_instructions(nir, filter_scan_reduce, @@ -721,7 +721,7 @@ ir3_nir_lower_subgroups_filter(const nir_intrinsic_instr *intrin, const void *da if (nir_intrinsic_cluster_size(intrin) == 1) { return true; } - if (nir_intrinsic_cluster_size(intrin) > 0 && !compiler->has_getfiberid) { + if (nir_intrinsic_cluster_size(intrin) > 0 && !compiler->info->props.has_getfiberid) { return true; } FALLTHROUGH; @@ -741,7 +741,7 @@ ir3_nir_lower_subgroups_filter(const nir_intrinsic_instr *intrin, const void *da return intrin->def.num_components > 1; } case nir_intrinsic_read_invocation: - return !compiler->has_movs; + return !compiler->info->props.has_movs; default: return true; } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 01af44c6059..62baec68ab4 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -149,7 +149,7 @@ ir3_nir_should_scalarize_mem(const nir_intrinsic_instr *intrin, const void *data */ if ((intrin->intrinsic == nir_intrinsic_load_ssbo) && (nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER) && - compiler->has_isam_ssbo && !compiler->has_isam_v) { + compiler->has_isam_ssbo && !compiler->info->props.has_isam_v) { return true; } @@ -187,7 +187,7 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, */ if ((low->intrinsic == nir_intrinsic_load_ssbo) && (nir_intrinsic_access(low) & ACCESS_CAN_REORDER) && - compiler->has_isam_ssbo && !compiler->has_isam_v) { + compiler->has_isam_ssbo && !compiler->info->props.has_isam_v) { return false; } @@ -910,12 +910,12 @@ ir3_nir_post_finalize(struct ir3_shader *shader) NIR_PASS(_, s, ir3_nir_move_varying_inputs); NIR_PASS(_, s, nir_lower_fb_read); NIR_PASS(_, s, ir3_nir_lower_layer_id); - if (!compiler->shading_rate_matches_vk) + if (!compiler->info->props.shading_rate_matches_vk) NIR_PASS(_, s, ir3_nir_lower_frag_shading_rate); } if (s->info.stage == MESA_SHADER_VERTEX || s->info.stage == MESA_SHADER_GEOMETRY) { - if (!compiler->shading_rate_matches_vk) + if (!compiler->info->props.shading_rate_matches_vk) NIR_PASS(_, s, ir3_nir_lower_primitive_shading_rate); } @@ -981,7 +981,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader) if (!((s->info.stage == MESA_SHADER_COMPUTE) || (s->info.stage == MESA_SHADER_KERNEL) || - compiler->has_getfiberid)) { + compiler->info->props.has_getfiberid)) { options.subgroup_size = 1; options.lower_vote_trivial = true; } @@ -1323,7 +1323,7 @@ ir3_nir_set_threadsize(struct ir3_shader_variant *v, const nir_shader *s) * might make different barrier choices). */ if (!info->workgroup_size_variable) { - if (threads_per_wg <= compiler->threadsize_base) + if (threads_per_wg <= compiler->info->threadsize_base) v->shader_options.real_wavesize = IR3_SINGLE_ONLY; } @@ -1338,7 +1338,7 @@ ir3_nir_set_threadsize(struct ir3_shader_variant *v, const nir_shader *s) */ if (compiler->gen < 6 && (info->workgroup_size_variable || - threads_per_wg > compiler->threadsize_base * compiler->max_waves)) { + threads_per_wg > compiler->info->threadsize_base * compiler->info->max_waves)) { v->shader_options.real_wavesize = IR3_DOUBLE_ONLY; }; } @@ -1463,7 +1463,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, progress |= OPT(s, ir3_nir_opt_subgroups, so); - if (so->compiler->load_shader_consts_via_preamble) + if (so->compiler->info->props.load_shader_consts_via_preamble) progress |= OPT(s, ir3_nir_lower_driver_params_to_ubo, so); if (!so->binning_pass) { @@ -1503,7 +1503,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, !(ir3_shader_debug & IR3_DBG_NOPREAMBLE)) progress |= OPT(s, ir3_nir_opt_preamble, so); - if (so->compiler->load_shader_consts_via_preamble) + if (so->compiler->info->props.load_shader_consts_via_preamble) progress |= OPT(s, ir3_nir_lower_driver_params_to_ubo, so); /* Do matrix reassociate after preamble, because we want uniform matrix @@ -1891,7 +1891,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4, 1); } - if (!compiler->load_shader_consts_via_preamble) { + if (!compiler->info->props.load_shader_consts_via_preamble) { switch (v->type) { case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_EVAL: diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 8fecd03213b..eab3863b934 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -617,7 +617,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) ptrs_vec4, 1); } - uint32_t align_vec4 = compiler->load_shader_consts_via_preamble + uint32_t align_vec4 = compiler->info->props.load_shader_consts_via_preamble ? 1 : compiler->const_upload_unit; diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c index 7de2d9ebbfa..dd89a6e7efe 100644 --- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c @@ -325,7 +325,7 @@ ir3_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data) { const struct ir3_compiler *compiler = data; - if (!compiler->has_ssbo_imm_offsets) + if (!compiler->info->props.has_ssbo_imm_offsets) return 0; switch (intrin->intrinsic) { diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c index be3175abb84..b44237080d2 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tess.c +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -374,7 +374,7 @@ ir3_nir_lower_to_explicit_input(nir_shader *shader, * HS uses a different primitive id, which starts at bit 16 in the header */ if (shader->info.stage == MESA_SHADER_TESS_CTRL && - v->compiler->tess_use_shared) + v->compiler->info->props.tess_use_shared) state.local_primitive_id_start = 16; nir_function_impl *impl = nir_shader_get_entrypoint(shader); diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c index d80e0beb74d..7c2b76bbf26 100644 --- a/src/freedreno/ir3/ir3_nir_opt_preamble.c +++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c @@ -238,7 +238,7 @@ rewrite_cost(nir_def *def, const void *data) nir_intrinsic_instr *parent_intrin = nir_instr_as_intrinsic(parent_instr); - if (v->compiler->has_alias_rt && v->type == MESA_SHADER_FRAGMENT && + if (v->compiler->info->props.has_alias_rt && v->type == MESA_SHADER_FRAGMENT && parent_intrin->intrinsic == nir_intrinsic_store_output && def->bit_size == 32) { /* For FS outputs, alias.rt can use const registers without a mov. diff --git a/src/freedreno/ir3/ir3_opt_predicates.c b/src/freedreno/ir3/ir3_opt_predicates.c index 0eea09b9572..a5d1d38922e 100644 --- a/src/freedreno/ir3/ir3_opt_predicates.c +++ b/src/freedreno/ir3/ir3_opt_predicates.c @@ -59,7 +59,8 @@ clone_with_predicate_dst(struct opt_predicates_ctx *ctx, clone->dsts[0]->flags |= IR3_REG_PREDICATE; clone->dsts[0]->flags &= ~(IR3_REG_HALF | IR3_REG_SHARED); - if (ctx->ir->compiler->has_scalar_predicates && opc_cat(instr->opc) == 2 && + if (ctx->ir->compiler->info->props.has_scalar_predicates && + opc_cat(instr->opc) == 2 && (instr->dsts[0]->flags & IR3_REG_SHARED)) { clone->dsts[0]->flags |= IR3_REG_UNIFORM; } @@ -77,7 +78,7 @@ can_write_predicate(struct opt_predicates_ctx *ctx, case OPC_CMPS_U: case OPC_CMPS_F: return !cat2_needs_scalar_alu(instr) || - ctx->ir->compiler->has_scalar_predicates; + ctx->ir->compiler->info->props.has_scalar_predicates; case OPC_AND_B: case OPC_OR_B: case OPC_NOT_B: @@ -85,7 +86,7 @@ can_write_predicate(struct opt_predicates_ctx *ctx, case OPC_GETBIT_B: return ctx->ir->compiler->bitops_can_write_predicates && (!cat2_needs_scalar_alu(instr) || - ctx->ir->compiler->has_scalar_predicates); + ctx->ir->compiler->info->props.has_scalar_predicates); default: return false; } diff --git a/src/freedreno/ir3/ir3_preamble.c b/src/freedreno/ir3/ir3_preamble.c index bbfa7fb01b5..70400a8dadc 100644 --- a/src/freedreno/ir3/ir3_preamble.c +++ b/src/freedreno/ir3/ir3_preamble.c @@ -11,7 +11,7 @@ bool ir3_imm_const_to_preamble(struct ir3 *ir, struct ir3_shader_variant *so) { - if (!ir->compiler->load_shader_consts_via_preamble) { + if (!ir->compiler->info->props.load_shader_consts_via_preamble) { return false; } diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 25286ca82a4..02e23a8a972 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -2708,7 +2708,7 @@ calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v, if (v->local_size_variable) { if (v->type == MESA_SHADER_KERNEL) { - threads_per_wg = compiler->threadsize_base * (double_threadsize ? 2 : 1); + threads_per_wg = compiler->info->threadsize_base * (double_threadsize ? 2 : 1); } else { /* We have to expect the worst case. */ threads_per_wg = compiler->max_variable_workgroup_size; @@ -2726,8 +2726,8 @@ calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v, */ unsigned waves_per_wg = DIV_ROUND_UP( - threads_per_wg, compiler->threadsize_base * (double_threadsize ? 2 : 1) * - compiler->wave_granularity); + threads_per_wg, compiler->info->threadsize_base * (double_threadsize ? 2 : 1) * + compiler->info->wave_granularity); uint32_t vec4_regs_per_thread = compiler->reg_size_vec4 / (waves_per_wg * (double_threadsize ? 2 : 1)); diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index c163f259368..95c58aa87db 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -47,7 +47,7 @@ ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size) * should be the same for the binning and non-binning variants. Make sure * we don't increase the size beyond that of the non-binning variant. */ - if (v->binning_pass && !v->compiler->load_shader_consts_via_preamble && + if (v->binning_pass && !v->compiler->info->props.load_shader_consts_via_preamble && size > v->nonbinning->imm_state.size) { return false; } @@ -1333,10 +1333,10 @@ ir3_shader_get_subgroup_size(const struct ir3_compiler *compiler, { switch (options->api_wavesize) { case IR3_SINGLE_ONLY: - *subgroup_size = *max_subgroup_size = compiler->threadsize_base; + *subgroup_size = *max_subgroup_size = compiler->info->threadsize_base; break; case IR3_DOUBLE_ONLY: - *subgroup_size = *max_subgroup_size = compiler->threadsize_base * 2; + *subgroup_size = *max_subgroup_size = compiler->info->threadsize_base * 2; break; case IR3_SINGLE_OR_DOUBLE: /* For vertex stages, we know the wavesize will never be doubled. @@ -1345,10 +1345,10 @@ ir3_shader_get_subgroup_size(const struct ir3_compiler *compiler, * a driver param. */ if (stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_FRAGMENT) { - *subgroup_size = *max_subgroup_size = compiler->threadsize_base; + *subgroup_size = *max_subgroup_size = compiler->info->threadsize_base; } else { *subgroup_size = 0; - *max_subgroup_size = compiler->threadsize_base * 2; + *max_subgroup_size = compiler->info->threadsize_base * 2; } break; } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 4457a7a3295..e084b5a4919 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1073,7 +1073,7 @@ static inline unsigned ir3_max_const_compute(const struct ir3_shader_variant *v, const struct ir3_compiler *compiler) { - unsigned lm_size = v->local_size_variable ? compiler->local_mem_size : + unsigned lm_size = v->local_size_variable ? compiler->info->cs_shared_mem_size : v->cs.req_local_mem; /* The LB is divided between consts and local memory. LB is split into @@ -1088,7 +1088,7 @@ ir3_max_const_compute(const struct ir3_shader_variant *v, * configuration where there is enough space for LM. */ unsigned lb_const_size = - ((compiler->compute_lb_size - lm_size) / compiler->wave_granularity) / + ((compiler->compute_lb_size - lm_size) / compiler->info->wave_granularity) / 16 /* bytes per vec4 */; if (lb_const_size < compiler->max_const_compute) { const uint32_t lb_const_sizes[] = { 128, 192, 256, 512 }; diff --git a/src/freedreno/ir3/ir3_shared_ra.c b/src/freedreno/ir3/ir3_shared_ra.c index 9b6ef2f7ae3..4246a48e896 100644 --- a/src/freedreno/ir3/ir3_shared_ra.c +++ b/src/freedreno/ir3/ir3_shared_ra.c @@ -848,7 +848,7 @@ can_demote_src(struct ir3_instruction *instr) case OPC_META_COLLECT: return false; case OPC_MOV: - if (instr->block->shader->compiler->has_salu_int_narrowing_quirk) { + if (instr->block->shader->compiler->info->props.has_salu_int_narrowing_quirk) { /* Avoid demoting something that would cause narrowin integer * conversion from GPR to uGPR: */ diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 25e532cbf42..f047c8bdb69 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -67,7 +67,7 @@ validate_reg(struct ir3_validate_ctx *ctx, struct ir3_register *reg) } if (reg->flags & IR3_REG_UNIFORM) { - validate_assert(ctx, ctx->ir->compiler->has_scalar_predicates); + validate_assert(ctx, ctx->ir->compiler->info->props.has_scalar_predicates); validate_assert(ctx, reg->flags & IR3_REG_PREDICATE); } @@ -334,7 +334,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) if ((opc_cat(instr->opc) == 2 || opc_cat(instr->opc) == 3 || opc_cat(instr->opc) == 4)) { validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_SHARED) || - ctx->ir->compiler->has_scalar_alu); + ctx->ir->compiler->info->props.has_scalar_alu); } /* Check that src/dst types match the register types, and for @@ -343,7 +343,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) */ switch (opc_cat(instr->opc)) { case 1: /* move instructions */ - if (ctx->ir->compiler->has_salu_int_narrowing_quirk && + if (ctx->ir->compiler->info->props.has_salu_int_narrowing_quirk && (instr->opc == OPC_MOV) && (instr->cat1.dst_type != instr->cat1.src_type) && (type_size(instr->cat1.dst_type) < diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index ad065a16b73..4ef66a0dfd4 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -1794,7 +1794,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, * UCHE because the FS param patchpoint is read through UCHE. */ tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); - if (cmd->device->compiler->load_shader_consts_via_preamble) { + if (cmd->device->compiler->info->props.load_shader_consts_via_preamble) { tu_emit_event_write(cmd, cs, FD_CACHE_INVALIDATE); tu_cs_emit_wfi(cs); } @@ -8094,7 +8094,7 @@ tu7_emit_fs_params(struct tu_cmd_buffer *cmd) static void tu_emit_fs_params(struct tu_cmd_buffer *cmd) { - if (cmd->device->compiler->load_shader_consts_via_preamble) + if (cmd->device->compiler->info->props.load_shader_consts_via_preamble) tu7_emit_fs_params(cmd); else tu6_emit_fs_params(cmd); diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 0c6ec725b93..7dfc47c64c0 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -5070,7 +5070,7 @@ tu_GetPipelineExecutablePropertiesKHR( VK_COPY_STR(props->description, _mesa_shader_stage_to_string(stage)); props->subgroupSize = - dev->compiler->threadsize_base * (exe->stats.double_threadsize ? 2 : 1); + dev->compiler->info->threadsize_base * (exe->stats.double_threadsize ? 2 : 1); } } diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index af40ab313a4..47ca0bc6e4a 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -349,7 +349,7 @@ lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b, */ base = nir_imm_int(b, binding_layout->dynamic_offset_offset / (4 * FDL6_TEX_CONST_DWORDS)); nir_def *dynamic_offset_start; - if (compiler->load_shader_consts_via_preamble) { + if (compiler->info->props.load_shader_consts_via_preamble) { dynamic_offset_start = ir3_load_driver_ubo(b, 1, &shader->const_state.dynamic_offsets_ubo, set); } else { @@ -646,7 +646,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, case nir_intrinsic_load_frag_offset_ir3: case nir_intrinsic_load_gmem_frag_scale_ir3: case nir_intrinsic_load_gmem_frag_offset_ir3: { - if (!dev->compiler->load_shader_consts_via_preamble) + if (!dev->compiler->info->props.load_shader_consts_via_preamble) return false; unsigned param; @@ -679,7 +679,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, return true; } case nir_intrinsic_load_frag_invocation_count: { - if (!dev->compiler->load_shader_consts_via_preamble) + if (!dev->compiler->info->props.load_shader_consts_via_preamble) return false; nir_def *result = @@ -3365,10 +3365,10 @@ tu_shader_key_subgroup_size(struct tu_shader_key *key, api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; } else { if (subgroup_info) { - if (subgroup_info->requiredSubgroupSize == dev->compiler->threadsize_base) { + if (subgroup_info->requiredSubgroupSize == dev->compiler->info->threadsize_base) { api_wavesize = IR3_SINGLE_ONLY; } else { - assert(subgroup_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2); + assert(subgroup_info->requiredSubgroupSize == dev->compiler->info->threadsize_base * 2); api_wavesize = IR3_DOUBLE_ONLY; } } else { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.h b/src/gallium/drivers/freedreno/a6xx/fd6_program.h index 3d1e0b4b673..ed223487b1c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.h @@ -93,10 +93,10 @@ static inline bool fd6_load_shader_consts_via_preamble(const struct ir3_shader_variant *v) { if (CHIP == A8XX) { - assert(v->compiler->load_shader_consts_via_preamble); + assert(v->compiler->info->props.load_shader_consts_via_preamble); return true; } - return (CHIP == A7XX) && v->compiler->load_shader_consts_via_preamble; + return (CHIP == A7XX) && v->compiler->info->props.load_shader_consts_via_preamble; } template @@ -104,10 +104,10 @@ static inline bool fd6_load_inline_uniforms_via_preamble_ldgk(const struct ir3_shader_variant *v) { if (CHIP == A8XX) { - assert(v->compiler->load_inline_uniforms_via_preamble_ldgk); + assert(v->compiler->info->props.load_inline_uniforms_via_preamble_ldgk); return true; } - return (CHIP == A7XX) && v->compiler->load_inline_uniforms_via_preamble_ldgk; + return (CHIP == A7XX) && v->compiler->info->props.load_inline_uniforms_via_preamble_ldgk; } template diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index c310c2c907d..2d42e21a27a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -48,7 +48,7 @@ emit_const_asserts(const struct ir3_shader_variant *v, uint32_t regid, uint32_t sizedwords) { assert((v->type == MESA_SHADER_VERTEX) || - !v->compiler->load_shader_consts_via_preamble); + !v->compiler->info->props.load_shader_consts_via_preamble); assert((regid % 4) == 0); assert((sizedwords % 4) == 0); assert(regid + sizedwords <= v->constlen * 4);