ir3: Use fd_dev_info from ir3_compiler

Rather than copying an ever growing list of params from fd_dev_info to
ir3_compiler, just store the info pointer in the compiler and use that
directly.

Mechanical change.  But deletes code and removes an extra step from
adding compiler related dev info props.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39442>
This commit is contained in:
Rob Clark 2026-01-21 09:52:28 -08:00 committed by Marge Bot
parent c8375c0f71
commit 4e28ac2870
32 changed files with 150 additions and 237 deletions

View file

@ -32,8 +32,14 @@ struct fd_dev_info {
uint32_t num_vsc_pipes;
/* The size of local memory in bytes */
uint32_t cs_shared_mem_size;
/* On at least a6xx, waves are always launched in pairs. In calculations
* about occupancy, we pretend that each wave pair is actually one wave,
* which simplifies many of the calculations, but means we have to
* multiply threadsize_base by this number.
*/
int wave_granularity;
/* These are fallback values that should match what drm/msm programs, for
@ -47,8 +53,12 @@ struct fd_dev_info {
/* Information for private memory calculations */
uint32_t fibers_per_sp;
/* The base number of threads per wave. Some stages may be able to double
* this.
*/
uint32_t threadsize_base;
/* The maximum number of simultaneous waves per core. */
uint32_t max_waves;
/* Local Memory (i.e. shared memory in GL/Vulkan) and compute shader
@ -105,6 +115,7 @@ struct fd_dev_info {
bool concurrent_resolve;
bool has_z24uint_s8uint;
/* on a650, vertex shader <-> tess control io uses LDL/STL */
bool tess_use_shared;
/* Does the hw support GL_QCOM_shading_rate? */
@ -149,8 +160,16 @@ struct fd_dev_info {
bool has_lpac;
/* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle
* instructions are supported which are necessary to support
* subgroup quad and arithmetic operations.
*/
bool has_getfiberid;
/* Whether half register shared->non-shared moves are broken. */
bool mov_half_shared_quirk;
/* Whether movs is supported for subgroupBroadcast. */
bool has_movs;
bool has_dp2acc;
@ -214,14 +233,31 @@ struct fd_dev_info {
bool broken_ds_ubwc_quirk;
/* See ir3_compiler::has_scalar_alu. */
/* True if there is a scalar ALU capable of executing a subset of
* cat2-cat4 instructions with a shared register destination. This also
* implies expanded MOV/COV capability when writing to shared registers,
* as MOV/COV is now executed on the scalar ALU except when reading from a
* normal register, as well as the ability for ldc to write to a shared
* register.
*/
bool has_scalar_alu;
/* See ir3_compiler::has_scalar_predicates. */
/* True if cat2 instructions can write predicate registers from the scalar
* ALU.
*/
bool has_scalar_predicates;
/* See ir3_compiler::has_early_preamble. */
/* On all generations that support scalar ALU, there is also a copy of the
* scalar ALU and some other HW units in HLSQ that can execute preambles
* before work is dispatched to the SPs, called "early preamble". We detect
* whether the shader can use early preamble in ir3.
*/
bool has_early_preamble;
/* Whether isam.v is supported to sample multiple components from SSBOs */
bool has_isam_v;
/* Whether isam/stib/ldib have immediate offsets. */
bool has_ssbo_imm_offsets;
/* Whether writing to UBWC attachment and reading the same image as input

View file

@ -213,7 +213,7 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
* in a wave. Thus, doubling the threadsize is only possible if we don't
* exceed the branchstack size limit.
*/
if (MIN2(v->branchstack, compiler->threadsize_base * 2) >
if (MIN2(v->branchstack, compiler->info->threadsize_base * 2) >
compiler->branchstack_size) {
return false;
}
@ -259,13 +259,13 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
bool double_threadsize)
{
const struct ir3_compiler *compiler = v->compiler;
unsigned max_waves = compiler->max_waves;
unsigned max_waves = compiler->info->max_waves;
/* Compute the limit based on branchstack */
if (v->branchstack > 0) {
unsigned branchstack_max_waves = compiler->branchstack_size /
v->branchstack *
compiler->wave_granularity;
compiler->info->wave_granularity;
max_waves = MIN2(max_waves, branchstack_max_waves);
}
@ -275,17 +275,17 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
unsigned threads_per_wg =
v->local_size[0] * v->local_size[1] * v->local_size[2];
unsigned waves_per_wg =
DIV_ROUND_UP(threads_per_wg, compiler->threadsize_base *
DIV_ROUND_UP(threads_per_wg, compiler->info->threadsize_base *
(double_threadsize ? 2 : 1) *
compiler->wave_granularity);
compiler->info->wave_granularity);
/* Shared is allocated in chunks of 1k */
unsigned shared_per_wg = ALIGN_POT(v->shared_size, 1024);
if (shared_per_wg > 0 && !v->local_size_variable) {
unsigned wgs_per_core = compiler->local_mem_size / shared_per_wg;
unsigned wgs_per_core = compiler->info->cs_shared_mem_size / shared_per_wg;
max_waves = MIN2(max_waves, waves_per_wg * wgs_per_core *
compiler->wave_granularity);
compiler->info->wave_granularity);
}
/* If we have a compute shader that has a big workgroup, a barrier, and
@ -316,8 +316,8 @@ ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler,
{
return reg_count ? (compiler->reg_size_vec4 /
(reg_count * (double_threadsize ? 2 : 1)) *
compiler->wave_granularity)
: compiler->max_waves;
compiler->info->wave_granularity)
: compiler->info->max_waves;
}
void
@ -569,7 +569,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves(
compiler, regs_count, info->double_threadsize);
info->max_waves = MIN2(reg_independent_max_waves, reg_dependent_max_waves);
assert(info->max_waves <= v->compiler->max_waves);
assert(info->max_waves <= v->compiler->info->max_waves);
ralloc_free(mem_ctx);
}
@ -1362,7 +1362,7 @@ is_scalar_alu(struct ir3_instruction *instr,
* supported, so that we treat them like vector->scalar mov instructions
* (such as requiring (ss)).
*/
compiler->has_scalar_alu &&
compiler->info->props.has_scalar_alu &&
/* moves from normal to shared seem to use a separate ALU as before and
* require a (ss) on dependent instructions.
*/
@ -1712,7 +1712,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
/* Conversions seem not to work in shared->shared copies before scalar
* ALU is supported.
*/
if (!compiler->has_scalar_alu &&
if (!compiler->info->props.has_scalar_alu &&
(flags & IR3_REG_SHARED) &&
(instr->dsts[0]->flags & IR3_REG_SHARED) &&
instr->cat1.src_type != instr->cat1.dst_type)
@ -2124,7 +2124,7 @@ ir3_cat3_absneg(struct ir3_compiler *compiler, opc_t opc, unsigned src_n)
case OPC_SEL_B16:
case OPC_SEL_B32:
return compiler->has_sel_b_fneg ? IR3_REG_FNEG : 0;
return compiler->info->props.has_sel_b_fneg ? IR3_REG_FNEG : 0;
case OPC_SAD_S16:
case OPC_SAD_S32:

View file

@ -24,7 +24,7 @@ lower_ssbo_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
nir_src *offset_src,
struct ir3_instruction **offset, unsigned *imm_offset)
{
if (ctx->compiler->has_ssbo_imm_offsets) {
if (ctx->compiler->info->props.has_ssbo_imm_offsets) {
ir3_lower_imm_offset(ctx, intr, offset_src, 7, offset, imm_offset);
} else {
assert(nir_intrinsic_base(intr) == 0);
@ -71,7 +71,7 @@ emit_load_uav(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
if (imm_offset_val) {
assert(ctx->compiler->has_ssbo_imm_offsets);
assert(ctx->compiler->info->props.has_ssbo_imm_offsets);
ldib->flags |= IR3_INSTR_IMM_OFFSET;
}
@ -163,7 +163,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
if (imm_offset_val) {
assert(ctx->compiler->has_ssbo_imm_offsets);
assert(ctx->compiler->info->props.has_ssbo_imm_offsets);
stib->flags |= IR3_INSTR_IMM_OFFSET;
}

View file

@ -746,7 +746,7 @@ create_output_aliases(struct ir3_shader_variant *v, struct ir3_instruction *end)
bool
ir3_create_alias_rt(struct ir3 *ir, struct ir3_shader_variant *v)
{
if (!ir->compiler->has_alias_rt)
if (!ir->compiler->info->props.has_alias_rt)
return false;
if (ir3_shader_debug & IR3_DBG_NOALIASRT)
return false;

View file

@ -199,7 +199,7 @@ try_conversion_folding(struct ir3_instruction *conv,
/* Don't fold in a conversion to a half register on gens where that is
* broken.
*/
if (compiler->mov_half_shared_quirk &&
if (compiler->info->props.mov_half_shared_quirk &&
(conv->dsts[0]->flags & IR3_REG_HALF)) {
return false;
}

View file

@ -163,16 +163,13 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->gen = fd_dev_gen(dev_id);
compiler->is_64bit = fd_dev_64b(dev_id);
compiler->options = *options;
compiler->info = dev_info;
/* TODO see if older GPU's were different here */
compiler->branchstack_size = 64;
compiler->wave_granularity = dev_info->wave_granularity;
compiler->max_waves = dev_info->max_waves;
compiler->max_variable_workgroup_size = 1024;
compiler->local_mem_size = dev_info->cs_shared_mem_size;
compiler->num_predicates = 1;
compiler->bitops_can_write_predicates = false;
compiler->has_branch_and_or = false;
@ -227,16 +224,6 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->has_preamble = true;
compiler->tess_use_shared = dev_info->props.tess_use_shared;
compiler->has_getfiberid = dev_info->props.has_getfiberid;
compiler->mov_half_shared_quirk = dev_info->props.mov_half_shared_quirk;
compiler->has_movs = dev_info->props.has_movs;
compiler->has_dp2acc = dev_info->props.has_dp2acc;
compiler->has_dp4acc = dev_info->props.has_dp4acc;
compiler->has_compliant_dp4acc = dev_info->props.has_compliant_dp4acc;
if (compiler->gen == 6 && options->shared_push_consts) {
compiler->shared_consts_base_offset = 504;
compiler->shared_consts_size = 8;
@ -247,32 +234,13 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->geom_shared_consts_size_quirk = 0;
}
compiler->has_fs_tex_prefetch = dev_info->props.has_fs_tex_prefetch;
compiler->stsc_duplication_quirk = dev_info->props.stsc_duplication_quirk;
compiler->load_shader_consts_via_preamble = dev_info->props.load_shader_consts_via_preamble;
compiler->load_inline_uniforms_via_preamble_ldgk = dev_info->props.load_inline_uniforms_via_preamble_ldgk;
compiler->num_predicates = 4;
compiler->bitops_can_write_predicates = true;
compiler->has_branch_and_or = true;
compiler->has_predication = true;
compiler->predtf_nop_quirk = dev_info->props.predtf_nop_quirk;
compiler->prede_nop_quirk = dev_info->props.prede_nop_quirk;
compiler->has_salu_int_narrowing_quirk = dev_info->props.has_salu_int_narrowing_quirk;
compiler->has_scalar_alu = dev_info->props.has_scalar_alu;
compiler->has_scalar_predicates = dev_info->props.has_scalar_predicates;
compiler->has_isam_v = dev_info->props.has_isam_v;
compiler->has_ssbo_imm_offsets = dev_info->props.has_ssbo_imm_offsets;
compiler->fs_must_have_non_zero_constlen_quirk = dev_info->props.fs_must_have_non_zero_constlen_quirk;
compiler->has_early_preamble = dev_info->props.has_early_preamble;
compiler->has_rpt_bary_f = true;
compiler->has_shfl = true;
compiler->reading_shading_rate_requires_smask_quirk =
dev_info->props.reading_shading_rate_requires_smask_quirk;
compiler->shading_rate_matches_vk = dev_info->props.shading_rate_matches_vk;
compiler->has_alias_rt = dev_info->props.has_alias_rt;
compiler->mergedregs = true;
compiler->has_sel_b_fneg = dev_info->props.has_sel_b_fneg;
compiler->has_eolm_eogm = dev_info->props.has_eolm_eogm;
compiler->has_alias_tex = (compiler->gen >= 7);
@ -291,11 +259,6 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
* earlier gen's.
*/
compiler->max_const_safe = 256;
compiler->has_scalar_alu = false;
compiler->has_isam_v = false;
compiler->has_ssbo_imm_offsets = false;
compiler->has_early_preamble = false;
}
if (dev_info->compute_lb_size) {
@ -303,7 +266,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
} else {
compiler->compute_lb_size =
compiler->max_const_compute * 16 /* bytes/vec4 */ *
compiler->wave_granularity + compiler->local_mem_size;
compiler->info->wave_granularity + compiler->info->cs_shared_mem_size;
}
/* This is just a guess for a4xx. */
@ -325,8 +288,6 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->reg_size_vec4 = 96;
}
compiler->threadsize_base = dev_info->threadsize_base;
if (compiler->gen >= 4) {
/* need special handling for "flat" */
compiler->flat_bypass = true;

View file

@ -107,12 +107,11 @@ struct ir3_compiler {
*/
bool samgq_workaround;
/* on a650, vertex shader <-> tess control io uses LDL/STL */
bool tess_use_shared;
/* Whether full and half regs are merged. */
bool mergedregs;
const struct fd_dev_info *info;
/* The maximum number of constants, in vec4's, across the entire graphics
* pipeline.
*/
@ -145,21 +144,6 @@ struct ir3_compiler {
*/
uint32_t const_upload_unit;
/* The base number of threads per wave. Some stages may be able to double
* this.
*/
uint32_t threadsize_base;
/* On at least a6xx, waves are always launched in pairs. In calculations
* about occupancy, we pretend that each wave pair is actually one wave,
* which simplifies many of the calculations, but means we have to
* multiply threadsize_base by this number.
*/
uint32_t wave_granularity;
/* The maximum number of simultaneous waves per core. */
uint32_t max_waves;
/* This is theoretical maximum number of vec4 registers that one wave of
* the base threadsize could use. To get the actual size of the register
* file in bytes one would need to compute:
@ -177,9 +161,6 @@ struct ir3_compiler {
*/
uint32_t reg_size_vec4;
/* The size of local memory in bytes */
uint32_t local_mem_size;
/* The number of total branch stack entries, divided by wave_granularity. */
uint32_t branchstack_size;
@ -195,27 +176,9 @@ struct ir3_compiler {
/* Whether SSBOs have descriptors for sampling with ISAM */
bool has_isam_ssbo;
/* Whether isam.v is supported to sample multiple components from SSBOs */
bool has_isam_v;
/* Whether isam/stib/ldib have immediate offsets. */
bool has_ssbo_imm_offsets;
/* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle
* instructions are supported which are necessary to support
* subgroup quad and arithmetic operations.
*/
bool has_getfiberid;
/* Whether half register shared->non-shared moves are broken. */
bool mov_half_shared_quirk;
/* Is lock/unlock sequence needed for CS? */
bool cs_lock_unlock_quirk;
/* Whether movs is supported for subgroupBroadcast. */
bool has_movs;
/* True if the shfl instruction is supported. Needed for subgroup rotate and
* (more efficient) shuffle.
*/
@ -235,18 +198,10 @@ struct ir3_compiler {
/* True if predt/predf/prede are supported. */
bool has_predication;
bool predtf_nop_quirk;
bool prede_nop_quirk;
bool has_salu_int_narrowing_quirk;
/* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */
uint32_t max_variable_workgroup_size;
bool has_dp2acc;
bool has_dp4acc;
bool has_compliant_dp4acc;
/* Type to use for 1b nir bools: */
type_t bool_type;
@ -273,55 +228,14 @@ struct ir3_compiler {
*/
uint64_t geom_shared_consts_size_quirk;
bool has_fs_tex_prefetch;
bool stsc_duplication_quirk;
bool load_shader_consts_via_preamble;
bool load_inline_uniforms_via_preamble_ldgk;
/* True if there is a scalar ALU capable of executing a subset of
* cat2-cat4 instructions with a shared register destination. This also
* implies expanded MOV/COV capability when writing to shared registers,
* as MOV/COV is now executed on the scalar ALU except when reading from a
* normal register, as well as the ability for ldc to write to a shared
* register.
*/
bool has_scalar_alu;
/* True if cat2 instructions can write predicate registers from the scalar
* ALU.
*/
bool has_scalar_predicates;
bool fs_must_have_non_zero_constlen_quirk;
/* On all generations that support scalar ALU, there is also a copy of the
* scalar ALU and some other HW units in HLSQ that can execute preambles
* before work is dispatched to the SPs, called "early preamble". We detect
* whether the shader can use early preamble in ir3.
*/
bool has_early_preamble;
/* True if (rptN) is supported for bary.f. */
bool has_rpt_bary_f;
/* True if alias.tex is supported. */
bool has_alias_tex;
/* True if alias.rt is supported. */
bool has_alias_rt;
bool reading_shading_rate_requires_smask_quirk;
bool shading_rate_matches_vk;
bool cat3_rel_offset_0_quirk;
bool has_sel_b_fneg;
bool has_eolm_eogm;
struct {
/* The number of cycles needed for the result of one ALU operation to be
* available to another ALU operation. Only valid when the halfness of the

View file

@ -349,7 +349,7 @@ create_cov(struct ir3_context *ctx, unsigned nrpt,
*/
if (src_type == TYPE_U8 && full_type(dst_type) == TYPE_U32) {
struct ir3_instruction_rpt mask = create_immed_typed_shared_rpt(
&ctx->build, nrpt, 0xff, TYPE_U8, ctx->compiler->has_scalar_alu);
&ctx->build, nrpt, 0xff, TYPE_U8, ctx->compiler->info->props.has_scalar_alu);
struct ir3_instruction_rpt cov =
ir3_AND_B_rpt(&ctx->build, nrpt, src, 0, mask, 0);
set_dst_flags(cov.rpts, nrpt, type_flags(dst_type));
@ -367,7 +367,8 @@ create_cov(struct ir3_context *ctx, unsigned nrpt,
struct ir3_instruction_rpt cov;
if (op == nir_op_u2f16 || op == nir_op_u2f32) {
struct ir3_instruction_rpt mask = create_immed_typed_shared_rpt(
&ctx->build, nrpt, 0xff, TYPE_U8, ctx->compiler->has_scalar_alu);
&ctx->build, nrpt, 0xff, TYPE_U8,
ctx->compiler->info->props.has_scalar_alu);
cov = ir3_AND_B_rpt(&ctx->build, nrpt, src, 0, mask, 0);
set_dst_flags(cov.rpts, nrpt, IR3_REG_HALF);
cov = ir3_COV_rpt(&ctx->build, nrpt, cov, TYPE_U16, dst_type);
@ -432,7 +433,7 @@ emit_alu_dot_4x8_as_dp4acc(struct ir3_context *ctx, nir_alu_instr *alu,
struct ir3_instruction **dst,
struct ir3_instruction **src)
{
if (ctx->compiler->has_compliant_dp4acc) {
if (ctx->compiler->info->props.has_compliant_dp4acc) {
dst[0] = ir3_DP4ACC(&ctx->build, src[0], 0, src[1], 0, src[2], 0);
/* This is actually the LHS signedness attribute.
@ -576,7 +577,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
assert(dst_sz == 1 || ir3_supports_vectorized_nir_op(alu->op));
bool use_shared = !alu->def.divergent &&
ctx->compiler->has_scalar_alu &&
ctx->compiler->info->props.has_scalar_alu &&
/* it probably isn't worth emulating these with scalar-only ops */
alu->op != nir_op_udot_4x8_uadd &&
alu->op != nir_op_udot_4x8_uadd_sat &&
@ -1126,9 +1127,9 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
struct ir3_instruction *src_rpt0[] = {src[0].rpts[0], src[1].rpts[0],
src[2].rpts[0]};
if (ctx->compiler->has_dp4acc) {
if (ctx->compiler->info->props.has_dp4acc) {
emit_alu_dot_4x8_as_dp4acc(ctx, alu, dst.rpts, src_rpt0);
} else if (ctx->compiler->has_dp2acc) {
} else if (ctx->compiler->info->props.has_dp2acc) {
emit_alu_dot_4x8_as_dp2acc(ctx, alu, dst.rpts, src_rpt0);
} else {
ir3_context_error(ctx, "ALU op should have been lowered: %s\n",
@ -1180,7 +1181,7 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr,
assert(nir_intrinsic_base(intr) == 0);
unsigned ncomp = intr->num_components;
bool use_shared = !intr->def.divergent && ctx->compiler->has_scalar_alu;
bool use_shared = !intr->def.divergent && ctx->compiler->info->props.has_scalar_alu;
struct ir3_instruction *offset =
ir3_get_src_shared(ctx, &intr->src[1], use_shared)[0];
struct ir3_instruction *idx =
@ -1215,7 +1216,7 @@ emit_intrinsic_copy_ubo_to_uniform(struct ir3_context *ctx,
struct ir3_instruction *addr1 = ir3_create_addr1(&ctx->build, base);
bool use_shared = ctx->compiler->has_scalar_alu;
bool use_shared = ctx->compiler->info->props.has_scalar_alu;
struct ir3_instruction *offset =
ir3_get_src_shared(ctx, &intr->src[1], use_shared)[0];
struct ir3_instruction *idx =
@ -1454,7 +1455,7 @@ emit_intrinsic_load_shared_ir3(struct ir3_context *ctx,
create_immed(b, intr->num_components), 0);
/* for a650, use LDL for tess ctrl inputs: */
if (ctx->so->type == MESA_SHADER_TESS_CTRL && ctx->compiler->tess_use_shared)
if (ctx->so->type == MESA_SHADER_TESS_CTRL && ctx->compiler->info->props.tess_use_shared)
load->opc = OPC_LDL;
load->cat6.type = utype_def(&intr->def);
@ -1484,7 +1485,7 @@ emit_intrinsic_store_shared_ir3(struct ir3_context *ctx,
/* for a650, use STL for vertex outputs used by tess ctrl shader: */
if (ctx->so->type == MESA_SHADER_VERTEX && ctx->so->key.tessellation &&
ctx->compiler->tess_use_shared)
ctx->compiler->info->props.tess_use_shared)
store->opc = OPC_STL;
store->cat6.dst_offset = nir_intrinsic_base(intr);
@ -1969,7 +1970,7 @@ emit_readonly_load_uav(struct ir3_context *ctx,
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, index, false);
struct ir3_instruction *src1;
if (ctx->compiler->has_isam_v && !uav_load) {
if (ctx->compiler->info->props.has_isam_v && !uav_load) {
src1 = create_immed(b, imm_offset);
} else {
assert(imm_offset == 0);
@ -1988,7 +1989,7 @@ emit_readonly_load_uav(struct ir3_context *ctx,
ir3_split_dest(b, dst, sam, 0, num_components);
if (ctx->compiler->has_isam_v && !uav_load) {
if (ctx->compiler->info->props.has_isam_v && !uav_load) {
sam->flags |= (IR3_INSTR_V | IR3_INSTR_INV_1D);
if (imm_offset) {
@ -2011,7 +2012,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
* Note: isam also can't handle 8-bit loads.
*/
if (!(nir_intrinsic_access(intr) & ACCESS_CAN_REORDER) ||
(intr->def.num_components > 1 && !ctx->compiler->has_isam_v) ||
(intr->def.num_components > 1 && !ctx->compiler->info->props.has_isam_v) ||
(ctx->compiler->options.storage_8bit && intr->def.bit_size == 8) ||
!ctx->compiler->has_isam_ssbo) {
ctx->funcs->emit_intrinsic_load_ssbo(ctx, intr, dst);
@ -2023,7 +2024,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
struct ir3_instruction *coords = NULL;
unsigned imm_offset = 0;
if (ctx->compiler->has_isam_v) {
if (ctx->compiler->info->props.has_isam_v) {
ir3_lower_imm_offset(ctx, intr, offset_src, 8, &coords, &imm_offset);
} else {
coords =
@ -2619,7 +2620,7 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx,
struct ir3_instruction *src,
struct ir3_instruction *dst)
{
if (!ctx->compiler->mov_half_shared_quirk) {
if (!ctx->compiler->info->props.mov_half_shared_quirk) {
return dst;
}
@ -2642,7 +2643,7 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx,
} else {
dst = ir3_MOV(&ctx->build, dst, TYPE_U32);
}
if (!ctx->compiler->has_scalar_alu)
if (!ctx->compiler->info->props.has_scalar_alu)
dst->dsts[0]->flags &= ~IR3_REG_SHARED;
}
@ -2752,8 +2753,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
}
create_rpt = true;
} else {
src = ctx->compiler->has_scalar_alu ?
ir3_get_src_maybe_shared(ctx, &intr->src[0]) :
src = ctx->compiler->info->props.has_scalar_alu ?
ir3_get_src_maybe_shared(ctx, &intr->src[0]) :
ir3_get_src(ctx, &intr->src[0]);
for (int i = 0; i < dest_components; i++) {
dst[i] = create_uniform_indirect(
@ -2764,7 +2765,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
* registers, manually make it shared. Optimizations can undo this if
* the user can't use shared regs.
*/
if (ctx->compiler->has_scalar_alu && !intr->def.divergent)
if (ctx->compiler->info->props.has_scalar_alu && !intr->def.divergent)
dst[i]->dsts[0]->flags |= IR3_REG_SHARED;
}
@ -3092,7 +3093,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
dst[0] = create_driver_param(ctx, IR3_DP_CS(work_dim));
break;
case nir_intrinsic_load_subgroup_invocation:
assert(ctx->compiler->has_getfiberid);
assert(ctx->compiler->info->props.has_getfiberid);
dst[0] = ir3_GETFIBERID(b);
dst[0]->cat6.type = TYPE_U32;
__ssa_dst(dst[0]);
@ -3166,7 +3167,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
} else {
/* unconditional discard: */
cond = create_immed_typed_shared(b, 1, ctx->compiler->bool_type,
ctx->compiler->has_scalar_alu);
ctx->compiler->info->props.has_scalar_alu);
}
cond = ir3_get_predicate(ctx, cond);
@ -3381,7 +3382,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *src =
ir3_create_collect(b, ir3_get_src_shared(ctx, &intr->src[0],
ctx->compiler->has_scalar_alu),
ctx->compiler->info->props.has_scalar_alu),
components);
ir3_store_const(ctx->so, b, src, dst);
break;
@ -4172,7 +4173,7 @@ emit_phi(struct ir3_context *ctx, nir_phi_instr *nphi)
phi->phi.nphi = nphi;
phi->phi.comp = i;
if (ctx->compiler->has_scalar_alu && !nphi->def.divergent)
if (ctx->compiler->info->props.has_scalar_alu && !nphi->def.divergent)
phi->dsts[0]->flags |= IR3_REG_SHARED;
dst[i] = phi;
@ -5442,7 +5443,7 @@ emit_instructions(struct ir3_context *ctx)
emit_function(ctx, fxn);
if (ctx->so->type == MESA_SHADER_TESS_CTRL &&
ctx->compiler->tess_use_shared) {
ctx->compiler->info->props.tess_use_shared) {
/* Anything before shpe seems to be ignored in the main shader when early
* preamble is enabled on a7xx, so we have to put the barrier after.
*/
@ -5671,7 +5672,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
if (so->type == MESA_SHADER_FRAGMENT && so->reads_shading_rate &&
!so->reads_smask &&
compiler->reading_shading_rate_requires_smask_quirk) {
compiler->info->props.reading_shading_rate_requires_smask_quirk) {
create_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_MASK_IN, 0x1);
}
@ -5858,7 +5859,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
* for the binning variant, ir3_const_add_imm will ensure we don't add more
* immediates than allowed.
*/
if (so->binning_pass && !compiler->load_shader_consts_via_preamble &&
if (so->binning_pass && !compiler->info->props.load_shader_consts_via_preamble &&
so->nonbinning->imm_state.size) {
ASSERTED bool success =
ir3_const_ensure_imm_size(so, so->nonbinning->imm_state.size);
@ -6091,7 +6092,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
}
if (ctx->so->type == MESA_SHADER_FRAGMENT &&
compiler->fs_must_have_non_zero_constlen_quirk) {
compiler->info->props.fs_must_have_non_zero_constlen_quirk) {
so->constlen = MAX2(so->constlen, 4);
}

View file

@ -109,7 +109,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
/* Enable the texture pre-fetch feature only a4xx onwards. But
* only enable it on generations that have been tested:
*/
if ((so->type == MESA_SHADER_FRAGMENT) && compiler->has_fs_tex_prefetch) {
if ((so->type == MESA_SHADER_FRAGMENT) && compiler->info->props.has_fs_tex_prefetch) {
NIR_PASS(_, ctx->s, ir3_nir_lower_tex_prefetch, &so->prefetch_bary_type);
}
@ -435,7 +435,7 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
if (cond->dsts[0]->flags & IR3_REG_SHARED) {
cond->dsts[0]->flags &= ~IR3_REG_SHARED;
if (ctx->compiler->has_scalar_predicates) {
if (ctx->compiler->info->props.has_scalar_predicates) {
cond->dsts[0]->flags |= IR3_REG_UNIFORM;
}
}

View file

@ -311,7 +311,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
* 0xffff depending on whether src_type is signed or unsigned.
* Float conversions behave as expected.
*/
if (ctx->shader->compiler->has_salu_int_narrowing_quirk &&
if (ctx->shader->compiler->info->props.has_salu_int_narrowing_quirk &&
(instr->opc == OPC_MOV) &&
(instr->cat1.dst_type != instr->cat1.src_type) &&
(type_size(instr->cat1.dst_type) <

View file

@ -154,7 +154,7 @@ find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
continue;
if (instr->input.sysval == SYSTEM_VALUE_SAMPLE_MASK_IN &&
so->reads_shading_rate &&
ir->compiler->reading_shading_rate_requires_smask_quirk)
ir->compiler->info->props.reading_shading_rate_requires_smask_quirk)
continue;
}

View file

@ -128,7 +128,7 @@ retrieve_variant(struct blob_reader *blob, struct ir3_shader_variant *v)
blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state));
}
if (!v->compiler->load_shader_consts_via_preamble) {
if (!v->compiler->info->props.load_shader_consts_via_preamble) {
v->imm_state.size = blob_read_uint32(blob);
v->imm_state.count = v->imm_state.size;
uint32_t immeds_sz = v->imm_state.size * sizeof(v->imm_state.values[0]);
@ -157,7 +157,7 @@ store_variant(struct blob *blob, const struct ir3_shader_variant *v)
/* When load_shader_consts_via_preamble, immediates are loaded in the
* preamble and hence part of bin.
*/
if (!v->compiler->load_shader_consts_via_preamble) {
if (!v->compiler->info->props.load_shader_consts_via_preamble) {
blob_write_uint32(blob, v->imm_state.size);
uint32_t immeds_sz = v->imm_state.size * sizeof(v->imm_state.values[0]);
blob_write_bytes(blob, v->imm_state.values, immeds_sz);

View file

@ -963,7 +963,7 @@ apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx,
stsc->cat6.iim_val = n->push_consts.src_size;
stsc->cat6.type = TYPE_U32;
if (ctx->compiler->stsc_duplication_quirk) {
if (ctx->compiler->info->props.stsc_duplication_quirk) {
struct ir3_builder build = ir3_builder_at(ir3_after_instr(stsc));
struct ir3_instruction *nop = ir3_NOP(&build);
nop->flags |= IR3_INSTR_SS;
@ -1353,11 +1353,11 @@ add_predication_workaround(struct ir3_compiler *compiler,
struct ir3_instruction *predtf,
struct ir3_instruction *prede)
{
if (predtf && compiler->predtf_nop_quirk) {
if (predtf && compiler->info->props.predtf_nop_quirk) {
add_nop_before_block(predtf->block->predecessors[0]->successors[1], 4);
}
if (compiler->prede_nop_quirk) {
if (compiler->info->props.prede_nop_quirk) {
add_nop_before_block(prede->block->successors[0], 6);
}
}
@ -2509,7 +2509,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
so->early_preamble = has_preamble && !gpr_in_preamble &&
!pred_in_preamble && !relative_in_preamble &&
ir->compiler->has_early_preamble &&
ir->compiler->info->props.has_early_preamble &&
!(ir3_shader_debug & IR3_DBG_NOEARLYPREAMBLE);
/* On a7xx, sync behavior for a1.x is different in the early preamble. RaW
@ -2567,7 +2567,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
kill_sched(ir, so);
if ((so->type == MESA_SHADER_FRAGMENT || so->type == MESA_SHADER_COMPUTE) &&
so->compiler->has_eolm_eogm) {
so->compiler->info->props.has_eolm_eogm) {
feature_usage_sched(ctx, ir, so, needs_eolm, is_cheap_for_eolm_eogm,
IR3_INSTR_EOLM);
feature_usage_sched(ctx, ir, so, needs_eogm, is_cheap_for_eolm_eogm,

View file

@ -557,7 +557,7 @@ ir3_lower_copies(struct ir3_shader_variant *v)
* components of the normal src and its even neighbor and then
* unswap afterwords to make it work for everything.
*/
if (v->compiler->mov_half_shared_quirk &&
if (v->compiler->info->props.mov_half_shared_quirk &&
(instr->dsts[0]->flags & IR3_REG_SHARED) &&
(instr->dsts[0]->flags & IR3_REG_HALF) &&
!(instr->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_IMMED |

View file

@ -704,7 +704,7 @@ lower_scan_reduce(struct nir_builder *b, nir_instr *instr, void *data)
bool
ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v)
{
if (!v->compiler->has_getfiberid)
if (!v->compiler->info->props.has_getfiberid)
return false;
return nir_shader_lower_instructions(nir, filter_scan_reduce,
@ -721,7 +721,7 @@ ir3_nir_lower_subgroups_filter(const nir_intrinsic_instr *intrin, const void *da
if (nir_intrinsic_cluster_size(intrin) == 1) {
return true;
}
if (nir_intrinsic_cluster_size(intrin) > 0 && !compiler->has_getfiberid) {
if (nir_intrinsic_cluster_size(intrin) > 0 && !compiler->info->props.has_getfiberid) {
return true;
}
FALLTHROUGH;
@ -741,7 +741,7 @@ ir3_nir_lower_subgroups_filter(const nir_intrinsic_instr *intrin, const void *da
return intrin->def.num_components > 1;
}
case nir_intrinsic_read_invocation:
return !compiler->has_movs;
return !compiler->info->props.has_movs;
default:
return true;
}

View file

@ -149,7 +149,7 @@ ir3_nir_should_scalarize_mem(const nir_intrinsic_instr *intrin, const void *data
*/
if ((intrin->intrinsic == nir_intrinsic_load_ssbo) &&
(nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER) &&
compiler->has_isam_ssbo && !compiler->has_isam_v) {
compiler->has_isam_ssbo && !compiler->info->props.has_isam_v) {
return true;
}
@ -187,7 +187,7 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
*/
if ((low->intrinsic == nir_intrinsic_load_ssbo) &&
(nir_intrinsic_access(low) & ACCESS_CAN_REORDER) &&
compiler->has_isam_ssbo && !compiler->has_isam_v) {
compiler->has_isam_ssbo && !compiler->info->props.has_isam_v) {
return false;
}
@ -910,12 +910,12 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
NIR_PASS(_, s, ir3_nir_move_varying_inputs);
NIR_PASS(_, s, nir_lower_fb_read);
NIR_PASS(_, s, ir3_nir_lower_layer_id);
if (!compiler->shading_rate_matches_vk)
if (!compiler->info->props.shading_rate_matches_vk)
NIR_PASS(_, s, ir3_nir_lower_frag_shading_rate);
}
if (s->info.stage == MESA_SHADER_VERTEX || s->info.stage == MESA_SHADER_GEOMETRY) {
if (!compiler->shading_rate_matches_vk)
if (!compiler->info->props.shading_rate_matches_vk)
NIR_PASS(_, s, ir3_nir_lower_primitive_shading_rate);
}
@ -981,7 +981,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
if (!((s->info.stage == MESA_SHADER_COMPUTE) ||
(s->info.stage == MESA_SHADER_KERNEL) ||
compiler->has_getfiberid)) {
compiler->info->props.has_getfiberid)) {
options.subgroup_size = 1;
options.lower_vote_trivial = true;
}
@ -1323,7 +1323,7 @@ ir3_nir_set_threadsize(struct ir3_shader_variant *v, const nir_shader *s)
* might make different barrier choices).
*/
if (!info->workgroup_size_variable) {
if (threads_per_wg <= compiler->threadsize_base)
if (threads_per_wg <= compiler->info->threadsize_base)
v->shader_options.real_wavesize = IR3_SINGLE_ONLY;
}
@ -1338,7 +1338,7 @@ ir3_nir_set_threadsize(struct ir3_shader_variant *v, const nir_shader *s)
*/
if (compiler->gen < 6 &&
(info->workgroup_size_variable ||
threads_per_wg > compiler->threadsize_base * compiler->max_waves)) {
threads_per_wg > compiler->info->threadsize_base * compiler->info->max_waves)) {
v->shader_options.real_wavesize = IR3_DOUBLE_ONLY;
};
}
@ -1463,7 +1463,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so,
progress |= OPT(s, ir3_nir_opt_subgroups, so);
if (so->compiler->load_shader_consts_via_preamble)
if (so->compiler->info->props.load_shader_consts_via_preamble)
progress |= OPT(s, ir3_nir_lower_driver_params_to_ubo, so);
if (!so->binning_pass) {
@ -1503,7 +1503,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so,
!(ir3_shader_debug & IR3_DBG_NOPREAMBLE))
progress |= OPT(s, ir3_nir_opt_preamble, so);
if (so->compiler->load_shader_consts_via_preamble)
if (so->compiler->info->props.load_shader_consts_via_preamble)
progress |= OPT(s, ir3_nir_lower_driver_params_to_ubo, so);
/* Do matrix reassociate after preamble, because we want uniform matrix
@ -1891,7 +1891,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4, 1);
}
if (!compiler->load_shader_consts_via_preamble) {
if (!compiler->info->props.load_shader_consts_via_preamble) {
switch (v->type) {
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:

View file

@ -617,7 +617,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
ptrs_vec4, 1);
}
uint32_t align_vec4 = compiler->load_shader_consts_via_preamble
uint32_t align_vec4 = compiler->info->props.load_shader_consts_via_preamble
? 1
: compiler->const_upload_unit;

View file

@ -325,7 +325,7 @@ ir3_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data)
{
const struct ir3_compiler *compiler = data;
if (!compiler->has_ssbo_imm_offsets)
if (!compiler->info->props.has_ssbo_imm_offsets)
return 0;
switch (intrin->intrinsic) {

View file

@ -374,7 +374,7 @@ ir3_nir_lower_to_explicit_input(nir_shader *shader,
* HS uses a different primitive id, which starts at bit 16 in the header
*/
if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
v->compiler->tess_use_shared)
v->compiler->info->props.tess_use_shared)
state.local_primitive_id_start = 16;
nir_function_impl *impl = nir_shader_get_entrypoint(shader);

View file

@ -238,7 +238,7 @@ rewrite_cost(nir_def *def, const void *data)
nir_intrinsic_instr *parent_intrin =
nir_instr_as_intrinsic(parent_instr);
if (v->compiler->has_alias_rt && v->type == MESA_SHADER_FRAGMENT &&
if (v->compiler->info->props.has_alias_rt && v->type == MESA_SHADER_FRAGMENT &&
parent_intrin->intrinsic == nir_intrinsic_store_output &&
def->bit_size == 32) {
/* For FS outputs, alias.rt can use const registers without a mov.

View file

@ -59,7 +59,8 @@ clone_with_predicate_dst(struct opt_predicates_ctx *ctx,
clone->dsts[0]->flags |= IR3_REG_PREDICATE;
clone->dsts[0]->flags &= ~(IR3_REG_HALF | IR3_REG_SHARED);
if (ctx->ir->compiler->has_scalar_predicates && opc_cat(instr->opc) == 2 &&
if (ctx->ir->compiler->info->props.has_scalar_predicates &&
opc_cat(instr->opc) == 2 &&
(instr->dsts[0]->flags & IR3_REG_SHARED)) {
clone->dsts[0]->flags |= IR3_REG_UNIFORM;
}
@ -77,7 +78,7 @@ can_write_predicate(struct opt_predicates_ctx *ctx,
case OPC_CMPS_U:
case OPC_CMPS_F:
return !cat2_needs_scalar_alu(instr) ||
ctx->ir->compiler->has_scalar_predicates;
ctx->ir->compiler->info->props.has_scalar_predicates;
case OPC_AND_B:
case OPC_OR_B:
case OPC_NOT_B:
@ -85,7 +86,7 @@ can_write_predicate(struct opt_predicates_ctx *ctx,
case OPC_GETBIT_B:
return ctx->ir->compiler->bitops_can_write_predicates &&
(!cat2_needs_scalar_alu(instr) ||
ctx->ir->compiler->has_scalar_predicates);
ctx->ir->compiler->info->props.has_scalar_predicates);
default:
return false;
}

View file

@ -11,7 +11,7 @@
bool
ir3_imm_const_to_preamble(struct ir3 *ir, struct ir3_shader_variant *so)
{
if (!ir->compiler->load_shader_consts_via_preamble) {
if (!ir->compiler->info->props.load_shader_consts_via_preamble) {
return false;
}

View file

@ -2708,7 +2708,7 @@ calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v,
if (v->local_size_variable) {
if (v->type == MESA_SHADER_KERNEL) {
threads_per_wg = compiler->threadsize_base * (double_threadsize ? 2 : 1);
threads_per_wg = compiler->info->threadsize_base * (double_threadsize ? 2 : 1);
} else {
/* We have to expect the worst case. */
threads_per_wg = compiler->max_variable_workgroup_size;
@ -2726,8 +2726,8 @@ calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v,
*/
unsigned waves_per_wg = DIV_ROUND_UP(
threads_per_wg, compiler->threadsize_base * (double_threadsize ? 2 : 1) *
compiler->wave_granularity);
threads_per_wg, compiler->info->threadsize_base * (double_threadsize ? 2 : 1) *
compiler->info->wave_granularity);
uint32_t vec4_regs_per_thread =
compiler->reg_size_vec4 / (waves_per_wg * (double_threadsize ? 2 : 1));

View file

@ -47,7 +47,7 @@ ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size)
* should be the same for the binning and non-binning variants. Make sure
* we don't increase the size beyond that of the non-binning variant.
*/
if (v->binning_pass && !v->compiler->load_shader_consts_via_preamble &&
if (v->binning_pass && !v->compiler->info->props.load_shader_consts_via_preamble &&
size > v->nonbinning->imm_state.size) {
return false;
}
@ -1333,10 +1333,10 @@ ir3_shader_get_subgroup_size(const struct ir3_compiler *compiler,
{
switch (options->api_wavesize) {
case IR3_SINGLE_ONLY:
*subgroup_size = *max_subgroup_size = compiler->threadsize_base;
*subgroup_size = *max_subgroup_size = compiler->info->threadsize_base;
break;
case IR3_DOUBLE_ONLY:
*subgroup_size = *max_subgroup_size = compiler->threadsize_base * 2;
*subgroup_size = *max_subgroup_size = compiler->info->threadsize_base * 2;
break;
case IR3_SINGLE_OR_DOUBLE:
/* For vertex stages, we know the wavesize will never be doubled.
@ -1345,10 +1345,10 @@ ir3_shader_get_subgroup_size(const struct ir3_compiler *compiler,
* a driver param.
*/
if (stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_FRAGMENT) {
*subgroup_size = *max_subgroup_size = compiler->threadsize_base;
*subgroup_size = *max_subgroup_size = compiler->info->threadsize_base;
} else {
*subgroup_size = 0;
*max_subgroup_size = compiler->threadsize_base * 2;
*max_subgroup_size = compiler->info->threadsize_base * 2;
}
break;
}

View file

@ -1073,7 +1073,7 @@ static inline unsigned
ir3_max_const_compute(const struct ir3_shader_variant *v,
const struct ir3_compiler *compiler)
{
unsigned lm_size = v->local_size_variable ? compiler->local_mem_size :
unsigned lm_size = v->local_size_variable ? compiler->info->cs_shared_mem_size :
v->cs.req_local_mem;
/* The LB is divided between consts and local memory. LB is split into
@ -1088,7 +1088,7 @@ ir3_max_const_compute(const struct ir3_shader_variant *v,
* configuration where there is enough space for LM.
*/
unsigned lb_const_size =
((compiler->compute_lb_size - lm_size) / compiler->wave_granularity) /
((compiler->compute_lb_size - lm_size) / compiler->info->wave_granularity) /
16 /* bytes per vec4 */;
if (lb_const_size < compiler->max_const_compute) {
const uint32_t lb_const_sizes[] = { 128, 192, 256, 512 };

View file

@ -848,7 +848,7 @@ can_demote_src(struct ir3_instruction *instr)
case OPC_META_COLLECT:
return false;
case OPC_MOV:
if (instr->block->shader->compiler->has_salu_int_narrowing_quirk) {
if (instr->block->shader->compiler->info->props.has_salu_int_narrowing_quirk) {
/* Avoid demoting something that would cause narrowin integer
* conversion from GPR to uGPR:
*/

View file

@ -67,7 +67,7 @@ validate_reg(struct ir3_validate_ctx *ctx, struct ir3_register *reg)
}
if (reg->flags & IR3_REG_UNIFORM) {
validate_assert(ctx, ctx->ir->compiler->has_scalar_predicates);
validate_assert(ctx, ctx->ir->compiler->info->props.has_scalar_predicates);
validate_assert(ctx, reg->flags & IR3_REG_PREDICATE);
}
@ -334,7 +334,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
if ((opc_cat(instr->opc) == 2 || opc_cat(instr->opc) == 3 ||
opc_cat(instr->opc) == 4)) {
validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_SHARED) ||
ctx->ir->compiler->has_scalar_alu);
ctx->ir->compiler->info->props.has_scalar_alu);
}
/* Check that src/dst types match the register types, and for
@ -343,7 +343,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
*/
switch (opc_cat(instr->opc)) {
case 1: /* move instructions */
if (ctx->ir->compiler->has_salu_int_narrowing_quirk &&
if (ctx->ir->compiler->info->props.has_salu_int_narrowing_quirk &&
(instr->opc == OPC_MOV) &&
(instr->cat1.dst_type != instr->cat1.src_type) &&
(type_size(instr->cat1.dst_type) <

View file

@ -1794,7 +1794,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
* UCHE because the FS param patchpoint is read through UCHE.
*/
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
if (cmd->device->compiler->load_shader_consts_via_preamble) {
if (cmd->device->compiler->info->props.load_shader_consts_via_preamble) {
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
tu_cs_emit_wfi(cs);
}
@ -8094,7 +8094,7 @@ tu7_emit_fs_params(struct tu_cmd_buffer *cmd)
static void
tu_emit_fs_params(struct tu_cmd_buffer *cmd)
{
if (cmd->device->compiler->load_shader_consts_via_preamble)
if (cmd->device->compiler->info->props.load_shader_consts_via_preamble)
tu7_emit_fs_params(cmd);
else
tu6_emit_fs_params(cmd);

View file

@ -5070,7 +5070,7 @@ tu_GetPipelineExecutablePropertiesKHR(
VK_COPY_STR(props->description, _mesa_shader_stage_to_string(stage));
props->subgroupSize =
dev->compiler->threadsize_base * (exe->stats.double_threadsize ? 2 : 1);
dev->compiler->info->threadsize_base * (exe->stats.double_threadsize ? 2 : 1);
}
}

View file

@ -349,7 +349,7 @@ lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b,
*/
base = nir_imm_int(b, binding_layout->dynamic_offset_offset / (4 * FDL6_TEX_CONST_DWORDS));
nir_def *dynamic_offset_start;
if (compiler->load_shader_consts_via_preamble) {
if (compiler->info->props.load_shader_consts_via_preamble) {
dynamic_offset_start =
ir3_load_driver_ubo(b, 1, &shader->const_state.dynamic_offsets_ubo, set);
} else {
@ -646,7 +646,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
case nir_intrinsic_load_frag_offset_ir3:
case nir_intrinsic_load_gmem_frag_scale_ir3:
case nir_intrinsic_load_gmem_frag_offset_ir3: {
if (!dev->compiler->load_shader_consts_via_preamble)
if (!dev->compiler->info->props.load_shader_consts_via_preamble)
return false;
unsigned param;
@ -679,7 +679,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
return true;
}
case nir_intrinsic_load_frag_invocation_count: {
if (!dev->compiler->load_shader_consts_via_preamble)
if (!dev->compiler->info->props.load_shader_consts_via_preamble)
return false;
nir_def *result =
@ -3365,10 +3365,10 @@ tu_shader_key_subgroup_size(struct tu_shader_key *key,
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
} else {
if (subgroup_info) {
if (subgroup_info->requiredSubgroupSize == dev->compiler->threadsize_base) {
if (subgroup_info->requiredSubgroupSize == dev->compiler->info->threadsize_base) {
api_wavesize = IR3_SINGLE_ONLY;
} else {
assert(subgroup_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2);
assert(subgroup_info->requiredSubgroupSize == dev->compiler->info->threadsize_base * 2);
api_wavesize = IR3_DOUBLE_ONLY;
}
} else {

View file

@ -93,10 +93,10 @@ static inline bool
fd6_load_shader_consts_via_preamble(const struct ir3_shader_variant *v)
{
if (CHIP == A8XX) {
assert(v->compiler->load_shader_consts_via_preamble);
assert(v->compiler->info->props.load_shader_consts_via_preamble);
return true;
}
return (CHIP == A7XX) && v->compiler->load_shader_consts_via_preamble;
return (CHIP == A7XX) && v->compiler->info->props.load_shader_consts_via_preamble;
}
template <chip CHIP>
@ -104,10 +104,10 @@ static inline bool
fd6_load_inline_uniforms_via_preamble_ldgk(const struct ir3_shader_variant *v)
{
if (CHIP == A8XX) {
assert(v->compiler->load_inline_uniforms_via_preamble_ldgk);
assert(v->compiler->info->props.load_inline_uniforms_via_preamble_ldgk);
return true;
}
return (CHIP == A7XX) && v->compiler->load_inline_uniforms_via_preamble_ldgk;
return (CHIP == A7XX) && v->compiler->info->props.load_inline_uniforms_via_preamble_ldgk;
}
template <chip CHIP>

View file

@ -48,7 +48,7 @@ emit_const_asserts(const struct ir3_shader_variant *v, uint32_t regid,
uint32_t sizedwords)
{
assert((v->type == MESA_SHADER_VERTEX) ||
!v->compiler->load_shader_consts_via_preamble);
!v->compiler->info->props.load_shader_consts_via_preamble);
assert((regid % 4) == 0);
assert((sizedwords % 4) == 0);
assert(regid + sizedwords <= v->constlen * 4);