mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
Merge branch 'panvk/pilot' into 'main'
Draft: Very draft! Panvk pilot shaders See merge request mesa/mesa!39009
This commit is contained in:
commit
600fdfbc09
18 changed files with 697 additions and 166 deletions
|
|
@ -6623,6 +6623,8 @@ bool nir_opt_load_skip_helpers(nir_shader *shader, nir_opt_load_skip_helpers_opt
|
||||||
|
|
||||||
void nir_sweep(nir_shader *shader);
|
void nir_sweep(nir_shader *shader);
|
||||||
|
|
||||||
|
void nir_steal_function(nir_shader *nir, nir_function *f);
|
||||||
|
|
||||||
nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
|
nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
|
||||||
gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
|
gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -167,7 +167,6 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
|
||||||
case nir_intrinsic_load_fb_layers_v3d:
|
case nir_intrinsic_load_fb_layers_v3d:
|
||||||
case nir_intrinsic_load_fep_w_v3d:
|
case nir_intrinsic_load_fep_w_v3d:
|
||||||
case nir_intrinsic_load_tcs_num_patches_amd:
|
case nir_intrinsic_load_tcs_num_patches_amd:
|
||||||
case nir_intrinsic_load_sample_positions_pan:
|
|
||||||
case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
|
case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
|
||||||
case nir_intrinsic_load_prim_gen_query_enabled_amd:
|
case nir_intrinsic_load_prim_gen_query_enabled_amd:
|
||||||
case nir_intrinsic_load_prim_xfb_query_enabled_amd:
|
case nir_intrinsic_load_prim_xfb_query_enabled_amd:
|
||||||
|
|
@ -233,6 +232,8 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
|
||||||
case nir_intrinsic_load_constant_agx:
|
case nir_intrinsic_load_constant_agx:
|
||||||
case nir_intrinsic_bindless_image_agx:
|
case nir_intrinsic_bindless_image_agx:
|
||||||
case nir_intrinsic_bindless_sampler_agx:
|
case nir_intrinsic_bindless_sampler_agx:
|
||||||
|
case nir_intrinsic_load_texel_buf_conv_pan:
|
||||||
|
case nir_intrinsic_load_texel_buf_index_address_pan:
|
||||||
return can_move_srcs(&instr->instr, ctx);
|
return can_move_srcs(&instr->instr, ctx);
|
||||||
|
|
||||||
/* Image/SSBO loads can be moved if they are CAN_REORDER and their
|
/* Image/SSBO loads can be moved if they are CAN_REORDER and their
|
||||||
|
|
@ -240,6 +241,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
|
||||||
*/
|
*/
|
||||||
case nir_intrinsic_image_load:
|
case nir_intrinsic_image_load:
|
||||||
case nir_intrinsic_image_samples_identical:
|
case nir_intrinsic_image_samples_identical:
|
||||||
|
case nir_intrinsic_image_texel_address:
|
||||||
case nir_intrinsic_bindless_image_load:
|
case nir_intrinsic_bindless_image_load:
|
||||||
case nir_intrinsic_load_global_bounded:
|
case nir_intrinsic_load_global_bounded:
|
||||||
case nir_intrinsic_load_ssbo:
|
case nir_intrinsic_load_ssbo:
|
||||||
|
|
@ -813,7 +815,7 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
||||||
|
|
||||||
if (num_candidates == 0) {
|
if (num_candidates == 0) {
|
||||||
free(ctx.states);
|
free(ctx.states);
|
||||||
return false;
|
return nir_no_progress(impl);
|
||||||
}
|
}
|
||||||
|
|
||||||
def_state **candidates = malloc(sizeof(*candidates) * num_candidates);
|
def_state **candidates = malloc(sizeof(*candidates) * num_candidates);
|
||||||
|
|
@ -880,7 +882,7 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
||||||
if (num_candidates == 0) {
|
if (num_candidates == 0) {
|
||||||
free(ctx.states);
|
free(ctx.states);
|
||||||
free(candidates);
|
free(candidates);
|
||||||
return false;
|
return nir_no_progress(impl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Step 4: Figure out which candidates we're going to replace and assign an
|
/* Step 4: Figure out which candidates we're going to replace and assign an
|
||||||
|
|
@ -940,6 +942,12 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
||||||
if (analyze_speculation_for_cf_list(&ctx, &impl->body))
|
if (analyze_speculation_for_cf_list(&ctx, &impl->body))
|
||||||
analyze_reconstructed(&ctx, impl);
|
analyze_reconstructed(&ctx, impl);
|
||||||
|
|
||||||
|
if (!BITSET_TEST_RANGE(ctx.reconstructed_defs, 0, impl->ssa_alloc)) {
|
||||||
|
free(ctx.states);
|
||||||
|
free(ctx.reconstructed_defs);
|
||||||
|
return nir_no_progress(impl);
|
||||||
|
}
|
||||||
|
|
||||||
/* Step 5: Actually do the replacement. */
|
/* Step 5: Actually do the replacement. */
|
||||||
struct hash_table *remap_table =
|
struct hash_table *remap_table =
|
||||||
_mesa_pointer_hash_table_create(NULL);
|
_mesa_pointer_hash_table_create(NULL);
|
||||||
|
|
@ -980,11 +988,10 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_progress(true, impl, nir_metadata_control_flow);
|
|
||||||
|
|
||||||
ralloc_free(remap_table);
|
ralloc_free(remap_table);
|
||||||
free(ctx.states);
|
free(ctx.states);
|
||||||
free(ctx.reconstructed_defs);
|
free(ctx.reconstructed_defs);
|
||||||
_mesa_set_destroy(ctx.reconstructed_ifs, NULL);
|
_mesa_set_destroy(ctx.reconstructed_ifs, NULL);
|
||||||
return true;
|
|
||||||
|
return nir_progress(true, impl, nir_metadata_control_flow);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -185,6 +185,29 @@ sweep_function(nir_shader *nir, nir_function *f)
|
||||||
sweep_impl(nir, f->impl);
|
sweep_impl(nir, f->impl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Steals a function from one shader into another.
|
||||||
|
*
|
||||||
|
* This knows nothing about dependencies so if this function calls other
|
||||||
|
* functions, you'll need a more complex pass that uses nir_function_clone()
|
||||||
|
* to steal them all.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
nir_steal_function(nir_shader *nir, nir_function *f)
|
||||||
|
{
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if (f->impl) {
|
||||||
|
nir_foreach_block(block, f->impl) {
|
||||||
|
nir_foreach_instr(instr, block) {
|
||||||
|
assert(instr->type != nir_instr_type_call);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
exec_node_remove(&f->node);
|
||||||
|
exec_list_push_tail(&nir->functions, &f->node);
|
||||||
|
sweep_function(nir, f);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
nir_sweep(nir_shader *nir)
|
nir_sweep(nir_shader *nir)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -6504,7 +6504,7 @@ bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compare_u32(const void* a, const void* b, void* _)
|
compare_u32(const void* a, const void* b)
|
||||||
{
|
{
|
||||||
const uint32_t va = (uintptr_t)a;
|
const uint32_t va = (uintptr_t)a;
|
||||||
const uint32_t vb = (uintptr_t)b;
|
const uint32_t vb = (uintptr_t)b;
|
||||||
|
|
@ -6673,9 +6673,9 @@ bi_compile_variant_nir(nir_shader *nir,
|
||||||
sorted[idx++] = (uintptr_t)entry.data;
|
sorted[idx++] = (uintptr_t)entry.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
util_qsort_r(sorted, const_amount, sizeof(uint32_t), compare_u32, NULL);
|
qsort(sorted, const_amount, sizeof(uint32_t), compare_u32);
|
||||||
uint32_t max_amount = MIN2(const_amount, ctx->inputs->fau_consts.max_amount);
|
uint32_t max_amount = MIN2(const_amount, ctx->inputs->fau_consts.max_amount);
|
||||||
uint32_t min_count_for_fau = max_amount > 0 ? sorted[max_amount - 1] : 0;
|
uint32_t min_count_for_fau = max_amount > 0 ? sorted[max_amount - 1] : 0;
|
||||||
ralloc_free(sorted);
|
ralloc_free(sorted);
|
||||||
|
|
||||||
bi_foreach_instr_global_safe(ctx, I) {
|
bi_foreach_instr_global_safe(ctx, I) {
|
||||||
|
|
@ -6837,7 +6837,7 @@ bi_compile_variant(nir_shader *nir,
|
||||||
|
|
||||||
/* Software invariant: Only a secondary shader can appear at a nonzero
|
/* Software invariant: Only a secondary shader can appear at a nonzero
|
||||||
* offset, to keep the ABI simple. */
|
* offset, to keep the ABI simple. */
|
||||||
assert((offset == 0) ^ (idvs == BI_IDVS_VARYING));
|
assert((offset == 0) ^ (idvs == BI_IDVS_VARYING || idvs == BI_IDVS_PILOT));
|
||||||
|
|
||||||
struct pan_stats *stats =
|
struct pan_stats *stats =
|
||||||
idvs == BI_IDVS_VARYING ? &info->stats_idvs_varying : &info->stats;
|
idvs == BI_IDVS_VARYING ? &info->stats_idvs_varying : &info->stats;
|
||||||
|
|
@ -6874,6 +6874,11 @@ bi_compile_variant(nir_shader *nir,
|
||||||
info->vs.secondary_offset = offset;
|
info->vs.secondary_offset = offset;
|
||||||
info->vs.secondary_preload = preload;
|
info->vs.secondary_preload = preload;
|
||||||
info->vs.secondary_work_reg_count = ctx->info.work_reg_count;
|
info->vs.secondary_work_reg_count = ctx->info.work_reg_count;
|
||||||
|
} else if (idvs == BI_IDVS_PILOT) {
|
||||||
|
info->pilot.enable = (binary->size > offset);
|
||||||
|
info->pilot.offset = offset;
|
||||||
|
info->pilot.preload = preload;
|
||||||
|
info->pilot.work_reg_count = ctx->info.work_reg_count;
|
||||||
} else {
|
} else {
|
||||||
info->preload = preload;
|
info->preload = preload;
|
||||||
info->work_reg_count = ctx->info.work_reg_count;
|
info->work_reg_count = ctx->info.work_reg_count;
|
||||||
|
|
@ -6996,6 +7001,26 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
||||||
|
|
||||||
info->tls_size = nir->scratch_size;
|
info->tls_size = nir->scratch_size;
|
||||||
|
|
||||||
|
nir_shader *pilot = NULL;
|
||||||
|
nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
|
||||||
|
if (entrypoint->preamble) {
|
||||||
|
pilot = nir_shader_create(nir, MESA_SHADER_COMPUTE, nir->options);
|
||||||
|
|
||||||
|
pilot->info = nir->info;
|
||||||
|
pilot->info.stage = MESA_SHADER_COMPUTE;
|
||||||
|
pilot->info.workgroup_size[0] = 1;
|
||||||
|
pilot->info.workgroup_size[1] = 1;
|
||||||
|
pilot->info.workgroup_size[2] = 1;
|
||||||
|
memset(&pilot->info.cs, 0, sizeof(pilot->info.cs));
|
||||||
|
|
||||||
|
nir_function *pilot_main = entrypoint->preamble;
|
||||||
|
nir_steal_function(pilot, pilot_main);
|
||||||
|
entrypoint->preamble = NULL;
|
||||||
|
|
||||||
|
pilot_main->is_preamble = false;
|
||||||
|
pilot_main->is_entrypoint = true;
|
||||||
|
}
|
||||||
|
|
||||||
pan_nir_collect_varyings(nir, info, PAN_MEDIUMP_VARY_32BIT);
|
pan_nir_collect_varyings(nir, info, PAN_MEDIUMP_VARY_32BIT);
|
||||||
|
|
||||||
if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) {
|
if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) {
|
||||||
|
|
@ -7010,6 +7035,9 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
||||||
bi_compile_variant(nir, inputs, binary, info, BI_IDVS_NONE);
|
bi_compile_variant(nir, inputs, binary, info, BI_IDVS_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pilot)
|
||||||
|
bi_compile_variant(pilot, inputs, binary, info, BI_IDVS_PILOT);
|
||||||
|
|
||||||
if (mesa_shader_stage_is_compute(nir->info.stage)) {
|
if (mesa_shader_stage_is_compute(nir->info.stage)) {
|
||||||
/* Workgroups may be merged if the structure of the workgroup is
|
/* Workgroups may be merged if the structure of the workgroup is
|
||||||
* not software visible. This is true if neither shared memory
|
* not software visible. This is true if neither shared memory
|
||||||
|
|
|
||||||
|
|
@ -1055,6 +1055,9 @@ enum bi_idvs_mode {
|
||||||
|
|
||||||
/* IDVS2 in use. Compiling a deferred shader (v12+) */
|
/* IDVS2 in use. Compiling a deferred shader (v12+) */
|
||||||
BI_IDVS_ALL = 3,
|
BI_IDVS_ALL = 3,
|
||||||
|
|
||||||
|
/* This is a pilot shader, and not a position or vertex shader */
|
||||||
|
BI_IDVS_PILOT = 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BI_MAX_REGS 64
|
#define BI_MAX_REGS 64
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ libpanfrost_compiler_files = files(
|
||||||
'pan_nir_lower_vertex_id.c',
|
'pan_nir_lower_vertex_id.c',
|
||||||
'pan_nir_lower_writeout.c',
|
'pan_nir_lower_writeout.c',
|
||||||
'pan_nir_lower_xfb.c',
|
'pan_nir_lower_xfb.c',
|
||||||
|
'pan_nir_opt_pilot.c',
|
||||||
)
|
)
|
||||||
|
|
||||||
subdir('bifrost')
|
subdir('bifrost')
|
||||||
|
|
|
||||||
|
|
@ -315,6 +315,23 @@ struct pan_shader_info {
|
||||||
} cs;
|
} cs;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
/* True if a pilot shader is used */
|
||||||
|
bool enable;
|
||||||
|
/* If the pilot shader is enabled, number of work registers used by
|
||||||
|
* the pilot shader
|
||||||
|
*/
|
||||||
|
uint16_t work_reg_count;
|
||||||
|
/* If the pilot shader is enabled, the pilot shader's offset in the
|
||||||
|
* program binary.
|
||||||
|
*/
|
||||||
|
uint32_t offset;
|
||||||
|
/* If the pilot shader is enabled, bit mask of preloaded registers
|
||||||
|
* used by the varying shader
|
||||||
|
*/
|
||||||
|
uint64_t preload;
|
||||||
|
} pilot;
|
||||||
|
|
||||||
/* Does the shader contains a barrier? or (for fragment shaders) does it
|
/* Does the shader contains a barrier? or (for fragment shaders) does it
|
||||||
* require helper invocations, which demand the same ordering guarantees
|
* require helper invocations, which demand the same ordering guarantees
|
||||||
* of the hardware? These notions are unified in the hardware, so we
|
* of the hardware? These notions are unified in the hardware, so we
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,26 @@ bool pan_nir_lower_framebuffer(nir_shader *shader,
|
||||||
unsigned blend_shader_nr_samples,
|
unsigned blend_shader_nr_samples,
|
||||||
bool broken_ld_special);
|
bool broken_ld_special);
|
||||||
|
|
||||||
|
struct pan_nir_opt_pilot_params {
|
||||||
|
unsigned gpu_id;
|
||||||
|
|
||||||
|
/* Offset in the FAU where the address to the FAU memory lives. This is
|
||||||
|
* used for writing FAUs from the pilot shader.
|
||||||
|
*/
|
||||||
|
uint32_t fau_addr_offset_B;
|
||||||
|
|
||||||
|
/* Offset in the FAU where the pilot data should go */
|
||||||
|
uint32_t fau_pilot_data_offset_B;
|
||||||
|
|
||||||
|
/* Amount of space in the FAU available for pilot data */
|
||||||
|
uint32_t fau_pilot_data_space_B;
|
||||||
|
|
||||||
|
/* Amount of actual pilot data consumed. This is an output parameter */
|
||||||
|
uint32_t pilot_data_size_B;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool pan_nir_opt_pilot(nir_shader *nir, struct pan_nir_opt_pilot_params *p);
|
||||||
|
|
||||||
uint32_t pan_nir_collect_noperspective_varyings_fs(nir_shader *s);
|
uint32_t pan_nir_collect_noperspective_varyings_fs(nir_shader *s);
|
||||||
|
|
||||||
/* Specify the mediump lowering behavior for pan_nir_collect_varyings */
|
/* Specify the mediump lowering behavior for pan_nir_collect_varyings */
|
||||||
|
|
|
||||||
|
|
@ -93,144 +93,151 @@ kill_depth_stencil_writes(nir_builder *b, nir_intrinsic_instr *intr,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_zs_store_impl(nir_function_impl *impl)
|
||||||
|
{
|
||||||
|
nir_intrinsic_instr *stores[3] = {NULL};
|
||||||
|
nir_intrinsic_instr *last_mask_store = NULL;
|
||||||
|
nir_block *mask_block = NULL;
|
||||||
|
unsigned writeout = 0;
|
||||||
|
|
||||||
|
nir_foreach_block(block, impl) {
|
||||||
|
nir_foreach_instr_safe(instr, block) {
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||||
|
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||||
|
if (sem.location == FRAG_RESULT_DEPTH) {
|
||||||
|
stores[0] = intr;
|
||||||
|
writeout |= PAN_WRITEOUT_Z;
|
||||||
|
} else if (sem.location == FRAG_RESULT_STENCIL) {
|
||||||
|
stores[1] = intr;
|
||||||
|
writeout |= PAN_WRITEOUT_S;
|
||||||
|
} else if (sem.dual_source_blend_index) {
|
||||||
|
assert(!stores[2]); /* there should be only 1 source for dual blending */
|
||||||
|
stores[2] = intr;
|
||||||
|
writeout |= PAN_WRITEOUT_2;
|
||||||
|
} else if (sem.location == FRAG_RESULT_SAMPLE_MASK) {
|
||||||
|
last_mask_store = intr;
|
||||||
|
mask_block = intr->instr.block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!writeout && !last_mask_store)
|
||||||
|
return nir_no_progress(impl);
|
||||||
|
|
||||||
|
nir_block *common_block = mask_block;
|
||||||
|
|
||||||
|
/* Ensure all stores are in the same block */
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
|
||||||
|
if (!stores[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_block *block = stores[i]->instr.block;
|
||||||
|
|
||||||
|
if (common_block)
|
||||||
|
assert(common_block == block);
|
||||||
|
else
|
||||||
|
common_block = block;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* move data stores in the common block to after the last mask store */
|
||||||
|
if (last_mask_store) {
|
||||||
|
nir_cursor insert_point = nir_after_instr(&last_mask_store->instr);
|
||||||
|
nir_foreach_instr_safe(instr, mask_block) {
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
continue;
|
||||||
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||||
|
|
||||||
|
/* stop when we've reached the last store to mask */
|
||||||
|
if (intr == last_mask_store)
|
||||||
|
break;
|
||||||
|
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||||
|
continue;
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||||
|
if (sem.location >= FRAG_RESULT_DATA0 &&
|
||||||
|
sem.location <= FRAG_RESULT_DATA7) {
|
||||||
|
nir_instr_move(insert_point, instr);
|
||||||
|
insert_point = nir_after_instr(instr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool replaced = false;
|
||||||
|
|
||||||
|
nir_foreach_block(block, impl) {
|
||||||
|
nir_foreach_instr_safe(instr, block) {
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||||
|
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||||
|
|
||||||
|
if (sem.location < FRAG_RESULT_DATA0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (sem.dual_source_blend_index)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
assert(nir_src_is_const(intr->src[1]) && "no indirect outputs");
|
||||||
|
|
||||||
|
nir_builder b =
|
||||||
|
nir_builder_at(nir_after_block_before_jump(instr->block));
|
||||||
|
|
||||||
|
/* Trying to write depth twice results in the
|
||||||
|
* wrong blend shader being executed on
|
||||||
|
* Midgard */
|
||||||
|
unsigned this_store = PAN_WRITEOUT_C | (replaced ? 0 : writeout);
|
||||||
|
|
||||||
|
pan_nir_emit_combined_store(&b, intr, this_store, stores);
|
||||||
|
|
||||||
|
nir_instr_remove(instr);
|
||||||
|
|
||||||
|
replaced = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Insert a store to the depth RT (0xff) if needed */
|
||||||
|
if (!replaced) {
|
||||||
|
nir_builder b =
|
||||||
|
nir_builder_at(nir_after_block_before_jump(common_block));
|
||||||
|
|
||||||
|
pan_nir_emit_combined_store(&b, NULL, writeout, stores);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
|
||||||
|
if (stores[i])
|
||||||
|
nir_instr_remove(&stores[i]->instr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return nir_progress(true, impl, nir_metadata_control_flow);
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
pan_nir_lower_zs_store(nir_shader *nir)
|
pan_nir_lower_zs_store(nir_shader *nir)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
if (nir->info.stage != MESA_SHADER_FRAGMENT)
|
if (nir->info.stage != MESA_SHADER_FRAGMENT) {
|
||||||
|
nir_shader_preserve_all_metadata(nir);
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Remove all stencil/depth writes if early fragment test is forced. */
|
/* Remove all stencil/depth writes if early fragment test is forced. */
|
||||||
if (nir->info.fs.early_fragment_tests)
|
if (nir->info.fs.early_fragment_tests)
|
||||||
progress |= nir_shader_intrinsics_pass(nir, kill_depth_stencil_writes,
|
progress |= nir_shader_intrinsics_pass(nir, kill_depth_stencil_writes,
|
||||||
nir_metadata_control_flow, NULL);
|
nir_metadata_control_flow, NULL);
|
||||||
|
|
||||||
nir_foreach_function_impl(impl, nir) {
|
nir_foreach_function_impl(impl, nir)
|
||||||
nir_intrinsic_instr *stores[3] = {NULL};
|
progress |= lower_zs_store_impl(impl);
|
||||||
nir_intrinsic_instr *last_mask_store = NULL;
|
|
||||||
nir_block *mask_block = NULL;
|
|
||||||
unsigned writeout = 0;
|
|
||||||
|
|
||||||
nir_foreach_block(block, impl) {
|
|
||||||
nir_foreach_instr_safe(instr, block) {
|
|
||||||
if (instr->type != nir_instr_type_intrinsic)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
||||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
||||||
if (sem.location == FRAG_RESULT_DEPTH) {
|
|
||||||
stores[0] = intr;
|
|
||||||
writeout |= PAN_WRITEOUT_Z;
|
|
||||||
} else if (sem.location == FRAG_RESULT_STENCIL) {
|
|
||||||
stores[1] = intr;
|
|
||||||
writeout |= PAN_WRITEOUT_S;
|
|
||||||
} else if (sem.dual_source_blend_index) {
|
|
||||||
assert(!stores[2]); /* there should be only 1 source for dual blending */
|
|
||||||
stores[2] = intr;
|
|
||||||
writeout |= PAN_WRITEOUT_2;
|
|
||||||
} else if (sem.location == FRAG_RESULT_SAMPLE_MASK) {
|
|
||||||
last_mask_store = intr;
|
|
||||||
mask_block = intr->instr.block;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!writeout && !last_mask_store)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nir_block *common_block = mask_block;
|
|
||||||
|
|
||||||
/* Ensure all stores are in the same block */
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
|
|
||||||
if (!stores[i])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nir_block *block = stores[i]->instr.block;
|
|
||||||
|
|
||||||
if (common_block)
|
|
||||||
assert(common_block == block);
|
|
||||||
else
|
|
||||||
common_block = block;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* move data stores in the common block to after the last mask store */
|
|
||||||
if (last_mask_store) {
|
|
||||||
nir_cursor insert_point = nir_after_instr(&last_mask_store->instr);
|
|
||||||
nir_foreach_instr_safe(instr, mask_block) {
|
|
||||||
if (instr->type != nir_instr_type_intrinsic)
|
|
||||||
continue;
|
|
||||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
||||||
|
|
||||||
/* stop when we've reached the last store to mask */
|
|
||||||
if (intr == last_mask_store)
|
|
||||||
break;
|
|
||||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
|
||||||
continue;
|
|
||||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
||||||
if (sem.location >= FRAG_RESULT_DATA0 &&
|
|
||||||
sem.location <= FRAG_RESULT_DATA7) {
|
|
||||||
nir_instr_move(insert_point, instr);
|
|
||||||
insert_point = nir_after_instr(instr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool replaced = false;
|
|
||||||
|
|
||||||
nir_foreach_block(block, impl) {
|
|
||||||
nir_foreach_instr_safe(instr, block) {
|
|
||||||
if (instr->type != nir_instr_type_intrinsic)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
||||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
||||||
|
|
||||||
if (sem.location < FRAG_RESULT_DATA0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (sem.dual_source_blend_index)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
assert(nir_src_is_const(intr->src[1]) && "no indirect outputs");
|
|
||||||
|
|
||||||
nir_builder b =
|
|
||||||
nir_builder_at(nir_after_block_before_jump(instr->block));
|
|
||||||
|
|
||||||
/* Trying to write depth twice results in the
|
|
||||||
* wrong blend shader being executed on
|
|
||||||
* Midgard */
|
|
||||||
unsigned this_store = PAN_WRITEOUT_C | (replaced ? 0 : writeout);
|
|
||||||
|
|
||||||
pan_nir_emit_combined_store(&b, intr, this_store, stores);
|
|
||||||
|
|
||||||
nir_instr_remove(instr);
|
|
||||||
|
|
||||||
replaced = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Insert a store to the depth RT (0xff) if needed */
|
|
||||||
if (!replaced) {
|
|
||||||
nir_builder b =
|
|
||||||
nir_builder_at(nir_after_block_before_jump(common_block));
|
|
||||||
|
|
||||||
pan_nir_emit_combined_store(&b, NULL, writeout, stores);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
|
|
||||||
if (stores[i])
|
|
||||||
nir_instr_remove(&stores[i]->instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
progress = nir_progress(true, impl, nir_metadata_control_flow);
|
|
||||||
}
|
|
||||||
|
|
||||||
return progress;
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
194
src/panfrost/compiler/pan_nir_opt_pilot.c
Normal file
194
src/panfrost/compiler/pan_nir_opt_pilot.c
Normal file
|
|
@ -0,0 +1,194 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2025 Collabora, Ltd.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "compiler/nir/nir_builder.h"
|
||||||
|
#include "panfrost/model/pan_model.h"
|
||||||
|
#include "pan_nir.h"
|
||||||
|
|
||||||
|
static void
|
||||||
|
pilot_def_size(nir_def *def, unsigned *size, unsigned *alignment,
|
||||||
|
nir_preamble_class *class)
|
||||||
|
{
|
||||||
|
unsigned bit_size = def->bit_size == 1 ? 32 : def->bit_size;
|
||||||
|
unsigned bits =bit_size * def->num_components;
|
||||||
|
*size = align(bits / 8, 4);
|
||||||
|
*alignment = 4;
|
||||||
|
*class = nir_preamble_class_general;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float
|
||||||
|
pilot_instr_cost(nir_instr *instr, const void *data)
|
||||||
|
{
|
||||||
|
switch (instr->type) {
|
||||||
|
case nir_instr_type_alu: {
|
||||||
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||||
|
if (alu->def.bit_size == 64 || alu->src[0].src.ssa->bit_size == 64)
|
||||||
|
return 10.0;
|
||||||
|
|
||||||
|
/* Shrug */
|
||||||
|
return 2.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_instr_type_tex:
|
||||||
|
return 20.0;
|
||||||
|
|
||||||
|
case nir_instr_type_intrinsic:
|
||||||
|
switch (nir_instr_as_intrinsic(instr)->intrinsic) {
|
||||||
|
case nir_intrinsic_load_ubo:
|
||||||
|
case nir_intrinsic_load_global:
|
||||||
|
case nir_intrinsic_load_global_constant:
|
||||||
|
case nir_intrinsic_load_ssbo:
|
||||||
|
return 20.0;
|
||||||
|
default:
|
||||||
|
/* Assume it's a sysval or something */
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_instr_type_load_const:
|
||||||
|
case nir_instr_type_undef:
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static float
|
||||||
|
pilot_rewrite_cost(nir_def *def, const void *data)
|
||||||
|
{
|
||||||
|
unsigned bit_size = def->bit_size == 1 ? 32 : def->bit_size;
|
||||||
|
unsigned bits = bit_size * def->num_components;
|
||||||
|
unsigned dwords = DIV_ROUND_UP(bits, 32);
|
||||||
|
|
||||||
|
bool needs_mov = false;
|
||||||
|
nir_foreach_use(use, def) {
|
||||||
|
nir_instr *parent_instr = nir_src_parent_instr(use);
|
||||||
|
switch (parent_instr->type) {
|
||||||
|
case nir_instr_type_tex:
|
||||||
|
case nir_instr_type_intrinsic:
|
||||||
|
needs_mov = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* It's not a mov but booleans require a conversion */
|
||||||
|
if (def->bit_size == 1)
|
||||||
|
needs_mov = true;
|
||||||
|
|
||||||
|
/* We want rewrites to always cost something or else we'll pull constants
|
||||||
|
* and undefs into the pilot shader.
|
||||||
|
*/
|
||||||
|
return needs_mov ? dwords * 2.0 : 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
pilot_avoid_instr(const nir_instr *instr, const void *data)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_pilot_intr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||||
|
{
|
||||||
|
const struct pan_nir_opt_pilot_params *p = data;
|
||||||
|
|
||||||
|
switch (intr->intrinsic) {
|
||||||
|
case nir_intrinsic_load_subgroup_size: {
|
||||||
|
b->cursor = nir_before_instr(&intr->instr);
|
||||||
|
unsigned sg_size = pan_subgroup_size(pan_arch(p->gpu_id));
|
||||||
|
nir_def_replace(&intr->def, nir_imm_int(b, sg_size));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_preamble: {
|
||||||
|
b->cursor = nir_before_instr(&intr->instr);
|
||||||
|
|
||||||
|
uint32_t offset = p->fau_pilot_data_offset_B + nir_intrinsic_base(intr);
|
||||||
|
assert(offset % 4 == 0);
|
||||||
|
|
||||||
|
unsigned bit_size = intr->def.bit_size == 1 ? 32 : intr->def.bit_size;
|
||||||
|
unsigned bits = bit_size * intr->def.num_components;
|
||||||
|
nir_def *val = nir_load_push_constant(b,
|
||||||
|
DIV_ROUND_UP(bits, 32), 32, nir_imm_int(b, offset));
|
||||||
|
|
||||||
|
val = nir_bitcast_vector(b, val, bit_size);
|
||||||
|
val = nir_trim_vector(b, val, intr->def.num_components);
|
||||||
|
|
||||||
|
if (intr->def.bit_size == 1)
|
||||||
|
val = nir_b2b1(b, val);
|
||||||
|
|
||||||
|
nir_def_replace(&intr->def, val);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_store_preamble: {
|
||||||
|
b->cursor = nir_before_instr(&intr->instr);
|
||||||
|
|
||||||
|
nir_def *val = intr->src[0].ssa;
|
||||||
|
if (val->bit_size == 1)
|
||||||
|
val = nir_b2b32(b, val);
|
||||||
|
|
||||||
|
nir_def *push_const_buf = nir_load_push_constant(
|
||||||
|
b, 1, 64, nir_imm_int(b, p->fau_addr_offset_B));
|
||||||
|
|
||||||
|
uint32_t offset = p->fau_pilot_data_offset_B + nir_intrinsic_base(intr);
|
||||||
|
|
||||||
|
nir_store_global(b, val, nir_iadd_imm(b, push_const_buf, offset),
|
||||||
|
.align_mul = 4);
|
||||||
|
|
||||||
|
nir_instr_remove(&intr->instr);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
pan_nir_opt_pilot(nir_shader *nir, struct pan_nir_opt_pilot_params *p)
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
const nir_opt_preamble_options preamble_opts = {
|
||||||
|
.subgroup_size_uniform = true,
|
||||||
|
.def_size = pilot_def_size,
|
||||||
|
.preamble_storage_size = {
|
||||||
|
[nir_preamble_class_general] = p->fau_pilot_data_space_B,
|
||||||
|
},
|
||||||
|
.instr_cost_cb = pilot_instr_cost,
|
||||||
|
.rewrite_cost_cb = pilot_rewrite_cost,
|
||||||
|
.avoid_instr_cb = pilot_avoid_instr,
|
||||||
|
};
|
||||||
|
NIR_PASS(progress, nir, nir_opt_preamble, &preamble_opts,
|
||||||
|
&p->pilot_data_size_B);
|
||||||
|
if (!progress)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
NIR_PASS(progress, nir, nir_shader_intrinsics_pass, lower_pilot_intr,
|
||||||
|
nir_metadata_control_flow, p);
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
@ -172,9 +172,11 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, cs, 1);
|
if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
|
||||||
if (result != VK_SUCCESS)
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, cs, 1);
|
||||||
return;
|
if (result != VK_SUCCESS)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (compute_state_dirty(cmdbuf, CS) ||
|
if (compute_state_dirty(cmdbuf, CS) ||
|
||||||
compute_state_dirty(cmdbuf, DESC_STATE)) {
|
compute_state_dirty(cmdbuf, DESC_STATE)) {
|
||||||
|
|
|
||||||
|
|
@ -452,6 +452,89 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
launch_pilot(struct panvk_cmd_buffer *cmdbuf,
|
||||||
|
const struct panvk_shader_variant *s,
|
||||||
|
struct panvk_shader_desc_state *desc_state,
|
||||||
|
uint64_t push_uniforms)
|
||||||
|
{
|
||||||
|
if (!s->info.pilot.enable)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const struct pan_compute_dim job_size = { 1, 1, 1 };
|
||||||
|
|
||||||
|
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||||
|
cs_update_compute_ctx(b) {
|
||||||
|
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SRT_2),
|
||||||
|
desc_state->res_table);
|
||||||
|
|
||||||
|
uint64_t fau_ptr = push_uniforms | ((uint64_t)s->fau.total_count << 56);
|
||||||
|
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_2), fau_ptr);
|
||||||
|
|
||||||
|
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_2),
|
||||||
|
panvk_priv_mem_dev_addr(s->pilot_spd));
|
||||||
|
|
||||||
|
/* TSD_2 is set in update_tls() */
|
||||||
|
|
||||||
|
struct mali_compute_size_workgroup_packed wg_size;
|
||||||
|
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
|
||||||
|
cfg.workgroup_size_x = 1;
|
||||||
|
cfg.workgroup_size_y = 1;
|
||||||
|
cfg.workgroup_size_z = 1;
|
||||||
|
cfg.allow_merging_workgroups = true;
|
||||||
|
}
|
||||||
|
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]);
|
||||||
|
|
||||||
|
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X), 0);
|
||||||
|
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y), 0);
|
||||||
|
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), 0);
|
||||||
|
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), job_size.x);
|
||||||
|
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y), job_size.y);
|
||||||
|
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z), job_size.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct cs_tracing_ctx *tracing_ctx =
|
||||||
|
&cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].tracing;
|
||||||
|
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||||
|
1, MALI_TASK_AXIS_X, cs_shader_res_sel(2, 2, 2, 2));
|
||||||
|
|
||||||
|
#if PAN_ARCH >= 11
|
||||||
|
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||||
|
struct cs_index add_val = cs_scratch_reg64(b, 2);
|
||||||
|
|
||||||
|
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
|
||||||
|
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||||
|
|
||||||
|
cs_add64(b, sync_addr, sync_addr,
|
||||||
|
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
|
||||||
|
cs_move64_to(b, add_val, 1);
|
||||||
|
panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_COMPUTE, true,
|
||||||
|
MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr,
|
||||||
|
cs_defer_indirect());
|
||||||
|
#else
|
||||||
|
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||||
|
struct cs_index iter_sb = cs_scratch_reg32(b, 2);
|
||||||
|
struct cs_index cmp_scratch = cs_scratch_reg32(b, 3);
|
||||||
|
struct cs_index add_val = cs_scratch_reg64(b, 4);
|
||||||
|
|
||||||
|
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
|
||||||
|
BITFIELD_MASK(3),
|
||||||
|
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||||
|
|
||||||
|
cs_add64(b, sync_addr, sync_addr,
|
||||||
|
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
|
||||||
|
cs_move64_to(b, add_val, 1);
|
||||||
|
|
||||||
|
cs_match_iter_sb(b, x, iter_sb, cmp_scratch) {
|
||||||
|
panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_COMPUTE, true,
|
||||||
|
MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr,
|
||||||
|
cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
has_depth_att(struct panvk_cmd_buffer *cmdbuf)
|
has_depth_att(struct panvk_cmd_buffer *cmdbuf)
|
||||||
{
|
{
|
||||||
|
|
@ -596,6 +679,8 @@ update_tls(struct panvk_cmd_buffer *cmdbuf)
|
||||||
panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader);
|
panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader);
|
||||||
struct cs_builder *b =
|
struct cs_builder *b =
|
||||||
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
|
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
|
||||||
|
struct cs_builder *cs_b =
|
||||||
|
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||||
|
|
||||||
if (!cmdbuf->state.gfx.tsd) {
|
if (!cmdbuf->state.gfx.tsd) {
|
||||||
if (!state->desc.gpu) {
|
if (!state->desc.gpu) {
|
||||||
|
|
@ -616,6 +701,10 @@ update_tls(struct panvk_cmd_buffer *cmdbuf)
|
||||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), state->desc.gpu);
|
cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), state->desc.gpu);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cs_update_compute_ctx(cs_b) {
|
||||||
|
cs_move64_to(cs_b, cs_sr_reg64(b, COMPUTE, TSD_2), state->desc.gpu);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
state->info.tls.size =
|
state->info.tls.size =
|
||||||
|
|
@ -1764,7 +1853,10 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, FS_PUSH_UNIFORMS)) {
|
bool fs_pilot = fs && fs->info.pilot.enable;
|
||||||
|
if (fs_user_dirty(cmdbuf) ||
|
||||||
|
(fs_pilot && gfx_state_dirty(cmdbuf, DESC_STATE)) ||
|
||||||
|
gfx_state_dirty(cmdbuf, FS_PUSH_UNIFORMS)) {
|
||||||
uint64_t fau_ptr = 0;
|
uint64_t fau_ptr = 0;
|
||||||
|
|
||||||
if (fs) {
|
if (fs) {
|
||||||
|
|
@ -1772,6 +1864,10 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf,
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
if (fs->info.pilot.enable)
|
||||||
|
launch_pilot(cmdbuf, fs, &cmdbuf->state.gfx.fs.desc,
|
||||||
|
cmdbuf->state.gfx.fs.push_uniforms);
|
||||||
|
|
||||||
fau_ptr = cmdbuf->state.gfx.fs.push_uniforms |
|
fau_ptr = cmdbuf->state.gfx.fs.push_uniforms |
|
||||||
((uint64_t)fs->fau.total_count << 56);
|
((uint64_t)fs->fau.total_count << 56);
|
||||||
}
|
}
|
||||||
|
|
@ -2310,10 +2406,6 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||||
|
|
||||||
panvk_per_arch(cmd_prepare_draw_sysvals)(cmdbuf, draw);
|
panvk_per_arch(cmd_prepare_draw_sysvals)(cmdbuf, draw);
|
||||||
|
|
||||||
result = prepare_push_uniforms(cmdbuf, draw);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = prepare_vs(cmdbuf, draw);
|
result = prepare_vs(cmdbuf, draw);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
@ -2322,6 +2414,10 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
result = prepare_push_uniforms(cmdbuf, draw);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
/* Assumes 16 byte slots. We could do better. */
|
/* Assumes 16 byte slots. We could do better. */
|
||||||
uint32_t varying_size = get_varying_slots(cmdbuf) * 16;
|
uint32_t varying_size = get_varying_slots(cmdbuf) * 16;
|
||||||
|
|
||||||
|
|
@ -3069,6 +3165,17 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
||||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
||||||
bool has_oq_chain = cmdbuf->state.gfx.render.oq.chain != 0;
|
bool has_oq_chain = cmdbuf->state.gfx.render.oq.chain != 0;
|
||||||
|
|
||||||
|
/* Wait on any pilot shaders */
|
||||||
|
const struct panvk_cs_deps deps = {
|
||||||
|
.src[PANVK_SUBQUEUE_COMPUTE] = {
|
||||||
|
.wait_sb_mask = dev->csf.sb.all_iters_mask,
|
||||||
|
},
|
||||||
|
.dst[PANVK_SUBQUEUE_FRAGMENT] = {
|
||||||
|
.wait_subqueue_mask = BITFIELD_BIT(PANVK_SUBQUEUE_COMPUTE),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
panvk_per_arch(emit_barrier)(cmdbuf, deps);
|
||||||
|
|
||||||
/* Now initialize the fragment bits. */
|
/* Now initialize the fragment bits. */
|
||||||
cs_update_frag_ctx(b) {
|
cs_update_frag_ctx(b) {
|
||||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
||||||
|
|
|
||||||
|
|
@ -103,9 +103,11 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
||||||
|
|
||||||
panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, info);
|
panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, info);
|
||||||
|
|
||||||
result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, cs, 1);
|
if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
|
||||||
if (result != VK_SUCCESS)
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, cs, 1);
|
||||||
return;
|
if (result != VK_SUCCESS)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
struct pan_ptr copy_desc_job = {0};
|
struct pan_ptr copy_desc_job = {0};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1471,12 +1471,14 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw)
|
||||||
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
||||||
}
|
}
|
||||||
|
|
||||||
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
|
||||||
cmdbuf, vs, 1);
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
||||||
if (result != VK_SUCCESS)
|
cmdbuf, vs, 1);
|
||||||
return;
|
if (result != VK_SUCCESS)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (fs) {
|
if (fs && compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
|
||||||
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
||||||
cmdbuf, fs, 1);
|
cmdbuf, fs, 1);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
|
|
@ -1633,12 +1635,14 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
||||||
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
||||||
}
|
}
|
||||||
|
|
||||||
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
|
||||||
cmdbuf, vs, 1);
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
||||||
if (result != VK_SUCCESS)
|
cmdbuf, vs, 1);
|
||||||
return;
|
if (result != VK_SUCCESS)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (fs) {
|
if (fs && compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
|
||||||
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
||||||
cmdbuf, fs, 1);
|
cmdbuf, fs, 1);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
|
|
|
||||||
|
|
@ -348,6 +348,8 @@ struct panvk_shader_fau_info {
|
||||||
BITSET_DECLARE(used_sysvals, MAX_SYSVAL_FAUS);
|
BITSET_DECLARE(used_sysvals, MAX_SYSVAL_FAUS);
|
||||||
BITSET_DECLARE(used_push_consts, MAX_PUSH_CONST_FAUS);
|
BITSET_DECLARE(used_push_consts, MAX_PUSH_CONST_FAUS);
|
||||||
uint32_t sysval_count;
|
uint32_t sysval_count;
|
||||||
|
uint32_t push_count;
|
||||||
|
uint32_t pilot_count;
|
||||||
uint32_t total_count;
|
uint32_t total_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -416,6 +418,7 @@ struct panvk_shader_variant {
|
||||||
#endif
|
#endif
|
||||||
} spds;
|
} spds;
|
||||||
};
|
};
|
||||||
|
struct panvk_priv_mem pilot_spd;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const char *nir_str;
|
const char *nir_str;
|
||||||
|
|
|
||||||
|
|
@ -17,18 +17,12 @@ panvk_per_arch(cmd_prepare_push_uniforms)(
|
||||||
|
|
||||||
switch (shader->info.stage) {
|
switch (shader->info.stage) {
|
||||||
case MESA_SHADER_COMPUTE:
|
case MESA_SHADER_COMPUTE:
|
||||||
if (!compute_state_dirty(cmdbuf, PUSH_UNIFORMS))
|
|
||||||
return VK_SUCCESS;
|
|
||||||
push_ptr = &cmdbuf->state.compute.push_uniforms;
|
push_ptr = &cmdbuf->state.compute.push_uniforms;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_VERTEX:
|
case MESA_SHADER_VERTEX:
|
||||||
if (!gfx_state_dirty(cmdbuf, VS_PUSH_UNIFORMS))
|
|
||||||
return VK_SUCCESS;
|
|
||||||
push_ptr = &cmdbuf->state.gfx.vs.push_uniforms;
|
push_ptr = &cmdbuf->state.gfx.vs.push_uniforms;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_FRAGMENT:
|
case MESA_SHADER_FRAGMENT:
|
||||||
if (!gfx_state_dirty(cmdbuf, FS_PUSH_UNIFORMS))
|
|
||||||
return VK_SUCCESS;
|
|
||||||
push_ptr = &cmdbuf->state.gfx.fs.push_uniforms;
|
push_ptr = &cmdbuf->state.gfx.fs.push_uniforms;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
@ -78,6 +72,9 @@ panvk_per_arch(cmd_prepare_push_uniforms)(
|
||||||
BITSET_FOREACH_SET(w, shader->fau.used_push_consts, MAX_PUSH_CONST_FAUS)
|
BITSET_FOREACH_SET(w, shader->fau.used_push_consts, MAX_PUSH_CONST_FAUS)
|
||||||
faus[fau++] = push_consts[w];
|
faus[fau++] = push_consts[w];
|
||||||
|
|
||||||
|
/* These are populated by the pilot shader */
|
||||||
|
fau += shader->fau.pilot_count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < shader->info.fau_consts_count; i += 2) {
|
for (uint32_t i = 0; i < shader->info.fau_consts_count; i += 2) {
|
||||||
faus[fau++] = (uint64_t)shader->info.fau_consts[i + 1] << 32 |
|
faus[fau++] = (uint64_t)shader->info.fau_consts[i + 1] << 32 |
|
||||||
shader->info.fau_consts[i];
|
shader->info.fau_consts[i];
|
||||||
|
|
|
||||||
|
|
@ -569,7 +569,7 @@ load_resource_deref_desc(nir_builder *b, nir_deref_instr *deref,
|
||||||
b, num_components, bit_size,
|
b, num_components, bit_size,
|
||||||
nir_imm_int(b, pan_res_handle(VALHALL_RESOURCE_TABLE_IDX, set + 1)),
|
nir_imm_int(b, pan_res_handle(VALHALL_RESOURCE_TABLE_IDX, set + 1)),
|
||||||
set_offset, .range = ~0u, .align_mul = PANVK_DESCRIPTOR_SIZE,
|
set_offset, .range = ~0u, .align_mul = PANVK_DESCRIPTOR_SIZE,
|
||||||
.align_offset = desc_offset);
|
.align_offset = desc_offset, .access = ACCESS_CAN_SPECULATE);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1162,6 +1162,9 @@ static bool
|
||||||
collect_tex_desc_access(nir_builder *b, nir_tex_instr *tex,
|
collect_tex_desc_access(nir_builder *b, nir_tex_instr *tex,
|
||||||
struct lower_desc_ctx *ctx)
|
struct lower_desc_ctx *ctx)
|
||||||
{
|
{
|
||||||
|
/* We'll also set can_speculate here. */
|
||||||
|
tex->can_speculate = true;
|
||||||
|
|
||||||
bool recorded = false;
|
bool recorded = false;
|
||||||
uint32_t plane = 0;
|
uint32_t plane = 0;
|
||||||
int sampler_src_idx =
|
int sampler_src_idx =
|
||||||
|
|
@ -1184,6 +1187,9 @@ collect_tex_desc_access(nir_builder *b, nir_tex_instr *tex,
|
||||||
struct panvk_subdesc_info subdesc =
|
struct panvk_subdesc_info subdesc =
|
||||||
get_sampler_subdesc_info(binding_layout->type, plane);
|
get_sampler_subdesc_info(binding_layout->type, plane);
|
||||||
|
|
||||||
|
if (binding_layout->flags & VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT)
|
||||||
|
tex->can_speculate = false;
|
||||||
|
|
||||||
record_binding(ctx, set, binding, subdesc, max_idx);
|
record_binding(ctx, set, binding, subdesc, max_idx);
|
||||||
recorded = true;
|
recorded = true;
|
||||||
}
|
}
|
||||||
|
|
@ -1203,6 +1209,9 @@ collect_tex_desc_access(nir_builder *b, nir_tex_instr *tex,
|
||||||
struct panvk_subdesc_info subdesc =
|
struct panvk_subdesc_info subdesc =
|
||||||
get_tex_subdesc_info(binding_layout->type, plane);
|
get_tex_subdesc_info(binding_layout->type, plane);
|
||||||
|
|
||||||
|
if (binding_layout->flags & VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT)
|
||||||
|
tex->can_speculate = false;
|
||||||
|
|
||||||
record_binding(ctx, set, binding, subdesc, max_idx);
|
record_binding(ctx, set, binding, subdesc, max_idx);
|
||||||
recorded = true;
|
recorded = true;
|
||||||
}
|
}
|
||||||
|
|
@ -1210,11 +1219,66 @@ collect_tex_desc_access(nir_builder *b, nir_tex_instr *tex,
|
||||||
return recorded;
|
return recorded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
mark_load_can_speculate(nir_intrinsic_instr *load,
|
||||||
|
const struct lower_desc_ctx *ctx)
|
||||||
|
{
|
||||||
|
assert(load->intrinsic == nir_intrinsic_load_deref);
|
||||||
|
|
||||||
|
/* If it's already marked CAN_SPECULATE, there's nothing to do */
|
||||||
|
enum gl_access_qualifier access = nir_intrinsic_access(load);
|
||||||
|
if (access & ACCESS_CAN_SPECULATE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
|
||||||
|
if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (nir_deref_mode_is(deref, nir_var_mem_ssbo) &&
|
||||||
|
((access & ACCESS_VOLATILE) || !(access & ACCESS_CAN_REORDER)))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
||||||
|
if (parent == NULL)
|
||||||
|
break;
|
||||||
|
deref = parent;
|
||||||
|
}
|
||||||
|
assert(deref->deref_type == nir_deref_type_cast);
|
||||||
|
|
||||||
|
nir_intrinsic_instr *desc_load = nir_src_as_intrinsic(deref->parent);
|
||||||
|
if (desc_load == NULL ||
|
||||||
|
desc_load->intrinsic != nir_intrinsic_load_vulkan_descriptor)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *desc_index = nir_src_as_intrinsic(desc_load->src[0]);
|
||||||
|
if (desc_index == NULL ||
|
||||||
|
desc_index->intrinsic != nir_intrinsic_vulkan_resource_index)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned set = nir_intrinsic_desc_set(desc_index);
|
||||||
|
unsigned binding = nir_intrinsic_binding(desc_index);
|
||||||
|
const struct panvk_descriptor_set_layout *set_layout =
|
||||||
|
get_set_layout(set, ctx);
|
||||||
|
const struct panvk_descriptor_set_binding_layout *bind_layout =
|
||||||
|
&set_layout->bindings[binding];
|
||||||
|
|
||||||
|
if (bind_layout->flags & VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
access |= ACCESS_CAN_SPECULATE;
|
||||||
|
nir_intrinsic_set_access(load, access);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
collect_intr_desc_access(nir_builder *b, nir_intrinsic_instr *intrin,
|
collect_intr_desc_access(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
struct lower_desc_ctx *ctx)
|
struct lower_desc_ctx *ctx)
|
||||||
{
|
{
|
||||||
switch (intrin->intrinsic) {
|
switch (intrin->intrinsic) {
|
||||||
|
case nir_intrinsic_load_deref:
|
||||||
|
return mark_load_can_speculate(intrin, ctx);
|
||||||
|
|
||||||
case nir_intrinsic_vulkan_resource_index: {
|
case nir_intrinsic_vulkan_resource_index: {
|
||||||
unsigned set, binding;
|
unsigned set, binding;
|
||||||
|
|
||||||
|
|
@ -1338,6 +1402,9 @@ panvk_per_arch(nir_lower_descriptors)(
|
||||||
for (uint32_t i = 0; i < set_layout_count; i++)
|
for (uint32_t i = 0; i < set_layout_count; i++)
|
||||||
ctx.set_layouts[i] = to_panvk_descriptor_set_layout(set_layouts[i]);
|
ctx.set_layouts[i] = to_panvk_descriptor_set_layout(set_layouts[i]);
|
||||||
|
|
||||||
|
/* Collect descriptor access. This also marks texture and UBO/SSBO loads
|
||||||
|
* as can_speculate based on descriptor binding flags.
|
||||||
|
*/
|
||||||
NIR_PASS(progress, nir, nir_shader_instructions_pass,
|
NIR_PASS(progress, nir, nir_shader_instructions_pass,
|
||||||
collect_instr_desc_access, nir_metadata_all, &ctx);
|
collect_instr_desc_access, nir_metadata_all, &ctx);
|
||||||
if (!progress)
|
if (!progress)
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,12 @@
|
||||||
#include "vk_shader.h"
|
#include "vk_shader.h"
|
||||||
#include "vk_util.h"
|
#include "vk_util.h"
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
panvk_wants_pilot_shader(mesa_shader_stage stage, unsigned arch)
|
||||||
|
{
|
||||||
|
return arch >= 9 && stage == MESA_SHADER_FRAGMENT;
|
||||||
|
}
|
||||||
|
|
||||||
#define FAU_WORD_COUNT 64
|
#define FAU_WORD_COUNT 64
|
||||||
|
|
||||||
struct panvk_lower_sysvals_context {
|
struct panvk_lower_sysvals_context {
|
||||||
|
|
@ -700,6 +706,9 @@ lower_load_push_consts(nir_shader *nir, struct panvk_shader_variant *shader)
|
||||||
shader_use_sysval(shader, graphics, blend.constants);
|
shader_use_sysval(shader, graphics, blend.constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (panvk_wants_pilot_shader(nir->info.stage, PAN_ARCH))
|
||||||
|
shader_use_sysval(shader, common, push_uniforms);
|
||||||
|
|
||||||
progress = false;
|
progress = false;
|
||||||
NIR_PASS(progress, nir, nir_shader_intrinsics_pass, collect_push_constant,
|
NIR_PASS(progress, nir, nir_shader_intrinsics_pass, collect_push_constant,
|
||||||
nir_metadata_all, shader);
|
nir_metadata_all, shader);
|
||||||
|
|
@ -717,8 +726,8 @@ lower_load_push_consts(nir_shader *nir, struct panvk_shader_variant *shader)
|
||||||
/* 32 FAUs (256 bytes) are reserved for API push constants */
|
/* 32 FAUs (256 bytes) are reserved for API push constants */
|
||||||
assert(shader->fau.sysval_count <= FAU_WORD_COUNT - 32 &&
|
assert(shader->fau.sysval_count <= FAU_WORD_COUNT - 32 &&
|
||||||
"too many sysval FAUs");
|
"too many sysval FAUs");
|
||||||
shader->fau.total_count =
|
shader->fau.push_count = BITSET_COUNT(shader->fau.used_push_consts);
|
||||||
shader->fau.sysval_count + BITSET_COUNT(shader->fau.used_push_consts);
|
shader->fau.total_count = shader->fau.sysval_count + shader->fau.push_count;
|
||||||
assert(shader->fau.total_count <= FAU_WORD_COUNT &&
|
assert(shader->fau.total_count <= FAU_WORD_COUNT &&
|
||||||
"asking for more FAUs than the hardware has to offer");
|
"asking for more FAUs than the hardware has to offer");
|
||||||
|
|
||||||
|
|
@ -925,6 +934,26 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
|
||||||
|
|
||||||
lower_load_push_consts(nir, shader);
|
lower_load_push_consts(nir, shader);
|
||||||
|
|
||||||
|
if (panvk_wants_pilot_shader(nir->info.stage, PAN_ARCH)) {
|
||||||
|
/* Our pilots will be more efficient if we optimize first. */
|
||||||
|
pan_optimize_nir(nir, input.gpu_id);
|
||||||
|
|
||||||
|
struct pan_nir_opt_pilot_params pilot = {
|
||||||
|
.gpu_id = input.gpu_id,
|
||||||
|
.fau_addr_offset_B = shader_remapped_sysval_offset(
|
||||||
|
shader, sysval_offset(common, push_uniforms)),
|
||||||
|
.fau_pilot_data_offset_B =
|
||||||
|
FAU_WORD_SIZE * shader->fau.total_count,
|
||||||
|
.fau_pilot_data_space_B =
|
||||||
|
FAU_WORD_SIZE * (FAU_WORD_COUNT - shader->fau.total_count),
|
||||||
|
};
|
||||||
|
pan_nir_opt_pilot(nir, &pilot);
|
||||||
|
|
||||||
|
shader->fau.pilot_count =
|
||||||
|
DIV_ROUND_UP(pilot.pilot_data_size_B, FAU_WORD_SIZE);
|
||||||
|
shader->fau.total_count += shader->fau.pilot_count;
|
||||||
|
}
|
||||||
|
|
||||||
/* Allow the remaining FAU space to be filled with constants. */
|
/* Allow the remaining FAU space to be filled with constants. */
|
||||||
input.fau_consts.max_amount =
|
input.fau_consts.max_amount =
|
||||||
2 * (FAU_WORD_COUNT - shader->fau.total_count);
|
2 * (FAU_WORD_COUNT - shader->fau.total_count);
|
||||||
|
|
@ -1208,6 +1237,23 @@ panvk_shader_upload(struct panvk_device *dev,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (shader->info.pilot.enable) {
|
||||||
|
shader->pilot_spd =
|
||||||
|
panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
|
||||||
|
if (!panvk_priv_mem_check_alloc(shader->pilot_spd))
|
||||||
|
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||||
|
|
||||||
|
panvk_priv_mem_write_desc(shader->pilot_spd, 0, SHADER_PROGRAM, cfg) {
|
||||||
|
cfg.stage = MALI_SHADER_STAGE_COMPUTE,
|
||||||
|
cfg.register_allocation =
|
||||||
|
pan_register_allocation(shader->info.pilot.work_reg_count);
|
||||||
|
cfg.binary = panvk_shader_variant_get_dev_addr(shader) +
|
||||||
|
shader->info.pilot.offset;
|
||||||
|
cfg.preload.r48_r63 = (shader->info.pilot.preload >> 48);
|
||||||
|
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
|
|
@ -1237,6 +1283,7 @@ panvk_shader_variant_destroy(struct panvk_shader_variant *shader)
|
||||||
panvk_pool_free_mem(&shader->spds.pos_triangles);
|
panvk_pool_free_mem(&shader->spds.pos_triangles);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
panvk_pool_free_mem(&shader->pilot_spd);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (shader->own_bin)
|
if (shader->own_bin)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue