pvr, pco: clip/cull distance support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2024-12-09 14:56:35 +00:00 committed by Marge Bot
parent 0b757da625
commit 9b595a50ba
7 changed files with 205 additions and 11 deletions

View file

@ -43,6 +43,9 @@ typedef struct _pco_vs_data {
/** Varyings/output mappings. */
pco_range varyings[VARYING_SLOT_MAX];
unsigned clip_count;
unsigned cull_count;
unsigned f32_smooth; /** Number of F32 linear varyings. */
unsigned f32_flat; /** Number of F32 flat varyings. */
unsigned f32_npc; /** Number of F32 NPC varyings. */

View file

@ -1680,10 +1680,12 @@ bool pco_index(pco_shader *shader, bool skip_ssa);
bool pco_legalize(pco_shader *shader);
bool pco_opt_comp_only_vecs(pco_shader *shader);
bool pco_nir_compute_instance_check(nir_shader *shader);
bool pco_nir_link_clip_cull_vars(nir_shader *producer, nir_shader *consumer);
bool pco_nir_lower_algebraic(nir_shader *shader);
bool pco_nir_lower_algebraic_late(nir_shader *shader);
bool pco_nir_lower_atomics(nir_shader *shader, bool *uses_usclib);
bool pco_nir_lower_barriers(nir_shader *shader, bool *uses_usclib);
bool pco_nir_lower_clip_cull_vars(nir_shader *shader);
bool pco_nir_lower_fs_intrinsics(nir_shader *shader);
bool pco_nir_lower_images(nir_shader *shader, pco_data *data);
bool pco_nir_lower_io(nir_shader *shader);

View file

@ -245,6 +245,20 @@ static void gather_fs_data(nir_shader *nir, pco_data *data)
data->fs.uses.sample_shading |= nir->info.fs.uses_sample_shading;
}
/**
* \brief Gathers vertex shader data.
*
* \param[in] nir NIR shader.
* \param[in,out] data Shader data.
*/
static void gather_vs_data(nir_shader *nir, pco_data *data)
{
pco_vs_data *vs_data = &data->vs;
vs_data->clip_count = nir->info.clip_distance_array_size;
vs_data->cull_count = nir->info.cull_distance_array_size;
}
/**
* \brief Gathers compute shader data.
*
@ -350,7 +364,7 @@ static void gather_data(nir_shader *nir, pco_data *data)
return gather_fs_data(nir, data);
case MESA_SHADER_VERTEX:
return;
return gather_vs_data(nir, data);
case MESA_SHADER_COMPUTE:
return gather_cs_data(nir, data);
@ -552,7 +566,7 @@ void pco_preprocess_nir(pco_ctx *ctx, nir_shader *nir)
*/
void pco_link_nir(pco_ctx *ctx, nir_shader *producer, nir_shader *consumer)
{
/* TODO: clip/cull */
pco_nir_link_clip_cull_vars(producer, consumer);
nir_lower_io_array_vars_to_elements(producer, consumer);
nir_validate_shader(producer, "after nir_lower_io_array_vars_to_elements");
@ -739,6 +753,9 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data)
nir_io_add_const_offset_to_base,
nir_var_shader_in | nir_var_shader_out);
if (nir->info.stage == MESA_SHADER_VERTEX)
NIR_PASS(_, nir, pco_nir_lower_clip_cull_vars);
NIR_PASS(_, nir, pco_nir_lower_variables, true, true);
NIR_PASS(_, nir, pco_nir_lower_images, data);

View file

@ -1343,3 +1343,155 @@ bool pco_nir_lower_fs_intrinsics(nir_shader *shader)
return nir_shader_lower_instructions(shader, is_fs_intr, lower_fs_intr, NULL);
}
bool pco_nir_lower_clip_cull_vars(nir_shader *shader)
{
if (shader->info.internal)
return false;
unsigned clip_cull_comps = shader->info.clip_distance_array_size +
shader->info.cull_distance_array_size;
if (!clip_cull_comps)
return false;
/* Remove the old variables. */
const gl_varying_slot clip_cull_locations[] = {
VARYING_SLOT_CLIP_DIST0,
VARYING_SLOT_CLIP_DIST1,
};
nir_variable *var;
for (unsigned u = 0; u < ARRAY_SIZE(clip_cull_locations); ++u) {
gl_varying_slot location = clip_cull_locations[u];
while ((var = nir_find_variable_with_location(shader,
nir_var_shader_out,
location))) {
exec_node_remove(&var->node);
}
}
/* Create new variables. */
nir_create_variable_with_location(shader,
nir_var_shader_out,
VARYING_SLOT_CLIP_DIST0,
glsl_vec_type(MIN2(clip_cull_comps, 4)));
if (clip_cull_comps > 4) {
nir_create_variable_with_location(shader,
nir_var_shader_out,
VARYING_SLOT_CLIP_DIST1,
glsl_vec_type(clip_cull_comps - 4));
}
nir_metadata_invalidate(shader);
return true;
}
static bool
clone_clip_cull_stores(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_store_deref)
return false;
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
if (deref->deref_type != nir_deref_type_array)
return false;
nir_variable *var = nir_deref_instr_get_variable(deref);
if (var->data.location != VARYING_SLOT_CLIP_DIST0 &&
var->data.location != VARYING_SLOT_CLIP_DIST1)
return false;
b->cursor = nir_after_instr(&intr->instr);
unsigned var_index = var->data.location - VARYING_SLOT_CLIP_DIST0;
nir_def *index =
nir_iadd_imm(b, deref->arr.index.ssa, var->data.location_frac);
index = nir_iadd_imm(b, index, var_index * 4);
nir_variable *clone_var = data;
nir_store_array_var(b, clone_var, index, intr->src[1].ssa, 1);
return true;
}
static bool is_clip_cull_load(const nir_instr *instr,
UNUSED const void *cb_data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_deref)
return false;
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
if (deref->deref_type != nir_deref_type_array)
return false;
nir_variable *var = nir_deref_instr_get_variable(deref);
return var->data.location == VARYING_SLOT_CLIP_DIST0 ||
var->data.location == VARYING_SLOT_CLIP_DIST1;
}
static nir_def *
swap_clip_cull_load(nir_builder *b, nir_instr *instr, void *cb_data)
{
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned var_index = var->data.location - VARYING_SLOT_CLIP_DIST0;
nir_def *index =
nir_iadd_imm(b, deref->arr.index.ssa, var->data.location_frac);
index = nir_iadd_imm(b, index, var_index * 4);
nir_variable *clone_var = cb_data;
return nir_load_array_var(b, clone_var, index);
}
bool pco_nir_link_clip_cull_vars(nir_shader *producer, nir_shader *consumer)
{
if (producer->info.stage != MESA_SHADER_VERTEX ||
consumer->info.stage != MESA_SHADER_FRAGMENT) {
return false;
}
unsigned clip_cull_comps = consumer->info.clip_distance_array_size +
consumer->info.cull_distance_array_size;
/* Skip if clip/cull comps aren't actually consumed. */
if (!clip_cull_comps)
return false;
const glsl_type *clone_var_type =
glsl_array_type(glsl_float_type(), clip_cull_comps, 0);
/* Find unused varying slot to use and create the variables. */
gl_varying_slot clone_slot = VARYING_SLOT_VAR0;
nir_foreach_shader_out_variable (var, producer) {
clone_slot = MAX2(clone_slot, var->data.location + 1);
}
assert(clone_slot < VARYING_SLOT_MAX);
nir_variable *clone_var =
nir_variable_create(producer, nir_var_shader_out, clone_var_type, NULL);
clone_var->data.location = clone_slot;
nir_shader_intrinsics_pass(producer,
clone_clip_cull_stores,
nir_metadata_block_index | nir_metadata_dominance,
clone_var);
clone_var =
nir_variable_create(consumer, nir_var_shader_in, clone_var_type, NULL);
clone_var->data.location = clone_slot;
nir_shader_lower_instructions(consumer,
is_clip_cull_load,
swap_clip_cull_load,
clone_var);
return true;
}

View file

@ -4353,6 +4353,10 @@ static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer)
const bool has_layer = varyings[VARYING_SLOT_LAYER].count > 0;
const unsigned clip_count = vs_data->vs.clip_count;
const unsigned cull_count = vs_data->vs.cull_count;
const unsigned clip_cull = clip_count + cull_count;
pvr_csb_pack (&output_selects, TA_OUTPUT_SEL, state) {
state.rhw_pres = fs_data->fs.uses.w;
state.tsp_unclamped_z_pres = fs_data->fs.uses.z;
@ -4361,6 +4365,24 @@ static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer)
state.psprite_size_pres = has_point_size;
state.vpt_tgt_pres = has_viewport;
state.render_tgt_pres = has_layer;
state.plane0 = clip_cull > 0;
state.plane1 = clip_cull > 1;
state.plane2 = clip_cull > 2;
state.plane3 = clip_cull > 3;
state.plane4 = clip_cull > 4;
state.plane5 = clip_cull > 5;
state.plane6 = clip_cull > 6;
state.plane7 = clip_cull > 7;
state.cullplane0 = (clip_cull > 0) && (clip_count < 1);
state.cullplane1 = (clip_cull > 1) && (clip_count < 2);
state.cullplane2 = (clip_cull > 2) && (clip_count < 3);
state.cullplane3 = (clip_cull > 3) && (clip_count < 4);
state.cullplane4 = (clip_cull > 4) && (clip_count < 5);
state.cullplane5 = (clip_cull > 5) && (clip_count < 6);
state.cullplane6 = (clip_cull > 6) && (clip_count < 7);
state.cullplane7 = (clip_cull > 7) && (clip_count < 8);
}
if (ppp_state->output_selects != output_selects) {

View file

@ -242,8 +242,8 @@ static void pvr_physical_device_get_supported_features(
.shaderSampledImageArrayDynamicIndexing = false,
.shaderStorageBufferArrayDynamicIndexing = false,
.shaderStorageImageArrayDynamicIndexing = false,
.shaderClipDistance = false,
.shaderCullDistance = false,
.shaderClipDistance = true,
.shaderCullDistance = true,
.shaderFloat64 = false,
.shaderInt64 = false,
.shaderInt16 = false,
@ -479,10 +479,9 @@ static bool pvr_physical_device_get_properties(
.timestampComputeAndGraphics = false,
.timestampPeriod = 0.0f,
/* Requires shaderClipDistance */
.maxClipDistances = 0U,
.maxCullDistances = 0U,
.maxCombinedClipAndCullDistances = 0U,
.maxClipDistances = PVR_MAX_USER_PLANES,
.maxCullDistances = PVR_MAX_USER_PLANES,
.maxCombinedClipAndCullDistances = PVR_MAX_USER_PLANES,
.discreteQueuePriorities = 2U,

View file

@ -1710,9 +1710,8 @@ static void pvr_alloc_vs_varyings(pco_data *data, nir_shader *nir)
assert(!vars_mask);
const gl_varying_slot last_slots[] = {
VARYING_SLOT_PSIZ,
VARYING_SLOT_VIEWPORT,
VARYING_SLOT_LAYER,
VARYING_SLOT_PSIZ, VARYING_SLOT_VIEWPORT, VARYING_SLOT_LAYER,
VARYING_SLOT_CLIP_DIST0, VARYING_SLOT_CLIP_DIST1,
};
for (unsigned u = 0; u < ARRAY_SIZE(last_slots); ++u) {