pco: initial image support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-02-28 13:19:18 +00:00 committed by Marge Bot
parent b930b13372
commit 3af73ef199
6 changed files with 493 additions and 32 deletions

View file

@ -2703,6 +2703,15 @@ intrinsic("smp_pco", src_comp=[16, 4, 4], dest_comp=0, indices=[SMP_FLAGS_PCO, R
# Returns the calculated sampling coefficients for the given data and state words.
intrinsic("smp_coeffs_pco", src_comp=[16, 4, 4], dest_comp=8, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32])
# smp_raw_pco(data, tex_state, smp_state)
# Returns the raw sampling data for the given data and state words.
# Actually outputs 4/8/12/16 components, but NIR doesn't support num_components == 12, so fake it as 8 for now.
intrinsic("smp_raw_pco", src_comp=[16, 4, 4], dest_comp=16, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32])
# smp_write_pco(data, tex_state, smp_state)
# Performs a sample write for the given data and state words.
intrinsic("smp_write_pco", src_comp=[16, 4, 4], indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32])
# alphatst_pco(data, comparator, comparison op)
# Performs an alpha test on the given parameters, returning float 0/1 depending on the comparison result.
intrinsic("alphatst_pco", src_comp=[1, 1, 1], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])

View file

@ -1626,7 +1626,8 @@ typedef union PACKED _pco_smp_flags {
bool sno : 1;
bool array : 1;
bool integer : 1;
unsigned pad : 3;
bool wrt : 1;
unsigned pad : 2;
};
uint16_t _;
@ -1648,6 +1649,7 @@ bool pco_nir_lower_algebraic(nir_shader *shader);
bool pco_nir_lower_algebraic_late(nir_shader *shader);
bool pco_nir_lower_atomics(nir_shader *shader, bool *uses_usclib);
bool pco_nir_lower_barriers(nir_shader *shader, bool *uses_usclib);
bool pco_nir_lower_images(nir_shader *shader);
bool pco_nir_lower_io(nir_shader *shader);
bool pco_nir_lower_tex(nir_shader *shader);
bool pco_nir_lower_vk(nir_shader *shader, pco_common_data *common);

View file

@ -434,7 +434,6 @@ static void pco_nir_opt(pco_ctx *ctx, nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_loop);
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_lower_undef_to_zero);
NIR_PASS(progress, nir, nir_opt_loop_unroll);
} while (progress);
}
@ -712,6 +711,7 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data)
nir_io_add_const_offset_to_base,
nir_var_shader_in | nir_var_shader_out);
NIR_PASS(_, nir, pco_nir_lower_images);
NIR_PASS(_, nir, nir_lower_tex, &(nir_lower_tex_options){});
NIR_PASS(_, nir, pco_nir_lower_tex);

View file

@ -7,7 +7,7 @@
/**
* \file pco_nir_tex.c
*
* \brief PCO NIR texture/sampler lowering passes.
* \brief PCO NIR texture/image/sampler lowering passes.
*/
#include "hwdef/rogue_hw_defs.h"
@ -172,7 +172,6 @@ lower_tex_query_lod(nir_builder *b, nir_def *coords, nir_def *smp_coeffs)
static inline unsigned process_coords(nir_builder *b,
bool is_array,
bool is_query_lod,
bool coords_are_float,
nir_def *coords,
nir_def **float_coords,
@ -187,7 +186,7 @@ static inline unsigned process_coords(nir_builder *b,
*float_array_index = NULL;
*int_array_index = NULL;
if (!is_array || is_query_lod)
if (!is_array)
return num_comps;
*float_array_index = nir_channel(b, *float_coords, num_comps - 1);
@ -227,6 +226,221 @@ static inline void unpack_base_addr(nir_builder *b,
*base_addr_hi = STATE_UNPACK(b, tex_state_word, 3, 14, 8);
}
typedef struct _pco_smp_params {
nir_def *tex_state;
nir_def *smp_state;
nir_alu_type dest_type;
enum glsl_sampler_dim sampler_dim;
bool nncoords;
nir_def *coords;
nir_def *array_index;
nir_def *proj;
nir_def *lod_bias;
nir_def *lod_replace;
nir_def *lod_ddx;
nir_def *lod_ddy;
nir_def *addr_lo;
nir_def *addr_hi;
nir_def *offset;
nir_def *ms_index;
nir_def *write_data;
bool sample_coeffs;
bool sample_raw;
unsigned sample_components;
bool int_mode;
} pco_smp_params;
static nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b,
pco_smp_params *params)
{
nir_def *comps[NIR_MAX_VEC_COMPONENTS];
unsigned count = 0;
pco_smp_flags smp_flags = {
.dim = to_pco_dim(params->sampler_dim),
.fcnorm = nir_alu_type_get_base_type(params->dest_type) == nir_type_float,
.nncoords = params->nncoords,
.lod_mode = PCO_LOD_MODE_NORMAL,
.integer = params->int_mode,
};
/* Emit coords (excluding array component if present). */
for (unsigned c = 0; c < params->coords->num_components; ++c)
comps[count++] = nir_channel(b, params->coords, c);
/* Emit projector (if present). */
if (params->proj) {
comps[count++] = params->proj;
smp_flags.proj = true;
}
/* Emit hardware array component (if present). */
if (params->array_index) {
comps[count++] = params->array_index;
smp_flags.array = true;
}
/* Emit LOD (if present). */
bool lod_present = false;
assert(!!params->lod_ddx == !!params->lod_ddy);
assert((!!params->lod_bias + !!params->lod_replace + !!params->lod_ddx) < 2);
if (params->lod_bias) {
lod_present = true;
comps[count++] = params->lod_bias;
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_BIAS;
} else if (params->lod_replace) {
lod_present = true;
comps[count++] = params->lod_replace;
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_REPLACE;
} else if (params->lod_ddx) {
lod_present = true;
for (unsigned c = 0; c < params->lod_ddx->num_components; ++c) {
comps[count++] = nir_channel(b, params->lod_ddx, c);
comps[count++] = nir_channel(b, params->lod_ddy, c);
}
smp_flags.lod_mode = PCO_LOD_MODE_GRADIENTS;
}
/* Emit address override (if present). */
assert(!!params->addr_lo == !!params->addr_hi);
if (params->addr_lo) {
/* Set a per-pixel lod bias of 0 if none has been set yet. */
if (!lod_present) {
comps[count++] = nir_imm_int(b, 0);
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_BIAS;
lod_present = true;
}
comps[count++] = params->addr_lo;
comps[count++] = params->addr_hi;
smp_flags.tao = true;
}
/* Emit lookup options (if present). */
if (params->offset || params->ms_index) {
nir_def *lookup = nir_imm_int(b, 0);
if (params->offset) {
const unsigned packed_offset_start[] = { 0, 6, 12 };
const unsigned packed_offset_size[] = { 6, 6, 4 };
for (unsigned c = 0; c < params->offset->num_components; ++c) {
lookup = nir_bitfield_insert(b,
lookup,
nir_channel(b, params->offset, c),
nir_imm_int(b, packed_offset_start[c]),
nir_imm_int(b, packed_offset_size[c]));
}
smp_flags.soo = true;
}
if (params->ms_index) {
lookup = nir_bitfield_insert(b,
lookup,
params->ms_index,
nir_imm_int(b, 16),
nir_imm_int(b, 3));
smp_flags.sno = true;
}
comps[count++] = lookup;
}
/* Emit write data (if present). */
if (params->write_data) {
for (unsigned c = 0; c < params->write_data->num_components; ++c)
comps[count++] = nir_channel(b, params->write_data, c);
smp_flags.wrt = true;
}
/* Pad out the rest of the data words. */
assert(count <= NIR_MAX_VEC_COMPONENTS);
nir_def *undef = nir_undef(b, 1, 32);
for (unsigned c = count; c < ARRAY_SIZE(comps); ++c)
comps[c] = undef;
nir_def *smp_data = nir_vec(b, comps, ARRAY_SIZE(comps));
if (params->sample_coeffs) {
assert(!params->sample_raw);
assert(!params->sample_components);
assert(!params->write_data);
nir_def *def = nir_smp_coeffs_pco(b,
smp_data,
params->tex_state,
params->smp_state,
.smp_flags_pco = smp_flags._,
.range = count);
return nir_instr_as_intrinsic(def->parent_instr);
}
if (params->sample_raw) {
assert(!params->sample_coeffs);
assert(!params->sample_components);
assert(!params->write_data);
nir_def *def = nir_smp_raw_pco(b,
smp_data,
params->tex_state,
params->smp_state,
.smp_flags_pco = smp_flags._,
.range = count);
return nir_instr_as_intrinsic(def->parent_instr);
}
if (params->write_data) {
assert(!params->sample_coeffs);
assert(!params->sample_raw);
assert(!params->sample_components);
return nir_smp_write_pco(b,
smp_data,
params->tex_state,
params->smp_state,
.smp_flags_pco = smp_flags._,
.range = count);
}
assert(!params->sample_coeffs);
assert(!params->sample_raw);
assert(params->sample_components > 0);
assert(!params->write_data);
nir_def *def = nir_smp_pco(b,
params->sample_components,
smp_data,
params->tex_state,
params->smp_state,
.smp_flags_pco = smp_flags._,
.range = count);
return nir_instr_as_intrinsic(def->parent_instr);
}
/**
* \brief Lowers a texture instruction.
*
@ -300,8 +514,7 @@ lower_tex(nir_builder *b, nir_instr *instr, UNUSED void *cb_data)
nir_def *int_array_index;
unsigned num_coord_comps =
process_coords(b,
tex->is_array,
tex->op == nir_texop_lod,
tex->is_array && tex->op != nir_texop_lod,
tex_src_is_float(tex, nir_tex_src_coord),
tex_srcs[nir_tex_src_coord],
&float_coords,
@ -570,3 +783,190 @@ bool pco_nir_lower_tex(nir_shader *shader)
{
return nir_shader_lower_instructions(shader, is_tex, lower_tex, NULL);
}
static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
{
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
bool hw_array_support = false;
bool hw_int_support = false;
enum glsl_sampler_dim image_dim = nir_intrinsic_image_dim(intr);
bool is_array = nir_intrinsic_image_array(intr);
enum pipe_format format = nir_intrinsic_format(intr);
nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
unsigned desc_set = nir_src_comp_as_uint(intr->src[0], 0);
unsigned binding = nir_src_comp_as_uint(intr->src[0], 1);
nir_def *elem = nir_channel(b, intr->src[0].ssa, 2);
nir_def *coords = !nir_src_is_undef(intr->src[1]) ? intr->src[1].ssa : NULL;
nir_def *sample_index = !nir_src_is_undef(intr->src[2]) ? intr->src[2].ssa
: NULL;
nir_def *lod = !nir_src_is_undef(intr->src[3]) ? intr->src[3].ssa : NULL;
ASSERTED bool msaa = image_dim == GLSL_SAMPLER_DIM_MS ||
image_dim == GLSL_SAMPLER_DIM_SUBPASS_MS;
assert(!!sample_index == msaa);
nir_def *tex_state = nir_load_tex_state_pco(b,
ROGUE_NUM_TEXSTATE_DWORDS,
elem,
.desc_set = desc_set,
.binding = binding);
nir_def *smp_state = nir_load_smp_state_pco(b,
ROGUE_NUM_TEXSTATE_DWORDS,
nir_imm_int(b, 0),
.desc_set = PCO_POINT_SAMPLER,
.binding = PCO_POINT_SAMPLER);
unsigned num_coord_comps =
glsl_get_sampler_dim_coordinate_components(image_dim) + !!is_array;
if (coords)
coords = nir_trim_vector(b, coords, num_coord_comps);
nir_def *float_coords;
nir_def *int_coords;
nir_def *float_array_index;
nir_def *int_array_index;
num_coord_comps = process_coords(b,
is_array,
false,
coords,
&float_coords,
&int_coords,
&float_array_index,
&int_array_index);
nir_def *smp_data_comps[NIR_MAX_VEC_COMPONENTS];
unsigned smp_data_comp_count = 0;
pco_smp_flags smp_flags = {
.dim = to_pco_dim(image_dim),
.lod_mode = PCO_LOD_MODE_NORMAL,
};
for (unsigned c = 0; c < num_coord_comps; ++c) {
smp_data_comps[smp_data_comp_count++] =
nir_channel(b, hw_int_support ? int_coords : float_coords, c);
}
if (hw_array_support && int_array_index) {
smp_data_comps[smp_data_comp_count++] =
hw_int_support ? int_array_index : float_array_index;
smp_flags.array = true;
}
bool lod_set = false;
if (lod) {
lod = nir_i2f32(b, lod);
smp_data_comps[smp_data_comp_count++] = lod;
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_REPLACE;
lod_set = true;
}
if (!hw_array_support && int_array_index) {
/* Set a per-pixel lod bias of 0 if none has been set yet. */
if (!lod_set) {
smp_data_comps[smp_data_comp_count++] = nir_imm_int(b, 0);
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_BIAS;
lod_set = true;
}
nir_def *tex_state_word[] = {
[0] = nir_channel(b, tex_state, 0),
[1] = nir_channel(b, tex_state, 1),
[2] = nir_channel(b, tex_state, 2),
[3] = nir_channel(b, tex_state, 3),
};
nir_def *base_addr_lo;
nir_def *base_addr_hi;
unpack_base_addr(b, tex_state_word, &base_addr_lo, &base_addr_hi);
nir_def *array_index = int_array_index;
assert(array_index);
nir_def *array_size = STATE_UNPACK_ADD(b, tex_state_word, 2, 4, 11, 1);
array_index = nir_uclamp(b, array_index, nir_imm_int(b, 0), array_size);
nir_def *tex_meta = nir_load_tex_meta_pco(b,
PCO_IMAGE_META_COUNT,
elem,
.desc_set = desc_set,
.binding = binding);
nir_def *array_stride =
nir_channel(b, tex_meta, PCO_IMAGE_META_LAYER_SIZE);
nir_def *array_offset = nir_imul(b, array_index, array_stride);
nir_def *addr =
nir_uadd64_2x32_lo(b, base_addr_lo, base_addr_hi, array_offset);
smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 0);
smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 1);
smp_flags.tao = true;
}
if (sample_index) {
nir_def *lookup = nir_bitfield_insert(b,
nir_imm_int(b, 0),
sample_index,
nir_imm_int(b, 16),
nir_imm_int(b, 3));
smp_data_comps[smp_data_comp_count++] = lookup;
smp_flags.sno = true;
}
/* Pad out the rest of the data words. */
assert(smp_data_comp_count <= NIR_MAX_VEC_COMPONENTS);
for (unsigned c = smp_data_comp_count; c < ARRAY_SIZE(smp_data_comps); ++c)
smp_data_comps[c] = nir_imm_int(b, 0);
nir_def *smp_data = nir_vec(b, smp_data_comps, ARRAY_SIZE(smp_data_comps));
smp_flags.nncoords = true;
smp_flags.integer = hw_int_support;
smp_flags.fcnorm = nir_alu_type_get_base_type(dest_type) == nir_type_float;
return nir_smp_pco(b,
intr->def.num_components,
smp_data,
tex_state,
smp_state,
.smp_flags_pco = smp_flags._,
.range = smp_data_comp_count);
}
static bool is_image(const nir_instr *instr, UNUSED const void *cb_data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_image_deref_load:
return true;
default:
break;
}
return false;
}
bool pco_nir_lower_images(nir_shader *shader)
{
return nir_shader_lower_instructions(shader, is_image, lower_image, NULL);
}

View file

@ -62,6 +62,21 @@ static nir_def *lower_load_vulkan_descriptor(nir_builder *b,
return nir_imm_ivec3(b, desc_set_binding, elem, 0);
}
static nir_def *array_elem_from_deref(nir_builder *b, nir_deref_instr *deref)
{
unsigned array_elem = 0;
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
array_elem = nir_src_as_uint(deref->arr.index);
deref = nir_deref_instr_parent(deref);
}
assert(deref->deref_type == nir_deref_type_var);
return nir_imm_int(b, array_elem);
}
static void lower_tex_deref_to_binding(nir_builder *b,
nir_tex_instr *tex,
unsigned deref_index,
@ -73,20 +88,11 @@ static void lower_tex_deref_to_binding(nir_builder *b,
b->cursor = nir_before_instr(&tex->instr);
unsigned array_elem = 0;
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
array_elem = nir_src_as_uint(deref->arr.index);
deref = nir_deref_instr_parent(deref);
}
nir_def *elem = nir_imm_int(b, array_elem);
assert(deref->deref_type == nir_deref_type_var);
unsigned desc_set = deref->var->data.descriptor_set;
unsigned binding = deref->var->data.binding;
nir_variable *var = nir_deref_instr_get_variable(deref);
assert(var);
unsigned desc_set = var->data.descriptor_set;
unsigned binding = var->data.binding;
nir_def *elem = array_elem_from_deref(b, deref);
set_resource_used(common, desc_set, binding);
@ -116,6 +122,34 @@ lower_tex_derefs(nir_builder *b, nir_tex_instr *tex, pco_common_data *common)
lower_tex_deref_to_binding(b, tex, deref_index, common);
}
static nir_def *lower_image_derefs(nir_builder *b,
nir_intrinsic_instr *intr,
pco_common_data *common)
{
nir_src *deref_src = &intr->src[0];
nir_deref_instr *deref = nir_src_as_deref(*deref_src);
b->cursor = nir_before_instr(&intr->instr);
nir_variable *var = nir_deref_instr_get_variable(deref);
assert(var);
unsigned desc_set = var->data.descriptor_set;
unsigned binding = var->data.binding;
nir_def *elem = array_elem_from_deref(b, deref);
set_resource_used(common, desc_set, binding);
common->uses.point_sampler = true;
if (nir_intrinsic_format(intr) == PIPE_FORMAT_NONE)
nir_intrinsic_set_format(intr, var->data.image.format);
nir_def *index =
nir_vec3(b, nir_imm_int(b, desc_set), nir_imm_int(b, binding), elem);
nir_src_rewrite(deref_src, index);
return NIR_LOWER_INSTR_PROGRESS;
}
/**
* \brief Lowers a Vulkan-related instruction.
*
@ -135,6 +169,9 @@ static nir_def *lower_vk(nir_builder *b, nir_instr *instr, void *cb_data)
case nir_intrinsic_load_vulkan_descriptor:
return lower_load_vulkan_descriptor(b, intr, common);
case nir_intrinsic_image_deref_load:
return lower_image_derefs(b, intr, common);
default:
break;
}
@ -169,6 +206,7 @@ static bool is_vk(const nir_instr *instr, UNUSED const void *cb_data)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_vulkan_descriptor:
case nir_intrinsic_image_deref_load:
return true;
default:

View file

@ -428,21 +428,33 @@ static unsigned fetch_resource_base_reg(const pco_common_data *common,
unsigned elem,
bool *is_img_smp)
{
assert(desc_set < ARRAY_SIZE(common->desc_sets));
const pco_descriptor_set_data *desc_set_data = &common->desc_sets[desc_set];
assert(desc_set_data->used);
assert(desc_set_data->bindings && binding < desc_set_data->binding_count);
const pco_range *range;
if (desc_set == PCO_POINT_SAMPLER && binding == PCO_POINT_SAMPLER) {
assert(common->uses.point_sampler);
range = &common->point_sampler;
const pco_binding_data *binding_data = &desc_set_data->bindings[binding];
assert(binding_data->used);
if (is_img_smp)
*is_img_smp = false;
} else {
assert(desc_set < ARRAY_SIZE(common->desc_sets));
const pco_descriptor_set_data *desc_set_data =
&common->desc_sets[desc_set];
assert(desc_set_data->used);
assert(desc_set_data->bindings && binding < desc_set_data->binding_count);
if (is_img_smp)
*is_img_smp = binding_data->is_img_smp;
const pco_binding_data *binding_data = &desc_set_data->bindings[binding];
assert(binding_data->used);
unsigned reg_offset = elem * binding_data->range.stride;
assert(reg_offset < binding_data->range.count);
range = &binding_data->range;
unsigned reg_index = binding_data->range.start + reg_offset;
if (is_img_smp)
*is_img_smp = binding_data->is_img_smp;
}
unsigned reg_offset = elem * range->stride;
assert(reg_offset < range->count);
unsigned reg_index = range->start + reg_offset;
return reg_index;
}