From 3af73ef1990d9a14b62809546bbc1ed1bad2ed84 Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Fri, 28 Feb 2025 13:19:18 +0000 Subject: [PATCH] pco: initial image support Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/compiler/nir/nir_intrinsics.py | 9 + src/imagination/pco/pco_internal.h | 4 +- src/imagination/pco/pco_nir.c | 2 +- src/imagination/pco/pco_nir_tex.c | 410 +++++++++++++++++++++++++++- src/imagination/pco/pco_nir_vk.c | 66 ++++- src/imagination/pco/pco_trans_nir.c | 34 ++- 6 files changed, 493 insertions(+), 32 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 8d4d66d9909..5310d5924fb 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2703,6 +2703,15 @@ intrinsic("smp_pco", src_comp=[16, 4, 4], dest_comp=0, indices=[SMP_FLAGS_PCO, R # Returns the calculated sampling coefficients for the given data and state words. intrinsic("smp_coeffs_pco", src_comp=[16, 4, 4], dest_comp=8, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32]) +# smp_raw_pco(data, tex_state, smp_state) +# Returns the raw sampling data for the given data and state words. +# Actually outputs 4/8/12/16 components, but NIR doesn't support num_components == 12, so fake it as 8 for now. +intrinsic("smp_raw_pco", src_comp=[16, 4, 4], dest_comp=16, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32]) + +# smp_write_pco(data, tex_state, smp_state) +# Performs a sample write for the given data and state words. +intrinsic("smp_write_pco", src_comp=[16, 4, 4], indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32]) + # alphatst_pco(data, comparator, comparison op) # Performs an alpha test on the given parameters, returning float 0/1 depending on the comparison result. intrinsic("alphatst_pco", src_comp=[1, 1, 1], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) diff --git a/src/imagination/pco/pco_internal.h b/src/imagination/pco/pco_internal.h index 093e110a2cc..7ef710583bd 100644 --- a/src/imagination/pco/pco_internal.h +++ b/src/imagination/pco/pco_internal.h @@ -1626,7 +1626,8 @@ typedef union PACKED _pco_smp_flags { bool sno : 1; bool array : 1; bool integer : 1; - unsigned pad : 3; + bool wrt : 1; + unsigned pad : 2; }; uint16_t _; @@ -1648,6 +1649,7 @@ bool pco_nir_lower_algebraic(nir_shader *shader); bool pco_nir_lower_algebraic_late(nir_shader *shader); bool pco_nir_lower_atomics(nir_shader *shader, bool *uses_usclib); bool pco_nir_lower_barriers(nir_shader *shader, bool *uses_usclib); +bool pco_nir_lower_images(nir_shader *shader); bool pco_nir_lower_io(nir_shader *shader); bool pco_nir_lower_tex(nir_shader *shader); bool pco_nir_lower_vk(nir_shader *shader, pco_common_data *common); diff --git a/src/imagination/pco/pco_nir.c b/src/imagination/pco/pco_nir.c index 464cadc8d6c..db90d3bb02a 100644 --- a/src/imagination/pco/pco_nir.c +++ b/src/imagination/pco/pco_nir.c @@ -434,7 +434,6 @@ static void pco_nir_opt(pco_ctx *ctx, nir_shader *nir) NIR_PASS(progress, nir, nir_opt_loop); NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS(progress, nir, nir_opt_undef); - NIR_PASS(progress, nir, nir_lower_undef_to_zero); NIR_PASS(progress, nir, nir_opt_loop_unroll); } while (progress); } @@ -712,6 +711,7 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out); + NIR_PASS(_, nir, pco_nir_lower_images); NIR_PASS(_, nir, nir_lower_tex, &(nir_lower_tex_options){}); NIR_PASS(_, nir, pco_nir_lower_tex); diff --git a/src/imagination/pco/pco_nir_tex.c b/src/imagination/pco/pco_nir_tex.c index ebec148d5be..a2be5017455 100644 --- a/src/imagination/pco/pco_nir_tex.c +++ b/src/imagination/pco/pco_nir_tex.c @@ -7,7 +7,7 @@ /** * \file pco_nir_tex.c * - * \brief PCO NIR texture/sampler lowering passes. + * \brief PCO NIR texture/image/sampler lowering passes. */ #include "hwdef/rogue_hw_defs.h" @@ -172,7 +172,6 @@ lower_tex_query_lod(nir_builder *b, nir_def *coords, nir_def *smp_coeffs) static inline unsigned process_coords(nir_builder *b, bool is_array, - bool is_query_lod, bool coords_are_float, nir_def *coords, nir_def **float_coords, @@ -187,7 +186,7 @@ static inline unsigned process_coords(nir_builder *b, *float_array_index = NULL; *int_array_index = NULL; - if (!is_array || is_query_lod) + if (!is_array) return num_comps; *float_array_index = nir_channel(b, *float_coords, num_comps - 1); @@ -227,6 +226,221 @@ static inline void unpack_base_addr(nir_builder *b, *base_addr_hi = STATE_UNPACK(b, tex_state_word, 3, 14, 8); } +typedef struct _pco_smp_params { + nir_def *tex_state; + nir_def *smp_state; + + nir_alu_type dest_type; + + enum glsl_sampler_dim sampler_dim; + + bool nncoords; + nir_def *coords; + nir_def *array_index; + + nir_def *proj; + + nir_def *lod_bias; + nir_def *lod_replace; + nir_def *lod_ddx; + nir_def *lod_ddy; + + nir_def *addr_lo; + nir_def *addr_hi; + + nir_def *offset; + nir_def *ms_index; + + nir_def *write_data; + + bool sample_coeffs; + bool sample_raw; + unsigned sample_components; + + bool int_mode; +} pco_smp_params; + +static nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b, + pco_smp_params *params) +{ + nir_def *comps[NIR_MAX_VEC_COMPONENTS]; + unsigned count = 0; + pco_smp_flags smp_flags = { + .dim = to_pco_dim(params->sampler_dim), + .fcnorm = nir_alu_type_get_base_type(params->dest_type) == nir_type_float, + .nncoords = params->nncoords, + .lod_mode = PCO_LOD_MODE_NORMAL, + .integer = params->int_mode, + }; + + /* Emit coords (excluding array component if present). */ + for (unsigned c = 0; c < params->coords->num_components; ++c) + comps[count++] = nir_channel(b, params->coords, c); + + /* Emit projector (if present). */ + if (params->proj) { + comps[count++] = params->proj; + smp_flags.proj = true; + } + + /* Emit hardware array component (if present). */ + if (params->array_index) { + comps[count++] = params->array_index; + smp_flags.array = true; + } + + /* Emit LOD (if present). */ + bool lod_present = false; + assert(!!params->lod_ddx == !!params->lod_ddy); + assert((!!params->lod_bias + !!params->lod_replace + !!params->lod_ddx) < 2); + if (params->lod_bias) { + lod_present = true; + comps[count++] = params->lod_bias; + + smp_flags.pplod = true; + smp_flags.lod_mode = PCO_LOD_MODE_BIAS; + } else if (params->lod_replace) { + lod_present = true; + comps[count++] = params->lod_replace; + + smp_flags.pplod = true; + smp_flags.lod_mode = PCO_LOD_MODE_REPLACE; + } else if (params->lod_ddx) { + lod_present = true; + + for (unsigned c = 0; c < params->lod_ddx->num_components; ++c) { + comps[count++] = nir_channel(b, params->lod_ddx, c); + comps[count++] = nir_channel(b, params->lod_ddy, c); + } + + smp_flags.lod_mode = PCO_LOD_MODE_GRADIENTS; + } + + /* Emit address override (if present). */ + assert(!!params->addr_lo == !!params->addr_hi); + if (params->addr_lo) { + /* Set a per-pixel lod bias of 0 if none has been set yet. */ + if (!lod_present) { + comps[count++] = nir_imm_int(b, 0); + smp_flags.pplod = true; + smp_flags.lod_mode = PCO_LOD_MODE_BIAS; + lod_present = true; + } + + comps[count++] = params->addr_lo; + comps[count++] = params->addr_hi; + + smp_flags.tao = true; + } + + /* Emit lookup options (if present). */ + if (params->offset || params->ms_index) { + nir_def *lookup = nir_imm_int(b, 0); + + if (params->offset) { + const unsigned packed_offset_start[] = { 0, 6, 12 }; + const unsigned packed_offset_size[] = { 6, 6, 4 }; + + for (unsigned c = 0; c < params->offset->num_components; ++c) { + lookup = nir_bitfield_insert(b, + lookup, + nir_channel(b, params->offset, c), + nir_imm_int(b, packed_offset_start[c]), + nir_imm_int(b, packed_offset_size[c])); + } + + smp_flags.soo = true; + } + + if (params->ms_index) { + lookup = nir_bitfield_insert(b, + lookup, + params->ms_index, + nir_imm_int(b, 16), + nir_imm_int(b, 3)); + + smp_flags.sno = true; + } + + comps[count++] = lookup; + } + + /* Emit write data (if present). */ + if (params->write_data) { + for (unsigned c = 0; c < params->write_data->num_components; ++c) + comps[count++] = nir_channel(b, params->write_data, c); + + smp_flags.wrt = true; + } + + /* Pad out the rest of the data words. */ + assert(count <= NIR_MAX_VEC_COMPONENTS); + + nir_def *undef = nir_undef(b, 1, 32); + for (unsigned c = count; c < ARRAY_SIZE(comps); ++c) + comps[c] = undef; + + nir_def *smp_data = nir_vec(b, comps, ARRAY_SIZE(comps)); + + if (params->sample_coeffs) { + assert(!params->sample_raw); + assert(!params->sample_components); + assert(!params->write_data); + + nir_def *def = nir_smp_coeffs_pco(b, + smp_data, + params->tex_state, + params->smp_state, + .smp_flags_pco = smp_flags._, + .range = count); + + return nir_instr_as_intrinsic(def->parent_instr); + } + + if (params->sample_raw) { + assert(!params->sample_coeffs); + assert(!params->sample_components); + assert(!params->write_data); + + nir_def *def = nir_smp_raw_pco(b, + smp_data, + params->tex_state, + params->smp_state, + .smp_flags_pco = smp_flags._, + .range = count); + + return nir_instr_as_intrinsic(def->parent_instr); + } + + if (params->write_data) { + assert(!params->sample_coeffs); + assert(!params->sample_raw); + assert(!params->sample_components); + + return nir_smp_write_pco(b, + smp_data, + params->tex_state, + params->smp_state, + .smp_flags_pco = smp_flags._, + .range = count); + } + + assert(!params->sample_coeffs); + assert(!params->sample_raw); + assert(params->sample_components > 0); + assert(!params->write_data); + + nir_def *def = nir_smp_pco(b, + params->sample_components, + smp_data, + params->tex_state, + params->smp_state, + .smp_flags_pco = smp_flags._, + .range = count); + + return nir_instr_as_intrinsic(def->parent_instr); +} + /** * \brief Lowers a texture instruction. * @@ -300,8 +514,7 @@ lower_tex(nir_builder *b, nir_instr *instr, UNUSED void *cb_data) nir_def *int_array_index; unsigned num_coord_comps = process_coords(b, - tex->is_array, - tex->op == nir_texop_lod, + tex->is_array && tex->op != nir_texop_lod, tex_src_is_float(tex, nir_tex_src_coord), tex_srcs[nir_tex_src_coord], &float_coords, @@ -570,3 +783,190 @@ bool pco_nir_lower_tex(nir_shader *shader) { return nir_shader_lower_instructions(shader, is_tex, lower_tex, NULL); } + +static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + bool hw_array_support = false; + bool hw_int_support = false; + + enum glsl_sampler_dim image_dim = nir_intrinsic_image_dim(intr); + bool is_array = nir_intrinsic_image_array(intr); + enum pipe_format format = nir_intrinsic_format(intr); + nir_alu_type dest_type = nir_intrinsic_dest_type(intr); + + unsigned desc_set = nir_src_comp_as_uint(intr->src[0], 0); + unsigned binding = nir_src_comp_as_uint(intr->src[0], 1); + nir_def *elem = nir_channel(b, intr->src[0].ssa, 2); + + nir_def *coords = !nir_src_is_undef(intr->src[1]) ? intr->src[1].ssa : NULL; + nir_def *sample_index = !nir_src_is_undef(intr->src[2]) ? intr->src[2].ssa + : NULL; + nir_def *lod = !nir_src_is_undef(intr->src[3]) ? intr->src[3].ssa : NULL; + + ASSERTED bool msaa = image_dim == GLSL_SAMPLER_DIM_MS || + image_dim == GLSL_SAMPLER_DIM_SUBPASS_MS; + assert(!!sample_index == msaa); + + nir_def *tex_state = nir_load_tex_state_pco(b, + ROGUE_NUM_TEXSTATE_DWORDS, + elem, + .desc_set = desc_set, + .binding = binding); + + nir_def *smp_state = nir_load_smp_state_pco(b, + ROGUE_NUM_TEXSTATE_DWORDS, + nir_imm_int(b, 0), + .desc_set = PCO_POINT_SAMPLER, + .binding = PCO_POINT_SAMPLER); + + unsigned num_coord_comps = + glsl_get_sampler_dim_coordinate_components(image_dim) + !!is_array; + + if (coords) + coords = nir_trim_vector(b, coords, num_coord_comps); + + nir_def *float_coords; + nir_def *int_coords; + nir_def *float_array_index; + nir_def *int_array_index; + num_coord_comps = process_coords(b, + is_array, + false, + coords, + &float_coords, + &int_coords, + &float_array_index, + &int_array_index); + + nir_def *smp_data_comps[NIR_MAX_VEC_COMPONENTS]; + unsigned smp_data_comp_count = 0; + pco_smp_flags smp_flags = { + .dim = to_pco_dim(image_dim), + .lod_mode = PCO_LOD_MODE_NORMAL, + }; + + for (unsigned c = 0; c < num_coord_comps; ++c) { + smp_data_comps[smp_data_comp_count++] = + nir_channel(b, hw_int_support ? int_coords : float_coords, c); + } + + if (hw_array_support && int_array_index) { + smp_data_comps[smp_data_comp_count++] = + hw_int_support ? int_array_index : float_array_index; + + smp_flags.array = true; + } + + bool lod_set = false; + if (lod) { + lod = nir_i2f32(b, lod); + + smp_data_comps[smp_data_comp_count++] = lod; + + smp_flags.pplod = true; + smp_flags.lod_mode = PCO_LOD_MODE_REPLACE; + + lod_set = true; + } + + if (!hw_array_support && int_array_index) { + /* Set a per-pixel lod bias of 0 if none has been set yet. */ + if (!lod_set) { + smp_data_comps[smp_data_comp_count++] = nir_imm_int(b, 0); + smp_flags.pplod = true; + smp_flags.lod_mode = PCO_LOD_MODE_BIAS; + lod_set = true; + } + + nir_def *tex_state_word[] = { + [0] = nir_channel(b, tex_state, 0), + [1] = nir_channel(b, tex_state, 1), + [2] = nir_channel(b, tex_state, 2), + [3] = nir_channel(b, tex_state, 3), + }; + + nir_def *base_addr_lo; + nir_def *base_addr_hi; + unpack_base_addr(b, tex_state_word, &base_addr_lo, &base_addr_hi); + + nir_def *array_index = int_array_index; + assert(array_index); + + nir_def *array_size = STATE_UNPACK_ADD(b, tex_state_word, 2, 4, 11, 1); + array_index = nir_uclamp(b, array_index, nir_imm_int(b, 0), array_size); + + nir_def *tex_meta = nir_load_tex_meta_pco(b, + PCO_IMAGE_META_COUNT, + elem, + .desc_set = desc_set, + .binding = binding); + + nir_def *array_stride = + nir_channel(b, tex_meta, PCO_IMAGE_META_LAYER_SIZE); + + nir_def *array_offset = nir_imul(b, array_index, array_stride); + + nir_def *addr = + nir_uadd64_2x32_lo(b, base_addr_lo, base_addr_hi, array_offset); + + smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 0); + smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 1); + + smp_flags.tao = true; + } + + if (sample_index) { + nir_def *lookup = nir_bitfield_insert(b, + nir_imm_int(b, 0), + sample_index, + nir_imm_int(b, 16), + nir_imm_int(b, 3)); + + smp_data_comps[smp_data_comp_count++] = lookup; + smp_flags.sno = true; + } + + /* Pad out the rest of the data words. */ + assert(smp_data_comp_count <= NIR_MAX_VEC_COMPONENTS); + for (unsigned c = smp_data_comp_count; c < ARRAY_SIZE(smp_data_comps); ++c) + smp_data_comps[c] = nir_imm_int(b, 0); + + nir_def *smp_data = nir_vec(b, smp_data_comps, ARRAY_SIZE(smp_data_comps)); + + smp_flags.nncoords = true; + + smp_flags.integer = hw_int_support; + smp_flags.fcnorm = nir_alu_type_get_base_type(dest_type) == nir_type_float; + + return nir_smp_pco(b, + intr->def.num_components, + smp_data, + tex_state, + smp_state, + .smp_flags_pco = smp_flags._, + .range = smp_data_comp_count); +} + +static bool is_image(const nir_instr *instr, UNUSED const void *cb_data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_image_deref_load: + return true; + + default: + break; + } + + return false; +} + +bool pco_nir_lower_images(nir_shader *shader) +{ + return nir_shader_lower_instructions(shader, is_image, lower_image, NULL); +} diff --git a/src/imagination/pco/pco_nir_vk.c b/src/imagination/pco/pco_nir_vk.c index 439f0b2ebd9..c620ce98d74 100644 --- a/src/imagination/pco/pco_nir_vk.c +++ b/src/imagination/pco/pco_nir_vk.c @@ -62,6 +62,21 @@ static nir_def *lower_load_vulkan_descriptor(nir_builder *b, return nir_imm_ivec3(b, desc_set_binding, elem, 0); } +static nir_def *array_elem_from_deref(nir_builder *b, nir_deref_instr *deref) +{ + unsigned array_elem = 0; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + + array_elem = nir_src_as_uint(deref->arr.index); + + deref = nir_deref_instr_parent(deref); + } + + assert(deref->deref_type == nir_deref_type_var); + return nir_imm_int(b, array_elem); +} + static void lower_tex_deref_to_binding(nir_builder *b, nir_tex_instr *tex, unsigned deref_index, @@ -73,20 +88,11 @@ static void lower_tex_deref_to_binding(nir_builder *b, b->cursor = nir_before_instr(&tex->instr); - unsigned array_elem = 0; - if (deref->deref_type != nir_deref_type_var) { - assert(deref->deref_type == nir_deref_type_array); - - array_elem = nir_src_as_uint(deref->arr.index); - - deref = nir_deref_instr_parent(deref); - } - - nir_def *elem = nir_imm_int(b, array_elem); - assert(deref->deref_type == nir_deref_type_var); - - unsigned desc_set = deref->var->data.descriptor_set; - unsigned binding = deref->var->data.binding; + nir_variable *var = nir_deref_instr_get_variable(deref); + assert(var); + unsigned desc_set = var->data.descriptor_set; + unsigned binding = var->data.binding; + nir_def *elem = array_elem_from_deref(b, deref); set_resource_used(common, desc_set, binding); @@ -116,6 +122,34 @@ lower_tex_derefs(nir_builder *b, nir_tex_instr *tex, pco_common_data *common) lower_tex_deref_to_binding(b, tex, deref_index, common); } +static nir_def *lower_image_derefs(nir_builder *b, + nir_intrinsic_instr *intr, + pco_common_data *common) +{ + nir_src *deref_src = &intr->src[0]; + nir_deref_instr *deref = nir_src_as_deref(*deref_src); + b->cursor = nir_before_instr(&intr->instr); + + nir_variable *var = nir_deref_instr_get_variable(deref); + assert(var); + unsigned desc_set = var->data.descriptor_set; + unsigned binding = var->data.binding; + nir_def *elem = array_elem_from_deref(b, deref); + + set_resource_used(common, desc_set, binding); + common->uses.point_sampler = true; + + if (nir_intrinsic_format(intr) == PIPE_FORMAT_NONE) + nir_intrinsic_set_format(intr, var->data.image.format); + + nir_def *index = + nir_vec3(b, nir_imm_int(b, desc_set), nir_imm_int(b, binding), elem); + + nir_src_rewrite(deref_src, index); + + return NIR_LOWER_INSTR_PROGRESS; +} + /** * \brief Lowers a Vulkan-related instruction. * @@ -135,6 +169,9 @@ static nir_def *lower_vk(nir_builder *b, nir_instr *instr, void *cb_data) case nir_intrinsic_load_vulkan_descriptor: return lower_load_vulkan_descriptor(b, intr, common); + case nir_intrinsic_image_deref_load: + return lower_image_derefs(b, intr, common); + default: break; } @@ -169,6 +206,7 @@ static bool is_vk(const nir_instr *instr, UNUSED const void *cb_data) nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); switch (intr->intrinsic) { case nir_intrinsic_load_vulkan_descriptor: + case nir_intrinsic_image_deref_load: return true; default: diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 46772c584dd..a4b5731ebe2 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -428,21 +428,33 @@ static unsigned fetch_resource_base_reg(const pco_common_data *common, unsigned elem, bool *is_img_smp) { - assert(desc_set < ARRAY_SIZE(common->desc_sets)); - const pco_descriptor_set_data *desc_set_data = &common->desc_sets[desc_set]; - assert(desc_set_data->used); - assert(desc_set_data->bindings && binding < desc_set_data->binding_count); + const pco_range *range; + if (desc_set == PCO_POINT_SAMPLER && binding == PCO_POINT_SAMPLER) { + assert(common->uses.point_sampler); + range = &common->point_sampler; - const pco_binding_data *binding_data = &desc_set_data->bindings[binding]; - assert(binding_data->used); + if (is_img_smp) + *is_img_smp = false; + } else { + assert(desc_set < ARRAY_SIZE(common->desc_sets)); + const pco_descriptor_set_data *desc_set_data = + &common->desc_sets[desc_set]; + assert(desc_set_data->used); + assert(desc_set_data->bindings && binding < desc_set_data->binding_count); - if (is_img_smp) - *is_img_smp = binding_data->is_img_smp; + const pco_binding_data *binding_data = &desc_set_data->bindings[binding]; + assert(binding_data->used); - unsigned reg_offset = elem * binding_data->range.stride; - assert(reg_offset < binding_data->range.count); + range = &binding_data->range; - unsigned reg_index = binding_data->range.start + reg_offset; + if (is_img_smp) + *is_img_smp = binding_data->is_img_smp; + } + + unsigned reg_offset = elem * range->stride; + assert(reg_offset < range->count); + + unsigned reg_index = range->start + reg_offset; return reg_index; }