mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 07:48:07 +02:00
tu: Implement VK_QCOM_image_processing.
This includes the block matching, box filtering, and weighted sample features. Passes all of the dEQP-VK.image_processing.* CTS tests that were recently landed. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38559>
This commit is contained in:
parent
431c7a6e36
commit
72c12f62ff
17 changed files with 329 additions and 23 deletions
|
|
@ -722,6 +722,7 @@ Khronos extensions that are not part of any Vulkan version:
|
|||
VK_MESA_image_alignment_control DONE (anv, nvk, radv)
|
||||
VK_EXT_legacy_dithering DONE (anv, tu, vn)
|
||||
VK_QCOM_fragment_density_map_offset DONE (tu)
|
||||
VK_QCOM_image_processing DONE (tu)
|
||||
VK_VALVE_video_encode_rgb_conversion DONE (radv)
|
||||
|
||||
Rusticl OpenCL 1.0 -- all DONE:
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
VK_QCOM_image_processing on Turnip
|
||||
|
|
@ -467,6 +467,9 @@ struct fd_dev_info {
|
|||
* expected:
|
||||
*/
|
||||
bool has_salu_int_narrowing_quirk;
|
||||
|
||||
/* Whether the device supports the image processing opcode */
|
||||
bool has_image_processing;
|
||||
} props;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1016,6 +1016,7 @@ a7xx_gen2 = GPUProps(
|
|||
reading_shading_rate_requires_smask_quirk = True,
|
||||
has_ray_intersection = True,
|
||||
has_hw_bin_scaling = True,
|
||||
has_image_processing = True,
|
||||
)
|
||||
|
||||
a7xx_gen3 = GPUProps(
|
||||
|
|
@ -1043,6 +1044,7 @@ a7xx_gen3 = GPUProps(
|
|||
has_abs_bin_mask = True,
|
||||
new_control_regs = True,
|
||||
has_hw_bin_scaling = True,
|
||||
has_image_processing = True,
|
||||
)
|
||||
|
||||
a730_magic_regs = dict(
|
||||
|
|
|
|||
|
|
@ -133,6 +133,10 @@ fdl6_texswiz(const struct fdl_view_args *args, bool has_z24uint_s8uint)
|
|||
unsigned char swiz[4];
|
||||
util_format_compose_swizzles(format_swiz, args->swiz, swiz);
|
||||
|
||||
/* Unused for box filter, match the blob behavior. */
|
||||
if (args->filter_width)
|
||||
return 0;
|
||||
|
||||
if (CHIP <= A7XX) {
|
||||
return A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) |
|
||||
A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) |
|
||||
|
|
@ -258,7 +262,13 @@ fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts,
|
|||
view->descriptor[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(layer_size);
|
||||
view->descriptor[4] = base_addr;
|
||||
view->descriptor[5] = (base_addr >> 32) | A6XX_TEX_CONST_5_DEPTH(depth);
|
||||
view->descriptor[6] = A6XX_TEX_CONST_6_MIN_LOD_CLAMP(args->min_lod_clamp - args->base_miplevel);
|
||||
if (args->filter_width) {
|
||||
view->descriptor[6] = A6XX_TEX_CONST_6_LOG2_PHASES(
|
||||
util_logbase2_ceil(args->filter_num_phases) / 2) |
|
||||
A6XX_TEX_CONST_6_DILATION(1);
|
||||
} else {
|
||||
view->descriptor[6] = A6XX_TEX_CONST_6_MIN_LOD_CLAMP(args->min_lod_clamp - args->base_miplevel);
|
||||
}
|
||||
|
||||
if (layout->tile_all)
|
||||
view->descriptor[3] |= A6XX_TEX_CONST_3_TILE_ALL;
|
||||
|
|
@ -300,6 +310,13 @@ fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts,
|
|||
|
||||
assert(args->type != FDL_VIEW_TYPE_3D);
|
||||
return;
|
||||
} else if (args->filter_width) {
|
||||
view->descriptor[8] =
|
||||
(A6XX_TEX_CONST_8_FILTER_SIZE_X(args->filter_width) |
|
||||
A6XX_TEX_CONST_8_FILTER_SIZE_Y(args->filter_height));
|
||||
view->descriptor[10] =
|
||||
(A6XX_TEX_CONST_10_FILTER_OFFSET_X(args->filter_center_x) |
|
||||
A6XX_TEX_CONST_10_FILTER_OFFSET_Y(args->filter_center_y));
|
||||
}
|
||||
|
||||
if (ubwc_enabled) {
|
||||
|
|
@ -323,6 +340,8 @@ fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts,
|
|||
} else if (CHIP >= A8XX) {
|
||||
uint32_t *descriptor = view->descriptor;
|
||||
|
||||
assert(!args->filter_width); /* Need descriptor fields defined. */
|
||||
|
||||
descriptor[0] = A8XX_TEX_MEMOBJ_0_BASE_LO(base_addr);
|
||||
descriptor[1] = A8XX_TEX_MEMOBJ_1_BASE_HI(base_addr >> 32) |
|
||||
A8XX_TEX_MEMOBJ_1_TYPE(fdl6_tex_type(args->type, false)) |
|
||||
|
|
@ -374,13 +393,23 @@ fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts,
|
|||
A8XX_TEX_MEMOBJ_9_UV_PITCH(fdl_pitch(layouts[1], args->base_miplevel));
|
||||
|
||||
return;
|
||||
} else if (args->filter_width) {
|
||||
descriptor[5] |= A8XX_TEX_MEMOBJ_5_FILTER_SIZE_X(args->filter_width) |
|
||||
A8XX_TEX_MEMOBJ_5_FILTER_SIZE_Y(args->filter_height) |
|
||||
A8XX_TEX_MEMOBJ_5_FILTER_OFFSET_X(args->filter_center_x) |
|
||||
A8XX_TEX_MEMOBJ_5_FILTER_OFFSET_Y(args->filter_center_y);
|
||||
}
|
||||
|
||||
descriptor[7] = A8XX_TEX_MEMOBJ_7_ARRAY_SLICE_OFFSET(layer_size);
|
||||
descriptor[9] = A8XX_TEX_MEMOBJ_9_MIN_LOD_CLAMP(args->min_lod_clamp - args->base_miplevel);
|
||||
|
||||
if (args->type == FDL_VIEW_TYPE_3D)
|
||||
if (args->filter_width) {
|
||||
descriptor[7] |= A8XX_TEX_MEMOBJ_7_LOG2_PHASES(
|
||||
util_logbase2_ceil(args->filter_num_phases) / 2) |
|
||||
A8XX_TEX_MEMOBJ_7_DILATION(1);
|
||||
} else if (args->type == FDL_VIEW_TYPE_3D) {
|
||||
descriptor[7] |= A8XX_TEX_MEMOBJ_7_MIN_ARRAY_SLIZE_OFFSET(layout->slices[layout->mip_levels - 1].size0);
|
||||
}
|
||||
|
||||
if (ubwc_enabled) {
|
||||
uint32_t block_width, block_height;
|
||||
|
|
|
|||
|
|
@ -374,6 +374,12 @@ struct fdl_view_args {
|
|||
enum pipe_format format;
|
||||
enum fdl_view_type type;
|
||||
enum fdl_chroma_location chroma_offsets[2];
|
||||
|
||||
uint32_t filter_width;
|
||||
uint32_t filter_height;
|
||||
uint32_t filter_center_x;
|
||||
uint32_t filter_center_y;
|
||||
uint32_t filter_num_phases;
|
||||
};
|
||||
|
||||
#define FDL6_TEX_CONST_DWORDS 16
|
||||
|
|
|
|||
|
|
@ -1900,7 +1900,7 @@ get_bindless_ref(struct ir3_context *ctx, nir_src *src, bool is_sampler)
|
|||
|
||||
static struct tex_src_info
|
||||
get_bindless_samp_src(struct ir3_context *ctx, nir_src *tex,
|
||||
nir_src *samp)
|
||||
nir_src *samp, nir_src *tex2, nir_src *samp2)
|
||||
{
|
||||
struct ir3_builder *b = &ctx->build;
|
||||
struct tex_src_info info = {0};
|
||||
|
|
@ -1912,6 +1912,46 @@ get_bindless_samp_src(struct ir3_context *ctx, nir_src *tex,
|
|||
*/
|
||||
struct bindless_ref_info tex_info = get_bindless_ref(ctx, tex, false);
|
||||
struct bindless_ref_info samp_info = get_bindless_ref(ctx, samp, true);
|
||||
struct bindless_ref_info tex2_info = get_bindless_ref(ctx, tex2, false);
|
||||
/* NOTE: The QC implementation completely ignores samp2 (reference
|
||||
* sampler), in both the A1 and S2EN cases.
|
||||
*/
|
||||
|
||||
if (tex2 || samp2) {
|
||||
struct tex_src_info info = {0};
|
||||
info.flags = IR3_INSTR_B;
|
||||
|
||||
/* NOTE: QC implementation doesn't encode the BASE_HI bits in the right
|
||||
* place (ORing them into src2 instead), but our normal base encoding
|
||||
* appears to work.
|
||||
*/
|
||||
info.base = tex_info.desc_set;
|
||||
|
||||
info.a1_val = 0;
|
||||
info.a1_val |= samp_info.desc_set;
|
||||
info.a1_val |= tex2_info.desc_set << 13;
|
||||
|
||||
/* NOTE: QC implementation lets samp index overflow into tex2 index */
|
||||
if (tex_info.is_const && tex_info.const_index < 16 &&
|
||||
samp_info.is_const && samp_info.const_index < 16 &&
|
||||
tex2_info.is_const && tex2_info.const_index < 64) {
|
||||
info.tex_idx = tex_info.const_index;
|
||||
info.a1_val |= (samp_info.const_index << 3);
|
||||
info.a1_val |= (tex2_info.const_index << 7);
|
||||
} else {
|
||||
/* Non-constant case: Collect the combined texture/sampler, and the
|
||||
* secondary texture.
|
||||
*/
|
||||
info.samp_tex = ir3_collect(b, tex_info.index, samp_info.index, tex2_info.index);
|
||||
|
||||
info.flags |= IR3_INSTR_S2EN;
|
||||
}
|
||||
|
||||
if (info.a1_val)
|
||||
info.flags |= IR3_INSTR_A1EN;
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
info.tex_base = tex_info.desc_set;
|
||||
info.tex_idx = tex_info.const_index;
|
||||
|
|
@ -3411,7 +3451,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
}
|
||||
case nir_intrinsic_prefetch_sam_ir3: {
|
||||
struct tex_src_info info =
|
||||
get_bindless_samp_src(ctx, &intr->src[0], &intr->src[1]);
|
||||
get_bindless_samp_src(ctx, &intr->src[0], &intr->src[1], NULL, NULL);
|
||||
struct ir3_instruction *sam =
|
||||
emit_sam(ctx, OPC_SAM, info, TYPE_F32, 0b1111, NULL, NULL);
|
||||
|
||||
|
|
@ -3581,13 +3621,17 @@ get_tex_samp_tex_src(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
struct tex_src_info info = {0};
|
||||
int texture_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
|
||||
int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle);
|
||||
int texture2_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_2_handle);
|
||||
int sampler2_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_2_handle);
|
||||
struct ir3_instruction *texture, *sampler;
|
||||
|
||||
if (texture_idx >= 0 || sampler_idx >= 0) {
|
||||
/* Bindless case */
|
||||
info = get_bindless_samp_src(ctx,
|
||||
texture_idx >= 0 ? &tex->src[texture_idx].src : NULL,
|
||||
sampler_idx >= 0 ? &tex->src[sampler_idx].src : NULL);
|
||||
sampler_idx >= 0 ? &tex->src[sampler_idx].src : NULL,
|
||||
texture2_idx >= 0 ? &tex->src[texture2_idx].src : NULL,
|
||||
sampler2_idx >= 0 ? &tex->src[sampler2_idx].src : NULL);
|
||||
|
||||
if (tex->texture_non_uniform || tex->sampler_non_uniform)
|
||||
info.flags |= IR3_INSTR_NONUNIF;
|
||||
|
|
@ -3629,8 +3673,8 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
{
|
||||
struct ir3_builder *b = &ctx->build;
|
||||
struct ir3_instruction **dst, *sam, *src0[12], *src1[5];
|
||||
struct ir3_instruction *const *coord, *const *off, *const *ddx, *const *ddy;
|
||||
struct ir3_instruction *lod, *compare, *proj, *sample_index, *min_lod;
|
||||
struct ir3_instruction *const *coord, *const *off, *const *ddx, *const *ddy, *const *box_size;
|
||||
struct ir3_instruction *lod, *compare, *proj, *sample_index, *min_lod, *ref_coord, *block_size;
|
||||
struct tex_src_info info = {0};
|
||||
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
|
||||
bool lod_zero = false, has_min_lod = false;
|
||||
|
|
@ -3641,8 +3685,8 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
|
||||
ncomp = tex->def.num_components;
|
||||
|
||||
coord = off = ddx = ddy = NULL;
|
||||
lod = proj = compare = sample_index = min_lod = NULL;
|
||||
coord = off = ddx = ddy = box_size = NULL;
|
||||
lod = proj = compare = sample_index = min_lod = ref_coord = block_size = NULL;
|
||||
|
||||
dst = ir3_get_def(ctx, &tex->def, ncomp);
|
||||
|
||||
|
|
@ -3691,11 +3735,22 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
min_lod = ir3_get_src(ctx, &tex->src[i].src)[0];
|
||||
has_min_lod = true;
|
||||
break;
|
||||
case nir_tex_src_box_size:
|
||||
box_size = ir3_get_src(ctx, &tex->src[i].src);
|
||||
break;
|
||||
case nir_tex_src_block_size:
|
||||
block_size = ir3_get_src(ctx, &tex->src[i].src)[0];
|
||||
break;
|
||||
case nir_tex_src_ref_coord:
|
||||
ref_coord = ir3_get_src(ctx, &tex->src[i].src)[0];
|
||||
break;
|
||||
case nir_tex_src_texture_offset:
|
||||
case nir_tex_src_sampler_offset:
|
||||
case nir_tex_src_texture_handle:
|
||||
case nir_tex_src_sampler_handle:
|
||||
/* handled in get_tex_samp_src() */
|
||||
case nir_tex_src_texture_2_handle:
|
||||
case nir_tex_src_sampler_2_handle:
|
||||
/* handled in get_tex_samp_tex_src() */
|
||||
break;
|
||||
default:
|
||||
ir3_context_error(ctx, "Unhandled NIR tex src type: %d\n",
|
||||
|
|
@ -3767,6 +3822,16 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
case nir_texop_txf_ms:
|
||||
opc = OPC_ISAMM;
|
||||
break;
|
||||
case nir_texop_sample_weighted_qcom:
|
||||
opc = OPC_IMG_BINDLESS_HOF;
|
||||
break;
|
||||
case nir_texop_box_filter_qcom:
|
||||
opc = OPC_IMG_BINDLESS_PCMN;
|
||||
break;
|
||||
case nir_texop_block_match_sad_qcom:
|
||||
case nir_texop_block_match_ssd_qcom:
|
||||
opc = OPC_IMG_BINDLESS;
|
||||
break;
|
||||
default:
|
||||
ir3_context_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
|
||||
return;
|
||||
|
|
@ -3864,7 +3929,7 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
* - lod
|
||||
* - bias
|
||||
*/
|
||||
if (has_off | has_lod | has_bias | has_min_lod) {
|
||||
if (has_off | has_lod | has_bias | has_min_lod | (box_size != NULL)) {
|
||||
if (has_off) {
|
||||
unsigned off_coords = coords;
|
||||
if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
|
||||
|
|
@ -3883,6 +3948,16 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
src1[nsrc1++] = min_lod;
|
||||
flags |= IR3_INSTR_CLP;
|
||||
}
|
||||
|
||||
if (box_size) {
|
||||
src1[nsrc1++] = box_size[0];
|
||||
src1[nsrc1++] = box_size[1];
|
||||
}
|
||||
}
|
||||
|
||||
if (opc == OPC_IMG_BINDLESS) {
|
||||
src1[nsrc1++] = ref_coord;
|
||||
src1[nsrc1++] = block_size;
|
||||
}
|
||||
|
||||
type = get_tex_dest_type(tex);
|
||||
|
|
@ -3978,6 +4053,9 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
sam = emit_sam(ctx, opc, info, type, MASK(ncomp), col0, col1);
|
||||
}
|
||||
|
||||
if (tex->op == nir_texop_block_match_ssd_qcom)
|
||||
sam->cat5.match_mode = IR3_MATCH_MODE_SSD;
|
||||
|
||||
if (tex->is_sparse) {
|
||||
info.flags |= flags;
|
||||
struct ir3_instruction *rck =
|
||||
|
|
|
|||
|
|
@ -523,6 +523,73 @@ ir3_nir_lower_array_sampler(nir_shader *shader)
|
|||
nir_metadata_control_flow, NULL);
|
||||
}
|
||||
|
||||
/* pack_uvec2_to_uint does clamping that we don't need to do. */
|
||||
static nir_def *
|
||||
pack_16_16(nir_builder *b, nir_def *x)
|
||||
{
|
||||
return nir_ior(b, nir_channel(b, x, 0), nir_ishl_imm(b, nir_channel(b, x, 1), 16));
|
||||
}
|
||||
|
||||
static bool
|
||||
ir3_nir_lower_image_processing_instr(struct nir_builder *b, nir_instr *instr,
|
||||
void *_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_tex)
|
||||
return false;
|
||||
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
if (tex->op == nir_texop_box_filter_qcom) {
|
||||
/* The hardware's box filter arg is preprocessed, but still a vec2. We do
|
||||
* the preprocessing in NIR so it's more legible, and can be constant
|
||||
* folded.
|
||||
*/
|
||||
int box_size_src = nir_tex_instr_src_index(tex, nir_tex_src_box_size);
|
||||
assert(box_size_src >= 0);
|
||||
|
||||
nir_def *box_size = tex->src[box_size_src].src.ssa;
|
||||
nir_def *area =
|
||||
nir_fmul(b, nir_channel(b, box_size, 0), nir_channel(b, box_size, 1));
|
||||
box_size =
|
||||
nir_f2u32(b, nir_fround_even(b, nir_fmul_imm(b, box_size, 64.0)));
|
||||
nir_def *inv_area = nir_u2u32(b, nir_f2f16(b, nir_frcp(b, area)));
|
||||
|
||||
nir_src_rewrite(&tex->src[box_size_src].src, nir_vec2(b, pack_16_16(b, box_size), inv_area));
|
||||
|
||||
return true;
|
||||
} else if (tex->op == nir_texop_block_match_sad_qcom ||
|
||||
tex->op == nir_texop_block_match_ssd_qcom) {
|
||||
/* Convert the src coords to integer, and pack the ref coord and block
|
||||
* into u32s each.
|
||||
*/
|
||||
int coord_src = nir_tex_instr_src_index(tex, nir_tex_src_coord);
|
||||
assert(coord_src >= 0);
|
||||
nir_src_rewrite(&tex->src[coord_src].src, nir_i2f32(b, tex->src[coord_src].src.ssa));
|
||||
|
||||
int ref_coord_src = nir_tex_instr_src_index(tex, nir_tex_src_ref_coord);
|
||||
assert(ref_coord_src >= 0);
|
||||
nir_src_rewrite(&tex->src[ref_coord_src].src,
|
||||
pack_16_16(b, tex->src[ref_coord_src].src.ssa));
|
||||
|
||||
int block_size_src = nir_tex_instr_src_index(tex, nir_tex_src_block_size);
|
||||
assert(block_size_src >= 0);
|
||||
nir_src_rewrite(&tex->src[block_size_src].src,
|
||||
pack_16_16(b, tex->src[block_size_src].src.ssa));
|
||||
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
ir3_nir_lower_image_processing(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, ir3_nir_lower_image_processing_instr,
|
||||
nir_metadata_control_flow, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_shader_clock(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
|
||||
{
|
||||
|
|
@ -701,6 +768,8 @@ ir3_finalize_nir(struct ir3_compiler *compiler,
|
|||
if (compiler->array_index_add_half)
|
||||
OPT(s, ir3_nir_lower_array_sampler);
|
||||
|
||||
OPT(s, ir3_nir_lower_image_processing);
|
||||
|
||||
if (compiler->gen >= 6) {
|
||||
OPT(s, ir3_nir_lower_shader_clock, compiler->options.uche_trap_base);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,6 +120,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
|
|||
<bitfield name="MIN_LOD_CLAMP" low="0" high="11" type="ufixed" radix="8"/>
|
||||
<!-- pitch for plane 2 / plane 3 -->
|
||||
<bitfield name="PLANE_PITCH" low="8" high="31" type="uint"/>
|
||||
|
||||
<!-- QCOM_image_filtering sample weights descriptor fields, overlapping the others. -->
|
||||
<bitfield name="LOG2_PHASES" low="0" high="2" type="uint"/>
|
||||
<bitfield name="DILATION" low="8" high="11" type="uint"/>
|
||||
</reg32>
|
||||
<!-- 7/8 is plane 2 address for planar formats -->
|
||||
<reg32 offset="7" name="7">
|
||||
|
|
@ -127,6 +131,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
|
|||
</reg32>
|
||||
<reg32 offset="8" name="8">
|
||||
<bitfield name="FLAG_HI" low="0" high="16"/>
|
||||
<bitfield name="FILTER_SIZE_X" low="17" high="23"/>
|
||||
<bitfield name="FILTER_SIZE_Y" low="24" high="30"/>
|
||||
</reg32>
|
||||
<!-- 9/10 is plane 3 address for planar formats -->
|
||||
<reg32 offset="9" name="9">
|
||||
|
|
@ -137,6 +143,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
|
|||
<!-- log2 size of the first level, required for mipmapping -->
|
||||
<bitfield name="FLAG_BUFFER_LOGW" low="8" high="11" type="uint"/>
|
||||
<bitfield name="FLAG_BUFFER_LOGH" low="12" high="15" type="uint"/>
|
||||
<bitfield name="FILTER_OFFSET_X" low="17" high="22"/>
|
||||
<bitfield name="FILTER_OFFSET_Y" low="23" high="28"/>
|
||||
</reg32>
|
||||
<reg32 offset="11" name="11"/>
|
||||
<reg32 offset="12" name="12"/>
|
||||
|
|
|
|||
|
|
@ -83,6 +83,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
|
|||
<bitfield name="FLAG_BUFFER_PITCH" low="17" high="24" shr="6" type="uint"/>
|
||||
<bitfield name="ALL_SAMPLES_CENTER" pos="29" type="boolean"/>
|
||||
<bitfield name="MUTABLEEN" pos="31" type="boolean"/>
|
||||
|
||||
<!-- QCOM_image_filtering sample weights descriptor fields, overlapping the others. -->
|
||||
<bitfield name="FILTER_SIZE_X" low="0" high="6"/>
|
||||
<bitfield name="FILTER_SIZE_Y" low="7" high="13"/>
|
||||
<bitfield name="FILTER_OFFSET_X" low="19" high="24"/>
|
||||
<bitfield name="FILTER_OFFSET_Y" low="25" high="30"/>
|
||||
</reg32>
|
||||
<reg32 offset="6" name="6">
|
||||
<bitfield name="TEX_LINE_OFFSET" low="0" high="23" type="uint"/> <!-- PITCH -->
|
||||
|
|
@ -99,6 +105,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
|
|||
<!-- For multiplanar. This overlaps other single-planar fields: -->
|
||||
<bitfield name="UV_OFFSET_H" low="24" high="25" type="ufixed" radix="2"/> <!-- CHROMA_MIDPOINT_X -->
|
||||
<bitfield name="UV_OFFSET_V" low="26" high="27" type="ufixed" radix="2"/> <!-- CHROMA_MIDPOINT_Y -->
|
||||
|
||||
<!-- QCOM_image_filtering sample weights descriptor fields, overlapping the others. -->
|
||||
<bitfield name="DILATION" low="24" high="27" type="uint"/>
|
||||
<bitfield name="LOG2_PHASES" low="28" high="30" type="uint"/>
|
||||
</reg32>
|
||||
<reg32 offset="8" name="8">
|
||||
<bitfield name="FLAG_ARRAY_PITCH" low="0" high="14" shr="12" type="uint"/> <!-- FLAG_BUFFER_ARRAY_PITCH -->
|
||||
|
|
|
|||
|
|
@ -8744,11 +8744,11 @@ got cmdszdw=416
|
|||
{ ARRAY_PITCH = 4096 | MIN_LAYERSZ = 0 }
|
||||
{ BASE_LO = 0x373a000 }
|
||||
{ BASE_HI = 0x1 | DEPTH = 1 }
|
||||
{ MIN_LOD_CLAMP = 0.000000 | PLANE_PITCH = 0 }
|
||||
{ MIN_LOD_CLAMP = 0.000000 | PLANE_PITCH = 0 | LOG2_PHASES = 0 | DILATION = 0 }
|
||||
{ FLAG_LO = 0 }
|
||||
{ FLAG_HI = 0 }
|
||||
{ FLAG_HI = 0 | FILTER_SIZE_X = 0 | FILTER_SIZE_Y = 0 }
|
||||
{ FLAG_BUFFER_ARRAY_PITCH = 0 }
|
||||
{ FLAG_BUFFER_PITCH = 0 | FLAG_BUFFER_LOGW = 0 | FLAG_BUFFER_LOGH = 0 }
|
||||
{ FLAG_BUFFER_PITCH = 0 | FLAG_BUFFER_LOGW = 0 | FLAG_BUFFER_LOGH = 0 | FILTER_OFFSET_X = 0 | FILTER_OFFSET_Y = 0 }
|
||||
{ 11 = 0 }
|
||||
{ 12 = 0 }
|
||||
{ 13 = 0 }
|
||||
|
|
|
|||
|
|
@ -1276,6 +1276,8 @@ tu_update_descriptor_sets(const struct tu_device *device,
|
|||
break;
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLE_WEIGHT_IMAGE_QCOM:
|
||||
case VK_DESCRIPTOR_TYPE_BLOCK_MATCH_IMAGE_QCOM:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
write_image_descriptor(ptr, writeset->descriptorType, writeset->pImageInfo + j);
|
||||
break;
|
||||
|
|
@ -1621,6 +1623,8 @@ tu_update_descriptor_set_with_template(
|
|||
break;
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLE_WEIGHT_IMAGE_QCOM:
|
||||
case VK_DESCRIPTOR_TYPE_BLOCK_MATCH_IMAGE_QCOM:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
|
||||
write_image_descriptor(ptr, templ->entry[i].descriptor_type,
|
||||
(const VkDescriptorImageInfo *) src);
|
||||
|
|
|
|||
|
|
@ -355,6 +355,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.IMG_filter_cubic = device->info->props.has_tex_filter_cubic,
|
||||
.NV_compute_shader_derivatives = device->info->chip >= 7,
|
||||
.QCOM_fragment_density_map_offset = true,
|
||||
.QCOM_image_processing = device->info->props.has_image_processing,
|
||||
.QCOM_multiview_per_view_render_areas = true,
|
||||
.QCOM_multiview_per_view_viewports =
|
||||
device->info->props.has_per_view_viewport,
|
||||
|
|
@ -824,6 +825,11 @@ tu_get_features(struct tu_physical_device *pdevice,
|
|||
/* VK_EXT_zero_initialize_device_memory */
|
||||
features->zeroInitializeDeviceMemory = true;
|
||||
|
||||
/* VK_QCOM_image_processing */
|
||||
features->textureSampleWeighted = pdevice->vk.supported_extensions.QCOM_image_processing;
|
||||
features->textureBoxFilter = pdevice->vk.supported_extensions.QCOM_image_processing;
|
||||
features->textureBlockMatch = pdevice->vk.supported_extensions.QCOM_image_processing;
|
||||
|
||||
/* VK_VALVE_fragment_density_map_layered */
|
||||
features->fragmentDensityMapLayered = true;
|
||||
|
||||
|
|
@ -1520,6 +1526,21 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
|||
|
||||
/* VK_VALVE_fragment_density_map_layered */
|
||||
props->maxFragmentDensityMapLayers = MAX_VIEWS;
|
||||
|
||||
/* VK_QCOM_image_processing */
|
||||
props->maxWeightFilterPhases = 1024;
|
||||
props->maxWeightFilterDimension =
|
||||
pdevice->vk.supported_extensions.QCOM_image_processing
|
||||
? (VkExtent2D) { 64, 64 }
|
||||
: (VkExtent2D) { 0, 0 };
|
||||
props->maxBlockMatchRegion =
|
||||
pdevice->vk.supported_extensions.QCOM_image_processing
|
||||
? (VkExtent2D) { 64, 64 }
|
||||
: (VkExtent2D) { 0, 0 };
|
||||
props->maxBoxFilterBlockSize =
|
||||
pdevice->vk.supported_extensions.QCOM_image_processing
|
||||
? (VkExtent2D) { 64, 64 }
|
||||
: (VkExtent2D) { 0, 0 };
|
||||
}
|
||||
|
||||
static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
|
||||
|
|
|
|||
|
|
@ -251,6 +251,50 @@ tu_physical_device_get_format_properties(
|
|||
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT);
|
||||
}
|
||||
|
||||
/* Set up QCOM_imgae_processing flags. This matches blob behavior, except
|
||||
* that it advertises box/weighted on NPOT sampleable formats and ASTC_FLOAT
|
||||
* (which we don't advertise yet), and blockmatch/box/weighted on
|
||||
* VK_FORMAT_G8B8G8R8_422_UNORM.
|
||||
*/
|
||||
if ((optimal & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT) &&
|
||||
(!ycbcr_info || ycbcr_info->n_planes == 1) &&
|
||||
!vk_format_is_depth_or_stencil(vk_format)) {
|
||||
int c = util_format_get_first_non_void_channel(desc->format);
|
||||
bool is_8bpc = c != -1 && desc->is_array && desc->channel[c].size == 8;
|
||||
|
||||
if ((is_8bpc && vk_format != VK_FORMAT_B8G8R8A8_UNORM &&
|
||||
vk_format != VK_FORMAT_B8G8R8A8_SNORM &&
|
||||
vk_format != VK_FORMAT_B8G8R8A8_SRGB) ||
|
||||
vk_format == VK_FORMAT_A2B10G10R10_UNORM_PACK32) {
|
||||
if (desc->is_unorm &&
|
||||
desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB)
|
||||
optimal |= VK_FORMAT_FEATURE_2_BLOCK_MATCHING_BIT_QCOM;
|
||||
if ((desc->is_unorm || desc->is_snorm) &&
|
||||
vk_format != VK_FORMAT_R8G8_SNORM) {
|
||||
optimal |= VK_FORMAT_FEATURE_2_BOX_FILTER_SAMPLED_BIT_QCOM;
|
||||
optimal |= VK_FORMAT_FEATURE_2_WEIGHT_SAMPLED_IMAGE_BIT_QCOM;
|
||||
}
|
||||
}
|
||||
|
||||
if (vk_format == VK_FORMAT_B5G6R5_UNORM_PACK16 ||
|
||||
vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
|
||||
vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 ||
|
||||
util_format_is_float16(format) ||
|
||||
(util_format_is_compressed(format) &&
|
||||
desc->layout != UTIL_FORMAT_LAYOUT_RGTC &&
|
||||
vk_format != VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK &&
|
||||
vk_format != VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK &&
|
||||
vk_format != VK_FORMAT_EAC_R11G11_UNORM_BLOCK &&
|
||||
vk_format != VK_FORMAT_EAC_R11G11_SNORM_BLOCK)) {
|
||||
optimal |= VK_FORMAT_FEATURE_2_BOX_FILTER_SAMPLED_BIT_QCOM;
|
||||
optimal |= VK_FORMAT_FEATURE_2_WEIGHT_SAMPLED_IMAGE_BIT_QCOM;
|
||||
}
|
||||
|
||||
if (vk_format == VK_FORMAT_R8_UNORM ||
|
||||
vk_format == VK_FORMAT_R16_SFLOAT)
|
||||
optimal |= VK_FORMAT_FEATURE_2_WEIGHT_IMAGE_BIT_QCOM;
|
||||
}
|
||||
|
||||
/* For the most part, we can do anything with a linear image that we could
|
||||
* do with a tiled image. However, we can't support sysmem rendering with a
|
||||
* linear depth texture, because we don't know if there's a bit to control
|
||||
|
|
|
|||
|
|
@ -180,6 +180,8 @@ tu_image_view_init(struct tu_device *device,
|
|||
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
|
||||
const struct vk_ycbcr_conversion *conversion = ycbcr_conversion ?
|
||||
vk_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
|
||||
const VkImageViewSampleWeightCreateInfoQCOM *sample_weights =
|
||||
vk_find_struct_const(pCreateInfo->pNext, IMAGE_VIEW_SAMPLE_WEIGHT_CREATE_INFO_QCOM);
|
||||
|
||||
vk_image_view_init(&device->vk, &iview->vk, pCreateInfo);
|
||||
assert(iview->vk.format != VK_FORMAT_UNDEFINED);
|
||||
|
|
@ -268,6 +270,14 @@ tu_image_view_init(struct tu_device *device,
|
|||
args.chroma_offsets[1] = (enum fdl_chroma_location) conversion->state.chroma_offsets[1];
|
||||
}
|
||||
|
||||
if (sample_weights) {
|
||||
args.filter_width = sample_weights->filterSize.width;
|
||||
args.filter_height = sample_weights->filterSize.height;
|
||||
args.filter_center_x = sample_weights->filterCenter.x;
|
||||
args.filter_center_y = sample_weights->filterCenter.y;
|
||||
args.filter_num_phases = sample_weights->numPhases;
|
||||
}
|
||||
|
||||
TU_CALLX(device, fdl6_view_init)(&iview->view, layouts, &args, device->use_z24uint_s8uint);
|
||||
|
||||
if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
|
|
|
|||
|
|
@ -90,6 +90,8 @@ tu6_load_state_size(struct tu_pipeline *pipeline,
|
|||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLE_WEIGHT_IMAGE_QCOM:
|
||||
case VK_DESCRIPTOR_TYPE_BLOCK_MATCH_IMAGE_QCOM:
|
||||
/* Textures and UBO's needs a packet for each stage */
|
||||
count = stage_count;
|
||||
break;
|
||||
|
|
@ -219,7 +221,8 @@ tu6_emit_load_state(struct tu_device *device,
|
|||
}
|
||||
break;
|
||||
}
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_BLOCK_MATCH_IMAGE_QCOM: {
|
||||
tu_foreach_stage(stage, stages) {
|
||||
/* TODO: We could emit less CP_LOAD_STATE6 if we used
|
||||
* struct-of-arrays instead of array-of-structs.
|
||||
|
|
|
|||
|
|
@ -757,30 +757,29 @@ lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
|
|||
}
|
||||
|
||||
static bool
|
||||
lower_tex(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev,
|
||||
lower_tex_impl(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev,
|
||||
struct tu_shader *shader, const struct tu_pipeline_layout *layout,
|
||||
uint32_t read_only_input_attachments, bool dynamic_renderpass)
|
||||
uint32_t read_only_input_attachments, bool dynamic_renderpass,
|
||||
bool ref)
|
||||
{
|
||||
lower_tex_ycbcr(layout, b, tex);
|
||||
|
||||
int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
|
||||
int sampler_src_idx = nir_tex_instr_src_index(tex, ref ? nir_tex_src_sampler_2_deref : nir_tex_src_sampler_deref);
|
||||
if (sampler_src_idx >= 0) {
|
||||
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
|
||||
nir_def *bindless = build_bindless(dev, b, deref, true, shader, layout,
|
||||
read_only_input_attachments,
|
||||
dynamic_renderpass);
|
||||
nir_src_rewrite(&tex->src[sampler_src_idx].src, bindless);
|
||||
tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
|
||||
tex->src[sampler_src_idx].src_type = ref ? nir_tex_src_sampler_2_handle : nir_tex_src_sampler_handle;
|
||||
}
|
||||
|
||||
int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
||||
int tex_src_idx = nir_tex_instr_src_index(tex, ref ? nir_tex_src_texture_2_deref : nir_tex_src_texture_deref);
|
||||
if (tex_src_idx >= 0) {
|
||||
nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
|
||||
nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout,
|
||||
read_only_input_attachments,
|
||||
dynamic_renderpass);
|
||||
nir_src_rewrite(&tex->src[tex_src_idx].src, bindless);
|
||||
tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
|
||||
tex->src[tex_src_idx].src_type = ref ? nir_tex_src_texture_2_handle : nir_tex_src_texture_handle;
|
||||
|
||||
/* for the input attachment case: */
|
||||
if (!nir_def_is_intrinsic(bindless))
|
||||
|
|
@ -790,6 +789,24 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_tex(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev,
|
||||
struct tu_shader *shader, const struct tu_pipeline_layout *layout,
|
||||
uint32_t read_only_input_attachments, bool dynamic_renderpass)
|
||||
{
|
||||
if (tex->op == nir_texop_block_match_sad_qcom ||
|
||||
tex->op == nir_texop_block_match_ssd_qcom ||
|
||||
tex->op == nir_texop_sample_weighted_qcom) {
|
||||
lower_tex_impl(b, tex, dev, shader, layout, read_only_input_attachments, dynamic_renderpass, false);
|
||||
lower_tex_impl(b, tex, dev, shader, layout, read_only_input_attachments, dynamic_renderpass, true);
|
||||
} else {
|
||||
lower_tex_ycbcr(layout, b, tex);
|
||||
lower_tex_impl(b, tex, dev, shader, layout, read_only_input_attachments, dynamic_renderpass, false);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct lower_instr_params {
|
||||
struct tu_device *dev;
|
||||
struct tu_shader *shader;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue