From 52226d0e3fb176424b6ad5334064e63644757244 Mon Sep 17 00:00:00 2001 From: Constantine Shablia Date: Wed, 23 Oct 2024 03:44:17 +0000 Subject: [PATCH] pan/va: Lower nir_texop_txd to TEX_GRADIENT with derivs followed by TEX_SINGLE On v9+, use TEX_GRADIENT to convert user-provided gradient into a gradient descriptor consumed by the hardware, and then supply that descriptor to TEX_SINGLE. Reviewed-by: Boris Brezillon Part-of: --- src/panfrost/compiler/bifrost_compile.c | 77 ++++++++++++++++++++----- src/panfrost/compiler/compiler.h | 2 +- src/panfrost/vulkan/panvk_vX_shader.c | 3 +- 3 files changed, 64 insertions(+), 18 deletions(-) diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 124ed0b96be..c5b7551c0f0 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -3711,7 +3711,8 @@ enum valhall_tex_sreg { VALHALL_TEX_SREG_SHADOW = 5, VALHALL_TEX_SREG_OFFSETMS = 6, VALHALL_TEX_SREG_LOD = 7, - VALHALL_TEX_SREG_GRDESC = 8, + VALHALL_TEX_SREG_GRDESC0 = 8, + VALHALL_TEX_SREG_GRDESC1 = 9, VALHALL_TEX_SREG_COUNT, }; @@ -3723,12 +3724,15 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) bool has_lod_mode = (instr->op == nir_texop_tex) || (instr->op == nir_texop_txl) || + (instr->op == nir_texop_txd) || (instr->op == nir_texop_txb); /* 32-bit indices to be allocated as consecutive staging registers */ bi_index sregs[VALHALL_TEX_SREG_COUNT] = {}; bi_index sampler = bi_imm_u32(instr->sampler_index); bi_index texture = bi_imm_u32(instr->texture_index); + bi_index ddx = bi_null(); + bi_index ddy = bi_null(); for (unsigned i = 0; i < instr->num_srcs; ++i) { bi_index index = bi_src_index(&instr->src[i].src); @@ -3774,6 +3778,14 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) } break; + case nir_tex_src_ddx: + ddx = index; + break; + + case nir_tex_src_ddy: + ddy = index; + break; + case nir_tex_src_bias: /* Upper 16-bits interpreted as a clamp, leave zero */ assert(sz == 16 || sz == 32); @@ -3819,19 +3831,6 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) explicit_offset = true; } - /* Allocate staging registers contiguously by compacting the array. */ - unsigned sr_count = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(sregs); ++i) { - if (!bi_is_null(sregs[i])) - sregs[sr_count++] = sregs[i]; - } - - bi_index idx = sr_count ? bi_temp(b->shader) : bi_null(); - - if (sr_count) - bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32); - bool narrow_indices = va_is_valid_const_narrow_index(texture) && va_is_valid_const_narrow_index(sampler); @@ -3860,6 +3859,50 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim); + if (!bi_is_null(ddx) || !bi_is_null(ddy)) { + unsigned coords_comp_count = + instr->coord_components - + (instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE); + assert(!bi_is_null(ddx) && !bi_is_null(ddy)); + + lod_mode = BI_VA_LOD_MODE_GRDESC; + + bi_index derivs[6] = { + bi_extract(b, ddx, 0), + bi_extract(b, ddy, 0), + coords_comp_count > 1 ? bi_extract(b, ddx, 1) : bi_null(), + coords_comp_count > 1 ? bi_extract(b, ddy, 1) : bi_null(), + coords_comp_count > 2 ? bi_extract(b, ddx, 2) : bi_null(), + coords_comp_count > 2 ? bi_extract(b, ddy, 2) : bi_null(), + }; + bi_index derivs_packed = bi_temp(b->shader); + bi_make_vec_to(b, derivs_packed, derivs, NULL, coords_comp_count * 2, 32); + bi_index grdesc = bi_temp(b->shader); + bi_instr *I = bi_tex_gradient_to(b, grdesc, derivs_packed, src0, src1, dim, + !narrow_indices, 3, coords_comp_count * 2); + I->derivative_enable = true; + I->force_delta_enable = false; + I->lod_clamp_disable = true; + I->lod_bias_disable = true; + I->register_format = BI_REGISTER_FORMAT_U32; + + bi_emit_cached_split_i32(b, grdesc, 2); + sregs[VALHALL_TEX_SREG_GRDESC0] = bi_extract(b, grdesc, 0); + sregs[VALHALL_TEX_SREG_GRDESC1] = bi_extract(b, grdesc, 1); + } + + /* Allocate staging registers contiguously by compacting the array. */ + unsigned sr_count = 0; + for (unsigned i = 0; i < ARRAY_SIZE(sregs); ++i) { + if (!bi_is_null(sregs[i])) + sregs[sr_count++] = sregs[i]; + } + + bi_index idx = sr_count ? bi_temp(b->shader) : bi_null(); + + if (sr_count) + bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32); + if (instr->op == nir_texop_lod) { assert(instr->def.num_components == 2 && instr->def.bit_size == 32); @@ -3897,8 +3940,9 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) switch (instr->op) { case nir_texop_tex: - case nir_texop_txl: case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: bi_tex_single_to(b, dest, idx, src0, src1, instr->is_array, dim, regfmt, instr->is_shadow, explicit_offset, lod_mode, !narrow_indices, mask, sr_count); @@ -5192,7 +5236,8 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) .lower_txs_lod = true, .lower_txp = ~0, .lower_tg4_broadcom_swizzle = true, - .lower_txd = true, + .lower_txd_cube_map = true, + .lower_txd = pan_arch(gpu_id) < 9, .lower_invalid_implicit_lod = true, .lower_index_to_offset = true, }); diff --git a/src/panfrost/compiler/compiler.h b/src/panfrost/compiler/compiler.h index 9dc342a76fa..244e5a7ce86 100644 --- a/src/panfrost/compiler/compiler.h +++ b/src/panfrost/compiler/compiler.h @@ -371,7 +371,7 @@ bi_is_value_equiv(bi_index left, bi_index right) #define BI_MAX_VEC 8 #define BI_MAX_DESTS 4 -#define BI_MAX_SRCS 6 +#define BI_MAX_SRCS 8 typedef struct { /* Must be first */ diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 9e34183b51e..5c821752707 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -338,7 +338,8 @@ panvk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir) .lower_txs_lod = true, .lower_txp = ~0, .lower_tg4_broadcom_swizzle = true, - .lower_txd = true, + .lower_txd_cube_map = true, + .lower_txd = PAN_ARCH < 9, .lower_invalid_implicit_lod = true, }; NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);