From eb14281b3c361b3b76848a3379a97dfa2cff0678 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 6 May 2026 11:01:45 +0200 Subject: [PATCH 1/4] pan/bi: TEX_GRADIENT may need helper invocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we use the texture coordinates mode for TEX_GRADIENT we need valid texture coordinates on disabled lanes to compute correct lods across all pixels on a triangle, otherwise pixels along triangle edges will read garbage when computing coordinate deltas and produce bogus results. We previously tried to solve this by setting the force_delta_enable bit, but that doesn't always work... and worse, this bit isn't supported on V9, which means we sometimes end up generating illegal instructions. Fixes Piglit: shaders/zero-tex-coord texturequerylod Fixes: 4e58029dc01 ("pan/va: fix base-level for nir_texop_lod") Reviewed-by: Alejandro PiƱeiro Reviewed-by: Erik Faye-Lund Reviewed-by: Lars-Ivar Hesselberg Simonsen Reviewed-by: Lorenzo Rossi --- src/panfrost/compiler/bifrost/bi_helper_invocations.c | 4 ++++ src/panfrost/compiler/pan_nir_lower_tex.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/panfrost/compiler/bifrost/bi_helper_invocations.c b/src/panfrost/compiler/bifrost/bi_helper_invocations.c index 5e59521a47e..e7b32aecffe 100644 --- a/src/panfrost/compiler/bifrost/bi_helper_invocations.c +++ b/src/panfrost/compiler/bifrost/bi_helper_invocations.c @@ -79,6 +79,10 @@ bi_instr_uses_helpers(bi_instr *I) case BI_OPCODE_TEX_SINGLE: return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) || (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS); + case BI_OPCODE_TEX_GRADIENT: + /* If we don't use derivatives to compute the lod we need disabled lanes + * to have valid texture coordinates. */ + return !I->derivative_enable; case BI_OPCODE_WMASK: /* Helpers are needed to implement voting in fragment shaders. */ return true; diff --git a/src/panfrost/compiler/pan_nir_lower_tex.c b/src/panfrost/compiler/pan_nir_lower_tex.c index d6bb5ea8617..47935ce2760 100644 --- a/src/panfrost/compiler/pan_nir_lower_tex.c +++ b/src/panfrost/compiler/pan_nir_lower_tex.c @@ -979,7 +979,7 @@ va_lower_lod(nir_builder *b, nir_tex_instr *tex, uint64_t gpu_id) struct pan_va_tex_flags flags = { .wide_indices = tex_h->num_components > 1, .derivative_enable = false, - .force_delta_enable = true, + .force_delta_enable = false, }; tex_h = nir_pad_vector_imm_int(b, tex_h, 0, 2); From e393cece77fdb26d791208489d4a261ae24dc937 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Thu, 7 May 2026 11:42:48 +0200 Subject: [PATCH 2/4] pan/va: do not allow force_delta_enable on v9 This bit is reserved and should be zero on V9, so we should report an illegal instruction if we ever encounter it while packing. Reviewed-by: Iago Toral Quiroga Reviewed-by: Lars-Ivar Hesselberg Simonsen Reviewed-by: Lorenzo Rossi --- src/panfrost/compiler/bifrost/valhall/va_pack.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/panfrost/compiler/bifrost/valhall/va_pack.c b/src/panfrost/compiler/bifrost/valhall/va_pack.c index 0790005e49a..72bcab9607c 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_pack.c +++ b/src/panfrost/compiler/bifrost/valhall/va_pack.c @@ -1043,8 +1043,11 @@ va_pack_instr(const bi_instr *I, unsigned arch) hex |= (1ull << 46); if (I->op == BI_OPCODE_TEX_GRADIENT) { - if (I->force_delta_enable) + if (I->force_delta_enable) { + if (arch < 10) + invalid_instruction(I, "gradient instruction does not support .force_delta_enable"); hex |= (1ull << 12); + } if (I->lod_bias_disable) hex |= (1ull << 13); if (I->lod_clamp_disable) From 5e4254f7706d729a4258d0a0e9d233111ec43261 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Fri, 8 Aug 2025 13:08:44 +0200 Subject: [PATCH 3/4] pan/bi: correct computation of lod.x Enabling clamping in the opcode here doesn't do quite what we need. This makes the HW clamp to the max LOD specified in the sampler, but we need to clamp to the maximum available LOD instead, which is the minimum of the max-lod of the sampler and the max level in the texture itself. We also need to take the mipmap mode into account when computing the level of detail. This is not something the TEX_GRADIENT instruction does, so we need to do this manually. Now that we no longer modify the flags in the loop, we can get rid of the loop alltogether, and only issue a single TEX_GRADIENT instruction. While we're at it, clean up some naming to better match the phrasing from the spec. This only applies to Valhall for now. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/14867 Reviewed-by: Iago Toral Quiroga Reviewed-by: Lars-Ivar Hesselberg Simonsen Reviewed-by: Lorenzo Rossi --- src/panfrost/compiler/pan_nir_lower_tex.c | 58 ++++++++++++++++------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/src/panfrost/compiler/pan_nir_lower_tex.c b/src/panfrost/compiler/pan_nir_lower_tex.c index 47935ce2760..e0a9aaf7d03 100644 --- a/src/panfrost/compiler/pan_nir_lower_tex.c +++ b/src/panfrost/compiler/pan_nir_lower_tex.c @@ -980,6 +980,7 @@ va_lower_lod(nir_builder *b, nir_tex_instr *tex, uint64_t gpu_id) .wide_indices = tex_h->num_components > 1, .derivative_enable = false, .force_delta_enable = false, + .lod_clamp_disable = true, }; tex_h = nir_pad_vector_imm_int(b, tex_h, 0, 2); @@ -988,29 +989,50 @@ va_lower_lod(nir_builder *b, nir_tex_instr *tex, uint64_t gpu_id) if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) coord = build_cube_desc(b, coord); - nir_def *comps[2]; - for (unsigned i = 0; i < 2; i++) { - flags.lod_clamp_disable = i != 0; - nir_def *grdesc = nir_build_tex(b, nir_texop_gradient_pan, - .dim = tex->sampler_dim, - .dest_type = nir_type_int32, - .backend_flags = PAN_AS_U32(flags), - .texture_handle = tex_h, - .backend1 = coord); + nir_def *grdesc = nir_build_tex(b, nir_texop_gradient_pan, + .dim = tex->sampler_dim, + .dest_type = nir_type_int32, + .backend_flags = PAN_AS_U32(flags), + .texture_handle = tex_h, + .backend1 = coord); - nir_def *lod_i16 = nir_unpack_32_2x16_split_x(b, grdesc); + nir_def *lod_i16 = nir_unpack_32_2x16_split_x(b, grdesc); - assert(tex->dest_type == nir_type_float32); - nir_def *lod = nir_i2f32(b, lod_i16); + assert(tex->dest_type == nir_type_float32); + nir_def *lambda_prime = nir_fdiv_imm(b, nir_i2f32(b, lod_i16), 256.0); - lod = nir_fdiv_imm(b, lod, 256.0); - if (i == 0) - lod = nir_fround_even(b, lod); + nir_def *samp = pan_nir_load_va_desc(b, 2, 32, srcs.samp_h, 0); + nir_def *samp_w0 = nir_channel(b, samp, 0); + nir_def *samp_w1 = nir_channel(b, samp, 1); - comps[i] = lod; - } + /* decode min/max lod from descriptor */ + nir_def *min_lod = nir_ubitfield_extract_imm(b, samp_w1, 0, 13); + nir_def *max_lod = nir_ubitfield_extract_imm(b, samp_w1, 16, 13); + min_lod = nir_fdiv_imm(b, nir_u2f32(b, min_lod), 256.0); + max_lod = nir_fdiv_imm(b, nir_u2f32(b, max_lod), 256.0); - nir_def_replace(&tex->def, nir_vec2(b, comps[0], comps[1])); + /* clamp max_lod to actual number of levels */ + nir_def *levels = pan_nir_load_va_tex_levels(b, srcs.tex_h); + levels = nir_u2f32(b, nir_iadd_imm(b, levels, -1)); + max_lod = nir_fmin(b, max_lod, levels); + + /* clamp res.x to [min_lod, max_lod] range */ + nir_def *lod = nir_fclamp(b, lambda_prime, min_lod, max_lod); + + /* decode mipmap mode from descriptor */ + nir_def *mipmap_mode = nir_ubitfield_extract_imm(b, samp_w0, 30, 2); + + /* adjust lod.x for MALI_MIPMAP_MODE_NONE */ + lod = nir_bcsel(b, nir_ieq_imm(b, mipmap_mode, 1 /* MALI_MIPMAP_MODE_NONE */), + nir_imm_zero(b, 1, 32), lod); + + /* adjust lod.x for MALI_MIPMAP_MODE_NEAREST */ + nir_def *nearest_lod = + nir_fadd_imm(b, nir_fceil(b, nir_fadd_imm(b, lod, 0.5)), -1.0); + lod = nir_bcsel(b, nir_ieq_imm(b, mipmap_mode, 0 /* MALI_MIPMAP_MODE_NEAREST */), + nearest_lod, lod); + + nir_def_replace(&tex->def, nir_vec2(b, lod, lambda_prime)); return true; } From 44dfa55b9daee67a4598a3353a3c3d9bdf9e10e7 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Wed, 2 Apr 2025 09:41:28 +0200 Subject: [PATCH 4/4] panfrost: enable ARB_texture_query_lod on v9+ We've been reporting in features.txt that we support this extension unconditionally, but we didn't. Now that we have the bits wired up due to Vulkan, we can actually enable it on Bifrost and later. Reviewed-by: Iago Toral Quiroga Reviewed-by: Lars-Ivar Hesselberg Simonsen Reviewed-by: Lorenzo Rossi --- docs/features.txt | 2 +- docs/relnotes/new_features.txt | 1 + src/gallium/drivers/panfrost/pan_screen.c | 1 + src/panfrost/ci/panfrost-g57-gles2-extensions.txt | 1 + src/panfrost/ci/panfrost-g610-gles2-extensions.txt | 1 + 5 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 29197179949..3378cad8fea 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -135,7 +135,7 @@ GL 4.0, GLSL 4.00 --- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, GL_ARB_texture_buffer_object_rgb32 DONE (freedreno, softpipe, panfrost, crocus/gen6+) GL_ARB_texture_cube_map_array DONE (freedreno/a4xx+, nv50, softpipe, v3d, panfrost, crocus/gen6+) GL_ARB_texture_gather DONE (freedreno, nv50, softpipe, v3d, panfrost) - GL_ARB_texture_query_lod DONE (freedreno, nv50, softpipe, v3d, panfrost, crocus/gen5+) + GL_ARB_texture_query_lod DONE (freedreno, nv50, softpipe, v3d, panfrost/v9+, crocus/gen5+) GL_ARB_transform_feedback2 DONE (freedreno/a3xx+, nv50, softpipe, v3d, panfrost, crocus/gen6+) GL_ARB_transform_feedback3 DONE (freedreno/a3xx+, softpipe, panfrost) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 81c666f1278..05c6bfcbb27 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -10,3 +10,4 @@ shaderImageGatherExtended on pvr static C++ stdlib required on rusticl to workaround applications using their own C++ stdlib VK_EXT_pipeline_protected_access on RADV VK_EXT_extended_dynamic_state3 on panvk +GL_ARB_texture_query_lod on panfrost/v9+ diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 86d28d2de7a..af4cc7106db 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -690,6 +690,7 @@ panfrost_init_screen_caps(struct panfrost_screen *screen) caps->depth_clip_disable = true; caps->mixed_framebuffer_sizes = true; caps->frontend_noop = true; + caps->texture_query_lod = dev->arch >= 9; caps->sample_shading = dev->arch >= 6; caps->fragment_shader_derivatives = true; caps->framebuffer_no_attachment = true; diff --git a/src/panfrost/ci/panfrost-g57-gles2-extensions.txt b/src/panfrost/ci/panfrost-g57-gles2-extensions.txt index c684b25453f..056c3653dbd 100644 --- a/src/panfrost/ci/panfrost-g57-gles2-extensions.txt +++ b/src/panfrost/ci/panfrost-g57-gles2-extensions.txt @@ -62,6 +62,7 @@ GL_EXT_texture_filter_anisotropic GL_EXT_texture_format_BGRA8888 GL_EXT_texture_mirror_clamp_to_edge GL_EXT_texture_norm16 +GL_EXT_texture_query_lod GL_EXT_texture_rg GL_EXT_texture_sRGB_decode GL_EXT_texture_sRGB_R8 diff --git a/src/panfrost/ci/panfrost-g610-gles2-extensions.txt b/src/panfrost/ci/panfrost-g610-gles2-extensions.txt index c684b25453f..056c3653dbd 100644 --- a/src/panfrost/ci/panfrost-g610-gles2-extensions.txt +++ b/src/panfrost/ci/panfrost-g610-gles2-extensions.txt @@ -62,6 +62,7 @@ GL_EXT_texture_filter_anisotropic GL_EXT_texture_format_BGRA8888 GL_EXT_texture_mirror_clamp_to_edge GL_EXT_texture_norm16 +GL_EXT_texture_query_lod GL_EXT_texture_rg GL_EXT_texture_sRGB_decode GL_EXT_texture_sRGB_R8