From 79aff6e27478698b680196242c25ff8fb287ecef Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 18 Dec 2025 20:48:10 +0200 Subject: [PATCH] brw: use fp64 to compute coarse_z For some reason we cannot get the precision needed from the HW at fp32. LNL internal fossildb changes : Totals from 7226 (0.76% of 947978) affected shaders: Instrs: 5512598 -> 5586086 (+1.33%); split: -0.00%, +1.33% Cycle count: 153836056 -> 155079472 (+0.81%); split: -0.77%, +1.58% Spill count: 2025 -> 2021 (-0.20%); split: -0.35%, +0.15% Fill count: 3139 -> 3112 (-0.86%); split: -1.12%, +0.25% Max live registers: 1034601 -> 1034632 (+0.00%); split: -0.00%, +0.00% Max dispatch width: 207296 -> 207264 (-0.02%); split: +0.02%, -0.03% Non SSA regs after NIR: 1147942 -> 1109326 (-3.36%) Signed-off-by: Lionel Landwerlin Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12726 Reviewed-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/zink/ci/traces-zink.yml | 4 ++-- src/intel/compiler/brw/brw_compile_fs.cpp | 2 +- src/intel/compiler/brw/brw_nir.c | 22 +++++++++++++-------- src/intel/compiler/brw/brw_nir.h | 3 ++- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/zink/ci/traces-zink.yml b/src/gallium/drivers/zink/ci/traces-zink.yml index 4e55f22e50c..d75e7cec77e 100644 --- a/src/gallium/drivers/zink/ci/traces-zink.yml +++ b/src/gallium/drivers/zink/ci/traces-zink.yml @@ -7,9 +7,9 @@ traces: 0ad/0ad-v2.trace: gl-zink-anv-adl: label: [no-perf] - checksum: ff0d4b072dd613b6f11c351027db3bb3 + checksum: 0d04e54fa259d407433a7925c739900a gl-zink-anv-tgl: - checksum: ff0d4b072dd613b6f11c351027db3bb3 + checksum: 0d04e54fa259d407433a7925c739900a zink-radv-vangogh: checksum: 52cabbe16a14628f92df31e0fb4c109e zink-radv-gfx1201: diff --git a/src/intel/compiler/brw/brw_compile_fs.cpp b/src/intel/compiler/brw/brw_compile_fs.cpp index f3fdeb25752..eb2f3c27935 100644 --- a/src/intel/compiler/brw/brw_compile_fs.cpp +++ b/src/intel/compiler/brw/brw_compile_fs.cpp @@ -1508,7 +1508,7 @@ brw_compile_fs(const struct brw_compiler *compiler, } if (prog_data->coarse_pixel_dispatch != INTEL_NEVER) - NIR_PASS(_, nir, brw_nir_lower_frag_coord_z); + NIR_PASS(_, nir, brw_nir_lower_frag_coord_z, devinfo); if (!brw_wm_prog_key_is_dynamic(key)) { uint32_t f = 0; diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index a48f7b9f621..b134019671a 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -1308,7 +1308,7 @@ brw_nir_lower_fs_outputs(nir_shader *nir) } static bool -lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *) +lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *data) { if (intrin->intrinsic != nir_intrinsic_load_frag_coord_z) return false; @@ -1316,25 +1316,31 @@ lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *) b->cursor = nir_after_instr(&intrin->instr); b->fp_math_ctrl = nir_fp_no_fast_math; - nir_def *start = nir_load_fs_start_intel(b); - nir_def *z_c = nir_load_fs_z_c_intel(b); - nir_def *z_c0 = nir_load_fs_z_c0_intel(b); - nir_def *coord = nir_fadd_imm(b, nir_i2f32(b, nir_load_pixel_coord(b)), 0.5f); + const struct intel_device_info *devinfo = (const struct intel_device_info *)data; + const unsigned precision = devinfo->has_64bit_float ? 64 : 32; + + nir_def *start = nir_f2fN(b, nir_load_fs_start_intel(b), precision); + nir_def *z_c = nir_f2fN(b, nir_load_fs_z_c_intel(b), precision); + nir_def *z_c0 = nir_f2fN(b, nir_load_fs_z_c0_intel(b), precision); + nir_def *coord = nir_fadd_imm( + b, nir_i2fN(b, nir_load_pixel_coord(b), precision), 0.5f); nir_def *offset = nir_fsub(b, coord, start); nir_def *dot = nir_fdot(b, offset, z_c); nir_def *coarse_z = nir_fadd(b, dot, z_c0); - nir_def_replace(&intrin->def, coarse_z); + nir_def_replace(&intrin->def, nir_f2f32(b, coarse_z)); return true; } bool -brw_nir_lower_frag_coord_z(nir_shader *nir) +brw_nir_lower_frag_coord_z(nir_shader *nir, + const struct intel_device_info *devinfo) { return nir_shader_intrinsics_pass(nir, lower_frag_coord_z_instr, - nir_metadata_control_flow, NULL); + nir_metadata_control_flow, + (void *)devinfo); } static bool diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index 76cd7355357..fea495ebc01 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -260,7 +260,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir); bool brw_nir_lower_fs_load_output(nir_shader *shader, const struct brw_wm_prog_key *key); -bool brw_nir_lower_frag_coord_z(nir_shader *nir); +bool brw_nir_lower_frag_coord_z(nir_shader *nir, + const struct intel_device_info *devinfo); bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);