brw: use fp64 to compute coarse_z

For some reason we cannot get the precision needed from the HW at fp32.

LNL internal fossildb changes :

Totals from 7226 (0.76% of 947978) affected shaders:
Instrs: 5512598 -> 5586086 (+1.33%); split: -0.00%, +1.33%
Cycle count: 153836056 -> 155079472 (+0.81%); split: -0.77%, +1.58%
Spill count: 2025 -> 2021 (-0.20%); split: -0.35%, +0.15%
Fill count: 3139 -> 3112 (-0.86%); split: -1.12%, +0.25%
Max live registers: 1034601 -> 1034632 (+0.00%); split: -0.00%, +0.00%
Max dispatch width: 207296 -> 207264 (-0.02%); split: +0.02%, -0.03%
Non SSA regs after NIR: 1147942 -> 1109326 (-3.36%)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12726
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38996>
This commit is contained in:
Lionel Landwerlin 2025-12-18 20:48:10 +02:00 committed by Marge Bot
parent a19e949824
commit 79aff6e274
4 changed files with 19 additions and 12 deletions

View file

@ -7,9 +7,9 @@ traces:
0ad/0ad-v2.trace:
gl-zink-anv-adl:
label: [no-perf]
checksum: ff0d4b072dd613b6f11c351027db3bb3
checksum: 0d04e54fa259d407433a7925c739900a
gl-zink-anv-tgl:
checksum: ff0d4b072dd613b6f11c351027db3bb3
checksum: 0d04e54fa259d407433a7925c739900a
zink-radv-vangogh:
checksum: 52cabbe16a14628f92df31e0fb4c109e
zink-radv-gfx1201:

View file

@ -1508,7 +1508,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
}
if (prog_data->coarse_pixel_dispatch != INTEL_NEVER)
NIR_PASS(_, nir, brw_nir_lower_frag_coord_z);
NIR_PASS(_, nir, brw_nir_lower_frag_coord_z, devinfo);
if (!brw_wm_prog_key_is_dynamic(key)) {
uint32_t f = 0;

View file

@ -1308,7 +1308,7 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
}
static bool
lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *)
lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
if (intrin->intrinsic != nir_intrinsic_load_frag_coord_z)
return false;
@ -1316,25 +1316,31 @@ lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *)
b->cursor = nir_after_instr(&intrin->instr);
b->fp_math_ctrl = nir_fp_no_fast_math;
nir_def *start = nir_load_fs_start_intel(b);
nir_def *z_c = nir_load_fs_z_c_intel(b);
nir_def *z_c0 = nir_load_fs_z_c0_intel(b);
nir_def *coord = nir_fadd_imm(b, nir_i2f32(b, nir_load_pixel_coord(b)), 0.5f);
const struct intel_device_info *devinfo = (const struct intel_device_info *)data;
const unsigned precision = devinfo->has_64bit_float ? 64 : 32;
nir_def *start = nir_f2fN(b, nir_load_fs_start_intel(b), precision);
nir_def *z_c = nir_f2fN(b, nir_load_fs_z_c_intel(b), precision);
nir_def *z_c0 = nir_f2fN(b, nir_load_fs_z_c0_intel(b), precision);
nir_def *coord = nir_fadd_imm(
b, nir_i2fN(b, nir_load_pixel_coord(b), precision), 0.5f);
nir_def *offset = nir_fsub(b, coord, start);
nir_def *dot = nir_fdot(b, offset, z_c);
nir_def *coarse_z = nir_fadd(b, dot, z_c0);
nir_def_replace(&intrin->def, coarse_z);
nir_def_replace(&intrin->def, nir_f2f32(b, coarse_z));
return true;
}
bool
brw_nir_lower_frag_coord_z(nir_shader *nir)
brw_nir_lower_frag_coord_z(nir_shader *nir,
const struct intel_device_info *devinfo)
{
return nir_shader_intrinsics_pass(nir, lower_frag_coord_z_instr,
nir_metadata_control_flow, NULL);
nir_metadata_control_flow,
(void *)devinfo);
}
static bool

View file

@ -260,7 +260,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
bool brw_nir_lower_fs_load_output(nir_shader *shader,
const struct brw_wm_prog_key *key);
bool brw_nir_lower_frag_coord_z(nir_shader *nir);
bool brw_nir_lower_frag_coord_z(nir_shader *nir,
const struct intel_device_info *devinfo);
bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);