nir: Add a lower_fdiv option, turn fdiv into fmul/frcp.

The nir_opt_algebraic rule (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), can produce new fdiv operations, which need to be lowered on i965, as we don't actually implement fdiv. (Normally, we handle this in GLSL IR's lower_instructions pass, but in the above case we introduce an fdiv after that point. So, make NIR do it for us.) Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 7295f4fcc2)
2026-01-21 00:00:22 +01:00 · 2016-01-05 05:09:46 -08:00 · 2016-01-05 05:09:46 -08:00 · 536c8cbcd3
commit 536c8cbcd3
parent 978480d69f
3 changed files with 3 additions and 0 deletions
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@ -1435,6 +1435,7 @@ typedef struct nir_function {
                  exec_list_get_head(&(func)->overload_list), node)

 typedef struct nir_shader_compiler_options {
+   bool lower_fdiv;
   bool lower_ffma;
   bool lower_flrp;
   bool lower_fpow;
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@ -179,6 +179,7 @@ optimizations = [
   (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
   # Division and reciprocal
   (('fdiv', 1.0, a), ('frcp', a)),
+   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
   (('frcp', ('frcp', a)), a),
   (('frcp', ('fsqrt', a)), ('frsq', a)),
   (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@ -94,6 +94,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
   nir_shader_compiler_options *nir_options =
      rzalloc(compiler, nir_shader_compiler_options);
   nir_options->native_integers = true;
+   nir_options->lower_fdiv = true;
   /* In order to help allow for better CSE at the NIR level we tell NIR
    * to split all ffma instructions during opt_algebraic and we then
    * re-combine them as a later step.