From 76cdbcb96a010c190d7a5a5f664f2a1b38e0782f Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 3 Apr 2026 21:25:47 +0200 Subject: [PATCH] llvmpipe: disable denorms in compute shaders on x86/sse For consistency with other shader stages (required by d3d, neither GL nor Vulkan really care). A bit awkward since we don't want to disable them for things like rusticl, which we should be able to distinguish with shader type. Note that to satisfy d3d requirements, disabling denorms in general is not sufficient, due to d3d requiring them to be disabled for single precision opcodes, but enabled for double precision ones, and x86 can't switch that individually (hence will need per-instruction tracking and switching inside the shader). Reviewed-by: Brian Paul Part-of: --- src/gallium/drivers/llvmpipe/lp_cs_tpool.c | 30 ++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_cs_tpool.h | 1 + src/gallium/drivers/llvmpipe/lp_state_cs.c | 21 +++------------ src/gallium/drivers/llvmpipe/lp_state_cs.h | 21 +++++++++++++++ 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_cs_tpool.c b/src/gallium/drivers/llvmpipe/lp_cs_tpool.c index 4ce0b6941d5..b81019238db 100644 --- a/src/gallium/drivers/llvmpipe/lp_cs_tpool.c +++ b/src/gallium/drivers/llvmpipe/lp_cs_tpool.c @@ -30,7 +30,9 @@ #include "util/u_thread.h" #include "util/u_memory.h" +#include "util/u_math.h" #include "lp_cs_tpool.h" +#include "compiler/shader_enums.h" static int lp_cs_tpool_worker(void *data) @@ -41,6 +43,9 @@ lp_cs_tpool_worker(void *data) memset(&lmem, 0, sizeof(lmem)); mtx_lock(&pool->m); + unsigned fpstate = util_fpstate_get(); + bool flush_denorms = false; + while (!pool->shutdown) { struct lp_cs_tpool_task *task; unsigned iter_per_thread; @@ -70,6 +75,17 @@ lp_cs_tpool_worker(void *data) list_del(&task->list); mtx_unlock(&pool->m); + + struct lp_cs_job_info *job_info = task->data; + if ((job_info->current->variant->stage == MESA_SHADER_KERNEL) == flush_denorms) { + if (flush_denorms) { + util_fpstate_set(fpstate); + flush_denorms = false; + } else { + util_fpstate_set_denorms_to_zero(fpstate); + flush_denorms = true; + } + } for (unsigned i = 0; i < iter_per_thread; i++) task->work(task->data, this_iter + i, &lmem); @@ -135,11 +151,25 @@ lp_cs_tpool_queue_task(struct lp_cs_tpool *pool, if (pool->num_threads == 0) { struct lp_cs_local_mem lmem; + struct lp_cs_job_info *job_info = data; + + unsigned fpstate = 0; + bool flush_denorms = false; + if (job_info->current->variant->stage != MESA_SHADER_KERNEL) { + fpstate = util_fpstate_get(); + util_fpstate_set_denorms_to_zero(fpstate); + flush_denorms = true; + } + memset(&lmem, 0, sizeof(lmem)); for (unsigned t = 0; t < num_iters; t++) { work(data, t, &lmem); } FREE(lmem.local_mem_ptr); + + if (flush_denorms) { + util_fpstate_set(fpstate); + } return NULL; } task = CALLOC_STRUCT(lp_cs_tpool_task); diff --git a/src/gallium/drivers/llvmpipe/lp_cs_tpool.h b/src/gallium/drivers/llvmpipe/lp_cs_tpool.h index 3d7ac028aab..13aad04d353 100644 --- a/src/gallium/drivers/llvmpipe/lp_cs_tpool.h +++ b/src/gallium/drivers/llvmpipe/lp_cs_tpool.h @@ -38,6 +38,7 @@ #include "util/u_thread.h" #include "util/list.h" +#include "lp_state_cs.h" #include "lp_limits.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c index 940e771d41e..f9139265d27 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c @@ -61,23 +61,6 @@ static unsigned cs_no = 0; static unsigned task_no = 0; static unsigned mesh_no = 0; -struct lp_cs_job_info { - unsigned grid_size[3]; - unsigned iter_size[3]; - unsigned grid_base[3]; - unsigned block_size[3]; - unsigned req_local_mem; - unsigned work_dim; - unsigned draw_id; - bool zero_initialize_shared_memory; - bool use_iters; - struct lp_cs_exec *current; - struct vertex_header *io; - size_t io_stride; - void *payload; - size_t payload_stride; -}; - enum { CS_ARG_CONTEXT, CS_ARG_RESOURCES, @@ -1322,8 +1305,10 @@ generate_variant(struct llvmpipe_context *lp, lp_jit_init_cs_types(variant); + struct nir_shader *nir = shader->base.ir.nir; + variant->stage = nir->info.stage; + if (sh_type == MESA_SHADER_MESH) { - struct nir_shader *nir = shader->base.ir.nir; int per_prim_count = util_bitcount64(nir->info.per_primitive_outputs); int out_count = util_bitcount64(nir->info.outputs_written); int per_vert_count = out_count - per_prim_count; diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.h b/src/gallium/drivers/llvmpipe/lp_state_cs.h index 1c7791d3937..f00b35996fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.h @@ -36,6 +36,24 @@ struct lp_compute_shader_variant; +struct lp_cs_job_info { + unsigned grid_size[3]; + unsigned iter_size[3]; + unsigned grid_base[3]; + unsigned block_size[3]; + unsigned req_local_mem; + unsigned work_dim; + unsigned draw_id; + bool zero_initialize_shared_memory; + bool use_iters; + struct lp_cs_exec *current; + struct vertex_header *io; + size_t io_stride; + void *payload; + size_t payload_stride; +}; + + struct lp_compute_shader_variant_key { unsigned nr_samplers:8; @@ -101,6 +119,9 @@ struct lp_compute_shader_variant struct lp_compute_shader *shader; + /* shader stage as declared in the shader (i.e. can be kernel) */ + mesa_shader_stage stage; + /* For debugging/profiling purposes */ unsigned no;