diff --git a/src/gallium/drivers/llvmpipe/lp_cs_tpool.c b/src/gallium/drivers/llvmpipe/lp_cs_tpool.c index 4ce0b6941d5..b81019238db 100644 --- a/src/gallium/drivers/llvmpipe/lp_cs_tpool.c +++ b/src/gallium/drivers/llvmpipe/lp_cs_tpool.c @@ -30,7 +30,9 @@ #include "util/u_thread.h" #include "util/u_memory.h" +#include "util/u_math.h" #include "lp_cs_tpool.h" +#include "compiler/shader_enums.h" static int lp_cs_tpool_worker(void *data) @@ -41,6 +43,9 @@ lp_cs_tpool_worker(void *data) memset(&lmem, 0, sizeof(lmem)); mtx_lock(&pool->m); + unsigned fpstate = util_fpstate_get(); + bool flush_denorms = false; + while (!pool->shutdown) { struct lp_cs_tpool_task *task; unsigned iter_per_thread; @@ -70,6 +75,17 @@ lp_cs_tpool_worker(void *data) list_del(&task->list); mtx_unlock(&pool->m); + + struct lp_cs_job_info *job_info = task->data; + if ((job_info->current->variant->stage == MESA_SHADER_KERNEL) == flush_denorms) { + if (flush_denorms) { + util_fpstate_set(fpstate); + flush_denorms = false; + } else { + util_fpstate_set_denorms_to_zero(fpstate); + flush_denorms = true; + } + } for (unsigned i = 0; i < iter_per_thread; i++) task->work(task->data, this_iter + i, &lmem); @@ -135,11 +151,25 @@ lp_cs_tpool_queue_task(struct lp_cs_tpool *pool, if (pool->num_threads == 0) { struct lp_cs_local_mem lmem; + struct lp_cs_job_info *job_info = data; + + unsigned fpstate = 0; + bool flush_denorms = false; + if (job_info->current->variant->stage != MESA_SHADER_KERNEL) { + fpstate = util_fpstate_get(); + util_fpstate_set_denorms_to_zero(fpstate); + flush_denorms = true; + } + memset(&lmem, 0, sizeof(lmem)); for (unsigned t = 0; t < num_iters; t++) { work(data, t, &lmem); } FREE(lmem.local_mem_ptr); + + if (flush_denorms) { + util_fpstate_set(fpstate); + } return NULL; } task = CALLOC_STRUCT(lp_cs_tpool_task); diff --git a/src/gallium/drivers/llvmpipe/lp_cs_tpool.h b/src/gallium/drivers/llvmpipe/lp_cs_tpool.h index 3d7ac028aab..13aad04d353 100644 --- a/src/gallium/drivers/llvmpipe/lp_cs_tpool.h +++ b/src/gallium/drivers/llvmpipe/lp_cs_tpool.h @@ -38,6 +38,7 @@ #include "util/u_thread.h" #include "util/list.h" +#include "lp_state_cs.h" #include "lp_limits.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c index 940e771d41e..f9139265d27 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c @@ -61,23 +61,6 @@ static unsigned cs_no = 0; static unsigned task_no = 0; static unsigned mesh_no = 0; -struct lp_cs_job_info { - unsigned grid_size[3]; - unsigned iter_size[3]; - unsigned grid_base[3]; - unsigned block_size[3]; - unsigned req_local_mem; - unsigned work_dim; - unsigned draw_id; - bool zero_initialize_shared_memory; - bool use_iters; - struct lp_cs_exec *current; - struct vertex_header *io; - size_t io_stride; - void *payload; - size_t payload_stride; -}; - enum { CS_ARG_CONTEXT, CS_ARG_RESOURCES, @@ -1322,8 +1305,10 @@ generate_variant(struct llvmpipe_context *lp, lp_jit_init_cs_types(variant); + struct nir_shader *nir = shader->base.ir.nir; + variant->stage = nir->info.stage; + if (sh_type == MESA_SHADER_MESH) { - struct nir_shader *nir = shader->base.ir.nir; int per_prim_count = util_bitcount64(nir->info.per_primitive_outputs); int out_count = util_bitcount64(nir->info.outputs_written); int per_vert_count = out_count - per_prim_count; diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.h b/src/gallium/drivers/llvmpipe/lp_state_cs.h index 1c7791d3937..f00b35996fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.h @@ -36,6 +36,24 @@ struct lp_compute_shader_variant; +struct lp_cs_job_info { + unsigned grid_size[3]; + unsigned iter_size[3]; + unsigned grid_base[3]; + unsigned block_size[3]; + unsigned req_local_mem; + unsigned work_dim; + unsigned draw_id; + bool zero_initialize_shared_memory; + bool use_iters; + struct lp_cs_exec *current; + struct vertex_header *io; + size_t io_stride; + void *payload; + size_t payload_stride; +}; + + struct lp_compute_shader_variant_key { unsigned nr_samplers:8; @@ -101,6 +119,9 @@ struct lp_compute_shader_variant struct lp_compute_shader *shader; + /* shader stage as declared in the shader (i.e. can be kernel) */ + mesa_shader_stage stage; + /* For debugging/profiling purposes */ unsigned no;