mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 00:40:10 +01:00
intel/compiler: Add a flag to avoid compacting push constants
In vec4, we can just not run the pass. In fs, things are a bit more deeply intertwined. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
This commit is contained in:
parent
aecde23519
commit
d1c4e64a69
6 changed files with 177 additions and 152 deletions
|
|
@ -673,6 +673,7 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
|
||||||
screen->compiler->shader_perf_log = iris_shader_perf_log;
|
screen->compiler->shader_perf_log = iris_shader_perf_log;
|
||||||
screen->compiler->supports_pull_constants = false;
|
screen->compiler->supports_pull_constants = false;
|
||||||
screen->compiler->supports_shader_constants = true;
|
screen->compiler->supports_shader_constants = true;
|
||||||
|
screen->compiler->compact_params = false;
|
||||||
|
|
||||||
iris_disk_cache_init(screen);
|
iris_disk_cache_init(screen);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -119,6 +119,12 @@ struct brw_compiler {
|
||||||
* whether nir_opt_large_constants will be run.
|
* whether nir_opt_large_constants will be run.
|
||||||
*/
|
*/
|
||||||
bool supports_shader_constants;
|
bool supports_shader_constants;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not the driver wants uniform params to be compacted by the
|
||||||
|
* back-end compiler.
|
||||||
|
*/
|
||||||
|
bool compact_params;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -2307,6 +2307,7 @@ fs_visitor::assign_constant_locations()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (compiler->compact_params) {
|
||||||
struct uniform_slot_info slots[uniforms];
|
struct uniform_slot_info slots[uniforms];
|
||||||
memset(slots, 0, sizeof(slots));
|
memset(slots, 0, sizeof(slots));
|
||||||
|
|
||||||
|
|
@ -2316,10 +2317,10 @@ fs_visitor::assign_constant_locations()
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* NIR tightly packs things so the uniform number might not be
|
/* NIR tightly packs things so the uniform number might not be
|
||||||
* aligned (if we have a double right after a float, for instance).
|
* aligned (if we have a double right after a float, for
|
||||||
* This is fine because the process of re-arranging them will ensure
|
* instance). This is fine because the process of re-arranging
|
||||||
* that things are properly aligned. The offset into that uniform,
|
* them will ensure that things are properly aligned. The offset
|
||||||
* however, must be aligned.
|
* into that uniform, however, must be aligned.
|
||||||
*
|
*
|
||||||
* In Vulkan, we have explicit offsets but everything is crammed
|
* In Vulkan, we have explicit offsets but everything is crammed
|
||||||
* into a single "variable" so inst->src[i].nr will always be 0.
|
* into a single "variable" so inst->src[i].nr will always be 0.
|
||||||
|
|
@ -2362,9 +2363,9 @@ fs_visitor::assign_constant_locations()
|
||||||
if (subgroup_id_index >= 0)
|
if (subgroup_id_index >= 0)
|
||||||
max_push_components--; /* Save a slot for the thread ID */
|
max_push_components--; /* Save a slot for the thread ID */
|
||||||
|
|
||||||
/* We push small arrays, but no bigger than 16 floats. This is big enough
|
/* We push small arrays, but no bigger than 16 floats. This is big
|
||||||
* for a vec4 but hopefully not large enough to push out other stuff. We
|
* enough for a vec4 but hopefully not large enough to push out other
|
||||||
* should probably use a better heuristic at some point.
|
* stuff. We should probably use a better heuristic at some point.
|
||||||
*/
|
*/
|
||||||
const unsigned int max_chunk_size = 16;
|
const unsigned int max_chunk_size = 16;
|
||||||
|
|
||||||
|
|
@ -2462,20 +2463,6 @@ fs_visitor::assign_constant_locations()
|
||||||
num_pull_constants);
|
num_pull_constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now that we know how many regular uniforms we'll push, reduce the
|
|
||||||
* UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits.
|
|
||||||
*/
|
|
||||||
unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
|
|
||||||
|
|
||||||
if (push_length + range->length > 64)
|
|
||||||
range->length = 64 - push_length;
|
|
||||||
|
|
||||||
push_length += range->length;
|
|
||||||
}
|
|
||||||
assert(push_length <= 64);
|
|
||||||
|
|
||||||
/* Up until now, the param[] array has been indexed by reg + offset
|
/* Up until now, the param[] array has been indexed by reg + offset
|
||||||
* of UNIFORM registers. Move pull constants into pull_param[] and
|
* of UNIFORM registers. Move pull constants into pull_param[] and
|
||||||
* condense param[] to only contain the uniforms we chose to push.
|
* condense param[] to only contain the uniforms we chose to push.
|
||||||
|
|
@ -2493,6 +2480,32 @@ fs_visitor::assign_constant_locations()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ralloc_free(param);
|
ralloc_free(param);
|
||||||
|
} else {
|
||||||
|
/* If we don't want to compact anything, just set up dummy push/pull
|
||||||
|
* arrays. All the rest of the compiler cares about are these arrays.
|
||||||
|
*/
|
||||||
|
push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
|
||||||
|
pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
|
||||||
|
|
||||||
|
for (unsigned u = 0; u < uniforms; u++)
|
||||||
|
push_constant_loc[u] = u;
|
||||||
|
|
||||||
|
memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now that we know how many regular uniforms we'll push, reduce the
|
||||||
|
* UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits.
|
||||||
|
*/
|
||||||
|
unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
|
||||||
|
|
||||||
|
if (push_length + range->length > 64)
|
||||||
|
range->length = 64 - push_length;
|
||||||
|
|
||||||
|
push_length += range->length;
|
||||||
|
}
|
||||||
|
assert(push_length <= 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
|
||||||
|
|
@ -633,6 +633,9 @@ set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
|
||||||
void
|
void
|
||||||
vec4_visitor::pack_uniform_registers()
|
vec4_visitor::pack_uniform_registers()
|
||||||
{
|
{
|
||||||
|
if (!compiler->compact_params)
|
||||||
|
return;
|
||||||
|
|
||||||
uint8_t chans_used[this->uniforms];
|
uint8_t chans_used[this->uniforms];
|
||||||
int new_loc[this->uniforms];
|
int new_loc[this->uniforms];
|
||||||
int new_chan[this->uniforms];
|
int new_chan[this->uniforms];
|
||||||
|
|
|
||||||
|
|
@ -560,6 +560,7 @@ anv_physical_device_init(struct anv_physical_device *device,
|
||||||
device->compiler->constant_buffer_0_is_relative =
|
device->compiler->constant_buffer_0_is_relative =
|
||||||
device->info.gen < 8 || !device->has_context_isolation;
|
device->info.gen < 8 || !device->has_context_isolation;
|
||||||
device->compiler->supports_shader_constants = true;
|
device->compiler->supports_shader_constants = true;
|
||||||
|
device->compiler->compact_params = true;
|
||||||
|
|
||||||
/* Broadwell PRM says:
|
/* Broadwell PRM says:
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -2799,6 +2799,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
|
||||||
!(screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION);
|
!(screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION);
|
||||||
|
|
||||||
screen->compiler->supports_pull_constants = true;
|
screen->compiler->supports_pull_constants = true;
|
||||||
|
screen->compiler->compact_params = true;
|
||||||
|
|
||||||
screen->has_exec_fence =
|
screen->has_exec_fence =
|
||||||
intel_get_boolean(screen, I915_PARAM_HAS_EXEC_FENCE);
|
intel_get_boolean(screen, I915_PARAM_HAS_EXEC_FENCE);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue