mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
radeonsi/nir: gather buffers declared more accurately and use const fast path
For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip setting the more accurate masks for builtin uniforms for now as it causes some piglit regressions. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
parent
56017d8100
commit
dd3d3cc877
2 changed files with 90 additions and 6 deletions
|
|
@ -2374,8 +2374,15 @@ static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
|
|||
static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader_selector *sel = ctx->shader->selector;
|
||||
|
||||
LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
|
||||
|
||||
if (sel->info.const_buffers_declared == 1 &&
|
||||
sel->info.shader_buffers_declared == 0) {
|
||||
return load_const_buffer_desc_fast_path(ctx);
|
||||
}
|
||||
|
||||
index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
|
||||
index = LLVMBuildAdd(ctx->ac.builder, index,
|
||||
LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
|
||||
|
|
|
|||
|
|
@ -600,23 +600,97 @@ void si_nir_scan_shader(const struct nir_shader *nir,
|
|||
|
||||
info->num_outputs = num_outputs;
|
||||
|
||||
struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
/* Intialise const_file_max[0] */
|
||||
info->const_file_max[0] = -1;
|
||||
|
||||
unsigned ubo_idx = 1;
|
||||
nir_foreach_variable(variable, &nir->uniforms) {
|
||||
const struct glsl_type *type = variable->type;
|
||||
enum glsl_base_type base_type =
|
||||
glsl_get_base_type(glsl_without_array(type));
|
||||
unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
|
||||
|
||||
/* Gather buffers declared bitmasks. Note: radeonsi doesn't
|
||||
* really use the mask (other than ubo_idx == 1 for regular
|
||||
* uniforms) its really only used for getting the buffer count
|
||||
* so we don't need to worry about the ordering.
|
||||
*/
|
||||
if (variable->interface_type != NULL) {
|
||||
if (variable->data.mode == nir_var_uniform) {
|
||||
|
||||
unsigned block_count;
|
||||
if (base_type != GLSL_TYPE_INTERFACE) {
|
||||
struct set_entry *entry =
|
||||
_mesa_set_search(ubo_set, variable->interface_type);
|
||||
|
||||
/* Check if we have already processed
|
||||
* a member from this ubo.
|
||||
*/
|
||||
if (entry)
|
||||
continue;
|
||||
|
||||
block_count = 1;
|
||||
} else {
|
||||
block_count = aoa_size;
|
||||
}
|
||||
|
||||
info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_count);
|
||||
ubo_idx += block_count;
|
||||
|
||||
_mesa_set_add(ubo_set, variable->interface_type);
|
||||
}
|
||||
|
||||
if (variable->data.mode == nir_var_shader_storage) {
|
||||
/* TODO: make this more accurate */
|
||||
info->shader_buffers_declared =
|
||||
u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We rely on the fact that nir_lower_samplers_as_deref has
|
||||
* eliminated struct dereferences.
|
||||
*/
|
||||
if (base_type == GLSL_TYPE_SAMPLER)
|
||||
if (base_type == GLSL_TYPE_SAMPLER) {
|
||||
info->samplers_declared |=
|
||||
u_bit_consecutive(variable->data.binding, aoa_size);
|
||||
else if (base_type == GLSL_TYPE_IMAGE)
|
||||
|
||||
if (variable->data.bindless) {
|
||||
info->const_buffers_declared |= 1;
|
||||
info->const_file_max[0] +=
|
||||
glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
} else if (base_type == GLSL_TYPE_IMAGE) {
|
||||
info->images_declared |=
|
||||
u_bit_consecutive(variable->data.binding, aoa_size);
|
||||
|
||||
if (variable->data.bindless) {
|
||||
info->const_buffers_declared |= 1;
|
||||
info->const_file_max[0] +=
|
||||
glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
} else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
|
||||
if (strncmp(variable->name, "state.", 6) == 0 ||
|
||||
strncmp(variable->name, "gl_", 3) == 0) {
|
||||
/* FIXME: figure out why piglit tests with builtin
|
||||
* uniforms are failing without this.
|
||||
*/
|
||||
info->const_buffers_declared =
|
||||
u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
|
||||
} else {
|
||||
info->const_buffers_declared |= 1;
|
||||
info->const_file_max[0] +=
|
||||
glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_set_destroy(ubo_set, NULL);
|
||||
|
||||
info->num_written_clipdistance = nir->info.clip_distance_array_size;
|
||||
info->num_written_culldistance = nir->info.cull_distance_array_size;
|
||||
info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
|
||||
|
|
@ -625,10 +699,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,
|
|||
if (info->processor == PIPE_SHADER_FRAGMENT)
|
||||
info->uses_kill = nir->info.fs.uses_discard;
|
||||
|
||||
/* TODO make this more accurate */
|
||||
info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
|
||||
info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
|
||||
|
||||
func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
|
||||
nir_foreach_block(block, func->impl) {
|
||||
nir_foreach_instr(instr, block)
|
||||
|
|
@ -643,6 +713,13 @@ void si_nir_scan_shader(const struct nir_shader *nir,
|
|||
void
|
||||
si_lower_nir(struct si_shader_selector* sel)
|
||||
{
|
||||
/* Disable const buffer fast path for old LLVM versions */
|
||||
if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 &&
|
||||
sel->info.const_buffers_declared == 1 &&
|
||||
sel->info.shader_buffers_declared == 0) {
|
||||
sel->info.const_buffers_declared |= 0x2;
|
||||
}
|
||||
|
||||
/* Adjust the driver location of inputs and outputs. The state tracker
|
||||
* interprets them as slots, while the ac/nir backend interprets them
|
||||
* as individual components.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue