mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 13:20:14 +01:00
ir3: Rewrite UBO push analysis to support bindless
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
This commit is contained in:
parent
274f3815a5
commit
fc850080ee
4 changed files with 113 additions and 62 deletions
|
|
@ -41,18 +41,62 @@ get_ubo_load_range(nir_intrinsic_instr *instr)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct ir3_ubo_range *
|
||||||
|
get_existing_range(nir_intrinsic_instr *instr,
|
||||||
|
struct ir3_ubo_analysis_state *state,
|
||||||
|
bool create_new)
|
||||||
|
{
|
||||||
|
unsigned block, base = 0;
|
||||||
|
bool bindless;
|
||||||
|
if (nir_src_is_const(instr->src[0])) {
|
||||||
|
block = nir_src_as_uint(instr->src[0]);
|
||||||
|
bindless = false;
|
||||||
|
} else {
|
||||||
|
nir_intrinsic_instr *rsrc = ir3_bindless_resource(instr->src[0]);
|
||||||
|
if (rsrc && nir_src_is_const(rsrc->src[0])) {
|
||||||
|
block = nir_src_as_uint(rsrc->src[0]);
|
||||||
|
base = nir_intrinsic_desc_set(rsrc);
|
||||||
|
bindless = true;
|
||||||
|
} else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
|
||||||
|
struct ir3_ubo_range *range = &state->range[i];
|
||||||
|
if (range->end < range->start) {
|
||||||
|
/* We don't have a matching range, but there are more available.
|
||||||
|
*/
|
||||||
|
if (create_new) {
|
||||||
|
range->block = block;
|
||||||
|
range->bindless_base = base;
|
||||||
|
range->bindless = bindless;
|
||||||
|
return range;
|
||||||
|
} else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} else if (range->block == block && range->bindless_base == base &&
|
||||||
|
range->bindless == bindless) {
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
|
gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
|
||||||
struct ir3_ubo_analysis_state *state)
|
struct ir3_ubo_analysis_state *state)
|
||||||
{
|
{
|
||||||
if (!nir_src_is_const(instr->src[0]))
|
struct ir3_ubo_range *old_r = get_existing_range(instr, state, true);
|
||||||
|
if (!old_r)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!nir_src_is_const(instr->src[1])) {
|
if (!nir_src_is_const(instr->src[1])) {
|
||||||
if (nir_src_as_uint(instr->src[0]) == 0) {
|
if (!old_r->bindless && old_r->block == 0) {
|
||||||
/* If this is an indirect on UBO 0, we'll still lower it back to
|
/* If this is an indirect on UBO 0, we'll still lower it back to
|
||||||
* load_uniform. Set the range to cover all of UBO 0.
|
* load_uniform. Set the range to cover all of UBO 0.
|
||||||
*/
|
*/
|
||||||
|
state->range[0].start = 0;
|
||||||
state->range[0].end = ALIGN(nir->num_uniforms * 16, 16 * 4);
|
state->range[0].end = ALIGN(nir->num_uniforms * 16, 16 * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -60,18 +104,17 @@ gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct ir3_ubo_range r = get_ubo_load_range(instr);
|
const struct ir3_ubo_range r = get_ubo_load_range(instr);
|
||||||
const uint32_t block = nir_src_as_uint(instr->src[0]);
|
|
||||||
|
|
||||||
/* if UBO lowering is disabled, we still want to lower block 0
|
/* if UBO lowering is disabled, we still want to lower block 0
|
||||||
* (which is normal uniforms):
|
* (which is normal uniforms):
|
||||||
*/
|
*/
|
||||||
if ((block > 0) && (ir3_shader_debug & IR3_DBG_NOUBOOPT))
|
if ((old_r->bindless || old_r->block != 0) && (ir3_shader_debug & IR3_DBG_NOUBOOPT))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (r.start < state->range[block].start)
|
if (r.start < old_r->start)
|
||||||
state->range[block].start = r.start;
|
old_r->start = r.start;
|
||||||
if (state->range[block].end < r.end)
|
if (old_r->end < r.end)
|
||||||
state->range[block].end = r.end;
|
old_r->end = r.end;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For indirect offset, it is common to see a pattern of multiple
|
/* For indirect offset, it is common to see a pattern of multiple
|
||||||
|
|
@ -142,12 +185,11 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
|
||||||
* could probably with some effort determine a block stride in number of
|
* could probably with some effort determine a block stride in number of
|
||||||
* registers.
|
* registers.
|
||||||
*/
|
*/
|
||||||
if (!nir_src_is_const(instr->src[0]))
|
struct ir3_ubo_range *range = get_existing_range(instr, state, false);
|
||||||
|
if (!range)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const uint32_t block = nir_src_as_uint(instr->src[0]);
|
if (range->bindless || range->block > 0) {
|
||||||
|
|
||||||
if (block > 0) {
|
|
||||||
/* We don't lower dynamic array indexing either, but we definitely should.
|
/* We don't lower dynamic array indexing either, but we definitely should.
|
||||||
* We don't have a good way of determining the range of the dynamic
|
* We don't have a good way of determining the range of the dynamic
|
||||||
* access, so for now just fall back to pulling.
|
* access, so for now just fall back to pulling.
|
||||||
|
|
@ -159,8 +201,7 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
|
||||||
* upload. Reject if we're now outside the range.
|
* upload. Reject if we're now outside the range.
|
||||||
*/
|
*/
|
||||||
const struct ir3_ubo_range r = get_ubo_load_range(instr);
|
const struct ir3_ubo_range r = get_ubo_load_range(instr);
|
||||||
if (!(state->range[block].start <= r.start &&
|
if (!(range->start <= r.start && r.end <= range->end))
|
||||||
r.end <= state->range[block].end))
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -186,8 +227,7 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
|
||||||
debug_assert(!(const_offset & 0x3));
|
debug_assert(!(const_offset & 0x3));
|
||||||
const_offset >>= 2;
|
const_offset >>= 2;
|
||||||
|
|
||||||
const int range_offset =
|
const int range_offset = (range->offset - range->start) / 4;
|
||||||
(state->range[block].offset - state->range[block].start) / 4;
|
|
||||||
const_offset += range_offset;
|
const_offset += range_offset;
|
||||||
|
|
||||||
nir_intrinsic_instr *uniform =
|
nir_intrinsic_instr *uniform =
|
||||||
|
|
@ -213,6 +253,9 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
|
||||||
struct ir3_ubo_analysis_state *state = &shader->ubo_state;
|
struct ir3_ubo_analysis_state *state = &shader->ubo_state;
|
||||||
|
|
||||||
memset(state, 0, sizeof(*state));
|
memset(state, 0, sizeof(*state));
|
||||||
|
for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
|
||||||
|
state->range[i].start = UINT32_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
nir_foreach_function (function, nir) {
|
nir_foreach_function (function, nir) {
|
||||||
if (function->impl) {
|
if (function->impl) {
|
||||||
|
|
@ -236,7 +279,13 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
|
||||||
*/
|
*/
|
||||||
const uint32_t max_upload = 16 * 1024;
|
const uint32_t max_upload = 16 * 1024;
|
||||||
uint32_t offset = shader->const_state.num_reserved_user_consts * 16;
|
uint32_t offset = shader->const_state.num_reserved_user_consts * 16;
|
||||||
|
state->num_enabled = ARRAY_SIZE(state->range);
|
||||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
|
for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
|
||||||
|
if (state->range[i].start >= state->range[i].end) {
|
||||||
|
state->num_enabled = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t range_size = state->range[i].end - state->range[i].start;
|
uint32_t range_size = state->range[i].end - state->range[i].start;
|
||||||
|
|
||||||
debug_assert(offset <= max_upload);
|
debug_assert(offset <= max_upload);
|
||||||
|
|
@ -247,8 +296,6 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
|
||||||
}
|
}
|
||||||
offset += range_size;
|
offset += range_size;
|
||||||
|
|
||||||
if (state->range[i].start < state->range[i].end)
|
|
||||||
state->enabled |= 1 << i;
|
|
||||||
}
|
}
|
||||||
state->size = offset;
|
state->size = offset;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ enum ir3_driver_param {
|
||||||
#define IR3_MAX_SO_BUFFERS 4
|
#define IR3_MAX_SO_BUFFERS 4
|
||||||
#define IR3_MAX_SO_STREAMS 4
|
#define IR3_MAX_SO_STREAMS 4
|
||||||
#define IR3_MAX_SO_OUTPUTS 64
|
#define IR3_MAX_SO_OUTPUTS 64
|
||||||
#define IR3_MAX_CONSTANT_BUFFERS 32
|
#define IR3_MAX_UBO_PUSH_RANGES 32
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -619,13 +619,16 @@ ir3_shader_stage(struct ir3_shader_variant *v)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ir3_ubo_range {
|
struct ir3_ubo_range {
|
||||||
uint32_t offset; /* start offset of this block in const register file */
|
uint32_t offset; /* start offset to push in the const register file */
|
||||||
|
uint32_t block; /* Which constant block */
|
||||||
uint32_t start, end; /* range of block that's actually used */
|
uint32_t start, end; /* range of block that's actually used */
|
||||||
|
uint16_t bindless_base; /* For bindless, which base register is used */
|
||||||
|
bool bindless;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ir3_ubo_analysis_state {
|
struct ir3_ubo_analysis_state {
|
||||||
struct ir3_ubo_range range[IR3_MAX_CONSTANT_BUFFERS];
|
struct ir3_ubo_range range[IR3_MAX_UBO_PUSH_RANGES];
|
||||||
uint32_t enabled;
|
uint32_t num_enabled;
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
uint32_t lower_count;
|
uint32_t lower_count;
|
||||||
uint32_t cmdstream_size; /* for per-gen backend to stash required cmdstream size */
|
uint32_t cmdstream_size; /* for per-gen backend to stash required cmdstream size */
|
||||||
|
|
|
||||||
|
|
@ -2691,49 +2691,47 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
|
||||||
tu_cs_emit(cs, push_constants[i + offset * 4]);
|
tu_cs_emit(cs, push_constants[i + offset * 4]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
|
for (uint32_t i = 0; i < state->num_enabled; i++) {
|
||||||
if (state->range[i].start < state->range[i].end) {
|
uint32_t size = state->range[i].end - state->range[i].start;
|
||||||
uint32_t size = state->range[i].end - state->range[i].start;
|
uint32_t offset = state->range[i].start;
|
||||||
uint32_t offset = state->range[i].start;
|
|
||||||
|
|
||||||
/* and even if the start of the const buffer is before
|
/* and even if the start of the const buffer is before
|
||||||
* first_immediate, the end may not be:
|
* first_immediate, the end may not be:
|
||||||
*/
|
*/
|
||||||
size = MIN2(size, (16 * link->constlen) - state->range[i].offset);
|
size = MIN2(size, (16 * link->constlen) - state->range[i].offset);
|
||||||
|
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* things should be aligned to vec4: */
|
/* things should be aligned to vec4: */
|
||||||
debug_assert((state->range[i].offset % 16) == 0);
|
debug_assert((state->range[i].offset % 16) == 0);
|
||||||
debug_assert((size % 16) == 0);
|
debug_assert((size % 16) == 0);
|
||||||
debug_assert((offset % 16) == 0);
|
debug_assert((offset % 16) == 0);
|
||||||
|
|
||||||
/* Look through the UBO map to find our UBO index, and get the VA for
|
/* Look through the UBO map to find our UBO index, and get the VA for
|
||||||
* that UBO.
|
* that UBO.
|
||||||
*/
|
*/
|
||||||
uint64_t va = 0;
|
uint64_t va = 0;
|
||||||
uint32_t ubo_idx = i - 1;
|
uint32_t ubo_idx = state->range[i].block - 1;
|
||||||
uint32_t ubo_map_base = 0;
|
uint32_t ubo_map_base = 0;
|
||||||
for (int j = 0; j < link->ubo_map.num; j++) {
|
for (int j = 0; j < link->ubo_map.num; j++) {
|
||||||
if (ubo_idx >= ubo_map_base &&
|
if (ubo_idx >= ubo_map_base &&
|
||||||
ubo_idx < ubo_map_base + link->ubo_map.array_size[j]) {
|
ubo_idx < ubo_map_base + link->ubo_map.array_size[j]) {
|
||||||
va = buffer_ptr(descriptors_state, &link->ubo_map, j,
|
va = buffer_ptr(descriptors_state, &link->ubo_map, j,
|
||||||
ubo_idx - ubo_map_base);
|
ubo_idx - ubo_map_base);
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
ubo_map_base += link->ubo_map.array_size[j];
|
|
||||||
}
|
}
|
||||||
assert(va);
|
ubo_map_base += link->ubo_map.array_size[j];
|
||||||
|
|
||||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
|
|
||||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
|
|
||||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
|
||||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
|
||||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
|
||||||
CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
|
|
||||||
tu_cs_emit_qw(cs, va + offset);
|
|
||||||
}
|
}
|
||||||
|
assert(va);
|
||||||
|
|
||||||
|
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
|
||||||
|
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
|
||||||
|
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||||
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||||
|
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
||||||
|
CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
|
||||||
|
tu_cs_emit_qw(cs, va + offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -259,9 +259,12 @@ ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *
|
||||||
struct ir3_ubo_analysis_state *state;
|
struct ir3_ubo_analysis_state *state;
|
||||||
state = &v->shader->ubo_state;
|
state = &v->shader->ubo_state;
|
||||||
|
|
||||||
uint32_t i;
|
for (unsigned i = 0; i < state->num_enabled; i++) {
|
||||||
foreach_bit(i, state->enabled & constbuf->enabled_mask) {
|
assert(!state->range[i].bindless);
|
||||||
struct pipe_constant_buffer *cb = &constbuf->cb[i];
|
unsigned ubo = state->range[i].block;
|
||||||
|
if (!(constbuf->enabled_mask & (1 << ubo)))
|
||||||
|
continue;
|
||||||
|
struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
|
||||||
|
|
||||||
uint32_t size = state->range[i].end - state->range[i].start;
|
uint32_t size = state->range[i].end - state->range[i].start;
|
||||||
uint32_t offset = cb->buffer_offset + state->range[i].start;
|
uint32_t offset = cb->buffer_offset + state->range[i].start;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue