radeonsi: use common build buffer descriptor helpers

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29385>
This commit is contained in:
Samuel Pitoiset 2024-05-24 14:29:03 +02:00
parent 3c5173aabf
commit 139bc6b813
4 changed files with 131 additions and 222 deletions

View file

@ -1033,26 +1033,23 @@ static void si_init_buffer_resources(struct si_context *sctx,
si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers);
const struct ac_buffer_state buffer_state = {
.format = PIPE_FORMAT_R32_FLOAT,
.swizzle =
{
PIPE_SWIZZLE_X,
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z,
PIPE_SWIZZLE_W,
},
.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW,
};
/* Initialize buffer descriptors, so that we don't have to do it at bind time. */
for (unsigned i = 0; i < num_buffers; i++) {
uint32_t *desc = descs->list + i * 4;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (sctx->gfx_level >= GFX12) {
desc[3] |= S_008F0C_FORMAT_GFX12(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (sctx->gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (sctx->gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
} else {
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
ac_set_buf_desc_word3(sctx->gfx_level, &buffer_state, &desc[3]);
}
}
@ -1482,37 +1479,31 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource
/* Set the descriptor. */
uint32_t *desc = descs->list + slot * 4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
desc[2] = num_records;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_INDEX_STRIDE(index_stride) | S_008F0C_ADD_TID_ENABLE(add_tid);
uint32_t swizzle_enable;
if (sctx->gfx_level >= GFX11) {
assert(!swizzle || element_size == 1 || element_size == 3); /* 4 or 16 bytes */
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(swizzle ? element_size : 0);
} else if (sctx->gfx_level >= GFX9) {
assert(!swizzle || element_size == 1); /* only 4 bytes on GFX9 */
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle);
swizzle_enable = swizzle ? element_size : 0;
} else {
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle);
desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
swizzle_enable = swizzle;
}
if (sctx->gfx_level >= GFX12) {
desc[3] |= S_008F0C_FORMAT_GFX12(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (sctx->gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (sctx->gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else {
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
const struct ac_buffer_state buffer_state = {
.va = va,
.size = num_records,
.format = PIPE_FORMAT_R32_FLOAT,
.swizzle = {
PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
},
.stride = stride,
.swizzle_enable = swizzle_enable,
.gfx10_oob_select = V_008F0C_OOB_SELECT_DISABLED,
.index_stride = index_stride,
.element_size = element_size,
.add_tid = add_tid,
};
ac_build_buffer_descriptor(sctx->gfx_level, &buffer_state, desc);
pipe_resource_reference(&buffers->buffers[slot], buffer);
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer),

View file

@ -44,20 +44,17 @@ static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
ac_nir_load_arg(b, &args->ac, args->gs_attr_address);
unsigned stride = 16 * shader->info.nr_param_exports;
uint32_t desc[4];
ac_build_attr_ring_descriptor(sel->screen->info.gfx_level,
(uint64_t)sel->screen->info.address32_hi << 32,
0xffffffff, stride, desc);
nir_def *comp[] = {
attr_address,
nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(sel->screen->info.address32_hi) |
S_008F04_STRIDE(stride) |
S_008F04_SWIZZLE_ENABLE_GFX11(3) /* 16B */),
nir_imm_int(b, 0xffffffff),
nir_imm_int(b, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
(sel->screen->info.gfx_level >= GFX12 ?
S_008F0C_FORMAT_GFX12(V_008F0C_GFX11_FORMAT_32_32_32_32_FLOAT) :
S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_32_32_32_FLOAT)) |
S_008F0C_INDEX_STRIDE(2) /* 32 elements */),
nir_imm_int(b, desc[1]),
nir_imm_int(b, desc[2]),
nir_imm_int(b, desc[3]),
};
return nir_vec(b, comp, 4);
@ -133,33 +130,17 @@ static nir_def *build_tess_ring_desc(nir_builder *b, struct si_screen *screen,
struct si_shader_args *args)
{
nir_def *addr = ac_nir_load_arg(b, &args->ac, args->tes_offchip_addr);
uint32_t desc[4];
uint32_t rsrc3 =
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (screen->info.gfx_level >= GFX12) {
rsrc3 |= S_008F0C_FORMAT_GFX12(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (screen->info.gfx_level >= GFX11) {
rsrc3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (screen->info.gfx_level >= GFX10) {
rsrc3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
ac_build_raw_buffer_descriptor(screen->info.gfx_level,
(uint64_t)screen->info.address32_hi << 32,
0xffffffff, desc);
nir_def *comp[4] = {
addr,
nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(screen->info.address32_hi)),
nir_imm_int(b, 0xffffffff),
nir_imm_int(b, rsrc3),
nir_imm_int(b, desc[1]),
nir_imm_int(b, desc[2]),
nir_imm_int(b, desc[3]),
};
return nir_vec(b, comp, 4);
@ -215,46 +196,34 @@ static void build_gsvs_ring_desc(nir_builder *b, struct lower_abi_state *s)
if (!num_components)
continue;
nir_def *desc[4];
desc[0] = nir_unpack_64_2x32_split_x(b, base_addr);
desc[1] = nir_unpack_64_2x32_split_y(b, base_addr);
unsigned stride = 4 * num_components * sel->info.base.gs.vertices_out;
/* Limit on the stride field for <= GFX7. */
assert(stride < (1 << 14));
desc[1] = nir_ior_imm(
b, desc[1], S_008F04_STRIDE(stride) | S_008F04_SWIZZLE_ENABLE_GFX6(1));
unsigned num_records = s->shader->wave_size;
desc[2] = nir_imm_int(b, num_records);
uint32_t rsrc3 =
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
S_008F0C_ADD_TID_ENABLE(1);
const struct ac_buffer_state buffer_state = {
.size = num_records,
.format = PIPE_FORMAT_R32_FLOAT,
.swizzle = {
PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
},
.stride = stride,
.swizzle_enable = true,
.element_size = 1,
.index_stride = 1,
.add_tid = true,
.gfx10_oob_select = V_008F0C_OOB_SELECT_DISABLED,
};
uint32_t tmp_desc[4];
if (sel->screen->info.gfx_level >= GFX10) {
rsrc3 |=
S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
/* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */
unsigned data_format =
sel->screen->info.gfx_level == GFX8 || sel->screen->info.gfx_level == GFX9 ?
0 : V_008F0C_BUF_DATA_FORMAT_32;
ac_build_buffer_descriptor(sel->screen->info.gfx_level, &buffer_state, tmp_desc);
rsrc3 |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(data_format) |
S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */
}
desc[3] = nir_imm_int(b, rsrc3);
nir_def *desc[4];
desc[0] = nir_unpack_64_2x32_split_x(b, base_addr);
desc[1] = nir_ior_imm(b, nir_unpack_64_2x32_split_y(b, base_addr), tmp_desc[1]);
desc[2] = nir_imm_int(b, tmp_desc[2]);
desc[3] = nir_imm_int(b, tmp_desc[3]);
s->gsvs_ring[stream] = nir_vec(b, desc, 4);

View file

@ -28,28 +28,25 @@ struct lower_resource_state {
static nir_def *load_ubo_desc_fast_path(nir_builder *b, nir_def *addr_lo,
struct si_shader_selector *sel)
{
nir_def *addr_hi =
nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(sel->screen->info.address32_hi));
const struct ac_buffer_state buffer_state = {
.va = (uint64_t)sel->screen->info.address32_hi << 32,
.size = sel->info.constbuf0_num_slots * 16,
.format = PIPE_FORMAT_R32_FLOAT,
.swizzle =
{
PIPE_SWIZZLE_X,
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z,
PIPE_SWIZZLE_W,
},
.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW,
};
uint32_t desc[4];
uint32_t rsrc3 =
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
ac_build_buffer_descriptor(sel->screen->info.gfx_level, &buffer_state, desc);
if (sel->screen->info.gfx_level >= GFX12)
rsrc3 |= S_008F0C_FORMAT_GFX12(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
else if (sel->screen->info.gfx_level >= GFX11)
rsrc3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
else if (sel->screen->info.gfx_level >= GFX10)
rsrc3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
else
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
return nir_vec4(b, addr_lo, addr_hi, nir_imm_int(b, sel->info.constbuf0_num_slots * 16),
nir_imm_int(b, rsrc3));
return nir_vec4(b, addr_lo, nir_imm_int(b, desc[1]), nir_imm_int(b, desc[2]),
nir_imm_int(b, desc[3]));
}
static nir_def *clamp_index(nir_builder *b, nir_def *index, unsigned max)

View file

@ -4015,52 +4015,21 @@ void si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf
if (screen->info.gfx_level == GFX8)
num_records *= stride;
state[4] = 0;
state[5] = S_008F04_STRIDE(stride);
state[6] = num_records;
state[7] = S_008F0C_DST_SEL_X(ac_map_swizzle(desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(ac_map_swizzle(desc->swizzle[1])) |
S_008F0C_DST_SEL_Z(ac_map_swizzle(desc->swizzle[2])) |
S_008F0C_DST_SEL_W(ac_map_swizzle(desc->swizzle[3]));
const struct ac_buffer_state buffer_state = {
.size = num_records,
.format = format,
.swizzle =
{
desc->swizzle[0],
desc->swizzle[1],
desc->swizzle[2],
desc->swizzle[3],
},
.stride = stride,
.gfx10_oob_select = V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET,
};
if (screen->info.gfx_level >= GFX10) {
const struct gfx10_format *fmt = &ac_get_gfx10_format_table(screen->info.gfx_level)[format];
/* OOB_SELECT chooses the out-of-bounds check.
*
* GFX10:
* - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
* - 1: index >= NUM_RECORDS
* - 2: NUM_RECORDS == 0
* - 3: if SWIZZLE_ENABLE:
* swizzle_address >= NUM_RECORDS
* else:
* offset >= NUM_RECORDS
*
* GFX11:
* - 0: (index >= NUM_RECORDS) || (offset+payload > STRIDE)
* - 1: index >= NUM_RECORDS
* - 2: NUM_RECORDS == 0
* - 3: if SWIZZLE_ENABLE && STRIDE:
* (index >= NUM_RECORDS) || ( offset+payload > STRIDE)
* else:
* offset+payload > NUM_RECORDS
*/
state[7] |= (screen->info.gfx_level >= GFX12 ?
S_008F0C_FORMAT_GFX12(fmt->img_format) :
S_008F0C_FORMAT_GFX10(fmt->img_format)) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
S_008F0C_RESOURCE_LEVEL(screen->info.gfx_level < GFX11);
} else {
int first_non_void;
unsigned num_format, data_format;
first_non_void = util_format_get_first_non_void_channel(format);
num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void);
data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void);
state[7] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
}
ac_build_buffer_descriptor(screen->info.gfx_level, &buffer_state, &state[4]);
}
/**
@ -4128,29 +4097,21 @@ static void cdna_emu_make_image_descriptor(struct si_screen *screen, struct si_t
util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
/* Buffer descriptor */
state[0] = 0;
state[1] = S_008F04_STRIDE(stride);
state[2] = num_records;
state[3] = S_008F0C_DST_SEL_X(ac_map_swizzle(swizzle[0])) |
S_008F0C_DST_SEL_Y(ac_map_swizzle(swizzle[1])) |
S_008F0C_DST_SEL_Z(ac_map_swizzle(swizzle[2])) |
S_008F0C_DST_SEL_W(ac_map_swizzle(swizzle[3]));
const struct ac_buffer_state buffer_state = {
.size = num_records,
.format = pipe_format,
.swizzle =
{
desc->swizzle[0],
desc->swizzle[1],
desc->swizzle[2],
desc->swizzle[3],
},
.stride = stride,
.gfx10_oob_select = V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET,
};
if (screen->info.gfx_level >= GFX10) {
const struct gfx10_format *fmt = &ac_get_gfx10_format_table(screen->info.gfx_level)[pipe_format];
state[3] |= (screen->info.gfx_level >= GFX12 ? S_008F0C_FORMAT_GFX12(fmt->img_format) :
S_008F0C_FORMAT_GFX10(fmt->img_format)) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
S_008F0C_RESOURCE_LEVEL(screen->info.gfx_level < GFX11);
} else {
int first_non_void = util_format_get_first_non_void_channel(pipe_format);
unsigned num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void);
unsigned data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void);
state[3] |= S_008F0C_NUM_FORMAT(num_format) |
S_008F0C_DATA_FORMAT(data_format);
}
ac_build_buffer_descriptor(screen->info.gfx_level, &buffer_state, &state[0]);
/* Additional fields used by image opcode emulation. */
state[4] = width | (height << 16);
@ -5050,33 +5011,24 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count,
v->vb_alignment_check_mask |= 1 << vbo_index;
}
v->elem[i].rsrc_word3 = S_008F0C_DST_SEL_X(ac_map_swizzle(desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(ac_map_swizzle(desc->swizzle[1])) |
S_008F0C_DST_SEL_Z(ac_map_swizzle(desc->swizzle[2])) |
S_008F0C_DST_SEL_W(ac_map_swizzle(desc->swizzle[3]));
const struct ac_buffer_state buffer_state = {
.format = elements[i].src_format,
.swizzle =
{
desc->swizzle[0],
desc->swizzle[1],
desc->swizzle[2],
desc->swizzle[3],
},
/* OOB_SELECT chooses the out-of-bounds check:
* - 1: index >= NUM_RECORDS (Structured)
* - 3: offset >= NUM_RECORDS (Raw)
*/
.gfx10_oob_select = v->elem[i].stride ? V_008F0C_OOB_SELECT_STRUCTURED
: V_008F0C_OOB_SELECT_RAW,
};
if (sscreen->info.gfx_level >= GFX10) {
const struct gfx10_format *fmt = &ac_get_gfx10_format_table(sscreen->info.gfx_level)[elements[i].src_format];
ASSERTED unsigned last_vertex_format = sscreen->info.gfx_level >= GFX11 ? 64 : 128;
assert(fmt->img_format != 0 && fmt->img_format < last_vertex_format);
v->elem[i].rsrc_word3 |=
(sscreen->info.gfx_level >= GFX12 ?
S_008F0C_FORMAT_GFX12(fmt->img_format) :
S_008F0C_FORMAT_GFX10(fmt->img_format)) |
S_008F0C_RESOURCE_LEVEL(sscreen->info.gfx_level < GFX11) |
/* OOB_SELECT chooses the out-of-bounds check:
* - 1: index >= NUM_RECORDS (Structured)
* - 3: offset >= NUM_RECORDS (Raw)
*/
S_008F0C_OOB_SELECT(v->elem[i].stride ? V_008F0C_OOB_SELECT_STRUCTURED
: V_008F0C_OOB_SELECT_RAW);
} else {
unsigned data_format, num_format;
data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
v->elem[i].rsrc_word3 |= S_008F0C_NUM_FORMAT(num_format) |
S_008F0C_DATA_FORMAT(data_format);
}
ac_set_buf_desc_word3(sscreen->info.gfx_level, &buffer_state, &v->elem[i].rsrc_word3);
}
if (v->instance_divisor_is_fetched) {