mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 22:20:14 +01:00
radeonsi/gfx11: use SET_*_REG_PAIRS_PACKED packets for pm4 states
It can generate all PACKED packets, but only SET_CONTEXT_REG_PAIRS_PACKED is generated because register shadowing is required by SET_SH_REG_PAIRS_PACKED*. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23517>
This commit is contained in:
parent
1aa99437d3
commit
22f3bcfb5a
5 changed files with 194 additions and 1 deletions
|
|
@ -9,13 +9,144 @@
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
#include "util/u_memory.h"
|
#include "util/u_memory.h"
|
||||||
|
|
||||||
|
static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val,
|
||||||
|
unsigned opcode, unsigned idx);
|
||||||
|
|
||||||
|
static bool opcode_is_packed(unsigned opcode)
|
||||||
|
{
|
||||||
|
return opcode == PKT3_SET_CONTEXT_REG_PAIRS_PACKED ||
|
||||||
|
opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
|
||||||
|
opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned packed_opcode_to_unpacked(unsigned opcode)
|
||||||
|
{
|
||||||
|
switch (opcode) {
|
||||||
|
case PKT3_SET_CONTEXT_REG_PAIRS_PACKED:
|
||||||
|
return PKT3_SET_CONTEXT_REG;
|
||||||
|
case PKT3_SET_SH_REG_PAIRS_PACKED:
|
||||||
|
return PKT3_SET_SH_REG;
|
||||||
|
default:
|
||||||
|
unreachable("invalid packed opcode");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned unpacked_opcode_to_packed(struct si_pm4_state *state, unsigned opcode)
|
||||||
|
{
|
||||||
|
switch (opcode) {
|
||||||
|
case PKT3_SET_CONTEXT_REG:
|
||||||
|
if (state->screen->info.gfx_level >= GFX11)
|
||||||
|
return PKT3_SET_CONTEXT_REG_PAIRS_PACKED;
|
||||||
|
break;
|
||||||
|
case PKT3_SET_SH_REG:
|
||||||
|
if (state->screen->info.gfx_level >= GFX11 &&
|
||||||
|
state->screen->info.register_shadowing_required)
|
||||||
|
return PKT3_SET_SH_REG_PAIRS_PACKED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return opcode;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool packed_next_is_reg_offset_pair(struct si_pm4_state *state)
|
||||||
|
{
|
||||||
|
return (state->ndw - state->last_pm4) % 3 == 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool packed_next_is_reg_value1(struct si_pm4_state *state)
|
||||||
|
{
|
||||||
|
return (state->ndw - state->last_pm4) % 3 == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool packed_prev_is_reg_value0(struct si_pm4_state *state)
|
||||||
|
{
|
||||||
|
return packed_next_is_reg_value1(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned get_packed_reg_dw_offsetN(struct si_pm4_state *state, unsigned index)
|
||||||
|
{
|
||||||
|
unsigned i = state->last_pm4 + 2 + (index / 2) * 3;
|
||||||
|
assert(i < state->ndw);
|
||||||
|
return (state->pm4[i] >> ((index % 2) * 16)) & 0xffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned get_packed_reg_valueN_idx(struct si_pm4_state *state, unsigned index)
|
||||||
|
{
|
||||||
|
unsigned i = state->last_pm4 + 2 + (index / 2) * 3 + 1 + (index % 2);
|
||||||
|
assert(i < state->ndw);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned get_packed_reg_valueN(struct si_pm4_state *state, unsigned index)
|
||||||
|
{
|
||||||
|
return state->pm4[get_packed_reg_valueN_idx(state, index)];
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned get_packed_reg_count(struct si_pm4_state *state)
|
||||||
|
{
|
||||||
|
int body_size = state->ndw - state->last_pm4 - 2;
|
||||||
|
assert(body_size > 0 && body_size % 3 == 0);
|
||||||
|
return (body_size / 3) * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
void si_pm4_finalize(struct si_pm4_state *state)
|
||||||
|
{
|
||||||
|
if (opcode_is_packed(state->last_opcode)) {
|
||||||
|
unsigned reg_count = get_packed_reg_count(state);
|
||||||
|
unsigned reg_dw_offset0 = get_packed_reg_dw_offsetN(state, 0);
|
||||||
|
|
||||||
|
if (state->packed_is_padded)
|
||||||
|
reg_count--;
|
||||||
|
|
||||||
|
bool all_consecutive = true;
|
||||||
|
|
||||||
|
/* If the whole packed SET packet only sets consecutive registers, rewrite the packet
|
||||||
|
* to be unpacked to make it shorter.
|
||||||
|
*
|
||||||
|
* This also eliminates the invalid scenario when the packed SET packet sets only
|
||||||
|
* 2 registers and the register offsets are equal due to padding.
|
||||||
|
*/
|
||||||
|
for (unsigned i = 1; i < reg_count; i++) {
|
||||||
|
if (reg_dw_offset0 != get_packed_reg_dw_offsetN(state, i) - i) {
|
||||||
|
all_consecutive = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (all_consecutive) {
|
||||||
|
assert(state->ndw - state->last_pm4 == 2 + 3 * (reg_count + state->packed_is_padded) / 2);
|
||||||
|
state->pm4[state->last_pm4] = PKT3(packed_opcode_to_unpacked(state->last_opcode),
|
||||||
|
reg_count, 0);
|
||||||
|
state->pm4[state->last_pm4 + 1] = reg_dw_offset0;
|
||||||
|
for (unsigned i = 0; i < reg_count; i++)
|
||||||
|
state->pm4[state->last_pm4 + 2 + i] = get_packed_reg_valueN(state, i);
|
||||||
|
state->ndw = state->last_pm4 + 2 + reg_count;
|
||||||
|
state->last_opcode = PKT3_SET_SH_REG;
|
||||||
|
} else {
|
||||||
|
/* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */
|
||||||
|
if (!state->is_compute_queue)
|
||||||
|
state->pm4[state->last_pm4] |= PKT3_RESET_FILTER_CAM_S(1);
|
||||||
|
|
||||||
|
/* If it's a packed SET_SH packet, use the *_N variant when possible. */
|
||||||
|
if (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED && reg_count <= 14) {
|
||||||
|
state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C;
|
||||||
|
state->pm4[state->last_pm4] |= PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
|
static void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
|
||||||
{
|
{
|
||||||
|
si_pm4_finalize(state);
|
||||||
|
|
||||||
assert(state->max_dw);
|
assert(state->max_dw);
|
||||||
assert(state->ndw < state->max_dw);
|
assert(state->ndw < state->max_dw);
|
||||||
assert(opcode <= 254);
|
assert(opcode <= 254);
|
||||||
state->last_opcode = opcode;
|
state->last_opcode = opcode;
|
||||||
state->last_pm4 = state->ndw++;
|
state->last_pm4 = state->ndw++;
|
||||||
|
state->packed_is_padded = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
|
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
|
||||||
|
|
@ -31,17 +162,36 @@ static void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate)
|
||||||
unsigned count;
|
unsigned count;
|
||||||
count = state->ndw - state->last_pm4 - 2;
|
count = state->ndw - state->last_pm4 - 2;
|
||||||
state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate);
|
state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate);
|
||||||
|
|
||||||
|
if (opcode_is_packed(state->last_opcode)) {
|
||||||
|
if (packed_prev_is_reg_value0(state)) {
|
||||||
|
/* Duplicate the first register at the end to make the number of registers aligned to 2. */
|
||||||
|
si_pm4_set_reg_custom(state, get_packed_reg_dw_offsetN(state, 0) * 4,
|
||||||
|
get_packed_reg_valueN(state, 0),
|
||||||
|
state->last_opcode, 0);
|
||||||
|
state->packed_is_padded = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
state->pm4[state->last_pm4 + 1] = get_packed_reg_count(state);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val,
|
static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val,
|
||||||
unsigned opcode, unsigned idx)
|
unsigned opcode, unsigned idx)
|
||||||
{
|
{
|
||||||
|
bool is_packed = opcode_is_packed(opcode);
|
||||||
reg >>= 2;
|
reg >>= 2;
|
||||||
|
|
||||||
assert(state->max_dw);
|
assert(state->max_dw);
|
||||||
assert(state->ndw + 2 <= state->max_dw);
|
assert(state->ndw + 2 <= state->max_dw);
|
||||||
|
|
||||||
if (opcode != state->last_opcode || reg != (state->last_reg + 1) || idx != state->last_idx) {
|
if (is_packed) {
|
||||||
|
if (opcode != state->last_opcode) {
|
||||||
|
si_pm4_cmd_begin(state, opcode); /* reserve space for the header */
|
||||||
|
state->ndw++; /* reserve space for the register count, it will be set at the end */
|
||||||
|
}
|
||||||
|
} else if (opcode != state->last_opcode || reg != (state->last_reg + 1) ||
|
||||||
|
idx != state->last_idx) {
|
||||||
si_pm4_cmd_begin(state, opcode);
|
si_pm4_cmd_begin(state, opcode);
|
||||||
state->pm4[state->ndw++] = reg | (idx << 28);
|
state->pm4[state->ndw++] = reg | (idx << 28);
|
||||||
}
|
}
|
||||||
|
|
@ -49,6 +199,25 @@ static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint
|
||||||
assert(reg <= UINT16_MAX);
|
assert(reg <= UINT16_MAX);
|
||||||
state->last_reg = reg;
|
state->last_reg = reg;
|
||||||
state->last_idx = idx;
|
state->last_idx = idx;
|
||||||
|
|
||||||
|
if (is_packed) {
|
||||||
|
if (state->packed_is_padded) {
|
||||||
|
/* The packet is padded, which means the first register is written redundantly again
|
||||||
|
* at the end. Remove it, so that we can replace it with this register.
|
||||||
|
*/
|
||||||
|
state->packed_is_padded = false;
|
||||||
|
state->ndw--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (packed_next_is_reg_offset_pair(state)) {
|
||||||
|
state->pm4[state->ndw++] = reg;
|
||||||
|
} else if (packed_next_is_reg_value1(state)) {
|
||||||
|
/* Set the second register offset in the high 16 bits. */
|
||||||
|
state->pm4[state->ndw - 2] &= 0x0000ffff;
|
||||||
|
state->pm4[state->ndw - 2] |= reg << 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
state->pm4[state->ndw++] = val;
|
state->pm4[state->ndw++] = val;
|
||||||
si_pm4_cmd_end(state, false);
|
si_pm4_cmd_end(state, false);
|
||||||
}
|
}
|
||||||
|
|
@ -78,6 +247,8 @@ void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opcode = unpacked_opcode_to_packed(state, opcode);
|
||||||
|
|
||||||
si_pm4_set_reg_custom(state, reg, val, opcode, 0);
|
si_pm4_set_reg_custom(state, reg, val, opcode, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ struct si_pm4_state {
|
||||||
uint8_t last_opcode;
|
uint8_t last_opcode;
|
||||||
uint8_t last_idx;
|
uint8_t last_idx;
|
||||||
bool is_compute_queue;
|
bool is_compute_queue;
|
||||||
|
bool packed_is_padded; /* whether SET_*_REG_PAIRS_PACKED is padded to an even number of regs */
|
||||||
|
|
||||||
/* For shader states only */
|
/* For shader states only */
|
||||||
bool is_shader;
|
bool is_shader;
|
||||||
|
|
@ -53,6 +54,7 @@ void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
|
||||||
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||||
void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||||
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||||
|
void si_pm4_finalize(struct si_pm4_state *state);
|
||||||
|
|
||||||
void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
|
void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
|
||||||
bool is_compute_queue);
|
bool is_compute_queue);
|
||||||
|
|
|
||||||
|
|
@ -636,6 +636,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
|
si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
return blend;
|
return blend;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1105,6 +1106,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
|
||||||
S_028230_ER_LINE_TB(0xA) |
|
S_028230_ER_LINE_TB(0xA) |
|
||||||
S_028230_ER_LINE_BT(0xA));
|
S_028230_ER_LINE_BT(0xA));
|
||||||
}
|
}
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
|
|
||||||
if (!rs->uses_poly_offset)
|
if (!rs->uses_poly_offset)
|
||||||
return rs;
|
return rs;
|
||||||
|
|
@ -1148,6 +1150,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
|
||||||
si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
|
si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
|
||||||
si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale));
|
si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale));
|
||||||
si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
|
si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
return rs;
|
return rs;
|
||||||
|
|
@ -1393,6 +1396,7 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
|
||||||
si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth_bounds_min));
|
si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth_bounds_min));
|
||||||
si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth_bounds_max));
|
si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth_bounds_max));
|
||||||
}
|
}
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
|
|
||||||
dsa->depth_enabled = state->depth_enabled;
|
dsa->depth_enabled = state->depth_enabled;
|
||||||
dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask;
|
dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask;
|
||||||
|
|
@ -5770,6 +5774,7 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx, bool uses_reg_
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
sctx->cs_preamble_state = pm4;
|
sctx->cs_preamble_state = pm4;
|
||||||
sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
|
sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
|
||||||
}
|
}
|
||||||
|
|
@ -5815,6 +5820,7 @@ static void cdna_init_compute_preamble_state(struct si_context *sctx)
|
||||||
S_030E04_ADDRESS(border_color_va >> 40));
|
S_030E04_ADDRESS(border_color_va >> 40));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
sctx->cs_preamble_state = pm4;
|
sctx->cs_preamble_state = pm4;
|
||||||
sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
|
sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
|
||||||
}
|
}
|
||||||
|
|
@ -6111,6 +6117,7 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx, bool uses_reg
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
sctx->cs_preamble_state = pm4;
|
sctx->cs_preamble_state = pm4;
|
||||||
sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
|
sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -379,6 +379,7 @@ static bool si_update_shaders(struct si_context *sctx)
|
||||||
si_pm4_set_reg(&pipeline->pm4, reg, va_low);
|
si_pm4_set_reg(&pipeline->pm4, reg, va_low);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
si_pm4_finalize(&pipeline->pm4);
|
||||||
sctx->screen->ws->buffer_unmap(sctx->screen->ws, bo->buf);
|
sctx->screen->ws->buffer_unmap(sctx->screen->ws, bo->buf);
|
||||||
|
|
||||||
_mesa_hash_table_u64_insert(sctx->sqtt->pipeline_bos,
|
_mesa_hash_table_u64_insert(sctx->sqtt->pipeline_bos,
|
||||||
|
|
|
||||||
|
|
@ -694,6 +694,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
S_00B528_FLOAT_MODE(shader->config.float_mode);
|
S_00B528_FLOAT_MODE(shader->config.float_mode);
|
||||||
shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(shader, SI_VS_NUM_USER_SGPR)) |
|
shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(shader, SI_VS_NUM_USER_SGPR)) |
|
||||||
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
|
@ -745,6 +746,8 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
|
||||||
if (sscreen->info.gfx_level <= GFX8)
|
if (sscreen->info.gfx_level <= GFX8)
|
||||||
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2);
|
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2);
|
||||||
|
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_shader_es(struct si_context *sctx)
|
static void si_emit_shader_es(struct si_context *sctx)
|
||||||
|
|
@ -811,6 +814,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
si_set_tesseval_regs(sscreen, shader->selector, shader);
|
si_set_tesseval_regs(sscreen, shader->selector, shader);
|
||||||
|
|
||||||
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader);
|
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader);
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs,
|
void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs,
|
||||||
|
|
@ -1124,6 +1128,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
S_00B22C_USER_SGPR(GFX6_GS_NUM_USER_SGPR) |
|
S_00B22C_USER_SGPR(GFX6_GS_NUM_USER_SGPR) |
|
||||||
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||||
}
|
}
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gfx10_is_ngg_passthrough(struct si_shader *shader)
|
bool gfx10_is_ngg_passthrough(struct si_shader *shader)
|
||||||
|
|
@ -1499,6 +1504,8 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||||
S_028B54_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader)) |
|
S_028B54_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader)) |
|
||||||
S_028B54_GS_W32_EN(shader->wave_size == 32) |
|
S_028B54_GS_W32_EN(shader->wave_size == 32) |
|
||||||
S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
||||||
|
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_shader_vs(struct si_context *sctx)
|
static void si_emit_shader_vs(struct si_context *sctx)
|
||||||
|
|
@ -1704,6 +1711,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
||||||
si_set_tesseval_regs(sscreen, shader->selector, shader);
|
si_set_tesseval_regs(sscreen, shader->selector, shader);
|
||||||
|
|
||||||
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader);
|
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader);
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
|
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
|
||||||
|
|
@ -1949,6 +1957,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
|
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
|
||||||
S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
|
S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
|
||||||
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader *shader)
|
static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
|
@ -3696,6 +3705,7 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx, bool tmz)
|
||||||
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
|
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
|
||||||
si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
|
|
||||||
*has_vgt_flush = true;
|
*has_vgt_flush = true;
|
||||||
}
|
}
|
||||||
|
|
@ -3857,6 +3867,7 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
|
||||||
si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE,
|
si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE,
|
||||||
sctx->gsvs_ring ? sctx->gsvs_ring->width0 / 256 : 0);
|
sctx->gsvs_ring ? sctx->gsvs_ring->width0 / 256 : 0);
|
||||||
}
|
}
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
|
|
||||||
if (old_ndw) {
|
if (old_ndw) {
|
||||||
pm4->ndw = old_ndw;
|
pm4->ndw = old_ndw;
|
||||||
|
|
@ -4126,6 +4137,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
|
||||||
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
|
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
|
||||||
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param);
|
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param);
|
||||||
}
|
}
|
||||||
|
si_pm4_finalize(pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Flush the context to re-emit the cs_preamble state.
|
/* Flush the context to re-emit the cs_preamble state.
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue