aco: remove radv vs prolog key from aco internals.

This creates an aco specific key, and converts radv to it.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16342>
This commit is contained in:
Dave Airlie 2022-05-05 14:27:01 +10:00 committed by Marge Bot
parent 04c07a2413
commit c44d5d61ce
7 changed files with 69 additions and 22 deletions

View file

@ -11683,7 +11683,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct radv_shader_args* args, u
}
void
select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shader_config* config,
select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
const struct radv_shader_args* args, unsigned* num_preserved_sgprs)
@ -11710,7 +11710,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
uint32_t attrib_mask = BITFIELD_MASK(key->num_attributes);
bool has_nontrivial_divisors = key->state->nontrivial_divisors & attrib_mask;
bool has_nontrivial_divisors = key->state.nontrivial_divisors & attrib_mask;
wait_imm lgkm_imm;
lgkm_imm.lgkm = 0;
@ -11769,12 +11769,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
bool needs_instance_index = false;
bool needs_start_instance = false;
u_foreach_bit(i, key->state->instance_rate_inputs & attrib_mask)
u_foreach_bit(i, key->state.instance_rate_inputs & attrib_mask)
{
needs_instance_index |= key->state->divisors[i] == 1;
needs_start_instance |= key->state->divisors[i] == 0;
needs_instance_index |= key->state.divisors[i] == 1;
needs_start_instance |= key->state.divisors[i] == 0;
}
bool needs_vertex_index = ~key->state->instance_rate_inputs & attrib_mask;
bool needs_vertex_index = ~key->state.instance_rate_inputs & attrib_mask;
if (needs_vertex_index)
bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->ac.base_vertex),
get_arg_fixed(args, args->ac.vertex_id), false, Operand(s2), true);
@ -11792,13 +11792,13 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
/* calculate index */
Operand fetch_index = Operand(vertex_index, v1);
if (key->state->instance_rate_inputs & (1u << loc)) {
uint32_t divisor = key->state->divisors[loc];
if (key->state.instance_rate_inputs & (1u << loc)) {
uint32_t divisor = key->state.divisors[loc];
if (divisor) {
fetch_index = instance_id;
if (key->state->nontrivial_divisors & (1u << loc)) {
if (key->state.nontrivial_divisors & (1u << loc)) {
unsigned index =
util_bitcount(key->state->nontrivial_divisors & BITFIELD_MASK(loc));
util_bitcount(key->state.nontrivial_divisors & BITFIELD_MASK(loc));
fetch_index = calc_nontrivial_instance_id(
bld, args, index, instance_id, start_instance, prolog_input,
nontrivial_tmp_vgpr0, nontrivial_tmp_vgpr1);
@ -11813,11 +11813,11 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
/* perform load */
PhysReg cur_desc = desc.advance(i * 16);
if ((key->misaligned_mask & (1u << loc))) {
unsigned dfmt = key->state->formats[loc] & 0xf;
unsigned nfmt = key->state->formats[loc] >> 4;
unsigned dfmt = key->state.formats[loc] & 0xf;
unsigned nfmt = key->state.formats[loc] >> 4;
const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt);
for (unsigned j = 0; j < vtx_info->num_channels; j++) {
bool post_shuffle = key->state->post_shuffle & (1u << loc);
bool post_shuffle = key->state.post_shuffle & (1u << loc);
unsigned offset = vtx_info->chan_byte_size * (post_shuffle && j < 3 ? 2 - j : j);
/* Use MUBUF to workaround hangs for byte-aligned dword loads. The Vulkan spec
@ -11849,7 +11849,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
}
}
if (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi) {
if (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi) {
wait_imm vm_imm;
vm_imm.vm = 0;
bld.sopp(aco_opcode::s_waitcnt, -1, vm_imm.pack(program->chip_class));
@ -11857,12 +11857,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
* so we may need to fix it up. */
u_foreach_bit(loc, (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi))
u_foreach_bit(loc, (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi))
{
PhysReg alpha(attributes_start.reg() + loc * 4u + 3);
unsigned alpha_adjust = (key->state->alpha_adjust_lo >> loc) & 0x1;
alpha_adjust |= ((key->state->alpha_adjust_hi >> loc) & 0x1) << 1;
unsigned alpha_adjust = (key->state.alpha_adjust_lo >> loc) & 0x1;
alpha_adjust |= ((key->state.alpha_adjust_hi >> loc) & 0x1) << 1;
if (alpha_adjust == ALPHA_ADJUST_SSCALED)
bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1));

View file

@ -281,7 +281,7 @@ aco_compile_shader(const struct radv_nir_compiler_options* options,
void
aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
const struct radv_vs_prolog_key* key,
const struct aco_vs_prolog_key* key,
const struct radv_shader_args* args,
struct radv_prolog_binary** binary)
{

View file

@ -33,6 +33,7 @@ extern "C" {
struct ac_shader_config;
struct aco_shader_info;
struct aco_vs_prolog_key;
struct aco_compiler_statistic_info {
char name[32];
@ -50,7 +51,7 @@ void aco_compile_shader(const struct radv_nir_compiler_options* options,
void aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
const struct radv_vs_prolog_key* key,
const struct aco_vs_prolog_key* key,
const struct radv_shader_args* args,
struct radv_prolog_binary** binary);

View file

@ -38,7 +38,6 @@
#include <vector>
struct radv_shader_args;
struct radv_vs_prolog_key;
namespace aco {
@ -2168,7 +2167,7 @@ void select_trap_handler_shader(Program* program, struct nir_shader* shader,
const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
const struct radv_shader_args* args);
void select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key,
void select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key,
ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,

View file

@ -35,6 +35,29 @@ extern "C" {
#define ACO_MAX_SO_OUTPUTS 64
#define ACO_MAX_SO_BUFFERS 4
#define ACO_MAX_VERTEX_ATTRIBS 32
struct aco_vs_input_state {
uint32_t instance_rate_inputs;
uint32_t nontrivial_divisors;
uint32_t post_shuffle;
/* Having two separate fields instead of a single uint64_t makes it easier to remove attributes
* using bitwise arithmetic.
*/
uint32_t alpha_adjust_lo;
uint32_t alpha_adjust_hi;
uint32_t divisors[ACO_MAX_VERTEX_ATTRIBS];
uint8_t formats[ACO_MAX_VERTEX_ATTRIBS];
};
struct aco_vs_prolog_key {
struct aco_vs_input_state state;
unsigned num_attributes;
uint32_t misaligned_mask;
bool is_ngg;
gl_shader_stage next_stage;
};
struct aco_vp_output_info {
uint8_t vs_output_param_offset[VARYING_SLOT_MAX];

View file

@ -103,8 +103,30 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info,
radv_aco_convert_shader_so_info(aco_info, radv);
aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size;
}
#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x
#define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state->x, sizeof(radv->state->x))
static inline void
radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_key *aco_info,
const struct radv_vs_prolog_key *radv)
{
ASSIGN_VS_STATE_FIELD(instance_rate_inputs);
ASSIGN_VS_STATE_FIELD(nontrivial_divisors);
ASSIGN_VS_STATE_FIELD(post_shuffle);
ASSIGN_VS_STATE_FIELD(alpha_adjust_lo);
ASSIGN_VS_STATE_FIELD(alpha_adjust_hi);
ASSIGN_VS_STATE_FIELD_CP(divisors);
ASSIGN_VS_STATE_FIELD_CP(formats);
ASSIGN_FIELD(num_attributes);
ASSIGN_FIELD(misaligned_mask);
ASSIGN_FIELD(is_ngg);
ASSIGN_FIELD(next_stage);
}
#undef ASSIGN_VS_STATE_FIELD
#undef ASSIGN_VS_STATE_FIELD_CP
#undef ASSIGN_FIELD
#undef ASSIGN_FIELD_CP
#undef ASSIGN_OUTINFO
#endif

View file

@ -2186,8 +2186,10 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
struct radv_prolog_binary *binary = NULL;
struct aco_shader_info ac_info;
struct aco_vs_prolog_key ac_key;
radv_aco_convert_shader_info(&ac_info, &info);
aco_compile_vs_prolog(&options, &ac_info, key, &args, &binary);
radv_aco_convert_vs_prolog_key(&ac_key, key);
aco_compile_vs_prolog(&options, &ac_info, &ac_key, &args, &binary);
struct radv_shader_prolog *prolog = upload_vs_prolog(device, binary, info.wave_size);
if (prolog) {
prolog->nontrivial_divisors = key->state->nontrivial_divisors;