mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
aco: remove radv vs prolog key from aco internals.
This creates an aco specific key, and converts radv to it. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16342>
This commit is contained in:
parent
04c07a2413
commit
c44d5d61ce
7 changed files with 69 additions and 22 deletions
|
|
@ -11683,7 +11683,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct radv_shader_args* args, u
|
|||
}
|
||||
|
||||
void
|
||||
select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shader_config* config,
|
||||
select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shader_config* config,
|
||||
const struct radv_nir_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
const struct radv_shader_args* args, unsigned* num_preserved_sgprs)
|
||||
|
|
@ -11710,7 +11710,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
|
|||
bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
|
||||
|
||||
uint32_t attrib_mask = BITFIELD_MASK(key->num_attributes);
|
||||
bool has_nontrivial_divisors = key->state->nontrivial_divisors & attrib_mask;
|
||||
bool has_nontrivial_divisors = key->state.nontrivial_divisors & attrib_mask;
|
||||
|
||||
wait_imm lgkm_imm;
|
||||
lgkm_imm.lgkm = 0;
|
||||
|
|
@ -11769,12 +11769,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
|
|||
|
||||
bool needs_instance_index = false;
|
||||
bool needs_start_instance = false;
|
||||
u_foreach_bit(i, key->state->instance_rate_inputs & attrib_mask)
|
||||
u_foreach_bit(i, key->state.instance_rate_inputs & attrib_mask)
|
||||
{
|
||||
needs_instance_index |= key->state->divisors[i] == 1;
|
||||
needs_start_instance |= key->state->divisors[i] == 0;
|
||||
needs_instance_index |= key->state.divisors[i] == 1;
|
||||
needs_start_instance |= key->state.divisors[i] == 0;
|
||||
}
|
||||
bool needs_vertex_index = ~key->state->instance_rate_inputs & attrib_mask;
|
||||
bool needs_vertex_index = ~key->state.instance_rate_inputs & attrib_mask;
|
||||
if (needs_vertex_index)
|
||||
bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->ac.base_vertex),
|
||||
get_arg_fixed(args, args->ac.vertex_id), false, Operand(s2), true);
|
||||
|
|
@ -11792,13 +11792,13 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
|
|||
|
||||
/* calculate index */
|
||||
Operand fetch_index = Operand(vertex_index, v1);
|
||||
if (key->state->instance_rate_inputs & (1u << loc)) {
|
||||
uint32_t divisor = key->state->divisors[loc];
|
||||
if (key->state.instance_rate_inputs & (1u << loc)) {
|
||||
uint32_t divisor = key->state.divisors[loc];
|
||||
if (divisor) {
|
||||
fetch_index = instance_id;
|
||||
if (key->state->nontrivial_divisors & (1u << loc)) {
|
||||
if (key->state.nontrivial_divisors & (1u << loc)) {
|
||||
unsigned index =
|
||||
util_bitcount(key->state->nontrivial_divisors & BITFIELD_MASK(loc));
|
||||
util_bitcount(key->state.nontrivial_divisors & BITFIELD_MASK(loc));
|
||||
fetch_index = calc_nontrivial_instance_id(
|
||||
bld, args, index, instance_id, start_instance, prolog_input,
|
||||
nontrivial_tmp_vgpr0, nontrivial_tmp_vgpr1);
|
||||
|
|
@ -11813,11 +11813,11 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
|
|||
/* perform load */
|
||||
PhysReg cur_desc = desc.advance(i * 16);
|
||||
if ((key->misaligned_mask & (1u << loc))) {
|
||||
unsigned dfmt = key->state->formats[loc] & 0xf;
|
||||
unsigned nfmt = key->state->formats[loc] >> 4;
|
||||
unsigned dfmt = key->state.formats[loc] & 0xf;
|
||||
unsigned nfmt = key->state.formats[loc] >> 4;
|
||||
const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt);
|
||||
for (unsigned j = 0; j < vtx_info->num_channels; j++) {
|
||||
bool post_shuffle = key->state->post_shuffle & (1u << loc);
|
||||
bool post_shuffle = key->state.post_shuffle & (1u << loc);
|
||||
unsigned offset = vtx_info->chan_byte_size * (post_shuffle && j < 3 ? 2 - j : j);
|
||||
|
||||
/* Use MUBUF to workaround hangs for byte-aligned dword loads. The Vulkan spec
|
||||
|
|
@ -11849,7 +11849,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
|
|||
}
|
||||
}
|
||||
|
||||
if (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi) {
|
||||
if (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi) {
|
||||
wait_imm vm_imm;
|
||||
vm_imm.vm = 0;
|
||||
bld.sopp(aco_opcode::s_waitcnt, -1, vm_imm.pack(program->chip_class));
|
||||
|
|
@ -11857,12 +11857,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
|
|||
|
||||
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
|
||||
* so we may need to fix it up. */
|
||||
u_foreach_bit(loc, (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi))
|
||||
u_foreach_bit(loc, (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi))
|
||||
{
|
||||
PhysReg alpha(attributes_start.reg() + loc * 4u + 3);
|
||||
|
||||
unsigned alpha_adjust = (key->state->alpha_adjust_lo >> loc) & 0x1;
|
||||
alpha_adjust |= ((key->state->alpha_adjust_hi >> loc) & 0x1) << 1;
|
||||
unsigned alpha_adjust = (key->state.alpha_adjust_lo >> loc) & 0x1;
|
||||
alpha_adjust |= ((key->state.alpha_adjust_hi >> loc) & 0x1) << 1;
|
||||
|
||||
if (alpha_adjust == ALPHA_ADJUST_SSCALED)
|
||||
bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1));
|
||||
|
|
|
|||
|
|
@ -281,7 +281,7 @@ aco_compile_shader(const struct radv_nir_compiler_options* options,
|
|||
void
|
||||
aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
const struct radv_vs_prolog_key* key,
|
||||
const struct aco_vs_prolog_key* key,
|
||||
const struct radv_shader_args* args,
|
||||
struct radv_prolog_binary** binary)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ extern "C" {
|
|||
|
||||
struct ac_shader_config;
|
||||
struct aco_shader_info;
|
||||
struct aco_vs_prolog_key;
|
||||
|
||||
struct aco_compiler_statistic_info {
|
||||
char name[32];
|
||||
|
|
@ -50,7 +51,7 @@ void aco_compile_shader(const struct radv_nir_compiler_options* options,
|
|||
|
||||
void aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
const struct radv_vs_prolog_key* key,
|
||||
const struct aco_vs_prolog_key* key,
|
||||
const struct radv_shader_args* args,
|
||||
struct radv_prolog_binary** binary);
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@
|
|||
#include <vector>
|
||||
|
||||
struct radv_shader_args;
|
||||
struct radv_vs_prolog_key;
|
||||
|
||||
namespace aco {
|
||||
|
||||
|
|
@ -2168,7 +2167,7 @@ void select_trap_handler_shader(Program* program, struct nir_shader* shader,
|
|||
const struct radv_nir_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
const struct radv_shader_args* args);
|
||||
void select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key,
|
||||
void select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key,
|
||||
ac_shader_config* config,
|
||||
const struct radv_nir_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
|
|
|
|||
|
|
@ -35,6 +35,29 @@ extern "C" {
|
|||
|
||||
#define ACO_MAX_SO_OUTPUTS 64
|
||||
#define ACO_MAX_SO_BUFFERS 4
|
||||
#define ACO_MAX_VERTEX_ATTRIBS 32
|
||||
|
||||
struct aco_vs_input_state {
|
||||
uint32_t instance_rate_inputs;
|
||||
uint32_t nontrivial_divisors;
|
||||
uint32_t post_shuffle;
|
||||
/* Having two separate fields instead of a single uint64_t makes it easier to remove attributes
|
||||
* using bitwise arithmetic.
|
||||
*/
|
||||
uint32_t alpha_adjust_lo;
|
||||
uint32_t alpha_adjust_hi;
|
||||
|
||||
uint32_t divisors[ACO_MAX_VERTEX_ATTRIBS];
|
||||
uint8_t formats[ACO_MAX_VERTEX_ATTRIBS];
|
||||
};
|
||||
|
||||
struct aco_vs_prolog_key {
|
||||
struct aco_vs_input_state state;
|
||||
unsigned num_attributes;
|
||||
uint32_t misaligned_mask;
|
||||
bool is_ngg;
|
||||
gl_shader_stage next_stage;
|
||||
};
|
||||
|
||||
struct aco_vp_output_info {
|
||||
uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
|
||||
|
|
|
|||
|
|
@ -103,8 +103,30 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info,
|
|||
radv_aco_convert_shader_so_info(aco_info, radv);
|
||||
aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size;
|
||||
}
|
||||
|
||||
#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x
|
||||
#define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state->x, sizeof(radv->state->x))
|
||||
static inline void
|
||||
radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_key *aco_info,
|
||||
const struct radv_vs_prolog_key *radv)
|
||||
{
|
||||
ASSIGN_VS_STATE_FIELD(instance_rate_inputs);
|
||||
ASSIGN_VS_STATE_FIELD(nontrivial_divisors);
|
||||
ASSIGN_VS_STATE_FIELD(post_shuffle);
|
||||
ASSIGN_VS_STATE_FIELD(alpha_adjust_lo);
|
||||
ASSIGN_VS_STATE_FIELD(alpha_adjust_hi);
|
||||
ASSIGN_VS_STATE_FIELD_CP(divisors);
|
||||
ASSIGN_VS_STATE_FIELD_CP(formats);
|
||||
ASSIGN_FIELD(num_attributes);
|
||||
ASSIGN_FIELD(misaligned_mask);
|
||||
ASSIGN_FIELD(is_ngg);
|
||||
ASSIGN_FIELD(next_stage);
|
||||
}
|
||||
#undef ASSIGN_VS_STATE_FIELD
|
||||
#undef ASSIGN_VS_STATE_FIELD_CP
|
||||
#undef ASSIGN_FIELD
|
||||
#undef ASSIGN_FIELD_CP
|
||||
#undef ASSIGN_OUTINFO
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -2186,8 +2186,10 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
|
|||
|
||||
struct radv_prolog_binary *binary = NULL;
|
||||
struct aco_shader_info ac_info;
|
||||
struct aco_vs_prolog_key ac_key;
|
||||
radv_aco_convert_shader_info(&ac_info, &info);
|
||||
aco_compile_vs_prolog(&options, &ac_info, key, &args, &binary);
|
||||
radv_aco_convert_vs_prolog_key(&ac_key, key);
|
||||
aco_compile_vs_prolog(&options, &ac_info, &ac_key, &args, &binary);
|
||||
struct radv_shader_prolog *prolog = upload_vs_prolog(device, binary, info.wave_size);
|
||||
if (prolog) {
|
||||
prolog->nontrivial_divisors = key->state->nontrivial_divisors;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue