mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-03 15:28:15 +02:00
nir: add load_deref_transpose_amd
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41653>
This commit is contained in:
parent
6229e89fa8
commit
57498eca83
15 changed files with 62 additions and 7 deletions
|
|
@ -2914,6 +2914,12 @@ nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
|
|||
bool
|
||||
nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
|
||||
{
|
||||
/* Subgroup operations can't be reordered because they might then read inactive
|
||||
* invocations. load_global_transpose_amd is an example of one which also has ACCESS.
|
||||
*/
|
||||
if (nir_intrinsic_has_semantic(instr, NIR_INTRINSIC_SUBGROUP | NIR_INTRINSIC_QUADGROUP))
|
||||
return false;
|
||||
|
||||
if (nir_intrinsic_has_access(instr)) {
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||
if (access & ACCESS_VOLATILE)
|
||||
|
|
|
|||
|
|
@ -198,6 +198,7 @@ nir_deref_instr_has_complex_use(nir_deref_instr *deref,
|
|||
nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
|
||||
switch (use_intrin->intrinsic) {
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
assert(use_src == &use_intrin->src[0]);
|
||||
continue;
|
||||
|
||||
|
|
@ -1535,6 +1536,8 @@ nir_opt_deref_impl(nir_function_impl *impl)
|
|||
case nir_intrinsic_load_deref:
|
||||
if (opt_load_vec_deref(&b, intrin))
|
||||
progress = true;
|
||||
FALLTHROUGH;
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
if (opt_load_undef_deref(&b, intrin))
|
||||
progress = true;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1075,6 +1075,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_zs_emit_pan:
|
||||
case nir_intrinsic_load_return_param_amd:
|
||||
case nir_intrinsic_load_local_invocation_index_intel:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
is_divergent = true;
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -2299,6 +2299,16 @@ intrinsic("strict_wqm_coord_amd", src_comp=[0], dest_comp=0, bit_sizes=[32], ind
|
|||
intrinsic("cmat_muladd_amd", src_comp=[-1, -1, 0], dest_comp=0, bit_sizes=src2,
|
||||
indices=[SATURATE, NEG_LO_AMD, NEG_HI_AMD, SRC_BASE_TYPE, SRC_BASE_TYPE2], flags=SUBGROUP_FLAGS)
|
||||
|
||||
# Global cooperative matrix load with combined cooperative matrix transpose.
|
||||
# This corresponds to RDNA4's global_load_tr_b{64,128}. Like typical cooperative matrix operations,
|
||||
# this has to be in subgroup uniform control flow with all invocations active.
|
||||
# The definition's component size may be 8-bit or 16-bit and matches the type of matrix to load.
|
||||
# The result has 8 components (wave32) or 4 components (wave64). The address is ignored for lanes
|
||||
# 32-63, and the actual address that's loaded from is probably offset from the values in lanes 0-31.
|
||||
# src[] = { address }.
|
||||
intrinsic("load_deref_transpose_amd", bit_sizes=[8, 16], dest_comp=0, src_comp=[1],
|
||||
indices=[ACCESS], flags=SUBGROUP_FLAGS)
|
||||
|
||||
# Get the debug log buffer descriptor.
|
||||
intrinsic("load_debug_log_desc_amd", bit_sizes=[32], dest_comp=4, flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
|
|
|
|||
|
|
@ -1482,7 +1482,8 @@ nir_lower_explicit_io_instr(nir_builder *b,
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_deref_block_intel: {
|
||||
case nir_intrinsic_load_deref_block_intel:
|
||||
case nir_intrinsic_load_deref_transpose_amd: {
|
||||
nir_io_offset addr = build_addr(b, intrin, base_addr, addr_format, 0,
|
||||
align_mul, align_offset);
|
||||
nir_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
|
||||
|
|
@ -1760,6 +1761,7 @@ nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
|
|||
case nir_intrinsic_store_deref:
|
||||
case nir_intrinsic_load_deref_block_intel:
|
||||
case nir_intrinsic_store_deref_block_intel:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
case nir_intrinsic_deref_atomic:
|
||||
case nir_intrinsic_deref_atomic_swap: {
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
|
|
|||
|
|
@ -79,6 +79,7 @@ get_intrinsic_info(nir_intrinsic_instr *intrin, nir_variable_mode *modes,
|
|||
*writes = true;
|
||||
break;
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
*modes = nir_src_as_deref(intrin->src[0])->modes;
|
||||
*reads = true;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -146,6 +146,7 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
case nir_intrinsic_store_deref:
|
||||
case nir_intrinsic_deref_atomic:
|
||||
case nir_intrinsic_deref_atomic_swap: {
|
||||
|
|
@ -154,9 +155,11 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
bool ssbo = nir_deref_mode_is(deref, nir_var_mem_ssbo);
|
||||
bool is_write = instr->intrinsic != nir_intrinsic_load_deref &&
|
||||
instr->intrinsic != nir_intrinsic_load_deref_transpose_amd;
|
||||
gather_buffer_access(state, ssbo ? instr->src[0].ssa : NULL,
|
||||
instr->intrinsic != nir_intrinsic_store_deref,
|
||||
instr->intrinsic != nir_intrinsic_load_deref);
|
||||
is_write);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -296,6 +299,7 @@ process_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
|
|||
false);
|
||||
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
case nir_intrinsic_store_deref: {
|
||||
if (nir_deref_mode_is(nir_src_as_deref(instr->src[0]), nir_var_mem_global))
|
||||
return update_access(state, instr, nir_var_mem_global, false, true);
|
||||
|
|
|
|||
|
|
@ -212,6 +212,7 @@ nir_opt_acquire_release_barriers_impl(nir_function_impl *impl,
|
|||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_load_deref_block_intel:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
case nir_intrinsic_store_deref:
|
||||
case nir_intrinsic_store_deref_block_intel:
|
||||
if (last_atomic) {
|
||||
|
|
|
|||
|
|
@ -344,7 +344,8 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block)
|
|||
}
|
||||
|
||||
case nir_intrinsic_load_deref_block_intel:
|
||||
case nir_intrinsic_store_deref_block_intel: {
|
||||
case nir_intrinsic_store_deref_block_intel:
|
||||
case nir_intrinsic_load_deref_transpose_amd: {
|
||||
/* Combine all the stores that may alias with the whole variable (or
|
||||
* cast).
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -216,6 +216,7 @@ node_is_dead(nir_cf_node *node)
|
|||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_bounded:
|
||||
|
|
@ -230,7 +231,8 @@ node_is_dead(nir_cf_node *node)
|
|||
* Consider only loads that the result can be affected by other
|
||||
* invocations.
|
||||
*/
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref ||
|
||||
intrin->intrinsic == nir_intrinsic_load_deref_transpose_amd) {
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
if (!nir_deref_mode_may_be(deref, nir_var_mem_ssbo |
|
||||
nir_var_mem_shared |
|
||||
|
|
|
|||
|
|
@ -195,6 +195,12 @@ remove_dead_write_vars_local(nir_shader *shader, nir_block *block,
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_deref_transpose_amd: {
|
||||
nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
|
||||
clear_unused_for_modes(unused_writes, src->modes);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_store_deref: {
|
||||
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ get_info(nir_intrinsic_op op)
|
|||
LOAD(nir_var_mem_ssbo, ssbo, 0, 1, -1, 1)
|
||||
STORE(nir_var_mem_ssbo, ssbo, 1, 2, -1, 0, 1)
|
||||
LOAD(0, deref, -1, -1, 0, 1)
|
||||
INFO(0, load_deref_transpose_amd, true, -1, -1, 0, -1, 1)
|
||||
STORE(0, deref, -1, -1, 0, 1, 1)
|
||||
LOAD(nir_var_mem_shared, shared, -1, 0, -1, 1)
|
||||
STORE(nir_var_mem_shared, shared, -1, 1, -1, 0, 1)
|
||||
|
|
@ -1241,7 +1242,7 @@ bindings_different_restrict(nir_shader *shader, struct entry *a, struct entry *b
|
|||
((a_access | b_access) & ACCESS_RESTRICT);
|
||||
}
|
||||
|
||||
static int64_t
|
||||
static bool
|
||||
may_alias_internal(struct entry *a, struct entry *b, uint32_t a_offset, uint32_t b_offset)
|
||||
{
|
||||
/* use adjacency information */
|
||||
|
|
@ -1252,7 +1253,10 @@ may_alias_internal(struct entry *a, struct entry *b, uint32_t a_offset, uint32_t
|
|||
int64_t diff = get_offset_diff(a, b) + b_offset - a_offset;
|
||||
|
||||
struct entry *first = diff < 0 ? b : a;
|
||||
unsigned size = get_bit_size(first) / 8u * first->num_components;
|
||||
if (first->intrin->intrinsic == nir_intrinsic_load_deref_transpose_amd)
|
||||
return true;
|
||||
|
||||
uint64_t size = get_bit_size(first) / 8u * first->num_components;
|
||||
return llabs(diff) < size;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -830,7 +830,8 @@ is_indirect_load(nir_instr *instr)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_global)
|
||||
if (intrin->intrinsic == nir_intrinsic_load_global ||
|
||||
intrin->intrinsic == nir_intrinsic_load_deref_transpose_amd)
|
||||
return true;
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref ||
|
||||
|
|
|
|||
|
|
@ -178,6 +178,7 @@ propagate_invariant_instr(nir_instr *instr, struct set *invariants, uint8_t *var
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
if (def_is_invariant(&intrin->def, invariants))
|
||||
add_var(nir_src_as_deref(intrin->src[0]), invariants, var_invariant);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -745,6 +745,18 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
|
|||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_deref_transpose_amd: {
|
||||
nir_deref_instr *src = nir_src_as_deref(instr->src[0]);
|
||||
assert(src);
|
||||
unsigned disallow_access = ACCESS_ATOMIC | ACCESS_SKIP_HELPERS | ACCESS_SMEM_AMD;
|
||||
validate_assert(state, !(nir_intrinsic_access(instr) & disallow_access));
|
||||
validate_assert(state, glsl_type_is_scalar(src->type));
|
||||
validate_assert(state, instr->num_components == 8 || instr->num_components == 4);
|
||||
dest_bit_size = glsl_get_bit_size(src->type);
|
||||
src_bit_sizes[0] = 64;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_global_atomic_nv:
|
||||
case nir_intrinsic_global_atomic_swap_nv:
|
||||
case nir_intrinsic_shared_atomic_nv:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue