mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-03 15:28:15 +02:00
nir: add load_global_transpose_amd
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41653>
This commit is contained in:
parent
57498eca83
commit
b982e71084
11 changed files with 28 additions and 1 deletions
|
|
@ -91,6 +91,7 @@ is_vmem_or_lds_load(nir_def *def, unsigned depth, unsigned begin, unsigned end)
|
|||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
case nir_intrinsic_load_global_amd:
|
||||
case nir_intrinsic_load_global_transpose_amd:
|
||||
case nir_intrinsic_load_scratch:
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_shared2_amd:
|
||||
|
|
|
|||
|
|
@ -1075,6 +1075,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_zs_emit_pan:
|
||||
case nir_intrinsic_load_return_param_amd:
|
||||
case nir_intrinsic_load_local_invocation_index_intel:
|
||||
case nir_intrinsic_load_global_transpose_amd:
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
is_divergent = true;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -2308,6 +2308,8 @@ intrinsic("cmat_muladd_amd", src_comp=[-1, -1, 0], dest_comp=0, bit_sizes=src2,
|
|||
# src[] = { address }.
|
||||
intrinsic("load_deref_transpose_amd", bit_sizes=[8, 16], dest_comp=0, src_comp=[1],
|
||||
indices=[ACCESS], flags=SUBGROUP_FLAGS)
|
||||
intrinsic("load_global_transpose_amd", bit_sizes=[8, 16], dest_comp=0, src_comp=[1],
|
||||
indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=SUBGROUP_FLAGS)
|
||||
|
||||
# Get the debug log buffer descriptor.
|
||||
intrinsic("load_debug_log_desc_amd", bit_sizes=[32], dest_comp=4, flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
|
|
|||
|
|
@ -153,6 +153,7 @@ lower_intrinsic(lower_state *state, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_global_atomic_swap:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_transpose_amd:
|
||||
case nir_intrinsic_load_pixel_local:
|
||||
/* just assume that 24b is not sufficient: */
|
||||
lower_large_src(&intr->src[0], state);
|
||||
|
|
|
|||
|
|
@ -726,6 +726,13 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_deref_transpose_amd:
|
||||
if (mode != nir_var_mem_global)
|
||||
UNREACHABLE("Unsupported explicit IO variable mode");
|
||||
assert(addr_format == nir_address_format_64bit_global);
|
||||
op = nir_intrinsic_load_global_transpose_amd;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("Invalid intrinsic");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -991,6 +991,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_load_global_constant:
|
||||
case nir_intrinsic_load_global_etna:
|
||||
case nir_intrinsic_load_global_nv:
|
||||
case nir_intrinsic_load_global_transpose_amd:
|
||||
case nir_intrinsic_load_scratch:
|
||||
case nir_intrinsic_load_scratch_nv:
|
||||
case nir_intrinsic_load_scratch_intel:
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ get_intrinsic_info(nir_intrinsic_instr *intrin, nir_variable_mode *modes,
|
|||
*writes = true;
|
||||
break;
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_transpose_amd:
|
||||
*modes = nir_var_mem_global;
|
||||
*reads = true;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -221,6 +221,7 @@ node_is_dead(nir_cf_node *node)
|
|||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_bounded:
|
||||
case nir_intrinsic_load_global_nv:
|
||||
case nir_intrinsic_load_global_transpose_amd:
|
||||
case nir_intrinsic_load_ssbo_intel:
|
||||
case nir_intrinsic_load_ssbo_ir3:
|
||||
/* If there's a memory barrier after the loop, a load might be
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ get_info(nir_intrinsic_op op)
|
|||
INFO(nir_var_mem_shared, load_shared2_amd, true, -1, 0, -1, -1, 1);
|
||||
INFO(nir_var_mem_shared, store_shared2_amd, true, -1, 1, -1, 0, 1)
|
||||
LOAD(nir_var_mem_global, global, -1, 0, -1, 1)
|
||||
INFO(nir_var_mem_global, load_global_transpose_amd, true, -1, 0, -1, -1, 1)
|
||||
STORE(nir_var_mem_global, global, -1, 1, -1, 0, 1)
|
||||
LOAD(nir_var_mem_global, global_constant, -1, 0, -1, 1)
|
||||
LOAD(nir_var_mem_task_payload, task_payload, -1, 0, -1, 1)
|
||||
|
|
@ -1253,8 +1254,10 @@ may_alias_internal(struct entry *a, struct entry *b, uint32_t a_offset, uint32_t
|
|||
int64_t diff = get_offset_diff(a, b) + b_offset - a_offset;
|
||||
|
||||
struct entry *first = diff < 0 ? b : a;
|
||||
if (first->intrin->intrinsic == nir_intrinsic_load_deref_transpose_amd)
|
||||
if (first->intrin->intrinsic == nir_intrinsic_load_deref_transpose_amd ||
|
||||
first->intrin->intrinsic == nir_intrinsic_load_global_transpose_amd) {
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t size = get_bit_size(first) / 8u * first->num_components;
|
||||
return llabs(diff) < size;
|
||||
|
|
|
|||
|
|
@ -831,6 +831,7 @@ is_indirect_load(nir_instr *instr)
|
|||
}
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_global ||
|
||||
intrin->intrinsic == nir_intrinsic_load_global_transpose_amd ||
|
||||
intrin->intrinsic == nir_intrinsic_load_deref_transpose_amd)
|
||||
return true;
|
||||
|
||||
|
|
|
|||
|
|
@ -757,6 +757,14 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_global_transpose_amd: {
|
||||
unsigned disallow_access = ACCESS_ATOMIC | ACCESS_SKIP_HELPERS | ACCESS_SMEM_AMD;
|
||||
validate_assert(state, !(nir_intrinsic_access(instr) & disallow_access));
|
||||
validate_assert(state, instr->num_components == 8 || instr->num_components == 4);
|
||||
src_bit_sizes[0] = 64;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_global_atomic_nv:
|
||||
case nir_intrinsic_global_atomic_swap_nv:
|
||||
case nir_intrinsic_shared_atomic_nv:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue