mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-21 05:58:22 +02:00
intel: Change dpas_intel source order to follow DPAS
- The general NIR intrinsic (`cmat_muladd`) is `Dst A B C`, which follows SPIR-V corresponding operation. - The DPAS hardware instruction is `Dst = C B A` Right now the NIR intrinsic for `dpas_intel` has a third order `Dst = C A B`. Change so it follows DPAS hardware order. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42275>
This commit is contained in:
parent
8b8603df2b
commit
db25e87243
5 changed files with 11 additions and 16 deletions
|
|
@ -2902,19 +2902,16 @@ system_value("btd_shader_type_intel", 1)
|
|||
# 64B, the pointer needs 256B aligned.
|
||||
system_value("ray_query_global_intel", 1, bit_sizes=[64])
|
||||
|
||||
# Source 0: Accumulator matrix (type specified by DEST_TYPE)
|
||||
# Source 1: A matrix (type specified by SRC_TYPE)
|
||||
# Source 2: B matrix (type specified by SRC_TYPE)
|
||||
# Source order same as DPAS instruction in the HW.
|
||||
#
|
||||
# Source 0: Accumulator matrix (type specified by DEST_BASE_TYPE)
|
||||
# Source 1: B matrix (type specified by SRC_BASE_TYPE)
|
||||
# Source 2: A matrix (type specified by SRC_BASE_TYPE)
|
||||
#
|
||||
# The matrix parameters are the slices owned by the invocation.
|
||||
#
|
||||
# The accumulator is source 0 because that is the source the intrinsic
|
||||
# infrastructure in NIR uses to determine the number of components in the
|
||||
# result.
|
||||
#
|
||||
# The number of components for the second and third sources is -1 to avoid
|
||||
# validation of its value. Some supported configurations will have the
|
||||
# component count of that matrix different than the others.
|
||||
# The number of components in the A/B sources may not match the
|
||||
# destination due to different packing factors.
|
||||
intrinsic("dpas_intel", dest_comp=0, src_comp=[0, -1, -1],
|
||||
indices=[DEST_BASE_TYPE, SRC_BASE_TYPE, SATURATE, SYSTOLIC_DEPTH, REPEAT_COUNT],
|
||||
flags=[CAN_ELIMINATE])
|
||||
|
|
|
|||
|
|
@ -4255,11 +4255,10 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb,
|
|||
const unsigned dpas_exec_size = devinfo->ver >= 20 ? 16 : 8;
|
||||
brw_builder bldn = bld.exec_all().group(dpas_exec_size, 0);
|
||||
|
||||
/* DPAS uses a different source order: Accumulator, B, A. */
|
||||
bldn.DPAS(retype(dest, dest_type),
|
||||
retype(src[0], dest_type),
|
||||
retype(src[2], src_type),
|
||||
retype(src[1], src_type),
|
||||
retype(src[2], src_type),
|
||||
sdepth,
|
||||
rcount)
|
||||
->saturate = nir_intrinsic_saturate(instr);
|
||||
|
|
|
|||
|
|
@ -861,8 +861,8 @@ lower_cmat_instr(nir_builder *b, nir_instr *instr, void *_state)
|
|||
nir_dpas_intel(b,
|
||||
dst_info->packing_factor * glsl_base_type_get_bit_size(dst_info->desc.element_type),
|
||||
nir_load_deref(b, accum_slice),
|
||||
nir_load_deref(b, A_slice),
|
||||
nir_load_deref(b, B_slice),
|
||||
nir_load_deref(b, A_slice),
|
||||
.dest_base_type = dst_type,
|
||||
.src_base_type = src_type,
|
||||
.saturate = nir_intrinsic_saturate(intrin),
|
||||
|
|
|
|||
|
|
@ -587,8 +587,8 @@ emit_dpas(const struct intel_device_info *devinfo,
|
|||
*/
|
||||
nir_def *result = nir_dpas_intel(&b, XEHP_SYSTOLIC_CHANNEL_BITS,
|
||||
nir_load_deref(&b, deref_d),
|
||||
nir_load_deref(&b, deref_a),
|
||||
nir_load_deref(&b, deref_b),
|
||||
nir_load_deref(&b, deref_a),
|
||||
.dest_base_type = GLSL_TYPE_INT,
|
||||
.src_base_type = GLSL_TYPE_INT8,
|
||||
.saturate = false,
|
||||
|
|
|
|||
|
|
@ -1393,8 +1393,7 @@ jay_emit_dpas(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
|
|||
jay_as_gpr(b, nj_src(intr->src[2])),
|
||||
};
|
||||
|
||||
/* Jay follows HW source order. */
|
||||
jay_DPAS(b, dst, src[0], src[2], src[1], nir_intrinsic_systolic_depth(intr),
|
||||
jay_DPAS(b, dst, src[0], src[1], src[2], nir_intrinsic_systolic_depth(intr),
|
||||
nir_intrinsic_repeat_count(intr),
|
||||
jay_type_for_glsl_base_type(nir_intrinsic_dest_base_type(intr)),
|
||||
jay_type_for_glsl_base_type(nir_intrinsic_src_base_type(intr)),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue