mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-30 05:00:32 +01:00
radv/nir/lower_cmat: set optimal load/store alignment
Allows vectorizing load/stores with sub dword types or with robustness. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35633>
This commit is contained in:
parent
ed2ecf9ef8
commit
48fc8c8d1c
1 changed files with 22 additions and 0 deletions
|
|
@ -459,6 +459,21 @@ radv_nir_lower_cooperative_matrix(nir_shader *shader, enum amd_gfx_level gfx_lev
|
|||
unsigned idx_bits = deref->def.bit_size;
|
||||
nir_def *base_row = radv_get_base_row(&b, desc, ¶ms, local_idx);
|
||||
|
||||
/* VUID-RuntimeSpirv-OpCooperativeMatrixLoadKHR-08986:
|
||||
* For OpCooperativeMatrixLoadKHR and OpCooperativeMatrixStoreKHR instructions,
|
||||
* the Pointer and Stride operands must be aligned to at least the lesser of 16 bytes
|
||||
* or the natural alignment of a row or column (depending on ColumnMajor) of the matrix
|
||||
* (where the natural alignment is the number of columns/rows multiplied by the component size).
|
||||
*/
|
||||
unsigned align_mul = 0;
|
||||
if (layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR)
|
||||
align_mul = MIN2(16, radv_nir_cmat_bits(desc) * desc.rows / 8);
|
||||
|
||||
if (gfx_level >= GFX12)
|
||||
align_mul /= wave_size / 16;
|
||||
else if (desc.use == GLSL_CMAT_USE_ACCUMULATOR)
|
||||
align_mul = 0;
|
||||
|
||||
for (unsigned i = 0; i < length / mul; ++i) {
|
||||
nir_def *col_offset = inner_idx;
|
||||
nir_def *row_offset;
|
||||
|
|
@ -488,6 +503,13 @@ radv_nir_lower_cooperative_matrix(nir_shader *shader, enum amd_gfx_level gfx_lev
|
|||
glsl_scalar_type(desc.element_type), radv_nir_cmat_bits(desc) / 8);
|
||||
iter_deref = nir_build_deref_ptr_as_array(&b, iter_deref, row_offset);
|
||||
|
||||
if (align_mul) {
|
||||
unsigned align_offset = row_iter * radv_nir_cmat_bits(desc) / 8 % align_mul;
|
||||
iter_deref =
|
||||
nir_build_deref_cast_with_alignment(&b, &iter_deref->def, deref->modes, iter_deref->type,
|
||||
iter_deref->cast.ptr_stride, align_mul, align_offset);
|
||||
}
|
||||
|
||||
if (is_load) {
|
||||
vars[i * mul] = nir_load_deref(&b, iter_deref);
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue