2021-05-20 10:41:37 +01:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2020 Valve Corporation
|
|
|
|
|
*
|
2024-04-08 09:02:30 +02:00
|
|
|
* SPDX-License-Identifier: MIT
|
2021-05-20 10:41:37 +01:00
|
|
|
*/
|
|
|
|
|
#include "helpers.h"
|
|
|
|
|
#include "sid.h"
|
|
|
|
|
|
|
|
|
|
using namespace aco;
|
|
|
|
|
|
aco: Remove vtx_binding from MUBUF/MTBUF instructions.
We haven't measured any noteworthy perf improvement
from these, and they are difficult to port to NIR,
so remove them before the NIR based VS input lowering
in order to make it easier to bisect and analyze stats.
Fossil DB stats on Rembrandt (GFX10.3):
Totals from 21750 (16.12% of 134913) affected shaders:
VGPRs: 868512 -> 868664 (+0.02%); split: -0.00%, +0.02%
CodeSize: 64406804 -> 64397572 (-0.01%); split: -0.08%, +0.07%
MaxWaves: 567904 -> 567888 (-0.00%); split: +0.00%, -0.00%
Instrs: 12327212 -> 12324851 (-0.02%); split: -0.10%, +0.08%
Latency: 61367324 -> 61371204 (+0.01%); split: -0.04%, +0.05%
InvThroughput: 9687734 -> 9686000 (-0.02%); split: -0.03%, +0.01%
VClause: 248207 -> 303449 (+22.26%); split: -0.02%, +22.28%
SClause: 314942 -> 315564 (+0.20%); split: -0.09%, +0.29%
Copies: 921581 -> 921820 (+0.03%); split: -0.16%, +0.19%
Branches: 341964 -> 341967 (+0.00%); split: -0.00%, +0.00%
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16805>
2023-03-02 17:09:14 -08:00
|
|
|
static void
|
|
|
|
|
create_mubuf(Temp desc = Temp(0, s8))
|
2021-05-20 10:41:37 +01:00
|
|
|
{
|
|
|
|
|
Operand desc_op(desc);
|
|
|
|
|
desc_op.setFixed(PhysReg(0));
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1), desc_op,
|
aco: Remove vtx_binding from MUBUF/MTBUF instructions.
We haven't measured any noteworthy perf improvement
from these, and they are difficult to port to NIR,
so remove them before the NIR based VS input lowering
in order to make it easier to bisect and analyze stats.
Fossil DB stats on Rembrandt (GFX10.3):
Totals from 21750 (16.12% of 134913) affected shaders:
VGPRs: 868512 -> 868664 (+0.02%); split: -0.00%, +0.02%
CodeSize: 64406804 -> 64397572 (-0.01%); split: -0.08%, +0.07%
MaxWaves: 567904 -> 567888 (-0.00%); split: +0.00%, -0.00%
Instrs: 12327212 -> 12324851 (-0.02%); split: -0.10%, +0.08%
Latency: 61367324 -> 61371204 (+0.01%); split: -0.04%, +0.05%
InvThroughput: 9687734 -> 9686000 (-0.02%); split: -0.03%, +0.01%
VClause: 248207 -> 303449 (+22.26%); split: -0.02%, +22.28%
SClause: 314942 -> 315564 (+0.20%); split: -0.09%, +0.29%
Copies: 921581 -> 921820 (+0.03%); split: -0.16%, +0.19%
Branches: 341964 -> 341967 (+0.00%); split: -0.00%, +0.00%
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16805>
2023-03-02 17:09:14 -08:00
|
|
|
Operand(PhysReg(256), v1), Operand::zero(), 0, false);
|
2021-05-20 10:41:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
create_mubuf_store()
|
|
|
|
|
{
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(PhysReg(256), v1),
|
|
|
|
|
Operand(PhysReg(256), v1), Operand::zero(), 0, false);
|
2021-05-20 10:41:37 +01:00
|
|
|
}
|
|
|
|
|
|
aco: Remove vtx_binding from MUBUF/MTBUF instructions.
We haven't measured any noteworthy perf improvement
from these, and they are difficult to port to NIR,
so remove them before the NIR based VS input lowering
in order to make it easier to bisect and analyze stats.
Fossil DB stats on Rembrandt (GFX10.3):
Totals from 21750 (16.12% of 134913) affected shaders:
VGPRs: 868512 -> 868664 (+0.02%); split: -0.00%, +0.02%
CodeSize: 64406804 -> 64397572 (-0.01%); split: -0.08%, +0.07%
MaxWaves: 567904 -> 567888 (-0.00%); split: +0.00%, -0.00%
Instrs: 12327212 -> 12324851 (-0.02%); split: -0.10%, +0.08%
Latency: 61367324 -> 61371204 (+0.01%); split: -0.04%, +0.05%
InvThroughput: 9687734 -> 9686000 (-0.02%); split: -0.03%, +0.01%
VClause: 248207 -> 303449 (+22.26%); split: -0.02%, +22.28%
SClause: 314942 -> 315564 (+0.20%); split: -0.09%, +0.29%
Copies: 921581 -> 921820 (+0.03%); split: -0.16%, +0.19%
Branches: 341964 -> 341967 (+0.00%); split: -0.00%, +0.00%
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16805>
2023-03-02 17:09:14 -08:00
|
|
|
static void
|
|
|
|
|
create_mtbuf(Temp desc = Temp(0, s8))
|
2021-05-20 10:41:37 +01:00
|
|
|
{
|
|
|
|
|
Operand desc_op(desc);
|
|
|
|
|
desc_op.setFixed(PhysReg(0));
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(PhysReg(256), v1), desc_op,
|
|
|
|
|
Operand(PhysReg(256), v1), Operand::zero(), V_008F0C_BUF_DATA_FORMAT_32,
|
aco: Remove vtx_binding from MUBUF/MTBUF instructions.
We haven't measured any noteworthy perf improvement
from these, and they are difficult to port to NIR,
so remove them before the NIR based VS input lowering
in order to make it easier to bisect and analyze stats.
Fossil DB stats on Rembrandt (GFX10.3):
Totals from 21750 (16.12% of 134913) affected shaders:
VGPRs: 868512 -> 868664 (+0.02%); split: -0.00%, +0.02%
CodeSize: 64406804 -> 64397572 (-0.01%); split: -0.08%, +0.07%
MaxWaves: 567904 -> 567888 (-0.00%); split: +0.00%, -0.00%
Instrs: 12327212 -> 12324851 (-0.02%); split: -0.10%, +0.08%
Latency: 61367324 -> 61371204 (+0.01%); split: -0.04%, +0.05%
InvThroughput: 9687734 -> 9686000 (-0.02%); split: -0.03%, +0.01%
VClause: 248207 -> 303449 (+22.26%); split: -0.02%, +22.28%
SClause: 314942 -> 315564 (+0.20%); split: -0.09%, +0.29%
Copies: 921581 -> 921820 (+0.03%); split: -0.16%, +0.19%
Branches: 341964 -> 341967 (+0.00%); split: -0.00%, +0.00%
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16805>
2023-03-02 17:09:14 -08:00
|
|
|
V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false);
|
2021-05-20 10:41:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
create_flat()
|
|
|
|
|
{
|
|
|
|
|
bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1), Operand(PhysReg(256), v2),
|
|
|
|
|
Operand(s2));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
create_global()
|
|
|
|
|
{
|
|
|
|
|
bld.global(aco_opcode::global_load_dword, Definition(PhysReg(256), v1),
|
|
|
|
|
Operand(PhysReg(256), v2), Operand(s2));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
create_mimg(bool nsa, Temp desc = Temp(0, s8))
|
|
|
|
|
{
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> mimg{create_instruction(aco_opcode::image_sample, Format::MIMG, 5, 1)};
|
2021-05-20 10:41:37 +01:00
|
|
|
mimg->definitions[0] = Definition(PhysReg(256), v1);
|
|
|
|
|
mimg->operands[0] = Operand(desc);
|
|
|
|
|
mimg->operands[0].setFixed(PhysReg(0));
|
|
|
|
|
mimg->operands[1] = Operand(PhysReg(0), s4);
|
|
|
|
|
mimg->operands[2] = Operand(v1);
|
|
|
|
|
for (unsigned i = 0; i < 2; i++)
|
|
|
|
|
mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1);
|
2024-03-25 12:05:50 +01:00
|
|
|
mimg->mimg().dmask = 0x1;
|
|
|
|
|
mimg->mimg().dim = ac_image_2d;
|
2021-05-20 10:41:37 +01:00
|
|
|
|
|
|
|
|
bld.insert(std::move(mimg));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
create_smem()
|
|
|
|
|
{
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), Operand(PhysReg(0), s2),
|
|
|
|
|
Operand::zero());
|
2021-05-20 10:41:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
create_smem_buffer(Temp desc = Temp(0, s4))
|
|
|
|
|
{
|
|
|
|
|
Operand desc_op(desc);
|
|
|
|
|
desc_op.setFixed(PhysReg(0));
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.smem(aco_opcode::s_buffer_load_dword, Definition(PhysReg(0), s1), desc_op, Operand::zero());
|
2021-05-20 10:41:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(form_hard_clauses.type_restrictions)
|
|
|
|
|
if (!setup_cs(NULL, GFX10))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 0
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('image_sample')
|
|
|
|
|
//; search_re('image_sample')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mimg(false);
|
|
|
|
|
create_mimg(false);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 1
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('buffer_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mubuf();
|
|
|
|
|
create_mubuf();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 2
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('global_load_dword')
|
|
|
|
|
//; search_re('global_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_global();
|
|
|
|
|
create_global();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 3
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('flat_load_dword')
|
|
|
|
|
//; search_re('flat_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_flat();
|
|
|
|
|
create_flat();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 4
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('s_load_dword')
|
|
|
|
|
//; search_re('s_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_smem();
|
|
|
|
|
create_smem();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 5
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('flat_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mubuf();
|
|
|
|
|
create_flat();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 6
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('s_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mubuf();
|
|
|
|
|
create_smem();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 7
|
|
|
|
|
//; search_re('flat_load_dword')
|
|
|
|
|
//; search_re('s_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_flat();
|
|
|
|
|
create_smem();
|
|
|
|
|
|
|
|
|
|
finish_form_hard_clause_test();
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(form_hard_clauses.size)
|
|
|
|
|
if (!setup_cs(NULL, GFX10))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 0
|
|
|
|
|
//; search_re('s_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
2021-05-20 10:41:37 +01:00
|
|
|
create_smem();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 1
|
2022-07-21 16:16:16 +01:00
|
|
|
//! s_clause imm:62
|
|
|
|
|
//; for i in range(63):
|
2021-05-20 10:41:37 +01:00
|
|
|
//; search_re('s_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
|
2022-07-21 16:16:16 +01:00
|
|
|
for (unsigned i = 0; i < 63; i++)
|
2021-05-20 10:41:37 +01:00
|
|
|
create_smem();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 2
|
2022-07-21 16:16:16 +01:00
|
|
|
//! s_clause imm:62
|
|
|
|
|
//; for i in range(64):
|
2021-05-20 10:41:37 +01:00
|
|
|
//; search_re('s_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
2022-07-21 16:16:16 +01:00
|
|
|
for (unsigned i = 0; i < 64; i++)
|
2021-05-20 10:41:37 +01:00
|
|
|
create_smem();
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 3
|
2022-07-21 16:16:16 +01:00
|
|
|
//! s_clause imm:62
|
|
|
|
|
//; for i in range(63):
|
2021-05-20 10:41:37 +01:00
|
|
|
//; search_re('s_load_dword')
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('s_load_dword')
|
|
|
|
|
//; search_re('s_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
|
2022-07-21 16:16:16 +01:00
|
|
|
for (unsigned i = 0; i < 65; i++)
|
2021-05-20 10:41:37 +01:00
|
|
|
create_smem();
|
|
|
|
|
|
|
|
|
|
finish_form_hard_clause_test();
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(form_hard_clauses.nsa)
|
|
|
|
|
for (unsigned i = GFX10; i <= GFX10_3; i++) {
|
2022-05-12 02:50:17 -04:00
|
|
|
if (!setup_cs(NULL, (amd_gfx_level)i))
|
2021-05-20 10:41:37 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 0
|
|
|
|
|
//! s_clause imm:1
|
2024-02-06 12:47:34 +00:00
|
|
|
//; search_re(r'image_sample .* %0:v\[0\], %0:v\[1\]')
|
|
|
|
|
//; search_re(r'image_sample .* %0:v\[0\], %0:v\[1\]')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mimg(false);
|
|
|
|
|
create_mimg(false);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 1
|
|
|
|
|
//~gfx10_3! s_clause imm:1
|
2024-02-06 12:47:34 +00:00
|
|
|
//; search_re(r'image_sample .* %0:v\[0\], %0:v\[1\]')
|
|
|
|
|
//; search_re(r'image_sample .* %0:v\[0\], %0:v\[2\]')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mimg(false);
|
|
|
|
|
create_mimg(true);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 2
|
|
|
|
|
//~gfx10_3! s_clause imm:1
|
2024-02-06 12:47:34 +00:00
|
|
|
//; search_re(r'image_sample .* %0:v\[0\], %0:v\[2\]')
|
|
|
|
|
//; search_re(r'image_sample .* %0:v\[0\], %0:v\[2\]')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mimg(true);
|
|
|
|
|
create_mimg(true);
|
|
|
|
|
|
|
|
|
|
finish_form_hard_clause_test();
|
|
|
|
|
}
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(form_hard_clauses.heuristic)
|
|
|
|
|
if (!setup_cs(NULL, GFX10))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
Temp img_desc0 = bld.tmp(s8);
|
|
|
|
|
Temp img_desc1 = bld.tmp(s8);
|
|
|
|
|
Temp buf_desc0 = bld.tmp(s4);
|
|
|
|
|
Temp buf_desc1 = bld.tmp(s4);
|
|
|
|
|
|
|
|
|
|
/* Don't form clause with different descriptors */
|
|
|
|
|
//>> p_unit_test 0
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('image_sample')
|
|
|
|
|
//; search_re('image_sample')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mimg(false, img_desc0);
|
|
|
|
|
create_mimg(false, img_desc0);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 1
|
|
|
|
|
//; search_re('image_sample')
|
|
|
|
|
//; search_re('image_sample')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mimg(false, img_desc0);
|
|
|
|
|
create_mimg(false, img_desc1);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 2
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('buffer_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mubuf(buf_desc0);
|
|
|
|
|
create_mubuf(buf_desc0);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 3
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('buffer_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mubuf(buf_desc0);
|
|
|
|
|
create_mubuf(buf_desc1);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 4
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('s_buffer_load_dword')
|
|
|
|
|
//; search_re('s_buffer_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_smem_buffer(buf_desc0);
|
|
|
|
|
create_smem_buffer(buf_desc0);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 5
|
|
|
|
|
//; search_re('s_buffer_load_dword')
|
|
|
|
|
//; search_re('s_buffer_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_smem_buffer(buf_desc0);
|
|
|
|
|
create_smem_buffer(buf_desc1);
|
|
|
|
|
|
|
|
|
|
//>> p_unit_test 6
|
|
|
|
|
//; search_re('s_buffer_load_dword')
|
|
|
|
|
//; search_re('s_load_dword')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_smem_buffer(buf_desc0);
|
|
|
|
|
create_smem();
|
|
|
|
|
|
aco: form mixed MTBUF/MUBUF clauses
This should be one clause (all of the instructions load from the same vertex buffer)
s_clause 0x2 ; bfa10002
tbuffer_load_format_xyzw v[8:11], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:36 ; e9c32024 80010805
tbuffer_load_format_xyzw v[12:15], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:16 ; e9c32010 80010c05
tbuffer_load_format_xyzw v[16:19], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:12 ; e9c3200c 80011005
s_clause 0x2 ; bfa10002
buffer_load_dwordx3 v[20:22], v5, s[4:7], 0 idxen ; e03c2000 80011405
buffer_load_dwordx3 v[23:25], v5, s[4:7], 0 idxen offset:20 ; e03c2014 80011705
buffer_load_dwordx4 v[28:31], v5, s[4:7], 0 idxen offset:48 ; e0382030 80011c05
tbuffer_load_format_xy v[0:1], v5, s[4:7], 0 format:[BUF_FMT_8_8_UNORM] idxen offset:32 ; e8712020 80010005
Foz-DB Navi21:
Totals from 5624 (7.08% of 79395) affected shaders:
MaxWaves: 149894 -> 149898 (+0.00%)
Instrs: 3032697 -> 3034853 (+0.07%); split: -0.05%, +0.12%
CodeSize: 15907852 -> 15915752 (+0.05%); split: -0.05%, +0.10%
VGPRs: 216248 -> 216144 (-0.05%)
Latency: 10955137 -> 11008760 (+0.49%); split: -0.22%, +0.70%
InvThroughput: 2032857 -> 2033916 (+0.05%); split: -0.03%, +0.08%
VClause: 50120 -> 41778 (-16.64%); split: -16.66%, +0.02%
SClause: 62034 -> 62004 (-0.05%); split: -0.33%, +0.29%
Copies: 253836 -> 254505 (+0.26%); split: -0.17%, +0.43%
VALU: 1621606 -> 1622274 (+0.04%); split: -0.03%, +0.07%
SALU: 653251 -> 653252 (+0.00%)
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34379>
2025-01-21 01:12:58 +01:00
|
|
|
/* Form clause with MTBUF/MUBUF mix if they use the same descriptor. */
|
2021-05-20 10:41:37 +01:00
|
|
|
//>> p_unit_test 7
|
aco: form mixed MTBUF/MUBUF clauses
This should be one clause (all of the instructions load from the same vertex buffer)
s_clause 0x2 ; bfa10002
tbuffer_load_format_xyzw v[8:11], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:36 ; e9c32024 80010805
tbuffer_load_format_xyzw v[12:15], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:16 ; e9c32010 80010c05
tbuffer_load_format_xyzw v[16:19], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:12 ; e9c3200c 80011005
s_clause 0x2 ; bfa10002
buffer_load_dwordx3 v[20:22], v5, s[4:7], 0 idxen ; e03c2000 80011405
buffer_load_dwordx3 v[23:25], v5, s[4:7], 0 idxen offset:20 ; e03c2014 80011705
buffer_load_dwordx4 v[28:31], v5, s[4:7], 0 idxen offset:48 ; e0382030 80011c05
tbuffer_load_format_xy v[0:1], v5, s[4:7], 0 format:[BUF_FMT_8_8_UNORM] idxen offset:32 ; e8712020 80010005
Foz-DB Navi21:
Totals from 5624 (7.08% of 79395) affected shaders:
MaxWaves: 149894 -> 149898 (+0.00%)
Instrs: 3032697 -> 3034853 (+0.07%); split: -0.05%, +0.12%
CodeSize: 15907852 -> 15915752 (+0.05%); split: -0.05%, +0.10%
VGPRs: 216248 -> 216144 (-0.05%)
Latency: 10955137 -> 11008760 (+0.49%); split: -0.22%, +0.70%
InvThroughput: 2032857 -> 2033916 (+0.05%); split: -0.03%, +0.08%
VClause: 50120 -> 41778 (-16.64%); split: -16.66%, +0.02%
SClause: 62034 -> 62004 (-0.05%); split: -0.33%, +0.29%
Copies: 253836 -> 254505 (+0.26%); split: -0.17%, +0.43%
VALU: 1621606 -> 1622274 (+0.04%); split: -0.03%, +0.07%
SALU: 653251 -> 653252 (+0.00%)
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34379>
2025-01-21 01:12:58 +01:00
|
|
|
//! s_clause imm:1
|
2021-05-20 10:41:37 +01:00
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('tbuffer_load_format_x')
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
|
2021-05-20 10:41:37 +01:00
|
|
|
create_mubuf(buf_desc0);
|
|
|
|
|
create_mtbuf(buf_desc0);
|
|
|
|
|
|
aco: form mixed MTBUF/MUBUF clauses
This should be one clause (all of the instructions load from the same vertex buffer)
s_clause 0x2 ; bfa10002
tbuffer_load_format_xyzw v[8:11], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:36 ; e9c32024 80010805
tbuffer_load_format_xyzw v[12:15], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:16 ; e9c32010 80010c05
tbuffer_load_format_xyzw v[16:19], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:12 ; e9c3200c 80011005
s_clause 0x2 ; bfa10002
buffer_load_dwordx3 v[20:22], v5, s[4:7], 0 idxen ; e03c2000 80011405
buffer_load_dwordx3 v[23:25], v5, s[4:7], 0 idxen offset:20 ; e03c2014 80011705
buffer_load_dwordx4 v[28:31], v5, s[4:7], 0 idxen offset:48 ; e0382030 80011c05
tbuffer_load_format_xy v[0:1], v5, s[4:7], 0 format:[BUF_FMT_8_8_UNORM] idxen offset:32 ; e8712020 80010005
Foz-DB Navi21:
Totals from 5624 (7.08% of 79395) affected shaders:
MaxWaves: 149894 -> 149898 (+0.00%)
Instrs: 3032697 -> 3034853 (+0.07%); split: -0.05%, +0.12%
CodeSize: 15907852 -> 15915752 (+0.05%); split: -0.05%, +0.10%
VGPRs: 216248 -> 216144 (-0.05%)
Latency: 10955137 -> 11008760 (+0.49%); split: -0.22%, +0.70%
InvThroughput: 2032857 -> 2033916 (+0.05%); split: -0.03%, +0.08%
VClause: 50120 -> 41778 (-16.64%); split: -16.66%, +0.02%
SClause: 62034 -> 62004 (-0.05%); split: -0.33%, +0.29%
Copies: 253836 -> 254505 (+0.26%); split: -0.17%, +0.43%
VALU: 1621606 -> 1622274 (+0.04%); split: -0.03%, +0.07%
SALU: 653251 -> 653252 (+0.00%)
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34379>
2025-01-21 01:12:58 +01:00
|
|
|
//>> p_unit_test 8
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('tbuffer_load_format_x')
|
|
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
|
|
|
|
|
create_mubuf(buf_desc0);
|
|
|
|
|
create_mtbuf(buf_desc1);
|
|
|
|
|
|
2021-05-20 10:41:37 +01:00
|
|
|
finish_form_hard_clause_test();
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(form_hard_clauses.stores)
|
2023-06-07 17:33:46 +01:00
|
|
|
for (amd_gfx_level gfx : {GFX10, GFX11}) {
|
|
|
|
|
if (!setup_cs(NULL, gfx))
|
|
|
|
|
continue;
|
2021-05-20 10:41:37 +01:00
|
|
|
|
2023-06-07 17:33:46 +01:00
|
|
|
//>> p_unit_test 0
|
|
|
|
|
//~gfx11! s_clause imm:1
|
|
|
|
|
//; search_re('buffer_store_dword')
|
|
|
|
|
//; search_re('buffer_store_dword')
|
|
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
|
|
|
|
create_mubuf_store();
|
|
|
|
|
create_mubuf_store();
|
2021-05-20 10:41:37 +01:00
|
|
|
|
2023-06-07 17:33:46 +01:00
|
|
|
//>> p_unit_test 1
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('buffer_store_dword')
|
|
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
|
|
|
|
|
create_mubuf();
|
|
|
|
|
create_mubuf();
|
|
|
|
|
create_mubuf_store();
|
2021-05-20 10:41:37 +01:00
|
|
|
|
2023-06-07 17:33:46 +01:00
|
|
|
//>> p_unit_test 2
|
|
|
|
|
//; search_re('buffer_store_dword')
|
|
|
|
|
//! s_clause imm:1
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
|
|
|
|
create_mubuf_store();
|
|
|
|
|
create_mubuf();
|
|
|
|
|
create_mubuf();
|
|
|
|
|
|
|
|
|
|
/* Unclear whether this is the best behaviour */
|
|
|
|
|
//>> p_unit_test 3
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
//; search_re('buffer_store_dword')
|
|
|
|
|
//; search_re('buffer_load_dword')
|
|
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
|
|
|
|
|
create_mubuf();
|
|
|
|
|
create_mubuf_store();
|
|
|
|
|
create_mubuf();
|
2021-05-20 10:41:37 +01:00
|
|
|
|
2023-06-07 17:33:46 +01:00
|
|
|
finish_form_hard_clause_test();
|
|
|
|
|
}
|
2021-05-20 10:41:37 +01:00
|
|
|
END_TEST
|