2020-11-02 18:16:56 +01:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2020 Valve Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
#include "helpers.h"
|
|
|
|
|
|
|
|
|
|
using namespace aco;
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
|
|
|
|
|
/* Registers of operands should be "recycled" for the output. But if the
|
|
|
|
|
* input is smaller than the output, that's not generally possible. The
|
|
|
|
|
* first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0
|
|
|
|
|
* while the lower 16 bits are still live, so the output must be stored in
|
|
|
|
|
* a register other than v0. For the second v_cvt_f32_f16, the original
|
|
|
|
|
* value stored in v0 is no longer used and hence it's safe to store the
|
|
|
|
|
* result in v0.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
for (chip_class cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (chip_class)((unsigned)cc + 1)) {
|
|
|
|
|
for (bool pessimistic : { false, true }) {
|
|
|
|
|
const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
|
|
|
|
|
|
2021-02-04 16:01:44 +01:00
|
|
|
//>> v1: %_:v[#a] = p_startpgm
|
2020-11-02 18:16:56 +01:00
|
|
|
if (!setup_cs("v1", (chip_class)cc, CHIP_UNKNOWN, subvariant))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
//! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
|
|
|
|
|
Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
|
|
|
|
|
|
|
|
|
|
//! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32]
|
|
|
|
|
//! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
|
|
|
|
|
//; success = (b != a)
|
|
|
|
|
auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp());
|
|
|
|
|
auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp());
|
|
|
|
|
writeout(0, result1);
|
|
|
|
|
writeout(1, result2);
|
|
|
|
|
|
|
|
|
|
finish_ra_test(ra_test_policy { pessimistic });
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
END_TEST
|
2021-01-19 14:50:33 +00:00
|
|
|
|
|
|
|
|
BEGIN_TEST(regalloc.32bit_partial_write)
|
2021-02-04 16:01:44 +01:00
|
|
|
//>> v1: %_:v[0] = p_startpgm
|
2021-01-19 14:50:33 +00:00
|
|
|
if (!setup_cs("v1", GFX10))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* ensure high 16 bits are occupied */
|
|
|
|
|
//! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
|
|
|
|
|
Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
|
|
|
|
|
|
|
|
|
|
/* This test checks if this instruction uses SDWA. */
|
|
|
|
|
//! v2b: %_:v[0][0:16] = v_not_b32 0 dst_preserve
|
|
|
|
|
Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand(0u));
|
|
|
|
|
|
|
|
|
|
//! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]
|
|
|
|
|
bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
|
|
|
|
|
|
|
|
|
|
finish_ra_test(ra_test_policy());
|
|
|
|
|
END_TEST
|
aco/ra: fix get_reg_for_operand() with no free registers
fossil-db (Sienna Cichlid):
Totals from 195 (0.13% of 149839) affected shaders:
CodeSize: 2352160 -> 2356720 (+0.19%); split: -0.00%, +0.20%
Instrs: 431976 -> 433124 (+0.27%); split: -0.00%, +0.27%
Latency: 10174434 -> 10174897 (+0.00%); split: -0.00%, +0.00%
InvThroughput: 4044388 -> 4044425 (+0.00%); split: -0.00%, +0.00%
Copies: 67634 -> 68762 (+1.67%); split: -0.00%, +1.67%
fossil-db (Polaris):
Totals from 186 (0.12% of 151365) affected shaders:
CodeSize: 2272356 -> 2276848 (+0.20%); split: -0.00%, +0.20%
Instrs: 432390 -> 433513 (+0.26%); split: -0.00%, +0.26%
Latency: 13153394 -> 13160194 (+0.05%); split: -0.00%, +0.05%
InvThroughput: 10889509 -> 10889967 (+0.00%); split: -0.00%, +0.00%
SClause: 12745 -> 12747 (+0.02%)
Copies: 74832 -> 75945 (+1.49%); split: -0.01%, +1.50%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10459>
2021-04-23 14:29:22 +01:00
|
|
|
|
|
|
|
|
BEGIN_TEST(regalloc.precolor.swap)
|
|
|
|
|
//>> s2: %op0:s[0-1] = p_startpgm
|
|
|
|
|
if (!setup_cs("s2", GFX10))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
program->dev.sgpr_limit = 4;
|
|
|
|
|
|
|
|
|
|
//! s2: %op1:s[2-3] = p_unit_test
|
|
|
|
|
Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
|
|
|
|
|
|
|
|
|
|
//! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
|
|
|
|
|
//! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
|
|
|
|
|
Operand op(inputs[0]);
|
|
|
|
|
op.setFixed(PhysReg(2));
|
|
|
|
|
bld.pseudo(aco_opcode::p_unit_test, op, op1);
|
|
|
|
|
|
|
|
|
|
finish_ra_test(ra_test_policy());
|
|
|
|
|
END_TEST
|