mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 16:00:08 +01:00
brw: Add passes to generate and lower load_reg
v2: Add support for WE_all instructions... this already just worked, so I only had to delete the check and the FINISHME comment. v3: Use logic more like def_analysis::update_for_reads to determine when to not insert LOAD_REG instructions. Based on a suggestion by Ken. v4: Eliminate "store" from all the names since STORE_REG does not exist anymore. Fold insert_load_reg into brw_insert_load_reg. Elminate extra call to s.def_analysis.require() after progress. Pull a loop-invariant check out of the inst->srouces loop. Drop call to brw_opt_split_virtual_grfs after lowering load_reg. All suggested by Caio. v5: Assert that LOAD_REG doesn't already exist in brw_insert_load_reg. Update comment before fully_defines. Both suggested by Caio. v6: Don't explicitly special-case SHADER_OPCODE_MEMORY_STORE_LOGICAL. Move the inst->dst.file != VGRF check earlier to avoid the loop over sources. Both suggested by Ken. Move the call the brw_insert_load_reg a little bit later, and explain why it's at that location. Suggested by Caio. v7: Many changes to the for-each-source loop in brw_insert_load_reg. Removes incorrect multiplication of s.alloc.sizes with reg_unit. Adds checks for matching SIMD size and NoMask in the search for pre-existing LOAD_REG of same value. v8: Add some unit tests. Suggested by Caio. shader-db: Lunar Lake total instructions in shared programs: 16923237 -> 16921895 (<.01%) instructions in affected programs: 450565 -> 449223 (-0.30%) helped: 251 / HURT: 377 total cycles in shared programs: 910428418 -> 889920590 (-2.25%) cycles in affected programs: 719248184 -> 698740356 (-2.85%) helped: 9076 / HURT: 9082 total fills in shared programs: 2242 -> 2218 (-1.07%) fills in affected programs: 116 -> 92 (-20.69%) helped: 2 / HURT: 0 total sends in shared programs: 848635 -> 848421 (-0.03%) sends in affected programs: 810 -> 596 (-26.42%) helped: 10 / HURT: 0 LOST: 82 GAINED: 78 Meteor Lake and DG2 had similar results. (Meteor Lake shown) total instructions in shared programs: 19875784 -> 19871694 (-0.02%) instructions in affected programs: 1050091 -> 1046001 (-0.39%) helped: 251 / HURT: 2403 total cycles in shared programs: 905328238 -> 882446458 (-2.53%) cycles in affected programs: 682736344 -> 659854564 (-3.35%) helped: 7869 / HURT: 7911 total spills in shared programs: 5512 -> 5032 (-8.71%) spills in affected programs: 1830 -> 1350 (-26.23%) helped: 8 / HURT: 0 total fills in shared programs: 5648 -> 4782 (-15.33%) fills in affected programs: 3312 -> 2446 (-26.15%) helped: 8 / HURT: 0 total sends in shared programs: 1032942 -> 1032722 (-0.02%) sends in affected programs: 572 -> 352 (-38.46%) helped: 10 / HURT: 0 LOST: 138 GAINED: 53 Tiger Lake total instructions in shared programs: 19711930 -> 19715591 (0.02%) instructions in affected programs: 1040623 -> 1044284 (0.35%) helped: 317 / HURT: 2474 total cycles in shared programs: 862988990 -> 860573870 (-0.28%) cycles in affected programs: 612392461 -> 609977341 (-0.39%) helped: 7447 / HURT: 7686 total sends in shared programs: 1034763 -> 1034555 (-0.02%) sends in affected programs: 784 -> 576 (-26.53%) helped: 8 / HURT: 0 LOST: 56 GAINED: 143 Ice Lake and Skylake had similar results. (Ice Lake shown) total instructions in shared programs: 20545461 -> 20545220 (<.01%) instructions in affected programs: 422405 -> 422164 (-0.06%) helped: 180 / HURT: 459 total cycles in shared programs: 872697345 -> 866874523 (-0.67%) cycles in affected programs: 573117917 -> 567295095 (-1.02%) helped: 6783 / HURT: 6980 total spills in shared programs: 4335 -> 4336 (0.02%) spills in affected programs: 90 -> 91 (1.11%) helped: 1 / HURT: 2 total fills in shared programs: 4194 -> 4196 (0.05%) fills in affected programs: 463 -> 465 (0.43%) helped: 1 / HURT: 2 total sends in shared programs: 1079446 -> 1079238 (-0.02%) sends in affected programs: 784 -> 576 (-26.53%) helped: 8 / HURT: 0 LOST: 117 GAINED: 37 fossil-db: All Intel platforms had similar results. (Lunar Lake shown) Totals: Instrs: 209708136 -> 209695617 (-0.01%); split: -0.02%, +0.01% Send messages: 10927753 -> 10927640 (-0.00%) Cycle count: 30540172048 -> 30427084732 (-0.37%); split: -0.99%, +0.62% Spill count: 511621 -> 510932 (-0.13%); split: -0.22%, +0.08% Fill count: 621166 -> 618440 (-0.44%); split: -0.56%, +0.12% Scratch Memory Size: 35574784 -> 35648512 (+0.21%); split: -0.06%, +0.26% Max live registers: 65453860 -> 65453140 (-0.00%); split: -0.00%, +0.00% Non SSA regs after NIR: 75374990 -> 35195764 (-53.31%) Totals from 503284 (71.25% of 706391) affected shaders: Instrs: 180203778 -> 180191259 (-0.01%); split: -0.02%, +0.01% Send messages: 9699732 -> 9699619 (-0.00%) Cycle count: 30080349592 -> 29967262276 (-0.38%); split: -1.01%, +0.63% Spill count: 511584 -> 510895 (-0.13%); split: -0.22%, +0.08% Fill count: 621120 -> 618394 (-0.44%); split: -0.56%, +0.12% Scratch Memory Size: 35443712 -> 35517440 (+0.21%); split: -0.06%, +0.27% Max live registers: 52566092 -> 52565372 (-0.00%); split: -0.01%, +0.00% Non SSA regs after NIR: 70110949 -> 29931723 (-57.31%) Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31497>
This commit is contained in:
parent
8b2be206f3
commit
2d13acf9d9
5 changed files with 354 additions and 0 deletions
184
src/intel/compiler/brw_load_reg.cpp
Normal file
184
src/intel/compiler/brw_load_reg.cpp
Normal file
|
|
@ -0,0 +1,184 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2024 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
#include "brw_shader.h"
|
||||||
|
#include "brw_cfg.h"
|
||||||
|
#include "brw_analysis.h"
|
||||||
|
#include "brw_builder.h"
|
||||||
|
|
||||||
|
/* Duplicated from brw_def_analysis::fully_defines. */
|
||||||
|
static bool
|
||||||
|
fully_defines(const brw_shader &s, brw_inst *inst)
|
||||||
|
{
|
||||||
|
return s.alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
|
||||||
|
!inst->is_partial_write();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_insert_load_reg(brw_shader &s)
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
const brw_def_analysis &defs = s.def_analysis.require();
|
||||||
|
|
||||||
|
foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg) {
|
||||||
|
/* These should not exist yet. */
|
||||||
|
assert(inst->opcode != SHADER_OPCODE_LOAD_REG);
|
||||||
|
|
||||||
|
/* These opcodes may have the right source and destination patterns to
|
||||||
|
* have their sources replaced by load_reg, but these instructions are
|
||||||
|
* special and / or wierd. They should not be modified.
|
||||||
|
*/
|
||||||
|
if (inst->opcode == SHADER_OPCODE_UNDEF ||
|
||||||
|
inst->opcode == BRW_OPCODE_DPAS) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the destination is non-VGRF adding load_reg instructions will not
|
||||||
|
* help. If the destination is already SSA, nothing needs to be done.
|
||||||
|
*/
|
||||||
|
if (inst->dst.file != VGRF || defs.get(inst->dst) != NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* If there is a source that would cause def_analysis::update_for_reads
|
||||||
|
* to mark the def as invalid, adding load_reg for the sources will not
|
||||||
|
* help.
|
||||||
|
*/
|
||||||
|
if (inst->reads_accumulator_implicitly())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bool bad_source = false;
|
||||||
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
|
if (inst->src[i].file == ARF &&
|
||||||
|
(inst->src[i].nr == BRW_ARF_ADDRESS ||
|
||||||
|
inst->src[i].nr == BRW_ARF_ACCUMULATOR ||
|
||||||
|
inst->src[i].nr == BRW_ARF_FLAG)) {
|
||||||
|
bad_source = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bad_source)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* If the instruction does not fully define the destination, adding
|
||||||
|
* load_reg instructions will not help.
|
||||||
|
*/
|
||||||
|
if (!fully_defines(s, inst))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (inst->exec_size < 8)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
assert(inst->exec_size == 8 || inst->exec_size == 16 ||
|
||||||
|
inst->exec_size == 32);
|
||||||
|
|
||||||
|
const unsigned mask = (inst->exec_size / 8) - 1;
|
||||||
|
|
||||||
|
/* Replace any non-SSA sources with load_reg of the source. */
|
||||||
|
const brw_builder bld = brw_builder(inst);
|
||||||
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
|
/* LOAD_REG only operates on VGRF sources. If the source is not VGRF,
|
||||||
|
* skip it.
|
||||||
|
*/
|
||||||
|
if (inst->src[i].file != VGRF)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* The source is already a def, so don't add a LOAD_REG. */
|
||||||
|
if (defs.get(inst->src[i]) != NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Cases of stride != 1 are difficult to handle correctly. For
|
||||||
|
* example, when stride is 0, the source may have been written by
|
||||||
|
* NoMask instruction that cannot be seen from here. In this case,
|
||||||
|
* emitting a non-NoMask LOAD_REG may not actually copy the value
|
||||||
|
* that the instruction is trying to read.
|
||||||
|
*
|
||||||
|
* This means that is_scalar sources in larger exec sizes are not
|
||||||
|
* handled. Since enough information is available in the source, this
|
||||||
|
* could be added later.
|
||||||
|
*/
|
||||||
|
if (inst->src[i].stride != 1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* If the size of the VGRF allocation is not an even multiple of
|
||||||
|
* the SIMD size, don't emit a load_reg. This can occur for sparse
|
||||||
|
* texture loads. These will have SIMD-size values for the texture
|
||||||
|
* data and a single SIMD1 register for the residency information.
|
||||||
|
*/
|
||||||
|
if ((s.alloc.sizes[inst->src[i].nr] & mask) != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
brw_reg_type t =
|
||||||
|
brw_type_with_size(BRW_TYPE_UD,
|
||||||
|
brw_type_size_bits(inst->src[i].type));
|
||||||
|
brw_reg old_src = brw_vgrf(inst->src[i].nr, t);
|
||||||
|
brw_reg new_src;
|
||||||
|
|
||||||
|
/* Since the sources of a LOAD_REG will likely not be defs,
|
||||||
|
* none of the existing optimizations passes will eliminate
|
||||||
|
* redundant LOAD_REG instructions. Search back though this
|
||||||
|
* block to find a LOAD_REG of the same value to avoid emitting
|
||||||
|
* too many redundant instructions.
|
||||||
|
*/
|
||||||
|
foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) {
|
||||||
|
if (scan_inst->dst.file == old_src.file &&
|
||||||
|
scan_inst->dst.nr == old_src.nr) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scan_inst->opcode == SHADER_OPCODE_LOAD_REG &&
|
||||||
|
scan_inst->exec_size == inst->exec_size &&
|
||||||
|
scan_inst->force_writemask_all == inst->force_writemask_all &&
|
||||||
|
old_src.equals(scan_inst->src[0])) {
|
||||||
|
new_src = scan_inst->dst;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_src.file == BAD_FILE)
|
||||||
|
new_src = bld.LOAD_REG(old_src);
|
||||||
|
|
||||||
|
inst->src[i].nr = new_src.nr;
|
||||||
|
progress = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress)
|
||||||
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
||||||
|
BRW_DEPENDENCY_VARIABLES);
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_lower_load_reg(brw_shader &s)
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg) {
|
||||||
|
if (inst->opcode == SHADER_OPCODE_LOAD_REG) {
|
||||||
|
const brw_builder ibld = brw_builder(inst);
|
||||||
|
|
||||||
|
const unsigned bytes = inst->size_written;
|
||||||
|
const unsigned type_bytes = brw_type_size_bytes(inst->dst.type);
|
||||||
|
const unsigned bytes_per_mov = inst->exec_size * type_bytes;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < bytes; i += bytes_per_mov) {
|
||||||
|
ibld.MOV(byte_offset(inst->dst, i),
|
||||||
|
byte_offset(inst->src[0], i));
|
||||||
|
}
|
||||||
|
|
||||||
|
inst->remove();
|
||||||
|
progress = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress)
|
||||||
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
||||||
|
BRW_DEPENDENCY_VARIABLES);
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
@ -53,6 +53,14 @@ brw_optimize(brw_shader &s)
|
||||||
|
|
||||||
OPT(brw_opt_eliminate_find_live_channel);
|
OPT(brw_opt_eliminate_find_live_channel);
|
||||||
|
|
||||||
|
/* Add load_reg instructions before the main optimization loop to get more
|
||||||
|
* defs available in those passes. Do it after the preceeding few pre-loop
|
||||||
|
* passes so that it hopefully has less work to do. Having it here versus
|
||||||
|
* before the call to opt_dce made some difference, but it was mostly
|
||||||
|
* noise.
|
||||||
|
*/
|
||||||
|
OPT(brw_insert_load_reg);
|
||||||
|
|
||||||
/* Track how much non-SSA at this point. */
|
/* Track how much non-SSA at this point. */
|
||||||
{
|
{
|
||||||
const brw_def_analysis &defs = s.def_analysis.require();
|
const brw_def_analysis &defs = s.def_analysis.require();
|
||||||
|
|
@ -85,6 +93,12 @@ brw_optimize(brw_shader &s)
|
||||||
if (OPT(brw_opt_combine_convergent_txf))
|
if (OPT(brw_opt_combine_convergent_txf))
|
||||||
OPT(brw_opt_copy_propagation_defs);
|
OPT(brw_opt_copy_propagation_defs);
|
||||||
|
|
||||||
|
if (OPT(brw_lower_load_reg)) {
|
||||||
|
OPT(brw_opt_copy_propagation);
|
||||||
|
OPT(brw_opt_register_coalesce);
|
||||||
|
OPT(brw_opt_dead_code_eliminate);
|
||||||
|
}
|
||||||
|
|
||||||
if (OPT(brw_lower_pack)) {
|
if (OPT(brw_lower_pack)) {
|
||||||
OPT(brw_opt_register_coalesce);
|
OPT(brw_opt_register_coalesce);
|
||||||
OPT(brw_opt_dead_code_eliminate);
|
OPT(brw_opt_dead_code_eliminate);
|
||||||
|
|
|
||||||
|
|
@ -353,3 +353,6 @@ unsigned brw_get_lowered_simd_width(const brw_shader *shader,
|
||||||
|
|
||||||
brw_reg brw_allocate_vgrf(brw_shader &s, brw_reg_type type, unsigned count);
|
brw_reg brw_allocate_vgrf(brw_shader &s, brw_reg_type type, unsigned count);
|
||||||
brw_reg brw_allocate_vgrf_units(brw_shader &s, unsigned units_of_REGSIZE);
|
brw_reg brw_allocate_vgrf_units(brw_shader &s, unsigned units_of_REGSIZE);
|
||||||
|
|
||||||
|
bool brw_insert_load_reg(brw_shader &s);
|
||||||
|
bool brw_lower_load_reg(brw_shader &s);
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,7 @@ libintel_compiler_brw_files = files(
|
||||||
'brw_inst.cpp',
|
'brw_inst.cpp',
|
||||||
'brw_inst.h',
|
'brw_inst.h',
|
||||||
'brw_isa_info.h',
|
'brw_isa_info.h',
|
||||||
|
'brw_load_reg.cpp',
|
||||||
'brw_lower.cpp',
|
'brw_lower.cpp',
|
||||||
'brw_lower_dpas.cpp',
|
'brw_lower_dpas.cpp',
|
||||||
'brw_lower_integer_multiplication.cpp',
|
'brw_lower_integer_multiplication.cpp',
|
||||||
|
|
@ -212,6 +213,7 @@ if with_tests
|
||||||
'test_eu_validate.cpp',
|
'test_eu_validate.cpp',
|
||||||
'test_helpers.cpp',
|
'test_helpers.cpp',
|
||||||
'test_helpers.h',
|
'test_helpers.h',
|
||||||
|
'test_insert_load_reg.cpp',
|
||||||
'test_lower_scoreboard.cpp',
|
'test_lower_scoreboard.cpp',
|
||||||
'test_opt_cmod_propagation.cpp',
|
'test_opt_cmod_propagation.cpp',
|
||||||
'test_opt_combine_constants.cpp',
|
'test_opt_combine_constants.cpp',
|
||||||
|
|
|
||||||
151
src/intel/compiler/test_insert_load_reg.cpp
Normal file
151
src/intel/compiler/test_insert_load_reg.cpp
Normal file
|
|
@ -0,0 +1,151 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2025 Intel Corporation
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "test_helpers.h"
|
||||||
|
#include "brw_builder.h"
|
||||||
|
|
||||||
|
class insert_load_reg_test : public brw_shader_pass_test {};
|
||||||
|
|
||||||
|
TEST_F(insert_load_reg_test, basic)
|
||||||
|
{
|
||||||
|
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
brw_builder exp = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
|
||||||
|
brw_reg dst = vgrf(bld, exp, BRW_TYPE_F);
|
||||||
|
brw_reg src = vgrf(bld, exp, BRW_TYPE_F);
|
||||||
|
|
||||||
|
bld.ADD(dst, src, brw_imm_f(1.0));
|
||||||
|
|
||||||
|
EXPECT_PROGRESS(brw_insert_load_reg, bld);
|
||||||
|
|
||||||
|
exp.ADD(dst, exp.LOAD_REG(src), brw_imm_f(1.0));
|
||||||
|
|
||||||
|
EXPECT_SHADERS_MATCH(bld, exp);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(insert_load_reg_test, already_defs)
|
||||||
|
{
|
||||||
|
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
|
||||||
|
brw_reg dst0 = vgrf(bld, BRW_TYPE_F);
|
||||||
|
brw_reg dst1 = vgrf(bld, BRW_TYPE_F);
|
||||||
|
brw_reg src0 = retype(brw_vec16_reg(FIXED_GRF, 2, 0), BRW_TYPE_F);
|
||||||
|
|
||||||
|
/* The first ADD will produce a def due its FIXED_GRF and IMM sources. The
|
||||||
|
* second ADD will also produces a def due to its def and IMM
|
||||||
|
* sources. brw_insert_load_reg shouldn't do anything.
|
||||||
|
*/
|
||||||
|
bld.ADD(dst0, src0, brw_imm_f(1.0));
|
||||||
|
bld.ADD(dst1, dst0, brw_imm_f(1.0));
|
||||||
|
|
||||||
|
EXPECT_NO_PROGRESS(brw_insert_load_reg, bld);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(insert_load_reg_test, stride_0)
|
||||||
|
{
|
||||||
|
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
|
||||||
|
brw_reg dst = vgrf(bld, BRW_TYPE_F);
|
||||||
|
brw_reg src = component(vgrf(bld, BRW_TYPE_F), 0);
|
||||||
|
|
||||||
|
ASSERT_EQ(src.stride, 0);
|
||||||
|
bld.ADD(dst, src, brw_imm_f(1.0));
|
||||||
|
|
||||||
|
EXPECT_NO_PROGRESS(brw_insert_load_reg, bld);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(insert_load_reg_test, stride_2)
|
||||||
|
{
|
||||||
|
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
|
||||||
|
brw_reg dst = vgrf(bld, BRW_TYPE_D);
|
||||||
|
brw_reg src = subscript(vgrf(bld, BRW_TYPE_D), BRW_TYPE_W, 0);
|
||||||
|
|
||||||
|
ASSERT_EQ(src.stride, 2);
|
||||||
|
bld.ADD(dst, src, brw_imm_d(1));
|
||||||
|
|
||||||
|
EXPECT_NO_PROGRESS(brw_insert_load_reg, bld);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(insert_load_reg_test, is_scalar)
|
||||||
|
{
|
||||||
|
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
brw_builder xbld = bld.scalar_group();
|
||||||
|
|
||||||
|
brw_reg dst = vgrf(bld, BRW_TYPE_F);
|
||||||
|
brw_reg src = vgrf(xbld, BRW_TYPE_F);
|
||||||
|
|
||||||
|
/* Currently, is_scalar cases are treated the same as other stride=0
|
||||||
|
* cases. This does not need to be the case, and it may (should!) be
|
||||||
|
* changed in the future. Split this out as a separate test.
|
||||||
|
*/
|
||||||
|
src.is_scalar = true;
|
||||||
|
|
||||||
|
bld.ADD(dst, component(src, 0), brw_imm_f(1.0));
|
||||||
|
|
||||||
|
EXPECT_NO_PROGRESS(brw_insert_load_reg, bld);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(insert_load_reg_test, emit_load_reg_once)
|
||||||
|
{
|
||||||
|
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
brw_builder exp = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
|
||||||
|
brw_reg dst0 = vgrf(bld, exp, BRW_TYPE_F);
|
||||||
|
brw_reg dst1 = vgrf(bld, exp, BRW_TYPE_F);
|
||||||
|
brw_reg src = vgrf(bld, exp, BRW_TYPE_F);
|
||||||
|
|
||||||
|
/* Since both instructions use the same source, only one LOAD_REG should be
|
||||||
|
* generated.
|
||||||
|
*/
|
||||||
|
bld.ADD(dst0, src, brw_imm_f(1.0));
|
||||||
|
bld.ADD(dst1, src, brw_imm_f(2.0));
|
||||||
|
|
||||||
|
EXPECT_PROGRESS(brw_insert_load_reg, bld);
|
||||||
|
|
||||||
|
brw_reg dst2 = exp.LOAD_REG(src);
|
||||||
|
exp.ADD(dst0, dst2, brw_imm_f(1.0));
|
||||||
|
exp.ADD(dst1, dst2, brw_imm_f(2.0));
|
||||||
|
|
||||||
|
EXPECT_SHADERS_MATCH(bld, exp);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(insert_load_reg_test, no_mask)
|
||||||
|
{
|
||||||
|
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
brw_builder exp = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
|
||||||
|
brw_reg dst0 = vgrf(bld, exp, BRW_TYPE_F);
|
||||||
|
brw_reg dst1 = vgrf(bld, exp, BRW_TYPE_F);
|
||||||
|
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
|
||||||
|
|
||||||
|
bld.ADD(dst0, src0, brw_imm_f(1.0));
|
||||||
|
bld.exec_all().ADD(dst1, src0, brw_imm_f(2.0));
|
||||||
|
|
||||||
|
EXPECT_PROGRESS(brw_insert_load_reg, bld);
|
||||||
|
|
||||||
|
brw_reg src1 = exp.LOAD_REG(src0);
|
||||||
|
exp.ADD(dst0, src1, brw_imm_f(1.0));
|
||||||
|
brw_reg src2 = exp.exec_all().LOAD_REG(src0);
|
||||||
|
exp.exec_all().ADD(dst1, src2, brw_imm_f(2.0));
|
||||||
|
|
||||||
|
EXPECT_SHADERS_MATCH(bld, exp);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(insert_load_reg_test, odd_size)
|
||||||
|
{
|
||||||
|
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 16);
|
||||||
|
|
||||||
|
brw_reg dst = vgrf(bld, BRW_TYPE_D);
|
||||||
|
brw_reg src = vgrf(bld, BRW_TYPE_D, 3, 8);
|
||||||
|
|
||||||
|
/* The register allocation size is 3 SIMD8 units. Since that is not an even
|
||||||
|
* multiple of the exec size, it would be very difficult to generate a
|
||||||
|
* correct LOAD_REG. This should be skipped.
|
||||||
|
*/
|
||||||
|
bld.ADD(dst, src, brw_imm_d(1));
|
||||||
|
|
||||||
|
EXPECT_NO_PROGRESS(brw_insert_load_reg, bld);
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue