mesa/src/amd/compiler/tests/test_d3d11_derivs.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

630 lines
24 KiB
C++
Raw Normal View History

/*
* Copyright © 2023 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#include "helpers.h"
#include "test_d3d11_derivs-spirv.h"
using namespace aco;
BEGIN_TEST(d3d11_derivs.simple)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2D tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, in_coord);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x
//>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs.constant)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in float in_coord;
layout(location = 0) out float out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in float in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2D tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, vec2(in_coord, -0.5));
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, -0.5
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_mov_b32_e32 v#ry, -0.5 ; $_
aco/sched_ilp: new latency heuristic The main train of thought is that we should consider latency after the write was scheduled. This means we rely a lot less on the input order of instructions for good results. Foz-DB GFX1150: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01% CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00% Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06% InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02% VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07% SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05% VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00% Foz-DB Navi31: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00% CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00% Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05% InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01% VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06% SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05% VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00% Foz-DB Navi21: Totals from 76224 (96.03% of 79377) affected shaders: Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03% CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02% Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05% InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01% VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04% SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06% Foz-DB Vega10: Totals from 60142 (95.42% of 63026) affected shaders: Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02% CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02% Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43% InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09% VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09% SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs.discard)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2D tex;
void main() {
if (gl_FragCoord.y > 1.0)
discard;
out_color = texture(tex, in_coord);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
/* The discard gets emitted as demote_if. */
//>> s2: %_:exec, s1: (kill)%_:scc = s_wqm_b64 %_
//! p_exit_early_if_not %_:exec
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_, %_, (kill)%_ 2d disable_wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
END_TEST
BEGIN_TEST(d3d11_derivs.bias)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2D tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, in_coord, gl_FragCoord.x);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
//>> BB1
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> BB1:
//>> image_sample_b v[#_:#_], [v#rb, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs.offset)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2D tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = textureOffset(tex, in_coord, ivec2(1, 2));
}
);
// clang-format on
/* Use GFX9 because we should have at least one test which doesn't use NSA. */
PipelineBuilder pbld(get_vk_device(GFX9));
pbld.add_vsfs(vs, fs);
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
//>> BB1
//>> v1: %offset = p_parallelcopy 0x201
//>> v4: %_ = image_sample_o (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%offset 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> BB1:
//>> v_mov_b32_e32 v#ro_tmp, 0x201 ; $_ $_
//>> v_mov_b32_e32 v#ro, v#r0_tmp ; $_
//; success = ro+1 == rx and ro+2 == ry
//>> image_sample_o v[#_:#_], v[#ro:#rx], s[#_:#_], s[#_:#_] dmask:0xf ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs.array)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec3 in_coord;
layout(location = 0) out vec3 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec3 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2DArray tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, in_coord);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> lv3: %wqm = p_start_linear_vgpr (kill)%_, (kill)%_, (kill)%layer
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2darray da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
//>> BB1:
//; success = rx+1 == ry and rx+2 == rl
//>> image_sample v[#_:#_], v[#rx:#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs.bias_array)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec3 in_coord;
layout(location = 0) out vec3 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec3 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2DArray tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, in_coord, gl_FragCoord.x);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> lv4: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_, (kill)%layer
//>> BB1
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2darray da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
//>> BB1:
//>> image_sample_b v[#_:#_], [v2, v#rx, v#ry, v#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs._1d_gfx9)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in float in_coord;
layout(location = 0) out float out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in float in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler1D tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, in_coord);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX9));
pbld.add_vsfs(vs, fs);
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, 0.5
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
aco/sched_ilp: new latency heuristic The main train of thought is that we should consider latency after the write was scheduled. This means we rely a lot less on the input order of instructions for good results. Foz-DB GFX1150: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01% CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00% Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06% InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02% VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07% SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05% VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00% Foz-DB Navi31: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00% CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00% Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05% InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01% VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06% SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05% VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00% Foz-DB Navi21: Totals from 76224 (96.03% of 79377) affected shaders: Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03% CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02% Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05% InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01% VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04% SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06% Foz-DB Vega10: Totals from 60142 (95.42% of 63026) affected shaders: Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02% CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02% Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43% InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09% VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09% SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//; success = rx+1 == ry
//>> image_sample v[#_:#_], v#rx, s[#_:#_], s[#_:#_] dmask:0xf ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler1DArray tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, in_coord);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX9));
pbld.add_vsfs(vs, fs);
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, 0.5, (kill)%layer
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2darray da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
aco/sched_ilp: new latency heuristic The main train of thought is that we should consider latency after the write was scheduled. This means we rely a lot less on the input order of instructions for good results. Foz-DB GFX1150: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01% CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00% Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06% InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02% VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07% SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05% VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00% Foz-DB Navi31: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00% CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00% Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05% InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01% VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06% SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05% VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00% Foz-DB Navi21: Totals from 76224 (96.03% of 79377) affected shaders: Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03% CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02% Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05% InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01% VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04% SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06% Foz-DB Vega10: Totals from 60142 (95.42% of 63026) affected shaders: Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02% CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02% Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43% InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09% VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09% SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.y ; $_
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
//>> BB1:
//; success = rx+1 == ry and rx+2 == rl
//>> image_sample v[#_:#_], v#rx, s[#_:#_], s[#_:#_] dmask:0xf da ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs.cube)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec3 in_coord;
layout(location = 0) out vec3 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec3 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform samplerCube tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, in_coord);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
//; success = rx+1 == ry and rx+2 == rf
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs.cube_array)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec4 in_coord;
layout(location = 0) out vec4 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec4 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform samplerCubeArray tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, in_coord);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
2024-01-12 11:49:30 +01:00
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_
2024-01-12 11:49:30 +01:00
aco/sched_ilp: new latency heuristic The main train of thought is that we should consider latency after the write was scheduled. This means we rely a lot less on the input order of instructions for good results. Foz-DB GFX1150: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01% CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00% Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06% InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02% VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07% SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05% VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00% Foz-DB Navi31: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00% CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00% Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05% InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01% VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06% SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05% VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00% Foz-DB Navi21: Totals from 76224 (96.03% of 79377) affected shaders: Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03% CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02% Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05% InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01% VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04% SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06% Foz-DB Vega10: Totals from 60142 (95.42% of 63026) affected shaders: Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02% CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02% Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43% InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09% VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09% SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
//>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_
aco/sched_ilp: base latency and issue cycles on aco_statistics This matters for trans and scalar fpu instructions. Foz-DB GFX1150: Totals from 53894 (67.90% of 79377) affected shaders: Instrs: 38528421 -> 38481337 (-0.12%); split: -0.16%, +0.04% CodeSize: 200206016 -> 200023916 (-0.09%); split: -0.12%, +0.03% Latency: 265011734 -> 264303762 (-0.27%); split: -0.28%, +0.02% InvThroughput: 53804490 -> 53696097 (-0.20%); split: -0.21%, +0.01% VClause: 736996 -> 736988 (-0.00%); split: -0.00%, +0.00% SClause: 1118494 -> 1118474 (-0.00%); split: -0.01%, +0.01% VALU: 21982349 -> 21982358 (+0.00%); split: -0.00%, +0.00% Foz-DB Navi31: Totals from 50791 (63.99% of 79377) affected shaders: Instrs: 37511862 -> 37495712 (-0.04%); split: -0.11%, +0.07% CodeSize: 197990892 -> 197925104 (-0.03%); split: -0.09%, +0.06% Latency: 261929261 -> 261273534 (-0.25%); split: -0.27%, +0.01% InvThroughput: 43978329 -> 43921618 (-0.13%); split: -0.14%, +0.01% VClause: 727683 -> 727695 (+0.00%); split: -0.00%, +0.00% SClause: 1092527 -> 1092544 (+0.00%); split: -0.01%, +0.01% VALU: 22646553 -> 22646566 (+0.00%) Foz-DB Navi21: Totals from 43899 (55.30% of 79377) affected shaders: Instrs: 35649081 -> 35649110 (+0.00%); split: -0.00%, +0.00% CodeSize: 192336212 -> 192337276 (+0.00%); split: -0.00%, +0.00% Latency: 270621538 -> 270221431 (-0.15%); split: -0.16%, +0.02% InvThroughput: 66757841 -> 66715918 (-0.06%); split: -0.07%, +0.01% VClause: 734884 -> 734867 (-0.00%); split: -0.01%, +0.01% SClause: 1072956 -> 1072951 (-0.00%); split: -0.01%, +0.01% Foz-DB Vega10: Totals from 52687 (83.60% of 63026) affected shaders: Instrs: 24595280 -> 24595693 (+0.00%); split: -0.01%, +0.01% CodeSize: 127199836 -> 127200164 (+0.00%); split: -0.01%, +0.01% Latency: 252281578 -> 252497934 (+0.09%); split: -0.03%, +0.12% InvThroughput: 136551527 -> 136577609 (+0.02%); split: -0.01%, +0.03% VClause: 536798 -> 536718 (-0.01%); split: -0.04%, +0.03% SClause: 819978 -> 819693 (-0.03%); split: -0.04%, +0.01% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 16:31:23 +01:00
//>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
aco/sched_ilp: new latency heuristic The main train of thought is that we should consider latency after the write was scheduled. This means we rely a lot less on the input order of instructions for good results. Foz-DB GFX1150: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01% CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00% Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06% InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02% VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07% SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05% VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00% Foz-DB Navi31: Totals from 75606 (95.25% of 79377) affected shaders: Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00% CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00% Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05% InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01% VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06% SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05% VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00% Foz-DB Navi21: Totals from 76224 (96.03% of 79377) affected shaders: Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03% CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02% Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05% InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01% VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04% SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06% Foz-DB Vega10: Totals from 60142 (95.42% of 63026) affected shaders: Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02% CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02% Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43% InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09% VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09% SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
2024-01-12 11:49:30 +01:00
//>> BB1:
//; success = rx+1 == ry and rx+2 == rlf
//>> image_sample v[#_:#_], v[#rx:#rlf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
BEGIN_TEST(d3d11_derivs.dfdxy)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2D tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = vec4(dFdxFine(in_coord.x), dFdyCoarse(in_coord.y), textureLod(tex, vec2(0.5), 0.0).xy);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
/* Must be before BB1 */
//>> v1: %_ = v_subrev_f32 (kill)%_, (kill)%_ quad_perm:[0,0,2,2] bound_ctrl:1 fi
//>> v1: %_ = v_subrev_f32 (kill)%_, (kill)%_ quad_perm:[0,0,0,0] bound_ctrl:1 fi
//>> BB1
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
END_TEST
/* Ensure the BC optimize transform is done after ac_nir_lower_tex. */
BEGIN_TEST(d3d11_derivs.bc_optimize)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec4 out_color;
layout(binding = 0) uniform sampler2D tex;
void main() {
out_color = vec4(0.0);
if (gl_FragCoord.x > 1.0)
out_color = texture(tex, vec2(in_coord.x, interpolateAtCentroid(in_coord.y)));
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> v1: %y_coord2 = v_cndmask_b32 (kill)%_, %_, (kill)%_
//>> v1: %x = v_interp_p2_f32 (kill)%_, %_:m0, (kill)%_ attr0.x
//>> v1: %y = v_interp_p2_f32 (kill)%y_coord2, (kill)%_:m0, (kill)%_ attr0.y
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
END_TEST
BEGIN_TEST(d3d11_derivs.get_lod)
// clang-format off
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_coord;
void main() {
out_coord = in_coord;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
layout(location = 0) in vec2 in_coord;
layout(location = 0) out vec2 out_color;
layout(binding = 0) uniform sampler2D tex;
void main() {
out_color = vec2(0.0);
if (gl_FragCoord.x > 1.0)
out_color = textureQueryLod(tex, in_coord);
}
);
// clang-format on
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x
//>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y
//>> lv2: %wqm = p_start_linear_vgpr %x, %y
//>> v1: %x12_m_x0 = v_subrev_f32 (kill)%x, (kill)%x quad_perm:[0,0,0,0] bound_ctrl:1 fi
//>> v1: %x1_m_x0 = v_mov_b32 %x12_m_x0 quad_perm:[1,1,1,1] bound_ctrl:1 fi
//>> v1: %x2_m_x0 = v_mov_b32 (kill)%x12_m_x0 quad_perm:[2,2,2,2] bound_ctrl:1 fi
//>> v1: %y12_m_y0 = v_subrev_f32 (kill)%y, (kill)%y quad_perm:[0,0,0,0] bound_ctrl:1 fi
//>> v1: %y1_m_y0 = v_mov_b32 %y12_m_x0 quad_perm:[1,1,1,1] bound_ctrl:1 fi
//>> v1: %y2_m_y0 = v_mov_b32 (kill)%y12_m_x0 quad_perm:[2,2,2,2] bound_ctrl:1 fi
//>> BB1
//>> v2: %_ = image_get_lod (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
END_TEST
BEGIN_TEST(d3d11_derivs.nsa_max)
for (amd_gfx_level lvl : {GFX10, GFX10_3, GFX11}) {
if (!setup_cs(NULL, lvl))
continue;
PhysReg reg_v0{256};
PhysReg reg_v6{256 + 6};
PhysReg reg_v7{256 + 7};
PhysReg reg_v8{256 + 8};
PhysReg reg_s0{0};
PhysReg reg_s8{8};
//>> p_unit_test 0
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
//~gfx10! v2: %_:v[0-1] = v_lshrrev_b64 0, %_:v[6-7]
//~gfx10! v1: %_:v[2] = v_mov_b32 %_:v[8]
//~gfx10! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[0-5] 2darray da
//~gfx10_3! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4], %_:v[5] 2darray da
//~gfx11! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4-5] 2darray da
Instruction* instr =
bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4), Operand(reg_s0, s8),
Operand(reg_s8, s4), Operand(v1), Operand(reg_v0, v6.as_linear()),
Operand(reg_v6, v1), Operand(reg_v7, v1), Operand(reg_v8, v1));
instr->mimg().dim = ac_image_2darray;
instr->mimg().da = true;
instr->mimg().strict_wqm = true;
finish_to_hw_instr_test();
}
END_TEST