mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 04:50:11 +01:00
intel/brw: Replace uses of fs_reg with brw_reg
And remove the fs_reg alias. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29791>
This commit is contained in:
parent
fe46efa647
commit
3670c24740
38 changed files with 1626 additions and 1629 deletions
|
|
@ -48,11 +48,11 @@
|
||||||
using namespace brw;
|
using namespace brw;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
initialize_sources(fs_inst *inst, const fs_reg src[], uint8_t num_sources);
|
initialize_sources(fs_inst *inst, const brw_reg src[], uint8_t num_sources);
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
fs_inst::init(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
|
||||||
const fs_reg *src, unsigned sources)
|
const brw_reg *src, unsigned sources)
|
||||||
{
|
{
|
||||||
memset((void*)this, 0, sizeof(*this));
|
memset((void*)this, 0, sizeof(*this));
|
||||||
|
|
||||||
|
|
@ -100,34 +100,34 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size)
|
||||||
init(opcode, exec_size, reg_undef, NULL, 0);
|
init(opcode, exec_size, reg_undef, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst)
|
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst)
|
||||||
{
|
{
|
||||||
init(opcode, exec_size, dst, NULL, 0);
|
init(opcode, exec_size, dst, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
|
||||||
const fs_reg &src0)
|
const brw_reg &src0)
|
||||||
{
|
{
|
||||||
const fs_reg src[1] = { src0 };
|
const brw_reg src[1] = { src0 };
|
||||||
init(opcode, exec_size, dst, src, 1);
|
init(opcode, exec_size, dst, src, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
|
||||||
const fs_reg &src0, const fs_reg &src1)
|
const brw_reg &src0, const brw_reg &src1)
|
||||||
{
|
{
|
||||||
const fs_reg src[2] = { src0, src1 };
|
const brw_reg src[2] = { src0, src1 };
|
||||||
init(opcode, exec_size, dst, src, 2);
|
init(opcode, exec_size, dst, src, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
|
||||||
const fs_reg &src0, const fs_reg &src1, const fs_reg &src2)
|
const brw_reg &src0, const brw_reg &src1, const brw_reg &src2)
|
||||||
{
|
{
|
||||||
const fs_reg src[3] = { src0, src1, src2 };
|
const brw_reg src[3] = { src0, src1, src2 };
|
||||||
init(opcode, exec_size, dst, src, 3);
|
init(opcode, exec_size, dst, src, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
|
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const brw_reg &dst,
|
||||||
const fs_reg src[], unsigned sources)
|
const brw_reg src[], unsigned sources)
|
||||||
{
|
{
|
||||||
init(opcode, exec_width, dst, src, sources);
|
init(opcode, exec_width, dst, src, sources);
|
||||||
}
|
}
|
||||||
|
|
@ -145,10 +145,10 @@ fs_inst::~fs_inst()
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
initialize_sources(fs_inst *inst, const fs_reg src[], uint8_t num_sources)
|
initialize_sources(fs_inst *inst, const brw_reg src[], uint8_t num_sources)
|
||||||
{
|
{
|
||||||
if (num_sources > ARRAY_SIZE(inst->builtin_src))
|
if (num_sources > ARRAY_SIZE(inst->builtin_src))
|
||||||
inst->src = new fs_reg[num_sources];
|
inst->src = new brw_reg[num_sources];
|
||||||
else
|
else
|
||||||
inst->src = inst->builtin_src;
|
inst->src = inst->builtin_src;
|
||||||
|
|
||||||
|
|
@ -164,14 +164,14 @@ fs_inst::resize_sources(uint8_t num_sources)
|
||||||
if (this->sources == num_sources)
|
if (this->sources == num_sources)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
fs_reg *old_src = this->src;
|
brw_reg *old_src = this->src;
|
||||||
fs_reg *new_src;
|
brw_reg *new_src;
|
||||||
|
|
||||||
const unsigned builtin_size = ARRAY_SIZE(this->builtin_src);
|
const unsigned builtin_size = ARRAY_SIZE(this->builtin_src);
|
||||||
|
|
||||||
if (old_src == this->builtin_src) {
|
if (old_src == this->builtin_src) {
|
||||||
if (num_sources > builtin_size) {
|
if (num_sources > builtin_size) {
|
||||||
new_src = new fs_reg[num_sources];
|
new_src = new brw_reg[num_sources];
|
||||||
for (unsigned i = 0; i < this->sources; i++)
|
for (unsigned i = 0; i < this->sources; i++)
|
||||||
new_src[i] = old_src[i];
|
new_src[i] = old_src[i];
|
||||||
|
|
||||||
|
|
@ -189,7 +189,7 @@ fs_inst::resize_sources(uint8_t num_sources)
|
||||||
new_src = old_src;
|
new_src = old_src;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
new_src = new fs_reg[num_sources];
|
new_src = new brw_reg[num_sources];
|
||||||
for (unsigned i = 0; i < num_sources; i++)
|
for (unsigned i = 0; i < num_sources; i++)
|
||||||
new_src[i] = old_src[i];
|
new_src[i] = old_src[i];
|
||||||
}
|
}
|
||||||
|
|
@ -204,10 +204,10 @@ fs_inst::resize_sources(uint8_t num_sources)
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
||||||
const fs_reg &dst,
|
const brw_reg &dst,
|
||||||
const fs_reg &surface,
|
const brw_reg &surface,
|
||||||
const fs_reg &surface_handle,
|
const brw_reg &surface_handle,
|
||||||
const fs_reg &varying_offset,
|
const brw_reg &varying_offset,
|
||||||
uint32_t const_offset,
|
uint32_t const_offset,
|
||||||
uint8_t alignment,
|
uint8_t alignment,
|
||||||
unsigned components)
|
unsigned components)
|
||||||
|
|
@ -218,7 +218,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
||||||
* be any component of a vector, and then we load 4 contiguous
|
* be any component of a vector, and then we load 4 contiguous
|
||||||
* components starting from that. TODO: Support loading fewer than 4.
|
* components starting from that. TODO: Support loading fewer than 4.
|
||||||
*/
|
*/
|
||||||
fs_reg total_offset = bld.ADD(varying_offset, brw_imm_ud(const_offset));
|
brw_reg total_offset = bld.ADD(varying_offset, brw_imm_ud(const_offset));
|
||||||
|
|
||||||
/* The pull load message will load a vec4 (16 bytes). If we are loading
|
/* The pull load message will load a vec4 (16 bytes). If we are loading
|
||||||
* a double this means we are only loading 2 elements worth of data.
|
* a double this means we are only loading 2 elements worth of data.
|
||||||
|
|
@ -226,9 +226,9 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
||||||
* so other parts of the driver don't get confused about the size of the
|
* so other parts of the driver don't get confused about the size of the
|
||||||
* result.
|
* result.
|
||||||
*/
|
*/
|
||||||
fs_reg vec4_result = bld.vgrf(BRW_TYPE_F, 4);
|
brw_reg vec4_result = bld.vgrf(BRW_TYPE_F, 4);
|
||||||
|
|
||||||
fs_reg srcs[PULL_VARYING_CONSTANT_SRCS];
|
brw_reg srcs[PULL_VARYING_CONSTANT_SRCS];
|
||||||
srcs[PULL_VARYING_CONSTANT_SRC_SURFACE] = surface;
|
srcs[PULL_VARYING_CONSTANT_SRC_SURFACE] = surface;
|
||||||
srcs[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE] = surface_handle;
|
srcs[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE] = surface_handle;
|
||||||
srcs[PULL_VARYING_CONSTANT_SRC_OFFSET] = total_offset;
|
srcs[PULL_VARYING_CONSTANT_SRC_OFFSET] = total_offset;
|
||||||
|
|
@ -668,7 +668,7 @@ fs_inst::is_partial_write() const
|
||||||
/* Special case UNDEF since a lot of places in the backend do things like this :
|
/* Special case UNDEF since a lot of places in the backend do things like this :
|
||||||
*
|
*
|
||||||
* fs_builder ubld = bld.exec_all().group(1, 0);
|
* fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
* fs_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
* brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
||||||
* ubld.UNDEF(tmp); <- partial write, even if the whole register is concerned
|
* ubld.UNDEF(tmp); <- partial write, even if the whole register is concerned
|
||||||
*/
|
*/
|
||||||
if (this->opcode == SHADER_OPCODE_UNDEF) {
|
if (this->opcode == SHADER_OPCODE_UNDEF) {
|
||||||
|
|
@ -1151,13 +1151,13 @@ fs_visitor::emit_gs_thread_end()
|
||||||
if (mark_last_urb_write_with_eot())
|
if (mark_last_urb_write_with_eot())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(0);
|
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(0);
|
||||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||||
srcs, ARRAY_SIZE(srcs));
|
srcs, ARRAY_SIZE(srcs));
|
||||||
} else {
|
} else {
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||||
srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count;
|
srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count;
|
||||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
|
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
|
||||||
|
|
@ -1212,7 +1212,7 @@ fs_visitor::assign_curb_setup()
|
||||||
/* The base offset for our push data is passed in as R0.0[31:6]. We have
|
/* The base offset for our push data is passed in as R0.0[31:6]. We have
|
||||||
* to mask off the bottom 6 bits.
|
* to mask off the bottom 6 bits.
|
||||||
*/
|
*/
|
||||||
fs_reg base_addr =
|
brw_reg base_addr =
|
||||||
ubld.AND(retype(brw_vec1_grf(0, 0), BRW_TYPE_UD),
|
ubld.AND(retype(brw_vec1_grf(0, 0), BRW_TYPE_UD),
|
||||||
brw_imm_ud(INTEL_MASK(31, 6)));
|
brw_imm_ud(INTEL_MASK(31, 6)));
|
||||||
|
|
||||||
|
|
@ -1228,17 +1228,17 @@ fs_visitor::assign_curb_setup()
|
||||||
/* This pass occurs after all of the optimization passes, so don't
|
/* This pass occurs after all of the optimization passes, so don't
|
||||||
* emit an 'ADD addr, base_addr, 0' instruction.
|
* emit an 'ADD addr, base_addr, 0' instruction.
|
||||||
*/
|
*/
|
||||||
fs_reg addr = i == 0 ? base_addr :
|
brw_reg addr = i == 0 ? base_addr :
|
||||||
ubld.ADD(base_addr, brw_imm_ud(i * REG_SIZE));
|
ubld.ADD(base_addr, brw_imm_ud(i * REG_SIZE));
|
||||||
|
|
||||||
fs_reg srcs[4] = {
|
brw_reg srcs[4] = {
|
||||||
brw_imm_ud(0), /* desc */
|
brw_imm_ud(0), /* desc */
|
||||||
brw_imm_ud(0), /* ex_desc */
|
brw_imm_ud(0), /* ex_desc */
|
||||||
addr, /* payload */
|
addr, /* payload */
|
||||||
fs_reg(), /* payload2 */
|
brw_reg(), /* payload2 */
|
||||||
};
|
};
|
||||||
|
|
||||||
fs_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0),
|
brw_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, dest, srcs, 4);
|
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, dest, srcs, 4);
|
||||||
|
|
||||||
|
|
@ -1310,10 +1310,10 @@ fs_visitor::assign_curb_setup()
|
||||||
struct brw_reg mask = brw_vec1_grf(payload().num_regs + mask_param / 8,
|
struct brw_reg mask = brw_vec1_grf(payload().num_regs + mask_param / 8,
|
||||||
mask_param % 8);
|
mask_param % 8);
|
||||||
|
|
||||||
fs_reg b32;
|
brw_reg b32;
|
||||||
for (unsigned i = 0; i < 64; i++) {
|
for (unsigned i = 0; i < 64; i++) {
|
||||||
if (i % 16 == 0 && (want_zero & BITFIELD64_RANGE(i, 16))) {
|
if (i % 16 == 0 && (want_zero & BITFIELD64_RANGE(i, 16))) {
|
||||||
fs_reg shifted = ubld.vgrf(BRW_TYPE_W, 2);
|
brw_reg shifted = ubld.vgrf(BRW_TYPE_W, 2);
|
||||||
ubld.SHL(horiz_offset(shifted, 8),
|
ubld.SHL(horiz_offset(shifted, 8),
|
||||||
byte_offset(retype(mask, BRW_TYPE_W), i / 8),
|
byte_offset(retype(mask, BRW_TYPE_W), i / 8),
|
||||||
brw_imm_v(0x01234567));
|
brw_imm_v(0x01234567));
|
||||||
|
|
@ -1625,13 +1625,13 @@ fs_visitor::assign_urb_setup()
|
||||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||||
for (int i = 0; i < inst->sources; i++) {
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
if (inst->src[i].file == ATTR) {
|
if (inst->src[i].file == ATTR) {
|
||||||
/* ATTR fs_reg::nr in the FS is in units of logical scalar
|
/* ATTR brw_reg::nr in the FS is in units of logical scalar
|
||||||
* inputs each of which consumes 16B on Gfx4-Gfx12. In
|
* inputs each of which consumes 16B on Gfx4-Gfx12. In
|
||||||
* single polygon mode this leads to the following layout
|
* single polygon mode this leads to the following layout
|
||||||
* of the vertex setup plane parameters in the ATTR
|
* of the vertex setup plane parameters in the ATTR
|
||||||
* register file:
|
* register file:
|
||||||
*
|
*
|
||||||
* fs_reg::nr Input Comp0 Comp1 Comp2 Comp3
|
* brw_reg::nr Input Comp0 Comp1 Comp2 Comp3
|
||||||
* 0 Attr0.x a1-a0 a2-a0 N/A a0
|
* 0 Attr0.x a1-a0 a2-a0 N/A a0
|
||||||
* 1 Attr0.y a1-a0 a2-a0 N/A a0
|
* 1 Attr0.y a1-a0 a2-a0 N/A a0
|
||||||
* 2 Attr0.z a1-a0 a2-a0 N/A a0
|
* 2 Attr0.z a1-a0 a2-a0 N/A a0
|
||||||
|
|
@ -1644,7 +1644,7 @@ fs_visitor::assign_urb_setup()
|
||||||
* different plane parameters, so each parameter above is
|
* different plane parameters, so each parameter above is
|
||||||
* represented as a dispatch_width-wide vector:
|
* represented as a dispatch_width-wide vector:
|
||||||
*
|
*
|
||||||
* fs_reg::nr fs_reg::offset Input Comp0 ... CompN
|
* brw_reg::nr brw_reg::offset Input Comp0 ... CompN
|
||||||
* 0 0 Attr0.x a1[0]-a0[0] ... a1[N]-a0[N]
|
* 0 0 Attr0.x a1[0]-a0[0] ... a1[N]-a0[N]
|
||||||
* 0 4 * dispatch_width Attr0.x a2[0]-a0[0] ... a2[N]-a0[N]
|
* 0 4 * dispatch_width Attr0.x a2[0]-a0[0] ... a2[N]-a0[N]
|
||||||
* 0 8 * dispatch_width Attr0.x N/A ... N/A
|
* 0 8 * dispatch_width Attr0.x N/A ... N/A
|
||||||
|
|
@ -1955,7 +1955,7 @@ fs_visitor::assign_constant_locations()
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_visitor::get_pull_locs(const fs_reg &src,
|
fs_visitor::get_pull_locs(const brw_reg &src,
|
||||||
unsigned *out_surf_index,
|
unsigned *out_surf_index,
|
||||||
unsigned *out_pull_index)
|
unsigned *out_pull_index)
|
||||||
{
|
{
|
||||||
|
|
@ -1993,11 +1993,11 @@ fs_visitor::emit_repclear_shader()
|
||||||
assert(uniforms == 0);
|
assert(uniforms == 0);
|
||||||
assume(key->nr_color_regions > 0);
|
assume(key->nr_color_regions > 0);
|
||||||
|
|
||||||
fs_reg color_output = retype(brw_vec4_grf(127, 0), BRW_TYPE_UD);
|
brw_reg color_output = retype(brw_vec4_grf(127, 0), BRW_TYPE_UD);
|
||||||
fs_reg header = retype(brw_vec8_grf(125, 0), BRW_TYPE_UD);
|
brw_reg header = retype(brw_vec8_grf(125, 0), BRW_TYPE_UD);
|
||||||
|
|
||||||
/* We pass the clear color as a flat input. Copy it to the output. */
|
/* We pass the clear color as a flat input. Copy it to the output. */
|
||||||
fs_reg color_input =
|
brw_reg color_input =
|
||||||
brw_make_reg(BRW_GENERAL_REGISTER_FILE, 2, 3, 0, 0, BRW_TYPE_UD,
|
brw_make_reg(BRW_GENERAL_REGISTER_FILE, 2, 3, 0, 0, BRW_TYPE_UD,
|
||||||
BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4,
|
BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4,
|
||||||
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
|
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
|
||||||
|
|
@ -2047,7 +2047,7 @@ fs_visitor::emit_repclear_shader()
|
||||||
* thread payload, \p bld is required to have a dispatch_width() not greater
|
* thread payload, \p bld is required to have a dispatch_width() not greater
|
||||||
* than 16 for fragment shaders.
|
* than 16 for fragment shaders.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
brw_sample_mask_reg(const fs_builder &bld)
|
brw_sample_mask_reg(const fs_builder &bld)
|
||||||
{
|
{
|
||||||
const fs_visitor &s = *bld.shader;
|
const fs_visitor &s = *bld.shader;
|
||||||
|
|
@ -2107,7 +2107,7 @@ brw_emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst)
|
||||||
bld.dispatch_width() == inst->exec_size);
|
bld.dispatch_width() == inst->exec_size);
|
||||||
|
|
||||||
const fs_visitor &s = *bld.shader;
|
const fs_visitor &s = *bld.shader;
|
||||||
const fs_reg sample_mask = brw_sample_mask_reg(bld);
|
const brw_reg sample_mask = brw_sample_mask_reg(bld);
|
||||||
const unsigned subreg = sample_mask_flag_subreg(s);
|
const unsigned subreg = sample_mask_flag_subreg(s);
|
||||||
|
|
||||||
if (s.devinfo->ver >= 20 || brw_wm_prog_data(s.prog_data)->uses_kill) {
|
if (s.devinfo->ver >= 20 || brw_wm_prog_data(s.prog_data)->uses_kill) {
|
||||||
|
|
@ -3011,8 +3011,8 @@ fs_visitor::set_tcs_invocation_id()
|
||||||
* * 22:16 on gfx11+
|
* * 22:16 on gfx11+
|
||||||
* * 23:17 otherwise
|
* * 23:17 otherwise
|
||||||
*/
|
*/
|
||||||
fs_reg t =
|
brw_reg t =
|
||||||
bld.AND(fs_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)),
|
bld.AND(brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)),
|
||||||
brw_imm_ud(instance_id_mask));
|
brw_imm_ud(instance_id_mask));
|
||||||
|
|
||||||
if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) {
|
if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) {
|
||||||
|
|
@ -3023,9 +3023,9 @@ fs_visitor::set_tcs_invocation_id()
|
||||||
|
|
||||||
assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH);
|
assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH);
|
||||||
|
|
||||||
fs_reg channels_uw = bld.vgrf(BRW_TYPE_UW);
|
brw_reg channels_uw = bld.vgrf(BRW_TYPE_UW);
|
||||||
fs_reg channels_ud = bld.vgrf(BRW_TYPE_UD);
|
brw_reg channels_ud = bld.vgrf(BRW_TYPE_UD);
|
||||||
bld.MOV(channels_uw, fs_reg(brw_imm_uv(0x76543210)));
|
bld.MOV(channels_uw, brw_reg(brw_imm_uv(0x76543210)));
|
||||||
bld.MOV(channels_ud, channels_uw);
|
bld.MOV(channels_ud, channels_uw);
|
||||||
|
|
||||||
if (tcs_prog_data->instances == 1) {
|
if (tcs_prog_data->instances == 1) {
|
||||||
|
|
@ -3054,7 +3054,7 @@ fs_visitor::emit_tcs_thread_end()
|
||||||
* algorithm to set it optimally). On other platforms, we simply write
|
* algorithm to set it optimally). On other platforms, we simply write
|
||||||
* zero to a reserved/MBZ patch header DWord which has no consequence.
|
* zero to a reserved/MBZ patch header DWord which has no consequence.
|
||||||
*/
|
*/
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output;
|
srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output;
|
||||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
|
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
|
||||||
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
|
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
|
||||||
|
|
@ -3258,7 +3258,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||||
* stored in R0.15/R1.15 on gfx20+ and in R1.7/R2.7 on
|
* stored in R0.15/R1.15 on gfx20+ and in R1.7/R2.7 on
|
||||||
* gfx6+.
|
* gfx6+.
|
||||||
*/
|
*/
|
||||||
const fs_reg dispatch_mask =
|
const brw_reg dispatch_mask =
|
||||||
devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) :
|
devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) :
|
||||||
brw_vec1_grf(i + 1, 7);
|
brw_vec1_grf(i + 1, 7);
|
||||||
bld.exec_all().group(1, 0)
|
bld.exec_all().group(1, 0)
|
||||||
|
|
@ -4533,18 +4533,18 @@ bool brw_should_print_shader(const nir_shader *shader, uint64_t debug_flag)
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace brw {
|
namespace brw {
|
||||||
fs_reg
|
brw_reg
|
||||||
fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
|
fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
|
||||||
brw_reg_type type, unsigned n)
|
brw_reg_type type, unsigned n)
|
||||||
{
|
{
|
||||||
if (!regs[0])
|
if (!regs[0])
|
||||||
return fs_reg();
|
return brw_reg();
|
||||||
|
|
||||||
if (bld.dispatch_width() > 16) {
|
if (bld.dispatch_width() > 16) {
|
||||||
const fs_reg tmp = bld.vgrf(type, n);
|
const brw_reg tmp = bld.vgrf(type, n);
|
||||||
const brw::fs_builder hbld = bld.exec_all().group(16, 0);
|
const brw::fs_builder hbld = bld.exec_all().group(16, 0);
|
||||||
const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
|
const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
|
||||||
fs_reg *const components = new fs_reg[m * n];
|
brw_reg *const components = new brw_reg[m * n];
|
||||||
|
|
||||||
for (unsigned c = 0; c < n; c++) {
|
for (unsigned c = 0; c < n; c++) {
|
||||||
for (unsigned g = 0; g < m; g++)
|
for (unsigned g = 0; g < m; g++)
|
||||||
|
|
@ -4558,22 +4558,22 @@ namespace brw {
|
||||||
return tmp;
|
return tmp;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
|
return brw_reg(retype(brw_vec8_grf(regs[0], 0), type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg
|
brw_reg
|
||||||
fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
|
fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
|
||||||
{
|
{
|
||||||
if (!regs[0])
|
if (!regs[0])
|
||||||
return fs_reg();
|
return brw_reg();
|
||||||
else if (bld.shader->devinfo->ver >= 20)
|
else if (bld.shader->devinfo->ver >= 20)
|
||||||
return fetch_payload_reg(bld, regs, BRW_TYPE_F, 2);
|
return fetch_payload_reg(bld, regs, BRW_TYPE_F, 2);
|
||||||
|
|
||||||
const fs_reg tmp = bld.vgrf(BRW_TYPE_F, 2);
|
const brw_reg tmp = bld.vgrf(BRW_TYPE_F, 2);
|
||||||
const brw::fs_builder hbld = bld.exec_all().group(8, 0);
|
const brw::fs_builder hbld = bld.exec_all().group(8, 0);
|
||||||
const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
|
const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
|
||||||
fs_reg *const components = new fs_reg[2 * m];
|
brw_reg *const components = new brw_reg[2 * m];
|
||||||
|
|
||||||
for (unsigned c = 0; c < 2; c++) {
|
for (unsigned c = 0; c < 2; c++) {
|
||||||
for (unsigned g = 0; g < m; g++)
|
for (unsigned g = 0; g < m; g++)
|
||||||
|
|
|
||||||
|
|
@ -76,21 +76,21 @@ namespace brw {
|
||||||
~def_analysis();
|
~def_analysis();
|
||||||
|
|
||||||
fs_inst *
|
fs_inst *
|
||||||
get(const fs_reg ®) const
|
get(const brw_reg ®) const
|
||||||
{
|
{
|
||||||
return reg.file == VGRF && reg.nr < def_count ?
|
return reg.file == VGRF && reg.nr < def_count ?
|
||||||
def_insts[reg.nr] : NULL;
|
def_insts[reg.nr] : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
bblock_t *
|
bblock_t *
|
||||||
get_block(const fs_reg ®) const
|
get_block(const brw_reg ®) const
|
||||||
{
|
{
|
||||||
return reg.file == VGRF && reg.nr < def_count ?
|
return reg.file == VGRF && reg.nr < def_count ?
|
||||||
def_blocks[reg.nr] : NULL;
|
def_blocks[reg.nr] : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t
|
uint32_t
|
||||||
get_use_count(const fs_reg ®) const
|
get_use_count(const brw_reg ®) const
|
||||||
{
|
{
|
||||||
return reg.file == VGRF && reg.nr < def_count ?
|
return reg.file == VGRF && reg.nr < def_count ?
|
||||||
def_use_counts[reg.nr] : 0;
|
def_use_counts[reg.nr] : 0;
|
||||||
|
|
@ -164,33 +164,33 @@ protected:
|
||||||
struct vs_thread_payload : public thread_payload {
|
struct vs_thread_payload : public thread_payload {
|
||||||
vs_thread_payload(const fs_visitor &v);
|
vs_thread_payload(const fs_visitor &v);
|
||||||
|
|
||||||
fs_reg urb_handles;
|
brw_reg urb_handles;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tcs_thread_payload : public thread_payload {
|
struct tcs_thread_payload : public thread_payload {
|
||||||
tcs_thread_payload(const fs_visitor &v);
|
tcs_thread_payload(const fs_visitor &v);
|
||||||
|
|
||||||
fs_reg patch_urb_output;
|
brw_reg patch_urb_output;
|
||||||
fs_reg primitive_id;
|
brw_reg primitive_id;
|
||||||
fs_reg icp_handle_start;
|
brw_reg icp_handle_start;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tes_thread_payload : public thread_payload {
|
struct tes_thread_payload : public thread_payload {
|
||||||
tes_thread_payload(const fs_visitor &v);
|
tes_thread_payload(const fs_visitor &v);
|
||||||
|
|
||||||
fs_reg patch_urb_input;
|
brw_reg patch_urb_input;
|
||||||
fs_reg primitive_id;
|
brw_reg primitive_id;
|
||||||
fs_reg coords[3];
|
brw_reg coords[3];
|
||||||
fs_reg urb_output;
|
brw_reg urb_output;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gs_thread_payload : public thread_payload {
|
struct gs_thread_payload : public thread_payload {
|
||||||
gs_thread_payload(fs_visitor &v);
|
gs_thread_payload(fs_visitor &v);
|
||||||
|
|
||||||
fs_reg urb_handles;
|
brw_reg urb_handles;
|
||||||
fs_reg primitive_id;
|
brw_reg primitive_id;
|
||||||
fs_reg instance_id;
|
brw_reg instance_id;
|
||||||
fs_reg icp_handle_start;
|
brw_reg icp_handle_start;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct fs_thread_payload : public thread_payload {
|
struct fs_thread_payload : public thread_payload {
|
||||||
|
|
@ -215,34 +215,34 @@ struct fs_thread_payload : public thread_payload {
|
||||||
struct cs_thread_payload : public thread_payload {
|
struct cs_thread_payload : public thread_payload {
|
||||||
cs_thread_payload(const fs_visitor &v);
|
cs_thread_payload(const fs_visitor &v);
|
||||||
|
|
||||||
void load_subgroup_id(const brw::fs_builder &bld, fs_reg &dest) const;
|
void load_subgroup_id(const brw::fs_builder &bld, brw_reg &dest) const;
|
||||||
|
|
||||||
fs_reg local_invocation_id[3];
|
brw_reg local_invocation_id[3];
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
fs_reg subgroup_id_;
|
brw_reg subgroup_id_;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct task_mesh_thread_payload : public cs_thread_payload {
|
struct task_mesh_thread_payload : public cs_thread_payload {
|
||||||
task_mesh_thread_payload(fs_visitor &v);
|
task_mesh_thread_payload(fs_visitor &v);
|
||||||
|
|
||||||
fs_reg extended_parameter_0;
|
brw_reg extended_parameter_0;
|
||||||
fs_reg local_index;
|
brw_reg local_index;
|
||||||
fs_reg inline_parameter;
|
brw_reg inline_parameter;
|
||||||
|
|
||||||
fs_reg urb_output;
|
brw_reg urb_output;
|
||||||
|
|
||||||
/* URB to read Task memory inputs. Only valid for MESH stage. */
|
/* URB to read Task memory inputs. Only valid for MESH stage. */
|
||||||
fs_reg task_urb_input;
|
brw_reg task_urb_input;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bs_thread_payload : public thread_payload {
|
struct bs_thread_payload : public thread_payload {
|
||||||
bs_thread_payload(const fs_visitor &v);
|
bs_thread_payload(const fs_visitor &v);
|
||||||
|
|
||||||
fs_reg global_arg_ptr;
|
brw_reg global_arg_ptr;
|
||||||
fs_reg local_arg_ptr;
|
brw_reg local_arg_ptr;
|
||||||
|
|
||||||
void load_shader_type(const brw::fs_builder &bld, fs_reg &dest) const;
|
void load_shader_type(const brw::fs_builder &bld, brw_reg &dest) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum instruction_scheduler_mode {
|
enum instruction_scheduler_mode {
|
||||||
|
|
@ -293,10 +293,10 @@ public:
|
||||||
void import_uniforms(fs_visitor *v);
|
void import_uniforms(fs_visitor *v);
|
||||||
|
|
||||||
void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
|
void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
|
||||||
const fs_reg &dst,
|
const brw_reg &dst,
|
||||||
const fs_reg &surface,
|
const brw_reg &surface,
|
||||||
const fs_reg &surface_handle,
|
const brw_reg &surface_handle,
|
||||||
const fs_reg &varying_offset,
|
const brw_reg &varying_offset,
|
||||||
uint32_t const_offset,
|
uint32_t const_offset,
|
||||||
uint8_t alignment,
|
uint8_t alignment,
|
||||||
unsigned components);
|
unsigned components);
|
||||||
|
|
@ -324,7 +324,7 @@ public:
|
||||||
void calculate_payload_ranges(unsigned payload_node_count,
|
void calculate_payload_ranges(unsigned payload_node_count,
|
||||||
int *payload_last_use_ip) const;
|
int *payload_last_use_ip) const;
|
||||||
void assign_constant_locations();
|
void assign_constant_locations();
|
||||||
bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
|
bool get_pull_locs(const brw_reg &src, unsigned *out_surf_index,
|
||||||
unsigned *out_pull_index);
|
unsigned *out_pull_index);
|
||||||
void invalidate_analysis(brw::analysis_dependency_class c);
|
void invalidate_analysis(brw::analysis_dependency_class c);
|
||||||
|
|
||||||
|
|
@ -343,23 +343,23 @@ public:
|
||||||
void set_tcs_invocation_id();
|
void set_tcs_invocation_id();
|
||||||
|
|
||||||
fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
|
fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
|
||||||
fs_reg color1, fs_reg color2,
|
brw_reg color1, brw_reg color2,
|
||||||
fs_reg src0_alpha, unsigned components);
|
brw_reg src0_alpha, unsigned components);
|
||||||
void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
|
void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
|
||||||
void emit_fb_writes();
|
void emit_fb_writes();
|
||||||
void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
|
void emit_urb_writes(const brw_reg &gs_vertex_count = brw_reg());
|
||||||
void emit_gs_control_data_bits(const fs_reg &vertex_count);
|
void emit_gs_control_data_bits(const brw_reg &vertex_count);
|
||||||
fs_reg gs_urb_channel_mask(const fs_reg &dword_index);
|
brw_reg gs_urb_channel_mask(const brw_reg &dword_index);
|
||||||
fs_reg gs_urb_per_slot_dword_index(const fs_reg &vertex_count);
|
brw_reg gs_urb_per_slot_dword_index(const brw_reg &vertex_count);
|
||||||
void emit_gs_thread_end();
|
void emit_gs_thread_end();
|
||||||
bool mark_last_urb_write_with_eot();
|
bool mark_last_urb_write_with_eot();
|
||||||
void emit_tcs_thread_end();
|
void emit_tcs_thread_end();
|
||||||
void emit_urb_fence();
|
void emit_urb_fence();
|
||||||
void emit_cs_terminate();
|
void emit_cs_terminate();
|
||||||
|
|
||||||
fs_reg interp_reg(const brw::fs_builder &bld, unsigned location,
|
brw_reg interp_reg(const brw::fs_builder &bld, unsigned location,
|
||||||
unsigned channel, unsigned comp);
|
unsigned channel, unsigned comp);
|
||||||
fs_reg per_primitive_reg(const brw::fs_builder &bld,
|
brw_reg per_primitive_reg(const brw::fs_builder &bld,
|
||||||
int location, unsigned comp);
|
int location, unsigned comp);
|
||||||
|
|
||||||
void dump_instruction_to_file(const fs_inst *inst, FILE *file, const brw::def_analysis *defs) const;
|
void dump_instruction_to_file(const fs_inst *inst, FILE *file, const brw::def_analysis *defs) const;
|
||||||
|
|
@ -416,11 +416,11 @@ public:
|
||||||
*/
|
*/
|
||||||
int *push_constant_loc;
|
int *push_constant_loc;
|
||||||
|
|
||||||
fs_reg frag_depth;
|
brw_reg frag_depth;
|
||||||
fs_reg frag_stencil;
|
brw_reg frag_stencil;
|
||||||
fs_reg sample_mask;
|
brw_reg sample_mask;
|
||||||
fs_reg outputs[VARYING_SLOT_MAX];
|
brw_reg outputs[VARYING_SLOT_MAX];
|
||||||
fs_reg dual_src_output;
|
brw_reg dual_src_output;
|
||||||
int first_non_payload_grf;
|
int first_non_payload_grf;
|
||||||
|
|
||||||
bool failed;
|
bool failed;
|
||||||
|
|
@ -479,15 +479,15 @@ public:
|
||||||
|
|
||||||
bool source_depth_to_render_target;
|
bool source_depth_to_render_target;
|
||||||
|
|
||||||
fs_reg pixel_x;
|
brw_reg pixel_x;
|
||||||
fs_reg pixel_y;
|
brw_reg pixel_y;
|
||||||
fs_reg pixel_z;
|
brw_reg pixel_z;
|
||||||
fs_reg wpos_w;
|
brw_reg wpos_w;
|
||||||
fs_reg pixel_w;
|
brw_reg pixel_w;
|
||||||
fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
|
brw_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
|
||||||
fs_reg final_gs_vertex_count;
|
brw_reg final_gs_vertex_count;
|
||||||
fs_reg control_data_bits;
|
brw_reg control_data_bits;
|
||||||
fs_reg invocation_id;
|
brw_reg invocation_id;
|
||||||
|
|
||||||
unsigned grf_used;
|
unsigned grf_used;
|
||||||
bool spilled_any_registers;
|
bool spilled_any_registers;
|
||||||
|
|
@ -597,15 +597,15 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace brw {
|
namespace brw {
|
||||||
fs_reg
|
brw_reg
|
||||||
fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
|
fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
|
||||||
brw_reg_type type = BRW_TYPE_F,
|
brw_reg_type type = BRW_TYPE_F,
|
||||||
unsigned n = 1);
|
unsigned n = 1);
|
||||||
|
|
||||||
fs_reg
|
brw_reg
|
||||||
fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]);
|
fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]);
|
||||||
|
|
||||||
inline fs_reg
|
inline brw_reg
|
||||||
dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
|
dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
|
||||||
{
|
{
|
||||||
return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD);
|
return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD);
|
||||||
|
|
@ -621,8 +621,8 @@ namespace brw {
|
||||||
}
|
}
|
||||||
|
|
||||||
void shuffle_from_32bit_read(const brw::fs_builder &bld,
|
void shuffle_from_32bit_read(const brw::fs_builder &bld,
|
||||||
const fs_reg &dst,
|
const brw_reg &dst,
|
||||||
const fs_reg &src,
|
const brw_reg &src,
|
||||||
uint32_t first_component,
|
uint32_t first_component,
|
||||||
uint32_t components);
|
uint32_t components);
|
||||||
|
|
||||||
|
|
@ -636,7 +636,7 @@ void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
|
||||||
|
|
||||||
bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
|
bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
|
||||||
|
|
||||||
fs_reg brw_sample_mask_reg(const brw::fs_builder &bld);
|
brw_reg brw_sample_mask_reg(const brw::fs_builder &bld);
|
||||||
void brw_emit_predicate_on_sample_mask(const brw::fs_builder &bld, fs_inst *inst);
|
void brw_emit_predicate_on_sample_mask(const brw::fs_builder &bld, fs_inst *inst);
|
||||||
|
|
||||||
int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
|
int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
|
||||||
|
|
|
||||||
|
|
@ -480,7 +480,7 @@ namespace {
|
||||||
* possibly incur bank conflicts.
|
* possibly incur bank conflicts.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
is_grf(const fs_reg &r)
|
is_grf(const brw_reg &r)
|
||||||
{
|
{
|
||||||
return r.file == VGRF || r.file == FIXED_GRF;
|
return r.file == VGRF || r.file == FIXED_GRF;
|
||||||
}
|
}
|
||||||
|
|
@ -492,7 +492,7 @@ namespace {
|
||||||
* allocation or whether it was part of a VGRF allocation.
|
* allocation or whether it was part of a VGRF allocation.
|
||||||
*/
|
*/
|
||||||
unsigned
|
unsigned
|
||||||
reg_of(const fs_reg &r)
|
reg_of(const brw_reg &r)
|
||||||
{
|
{
|
||||||
assert(is_grf(r));
|
assert(is_grf(r));
|
||||||
if (r.file == VGRF)
|
if (r.file == VGRF)
|
||||||
|
|
@ -871,8 +871,8 @@ namespace {
|
||||||
* Apply the GRF atom permutation given by \p map to register \p r and
|
* Apply the GRF atom permutation given by \p map to register \p r and
|
||||||
* return the result.
|
* return the result.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
transform(const partitioning &p, const permutation &map, fs_reg r)
|
transform(const partitioning &p, const permutation &map, brw_reg r)
|
||||||
{
|
{
|
||||||
if (r.file == VGRF) {
|
if (r.file == VGRF) {
|
||||||
const unsigned reg = reg_of(r);
|
const unsigned reg = reg_of(r);
|
||||||
|
|
|
||||||
|
|
@ -184,7 +184,7 @@ namespace brw {
|
||||||
* dispatch_width units (which is just enough space for one logical
|
* dispatch_width units (which is just enough space for one logical
|
||||||
* component in this IR).
|
* component in this IR).
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
vgrf(enum brw_reg_type type, unsigned n = 1) const
|
vgrf(enum brw_reg_type type, unsigned n = 1) const
|
||||||
{
|
{
|
||||||
const unsigned unit = reg_unit(shader->devinfo);
|
const unsigned unit = reg_unit(shader->devinfo);
|
||||||
|
|
@ -202,34 +202,34 @@ namespace brw {
|
||||||
/**
|
/**
|
||||||
* Create a null register of floating type.
|
* Create a null register of floating type.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
null_reg_f() const
|
null_reg_f() const
|
||||||
{
|
{
|
||||||
return fs_reg(retype(brw_null_reg(), BRW_TYPE_F));
|
return brw_reg(retype(brw_null_reg(), BRW_TYPE_F));
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg
|
brw_reg
|
||||||
null_reg_df() const
|
null_reg_df() const
|
||||||
{
|
{
|
||||||
return fs_reg(retype(brw_null_reg(), BRW_TYPE_DF));
|
return brw_reg(retype(brw_null_reg(), BRW_TYPE_DF));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a null register of signed integer type.
|
* Create a null register of signed integer type.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
null_reg_d() const
|
null_reg_d() const
|
||||||
{
|
{
|
||||||
return fs_reg(retype(brw_null_reg(), BRW_TYPE_D));
|
return brw_reg(retype(brw_null_reg(), BRW_TYPE_D));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a null register of unsigned integer type.
|
* Create a null register of unsigned integer type.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
null_reg_ud() const
|
null_reg_ud() const
|
||||||
{
|
{
|
||||||
return fs_reg(retype(brw_null_reg(), BRW_TYPE_UD));
|
return brw_reg(retype(brw_null_reg(), BRW_TYPE_UD));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -254,7 +254,7 @@ namespace brw {
|
||||||
* Create and insert a nullary instruction into the program.
|
* Create and insert a nullary instruction into the program.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
emit(enum opcode opcode, const fs_reg &dst) const
|
emit(enum opcode opcode, const brw_reg &dst) const
|
||||||
{
|
{
|
||||||
return emit(fs_inst(opcode, dispatch_width(), dst));
|
return emit(fs_inst(opcode, dispatch_width(), dst));
|
||||||
}
|
}
|
||||||
|
|
@ -263,7 +263,7 @@ namespace brw {
|
||||||
* Create and insert a unary instruction into the program.
|
* Create and insert a unary instruction into the program.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0) const
|
emit(enum opcode opcode, const brw_reg &dst, const brw_reg &src0) const
|
||||||
{
|
{
|
||||||
return emit(fs_inst(opcode, dispatch_width(), dst, src0));
|
return emit(fs_inst(opcode, dispatch_width(), dst, src0));
|
||||||
}
|
}
|
||||||
|
|
@ -272,8 +272,8 @@ namespace brw {
|
||||||
* Create and insert a binary instruction into the program.
|
* Create and insert a binary instruction into the program.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
|
emit(enum opcode opcode, const brw_reg &dst, const brw_reg &src0,
|
||||||
const fs_reg &src1) const
|
const brw_reg &src1) const
|
||||||
{
|
{
|
||||||
return emit(fs_inst(opcode, dispatch_width(), dst,
|
return emit(fs_inst(opcode, dispatch_width(), dst,
|
||||||
src0, src1));
|
src0, src1));
|
||||||
|
|
@ -283,8 +283,8 @@ namespace brw {
|
||||||
* Create and insert a ternary instruction into the program.
|
* Create and insert a ternary instruction into the program.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
|
emit(enum opcode opcode, const brw_reg &dst, const brw_reg &src0,
|
||||||
const fs_reg &src1, const fs_reg &src2) const
|
const brw_reg &src1, const brw_reg &src2) const
|
||||||
{
|
{
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case BRW_OPCODE_BFE:
|
case BRW_OPCODE_BFE:
|
||||||
|
|
@ -307,7 +307,7 @@ namespace brw {
|
||||||
* into the program.
|
* into the program.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
emit(enum opcode opcode, const fs_reg &dst, const fs_reg srcs[],
|
emit(enum opcode opcode, const brw_reg &dst, const brw_reg srcs[],
|
||||||
unsigned n) const
|
unsigned n) const
|
||||||
{
|
{
|
||||||
/* Use the emit() methods for specific operand counts to ensure that
|
/* Use the emit() methods for specific operand counts to ensure that
|
||||||
|
|
@ -352,8 +352,8 @@ namespace brw {
|
||||||
* Generally useful to get the minimum or maximum of two values.
|
* Generally useful to get the minimum or maximum of two values.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
emit_minmax(const fs_reg &dst, const fs_reg &src0,
|
emit_minmax(const brw_reg &dst, const brw_reg &src0,
|
||||||
const fs_reg &src1, brw_conditional_mod mod) const
|
const brw_reg &src1, brw_conditional_mod mod) const
|
||||||
{
|
{
|
||||||
assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
|
assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
|
||||||
|
|
||||||
|
|
@ -367,8 +367,8 @@ namespace brw {
|
||||||
/**
|
/**
|
||||||
* Copy any live channel from \p src to the first channel of the result.
|
* Copy any live channel from \p src to the first channel of the result.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
emit_uniformize(const fs_reg &src) const
|
emit_uniformize(const brw_reg &src) const
|
||||||
{
|
{
|
||||||
/* FIXME: We use a vector chan_index and dst to allow constant and
|
/* FIXME: We use a vector chan_index and dst to allow constant and
|
||||||
* copy propagration to move result all the way into the consuming
|
* copy propagration to move result all the way into the consuming
|
||||||
|
|
@ -378,37 +378,37 @@ namespace brw {
|
||||||
* should go back to scalar destinations here.
|
* should go back to scalar destinations here.
|
||||||
*/
|
*/
|
||||||
const fs_builder ubld = exec_all();
|
const fs_builder ubld = exec_all();
|
||||||
const fs_reg chan_index = vgrf(BRW_TYPE_UD);
|
const brw_reg chan_index = vgrf(BRW_TYPE_UD);
|
||||||
const fs_reg dst = vgrf(src.type);
|
const brw_reg dst = vgrf(src.type);
|
||||||
|
|
||||||
ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
|
ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
|
||||||
ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
|
ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
|
||||||
|
|
||||||
return fs_reg(component(dst, 0));
|
return brw_reg(component(dst, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg
|
brw_reg
|
||||||
move_to_vgrf(const fs_reg &src, unsigned num_components) const
|
move_to_vgrf(const brw_reg &src, unsigned num_components) const
|
||||||
{
|
{
|
||||||
fs_reg *const src_comps = new fs_reg[num_components];
|
brw_reg *const src_comps = new brw_reg[num_components];
|
||||||
for (unsigned i = 0; i < num_components; i++)
|
for (unsigned i = 0; i < num_components; i++)
|
||||||
src_comps[i] = offset(src, dispatch_width(), i);
|
src_comps[i] = offset(src, dispatch_width(), i);
|
||||||
|
|
||||||
const fs_reg dst = vgrf(src.type, num_components);
|
const brw_reg dst = vgrf(src.type, num_components);
|
||||||
LOAD_PAYLOAD(dst, src_comps, num_components, 0);
|
LOAD_PAYLOAD(dst, src_comps, num_components, 0);
|
||||||
|
|
||||||
delete[] src_comps;
|
delete[] src_comps;
|
||||||
|
|
||||||
return fs_reg(dst);
|
return brw_reg(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
emit_scan_step(enum opcode opcode, brw_conditional_mod mod,
|
emit_scan_step(enum opcode opcode, brw_conditional_mod mod,
|
||||||
const fs_reg &tmp,
|
const brw_reg &tmp,
|
||||||
unsigned left_offset, unsigned left_stride,
|
unsigned left_offset, unsigned left_stride,
|
||||||
unsigned right_offset, unsigned right_stride) const
|
unsigned right_offset, unsigned right_stride) const
|
||||||
{
|
{
|
||||||
fs_reg left, right;
|
brw_reg left, right;
|
||||||
left = horiz_stride(horiz_offset(tmp, left_offset), left_stride);
|
left = horiz_stride(horiz_offset(tmp, left_offset), left_stride);
|
||||||
right = horiz_stride(horiz_offset(tmp, right_offset), right_stride);
|
right = horiz_stride(horiz_offset(tmp, right_offset), right_stride);
|
||||||
if ((tmp.type == BRW_TYPE_Q || tmp.type == BRW_TYPE_UQ) &&
|
if ((tmp.type == BRW_TYPE_Q || tmp.type == BRW_TYPE_UQ) &&
|
||||||
|
|
@ -430,13 +430,13 @@ namespace brw {
|
||||||
/* We treat the bottom 32 bits as unsigned regardless of
|
/* We treat the bottom 32 bits as unsigned regardless of
|
||||||
* whether or not the integer as a whole is signed.
|
* whether or not the integer as a whole is signed.
|
||||||
*/
|
*/
|
||||||
fs_reg right_low = subscript(right, BRW_TYPE_UD, 0);
|
brw_reg right_low = subscript(right, BRW_TYPE_UD, 0);
|
||||||
fs_reg left_low = subscript(left, BRW_TYPE_UD, 0);
|
brw_reg left_low = subscript(left, BRW_TYPE_UD, 0);
|
||||||
|
|
||||||
/* The upper bits get the same sign as the 64-bit type */
|
/* The upper bits get the same sign as the 64-bit type */
|
||||||
brw_reg_type type32 = brw_type_with_size(tmp.type, 32);
|
brw_reg_type type32 = brw_type_with_size(tmp.type, 32);
|
||||||
fs_reg right_high = subscript(right, type32, 1);
|
brw_reg right_high = subscript(right, type32, 1);
|
||||||
fs_reg left_high = subscript(left, type32, 1);
|
brw_reg left_high = subscript(left, type32, 1);
|
||||||
|
|
||||||
/* Build up our comparison:
|
/* Build up our comparison:
|
||||||
*
|
*
|
||||||
|
|
@ -468,7 +468,7 @@ namespace brw {
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
emit_scan(enum opcode opcode, const fs_reg &tmp,
|
emit_scan(enum opcode opcode, const brw_reg &tmp,
|
||||||
unsigned cluster_size, brw_conditional_mod mod) const
|
unsigned cluster_size, brw_conditional_mod mod) const
|
||||||
{
|
{
|
||||||
assert(dispatch_width() >= 8);
|
assert(dispatch_width() >= 8);
|
||||||
|
|
@ -479,8 +479,8 @@ namespace brw {
|
||||||
if (dispatch_width() * brw_type_size_bytes(tmp.type) > 2 * REG_SIZE) {
|
if (dispatch_width() * brw_type_size_bytes(tmp.type) > 2 * REG_SIZE) {
|
||||||
const unsigned half_width = dispatch_width() / 2;
|
const unsigned half_width = dispatch_width() / 2;
|
||||||
const fs_builder ubld = exec_all().group(half_width, 0);
|
const fs_builder ubld = exec_all().group(half_width, 0);
|
||||||
fs_reg left = tmp;
|
brw_reg left = tmp;
|
||||||
fs_reg right = horiz_offset(tmp, half_width);
|
brw_reg right = horiz_offset(tmp, half_width);
|
||||||
ubld.emit_scan(opcode, left, cluster_size, mod);
|
ubld.emit_scan(opcode, left, cluster_size, mod);
|
||||||
ubld.emit_scan(opcode, right, cluster_size, mod);
|
ubld.emit_scan(opcode, right, cluster_size, mod);
|
||||||
if (cluster_size > half_width) {
|
if (cluster_size > half_width) {
|
||||||
|
|
@ -547,15 +547,15 @@ namespace brw {
|
||||||
*/
|
*/
|
||||||
#define _ALU1(prefix, op) \
|
#define _ALU1(prefix, op) \
|
||||||
fs_inst * \
|
fs_inst * \
|
||||||
op(const fs_reg &dst, const fs_reg &src0) const \
|
op(const brw_reg &dst, const brw_reg &src0) const \
|
||||||
{ \
|
{ \
|
||||||
assert(_dispatch_width == 1 || \
|
assert(_dispatch_width == 1 || \
|
||||||
(dst.file >= VGRF && dst.stride != 0) || \
|
(dst.file >= VGRF && dst.stride != 0) || \
|
||||||
(dst.file < VGRF && dst.hstride != 0)); \
|
(dst.file < VGRF && dst.hstride != 0)); \
|
||||||
return emit(prefix##op, dst, src0); \
|
return emit(prefix##op, dst, src0); \
|
||||||
} \
|
} \
|
||||||
fs_reg \
|
brw_reg \
|
||||||
op(const fs_reg &src0, fs_inst **out = NULL) const \
|
op(const brw_reg &src0, fs_inst **out = NULL) const \
|
||||||
{ \
|
{ \
|
||||||
fs_inst *inst = op(vgrf(src0.type), src0); \
|
fs_inst *inst = op(vgrf(src0.type), src0); \
|
||||||
if (out) *out = inst; \
|
if (out) *out = inst; \
|
||||||
|
|
@ -565,12 +565,12 @@ namespace brw {
|
||||||
#define VIRT1(op) _ALU1(SHADER_OPCODE_, op)
|
#define VIRT1(op) _ALU1(SHADER_OPCODE_, op)
|
||||||
|
|
||||||
fs_inst *
|
fs_inst *
|
||||||
alu2(opcode op, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const
|
alu2(opcode op, const brw_reg &dst, const brw_reg &src0, const brw_reg &src1) const
|
||||||
{
|
{
|
||||||
return emit(op, dst, src0, src1);
|
return emit(op, dst, src0, src1);
|
||||||
}
|
}
|
||||||
fs_reg
|
brw_reg
|
||||||
alu2(opcode op, const fs_reg &src0, const fs_reg &src1, fs_inst **out = NULL) const
|
alu2(opcode op, const brw_reg &src0, const brw_reg &src1, fs_inst **out = NULL) const
|
||||||
{
|
{
|
||||||
enum brw_reg_type inferred_dst_type =
|
enum brw_reg_type inferred_dst_type =
|
||||||
brw_type_larger_of(src0.type, src1.type);
|
brw_type_larger_of(src0.type, src1.type);
|
||||||
|
|
@ -581,12 +581,12 @@ namespace brw {
|
||||||
|
|
||||||
#define _ALU2(prefix, op) \
|
#define _ALU2(prefix, op) \
|
||||||
fs_inst * \
|
fs_inst * \
|
||||||
op(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const \
|
op(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1) const \
|
||||||
{ \
|
{ \
|
||||||
return alu2(prefix##op, dst, src0, src1); \
|
return alu2(prefix##op, dst, src0, src1); \
|
||||||
} \
|
} \
|
||||||
fs_reg \
|
brw_reg \
|
||||||
op(const fs_reg &src0, const fs_reg &src1, fs_inst **out = NULL) const \
|
op(const brw_reg &src0, const brw_reg &src1, fs_inst **out = NULL) const \
|
||||||
{ \
|
{ \
|
||||||
return alu2(prefix##op, src0, src1, out); \
|
return alu2(prefix##op, src0, src1, out); \
|
||||||
}
|
}
|
||||||
|
|
@ -595,7 +595,7 @@ namespace brw {
|
||||||
|
|
||||||
#define ALU2_ACC(op) \
|
#define ALU2_ACC(op) \
|
||||||
fs_inst * \
|
fs_inst * \
|
||||||
op(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const \
|
op(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1) const \
|
||||||
{ \
|
{ \
|
||||||
fs_inst *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
|
fs_inst *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
|
||||||
inst->writes_accumulator = true; \
|
inst->writes_accumulator = true; \
|
||||||
|
|
@ -604,8 +604,8 @@ namespace brw {
|
||||||
|
|
||||||
#define ALU3(op) \
|
#define ALU3(op) \
|
||||||
fs_inst * \
|
fs_inst * \
|
||||||
op(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, \
|
op(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1, \
|
||||||
const fs_reg &src2) const \
|
const brw_reg &src2) const \
|
||||||
{ \
|
{ \
|
||||||
return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
|
return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
|
||||||
}
|
}
|
||||||
|
|
@ -672,13 +672,13 @@ namespace brw {
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
fs_inst *
|
fs_inst *
|
||||||
ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const
|
ADD(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1) const
|
||||||
{
|
{
|
||||||
return alu2(BRW_OPCODE_ADD, dst, src0, src1);
|
return alu2(BRW_OPCODE_ADD, dst, src0, src1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg
|
brw_reg
|
||||||
ADD(const fs_reg &src0, const fs_reg &src1, fs_inst **out = NULL) const
|
ADD(const brw_reg &src0, const brw_reg &src1, fs_inst **out = NULL) const
|
||||||
{
|
{
|
||||||
if (src1.file == IMM && src1.ud == 0 && !out)
|
if (src1.file == IMM && src1.ud == 0 && !out)
|
||||||
return src0;
|
return src0;
|
||||||
|
|
@ -692,7 +692,7 @@ namespace brw {
|
||||||
* the flag register with the packed 16 bits of the result.
|
* the flag register with the packed 16 bits of the result.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
CMP(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1,
|
CMP(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1,
|
||||||
brw_conditional_mod condition) const
|
brw_conditional_mod condition) const
|
||||||
{
|
{
|
||||||
/* Take the instruction:
|
/* Take the instruction:
|
||||||
|
|
@ -718,7 +718,7 @@ namespace brw {
|
||||||
* CMPN: Behaves like CMP, but produces true if src1 is NaN.
|
* CMPN: Behaves like CMP, but produces true if src1 is NaN.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
CMPN(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1,
|
CMPN(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1,
|
||||||
brw_conditional_mod condition) const
|
brw_conditional_mod condition) const
|
||||||
{
|
{
|
||||||
/* Take the instruction:
|
/* Take the instruction:
|
||||||
|
|
@ -753,8 +753,8 @@ namespace brw {
|
||||||
* CSEL: dst = src2 <op> 0.0f ? src0 : src1
|
* CSEL: dst = src2 <op> 0.0f ? src0 : src1
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
CSEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1,
|
CSEL(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1,
|
||||||
const fs_reg &src2, brw_conditional_mod condition) const
|
const brw_reg &src2, brw_conditional_mod condition) const
|
||||||
{
|
{
|
||||||
return set_condmod(condition,
|
return set_condmod(condition,
|
||||||
emit(BRW_OPCODE_CSEL,
|
emit(BRW_OPCODE_CSEL,
|
||||||
|
|
@ -768,8 +768,8 @@ namespace brw {
|
||||||
* Emit a linear interpolation instruction.
|
* Emit a linear interpolation instruction.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
LRP(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
|
LRP(const brw_reg &dst, const brw_reg &x, const brw_reg &y,
|
||||||
const fs_reg &a) const
|
const brw_reg &a) const
|
||||||
{
|
{
|
||||||
if (shader->devinfo->ver <= 10) {
|
if (shader->devinfo->ver <= 10) {
|
||||||
/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
|
/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
|
||||||
|
|
@ -779,14 +779,14 @@ namespace brw {
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
|
/* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
|
||||||
const fs_reg y_times_a = vgrf(dst.type);
|
const brw_reg y_times_a = vgrf(dst.type);
|
||||||
const fs_reg one_minus_a = vgrf(dst.type);
|
const brw_reg one_minus_a = vgrf(dst.type);
|
||||||
const fs_reg x_times_one_minus_a = vgrf(dst.type);
|
const brw_reg x_times_one_minus_a = vgrf(dst.type);
|
||||||
|
|
||||||
MUL(y_times_a, y, a);
|
MUL(y_times_a, y, a);
|
||||||
ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
|
ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
|
||||||
MUL(x_times_one_minus_a, x, fs_reg(one_minus_a));
|
MUL(x_times_one_minus_a, x, brw_reg(one_minus_a));
|
||||||
return ADD(dst, fs_reg(x_times_one_minus_a), fs_reg(y_times_a));
|
return ADD(dst, brw_reg(x_times_one_minus_a), brw_reg(y_times_a));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -794,7 +794,7 @@ namespace brw {
|
||||||
* Collect a number of registers in a contiguous range of registers.
|
* Collect a number of registers in a contiguous range of registers.
|
||||||
*/
|
*/
|
||||||
fs_inst *
|
fs_inst *
|
||||||
LOAD_PAYLOAD(const fs_reg &dst, const fs_reg *src,
|
LOAD_PAYLOAD(const brw_reg &dst, const brw_reg *src,
|
||||||
unsigned sources, unsigned header_size) const
|
unsigned sources, unsigned header_size) const
|
||||||
{
|
{
|
||||||
fs_inst *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
|
fs_inst *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
|
||||||
|
|
@ -809,7 +809,7 @@ namespace brw {
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst *
|
fs_inst *
|
||||||
VEC(const fs_reg &dst, const fs_reg *src, unsigned sources) const
|
VEC(const brw_reg &dst, const brw_reg *src, unsigned sources) const
|
||||||
{
|
{
|
||||||
return sources == 1 ? MOV(dst, src[0])
|
return sources == 1 ? MOV(dst, src[0])
|
||||||
: LOAD_PAYLOAD(dst, src, sources, 0);
|
: LOAD_PAYLOAD(dst, src, sources, 0);
|
||||||
|
|
@ -822,7 +822,7 @@ namespace brw {
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst *
|
fs_inst *
|
||||||
UNDEF(const fs_reg &dst) const
|
UNDEF(const brw_reg &dst) const
|
||||||
{
|
{
|
||||||
assert(dst.file == VGRF);
|
assert(dst.file == VGRF);
|
||||||
assert(dst.offset % REG_SIZE == 0);
|
assert(dst.offset % REG_SIZE == 0);
|
||||||
|
|
@ -834,7 +834,7 @@ namespace brw {
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst *
|
fs_inst *
|
||||||
DPAS(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, const fs_reg &src2,
|
DPAS(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1, const brw_reg &src2,
|
||||||
unsigned sdepth, unsigned rcount) const
|
unsigned sdepth, unsigned rcount) const
|
||||||
{
|
{
|
||||||
assert(_dispatch_width == 8 * reg_unit(shader->devinfo));
|
assert(_dispatch_width == 8 * reg_unit(shader->devinfo));
|
||||||
|
|
@ -868,14 +868,14 @@ namespace brw {
|
||||||
* Workaround for negation of UD registers. See comment in
|
* Workaround for negation of UD registers. See comment in
|
||||||
* fs_generator::generate_code() for more details.
|
* fs_generator::generate_code() for more details.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
fix_unsigned_negate(const fs_reg &src) const
|
fix_unsigned_negate(const brw_reg &src) const
|
||||||
{
|
{
|
||||||
if (src.type == BRW_TYPE_UD &&
|
if (src.type == BRW_TYPE_UD &&
|
||||||
src.negate) {
|
src.negate) {
|
||||||
fs_reg temp = vgrf(BRW_TYPE_UD);
|
brw_reg temp = vgrf(BRW_TYPE_UD);
|
||||||
MOV(temp, src);
|
MOV(temp, src);
|
||||||
return fs_reg(temp);
|
return brw_reg(temp);
|
||||||
} else {
|
} else {
|
||||||
return src;
|
return src;
|
||||||
}
|
}
|
||||||
|
|
@ -885,8 +885,8 @@ namespace brw {
|
||||||
* Workaround for source register modes not supported by the ternary
|
* Workaround for source register modes not supported by the ternary
|
||||||
* instruction encoding.
|
* instruction encoding.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
fix_3src_operand(const fs_reg &src) const
|
fix_3src_operand(const brw_reg &src) const
|
||||||
{
|
{
|
||||||
switch (src.file) {
|
switch (src.file) {
|
||||||
case FIXED_GRF:
|
case FIXED_GRF:
|
||||||
|
|
@ -905,7 +905,7 @@ namespace brw {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg expanded = vgrf(src.type);
|
brw_reg expanded = vgrf(src.type);
|
||||||
MOV(expanded, src);
|
MOV(expanded, src);
|
||||||
return expanded;
|
return expanded;
|
||||||
}
|
}
|
||||||
|
|
@ -925,8 +925,8 @@ namespace brw {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline fs_reg
|
static inline brw_reg
|
||||||
offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta)
|
offset(const brw_reg ®, const brw::fs_builder &bld, unsigned delta)
|
||||||
{
|
{
|
||||||
return offset(reg, bld.dispatch_width(), delta);
|
return offset(reg, bld.dispatch_width(), delta);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1150,7 +1150,7 @@ struct register_allocation {
|
||||||
uint16_t avail;
|
uint16_t avail;
|
||||||
};
|
};
|
||||||
|
|
||||||
static fs_reg
|
static brw_reg
|
||||||
allocate_slots(struct register_allocation *regs, unsigned num_regs,
|
allocate_slots(struct register_allocation *regs, unsigned num_regs,
|
||||||
unsigned bytes, unsigned align_bytes,
|
unsigned bytes, unsigned align_bytes,
|
||||||
brw::simple_allocator &alloc)
|
brw::simple_allocator &alloc)
|
||||||
|
|
@ -1172,7 +1172,7 @@ allocate_slots(struct register_allocation *regs, unsigned num_regs,
|
||||||
|
|
||||||
regs[i].avail &= ~(mask << j);
|
regs[i].avail &= ~(mask << j);
|
||||||
|
|
||||||
fs_reg reg = brw_vgrf(regs[i].nr, BRW_TYPE_F);
|
brw_reg reg = brw_vgrf(regs[i].nr, BRW_TYPE_F);
|
||||||
reg.offset = j * 2;
|
reg.offset = j * 2;
|
||||||
|
|
||||||
return reg;
|
return reg;
|
||||||
|
|
@ -1243,7 +1243,7 @@ parcel_out_registers(struct imm *imm, unsigned len, const bblock_t *cur_block,
|
||||||
*/
|
*/
|
||||||
const unsigned width = ver == 8 && imm[i].is_half_float ? 2 : 1;
|
const unsigned width = ver == 8 && imm[i].is_half_float ? 2 : 1;
|
||||||
|
|
||||||
const fs_reg reg = allocate_slots(regs, num_regs,
|
const brw_reg reg = allocate_slots(regs, num_regs,
|
||||||
imm[i].size * width,
|
imm[i].size * width,
|
||||||
get_alignment_for_imm(&imm[i]),
|
get_alignment_for_imm(&imm[i]),
|
||||||
alloc);
|
alloc);
|
||||||
|
|
@ -1569,7 +1569,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
||||||
const uint32_t width = 1;
|
const uint32_t width = 1;
|
||||||
const fs_builder ibld = fs_builder(&s, width).at(insert_block, n).exec_all();
|
const fs_builder ibld = fs_builder(&s, width).at(insert_block, n).exec_all();
|
||||||
|
|
||||||
fs_reg reg = brw_vgrf(imm->nr, BRW_TYPE_F);
|
brw_reg reg = brw_vgrf(imm->nr, BRW_TYPE_F);
|
||||||
reg.offset = imm->subreg_offset;
|
reg.offset = imm->subreg_offset;
|
||||||
reg.stride = 0;
|
reg.stride = 0;
|
||||||
|
|
||||||
|
|
@ -1591,7 +1591,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
||||||
/* Rewrite the immediate sources to refer to the new GRFs. */
|
/* Rewrite the immediate sources to refer to the new GRFs. */
|
||||||
for (int i = 0; i < table.len; i++) {
|
for (int i = 0; i < table.len; i++) {
|
||||||
foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
|
foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
|
||||||
fs_reg *reg = &link->inst->src[link->src];
|
brw_reg *reg = &link->inst->src[link->src];
|
||||||
|
|
||||||
if (link->inst->opcode == BRW_OPCODE_SEL) {
|
if (link->inst->opcode == BRW_OPCODE_SEL) {
|
||||||
if (link->type == either_type) {
|
if (link->type == either_type) {
|
||||||
|
|
@ -1709,7 +1709,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
||||||
inst->conditional_mod == BRW_CONDITIONAL_GE ||
|
inst->conditional_mod == BRW_CONDITIONAL_GE ||
|
||||||
inst->conditional_mod == BRW_CONDITIONAL_L);
|
inst->conditional_mod == BRW_CONDITIONAL_L);
|
||||||
|
|
||||||
fs_reg temp = inst->src[0];
|
brw_reg temp = inst->src[0];
|
||||||
inst->src[0] = inst->src[1];
|
inst->src[0] = inst->src[1];
|
||||||
inst->src[1] = temp;
|
inst->src[1] = temp;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -46,8 +46,8 @@ namespace { /* avoid conflict with opt_copy_propagation_elements */
|
||||||
struct acp_entry {
|
struct acp_entry {
|
||||||
struct rb_node by_dst;
|
struct rb_node by_dst;
|
||||||
struct rb_node by_src;
|
struct rb_node by_src;
|
||||||
fs_reg dst;
|
brw_reg dst;
|
||||||
fs_reg src;
|
brw_reg src;
|
||||||
unsigned global_idx;
|
unsigned global_idx;
|
||||||
unsigned size_written;
|
unsigned size_written;
|
||||||
unsigned size_read;
|
unsigned size_read;
|
||||||
|
|
@ -338,7 +338,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(linear_ctx *lin_ctx, cfg_t *cfg,
|
||||||
* Like reg_offset, but register must be VGRF or FIXED_GRF.
|
* Like reg_offset, but register must be VGRF or FIXED_GRF.
|
||||||
*/
|
*/
|
||||||
static inline unsigned
|
static inline unsigned
|
||||||
grf_reg_offset(const fs_reg &r)
|
grf_reg_offset(const brw_reg &r)
|
||||||
{
|
{
|
||||||
return (r.file == VGRF ? 0 : r.nr) * REG_SIZE +
|
return (r.file == VGRF ? 0 : r.nr) * REG_SIZE +
|
||||||
r.offset +
|
r.offset +
|
||||||
|
|
@ -349,7 +349,7 @@ grf_reg_offset(const fs_reg &r)
|
||||||
* Like regions_overlap, but register must be VGRF or FIXED_GRF.
|
* Like regions_overlap, but register must be VGRF or FIXED_GRF.
|
||||||
*/
|
*/
|
||||||
static inline bool
|
static inline bool
|
||||||
grf_regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
|
grf_regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
|
||||||
{
|
{
|
||||||
return reg_space(r) == reg_space(s) &&
|
return reg_space(r) == reg_space(s) &&
|
||||||
!(grf_reg_offset(r) + dr <= grf_reg_offset(s) ||
|
!(grf_reg_offset(r) + dr <= grf_reg_offset(s) ||
|
||||||
|
|
@ -793,7 +793,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
* regioning restrictions that apply to integer types smaller than a dword.
|
* regioning restrictions that apply to integer types smaller than a dword.
|
||||||
* See BSpec #56640 for details.
|
* See BSpec #56640 for details.
|
||||||
*/
|
*/
|
||||||
const fs_reg tmp = horiz_stride(entry->src, inst->src[arg].stride);
|
const brw_reg tmp = horiz_stride(entry->src, inst->src[arg].stride);
|
||||||
if (has_subdword_integer_region_restriction(devinfo, inst, &tmp, 1))
|
if (has_subdword_integer_region_restriction(devinfo, inst, &tmp, 1))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
@ -936,7 +936,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
try_constant_propagate_value(fs_reg val, brw_reg_type dst_type,
|
try_constant_propagate_value(brw_reg val, brw_reg_type dst_type,
|
||||||
fs_inst *inst, int arg)
|
fs_inst *inst, int arg)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
@ -1394,7 +1394,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
||||||
inst->src[i].is_contiguous())) {
|
inst->src[i].is_contiguous())) {
|
||||||
const brw_reg_type t = i < inst->header_size ?
|
const brw_reg_type t = i < inst->header_size ?
|
||||||
BRW_TYPE_UD : inst->src[i].type;
|
BRW_TYPE_UD : inst->src[i].type;
|
||||||
fs_reg dst = byte_offset(retype(inst->dst, t), offset);
|
brw_reg dst = byte_offset(retype(inst->dst, t), offset);
|
||||||
if (!dst.equals(inst->src[i])) {
|
if (!dst.equals(inst->src[i])) {
|
||||||
acp_entry *entry = linear_zalloc(lin_ctx, acp_entry);
|
acp_entry *entry = linear_zalloc(lin_ctx, acp_entry);
|
||||||
entry->dst = dst;
|
entry->dst = dst;
|
||||||
|
|
@ -1486,7 +1486,7 @@ brw_fs_opt_copy_propagation(fs_visitor &s)
|
||||||
static bool
|
static bool
|
||||||
try_copy_propagate_def(const brw_compiler *compiler,
|
try_copy_propagate_def(const brw_compiler *compiler,
|
||||||
const brw::simple_allocator &alloc,
|
const brw::simple_allocator &alloc,
|
||||||
fs_inst *def, const fs_reg &val,
|
fs_inst *def, const brw_reg &val,
|
||||||
fs_inst *inst, int arg,
|
fs_inst *inst, int arg,
|
||||||
uint8_t max_polygons)
|
uint8_t max_polygons)
|
||||||
{
|
{
|
||||||
|
|
@ -1716,7 +1716,7 @@ try_copy_propagate_def(const brw_compiler *compiler,
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
try_constant_propagate_def(fs_inst *def, fs_reg val, fs_inst *inst, int arg)
|
try_constant_propagate_def(fs_inst *def, brw_reg val, fs_inst *inst, int arg)
|
||||||
{
|
{
|
||||||
/* Bail if inst is reading more than a single vector component of entry */
|
/* Bail if inst is reading more than a single vector component of entry */
|
||||||
if (inst->size_read(arg) > def->dst.component_size(inst->exec_size))
|
if (inst->size_read(arg) > def->dst.component_size(inst->exec_size))
|
||||||
|
|
@ -1728,8 +1728,8 @@ try_constant_propagate_def(fs_inst *def, fs_reg val, fs_inst *inst, int arg)
|
||||||
/**
|
/**
|
||||||
* Handle cases like UW subreads of a UD immediate, with an offset.
|
* Handle cases like UW subreads of a UD immediate, with an offset.
|
||||||
*/
|
*/
|
||||||
static fs_reg
|
static brw_reg
|
||||||
extract_imm(fs_reg val, brw_reg_type type, unsigned offset)
|
extract_imm(brw_reg val, brw_reg_type type, unsigned offset)
|
||||||
{
|
{
|
||||||
assert(val.file == IMM);
|
assert(val.file == IMM);
|
||||||
|
|
||||||
|
|
@ -1748,16 +1748,16 @@ extract_imm(fs_reg val, brw_reg_type type, unsigned offset)
|
||||||
val.d = (val.d << (bitsize * (32/bitsize - 1 - offset))) >> ((32/bitsize - 1) * bitsize);
|
val.d = (val.d << (bitsize * (32/bitsize - 1 - offset))) >> ((32/bitsize - 1) * bitsize);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return fs_reg();
|
return brw_reg();
|
||||||
}
|
}
|
||||||
|
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static fs_reg
|
static brw_reg
|
||||||
find_value_for_offset(fs_inst *def, const fs_reg &src, unsigned src_size)
|
find_value_for_offset(fs_inst *def, const brw_reg &src, unsigned src_size)
|
||||||
{
|
{
|
||||||
fs_reg val;
|
brw_reg val;
|
||||||
|
|
||||||
switch (def->opcode) {
|
switch (def->opcode) {
|
||||||
case BRW_OPCODE_MOV:
|
case BRW_OPCODE_MOV:
|
||||||
|
|
@ -1837,7 +1837,7 @@ brw_fs_opt_copy_propagation_defs(fs_visitor &s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg val =
|
brw_reg val =
|
||||||
find_value_for_offset(def, inst->src[i], inst->size_read(i));
|
find_value_for_offset(def, inst->src[i], inst->size_read(i));
|
||||||
|
|
||||||
if (val.file == IMM) {
|
if (val.file == IMM) {
|
||||||
|
|
|
||||||
|
|
@ -162,8 +162,8 @@ local_only(const fs_inst *inst)
|
||||||
static bool
|
static bool
|
||||||
operands_match(const fs_inst *a, const fs_inst *b, bool *negate)
|
operands_match(const fs_inst *a, const fs_inst *b, bool *negate)
|
||||||
{
|
{
|
||||||
fs_reg *xs = a->src;
|
brw_reg *xs = a->src;
|
||||||
fs_reg *ys = b->src;
|
brw_reg *ys = b->src;
|
||||||
|
|
||||||
if (a->opcode == BRW_OPCODE_MAD) {
|
if (a->opcode == BRW_OPCODE_MAD) {
|
||||||
return xs[0].equals(ys[0]) &&
|
return xs[0].equals(ys[0]) &&
|
||||||
|
|
@ -251,7 +251,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
|
||||||
#define HASH(hash, data) XXH32(&(data), sizeof(data), hash)
|
#define HASH(hash, data) XXH32(&(data), sizeof(data), hash)
|
||||||
|
|
||||||
uint32_t
|
uint32_t
|
||||||
hash_reg(uint32_t hash, const fs_reg &r)
|
hash_reg(uint32_t hash, const brw_reg &r)
|
||||||
{
|
{
|
||||||
struct {
|
struct {
|
||||||
uint64_t u64;
|
uint64_t u64;
|
||||||
|
|
@ -314,7 +314,7 @@ hash_inst(const void *v)
|
||||||
/* Canonicalize negations on either source (or both) and commutatively
|
/* Canonicalize negations on either source (or both) and commutatively
|
||||||
* combine the hashes for both sources.
|
* combine the hashes for both sources.
|
||||||
*/
|
*/
|
||||||
fs_reg src[2] = { inst->src[0], inst->src[1] };
|
brw_reg src[2] = { inst->src[0], inst->src[1] };
|
||||||
uint32_t src_hash[2];
|
uint32_t src_hash[2];
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
|
|
@ -383,8 +383,8 @@ remap_sources(fs_visitor &s, const brw::def_analysis &defs,
|
||||||
if (def_block->end_ip_delta)
|
if (def_block->end_ip_delta)
|
||||||
s.cfg->adjust_block_ips();
|
s.cfg->adjust_block_ips();
|
||||||
|
|
||||||
fs_reg neg = brw_vgrf(new_nr, BRW_TYPE_F);
|
brw_reg neg = brw_vgrf(new_nr, BRW_TYPE_F);
|
||||||
fs_reg tmp = dbld.MOV(negate(neg));
|
brw_reg tmp = dbld.MOV(negate(neg));
|
||||||
inst->src[i].nr = tmp.nr;
|
inst->src[i].nr = tmp.nr;
|
||||||
remap_table[old_nr] = tmp.nr;
|
remap_table[old_nr] = tmp.nr;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -100,8 +100,8 @@ brw_fs_opt_dead_code_eliminate(fs_visitor &s)
|
||||||
|
|
||||||
if (!result_live &&
|
if (!result_live &&
|
||||||
(can_omit_write(inst) || can_eliminate(devinfo, inst, flag_live))) {
|
(can_omit_write(inst) || can_eliminate(devinfo, inst, flag_live))) {
|
||||||
inst->dst = fs_reg(spread(retype(brw_null_reg(), inst->dst.type),
|
inst->dst = brw_reg(spread(retype(brw_null_reg(), inst->dst.type),
|
||||||
inst->dst.stride));
|
inst->dst.stride));
|
||||||
progress = true;
|
progress = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,6 @@ brw_math_function(enum opcode op)
|
||||||
|
|
||||||
static struct brw_reg
|
static struct brw_reg
|
||||||
normalize_brw_reg_for_encoding(brw_reg *reg)
|
normalize_brw_reg_for_encoding(brw_reg *reg)
|
||||||
|
|
||||||
{
|
{
|
||||||
struct brw_reg brw_reg;
|
struct brw_reg brw_reg;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ using namespace brw;
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_live_variables::setup_one_read(struct block_data *bd,
|
fs_live_variables::setup_one_read(struct block_data *bd,
|
||||||
int ip, const fs_reg ®)
|
int ip, const brw_reg ®)
|
||||||
{
|
{
|
||||||
int var = var_from_reg(reg);
|
int var = var_from_reg(reg);
|
||||||
assert(var < num_vars);
|
assert(var < num_vars);
|
||||||
|
|
@ -72,7 +72,7 @@ fs_live_variables::setup_one_read(struct block_data *bd,
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
|
fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
|
||||||
int ip, const fs_reg ®)
|
int ip, const brw_reg ®)
|
||||||
{
|
{
|
||||||
int var = var_from_reg(reg);
|
int var = var_from_reg(reg);
|
||||||
assert(var < num_vars);
|
assert(var < num_vars);
|
||||||
|
|
@ -115,7 +115,7 @@ fs_live_variables::setup_def_use()
|
||||||
foreach_inst_in_block(fs_inst, inst, block) {
|
foreach_inst_in_block(fs_inst, inst, block) {
|
||||||
/* Set use[] for this instruction */
|
/* Set use[] for this instruction */
|
||||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||||
fs_reg reg = inst->src[i];
|
brw_reg reg = inst->src[i];
|
||||||
|
|
||||||
if (reg.file != VGRF)
|
if (reg.file != VGRF)
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -130,7 +130,7 @@ fs_live_variables::setup_def_use()
|
||||||
|
|
||||||
/* Set def[] for this instruction */
|
/* Set def[] for this instruction */
|
||||||
if (inst->dst.file == VGRF) {
|
if (inst->dst.file == VGRF) {
|
||||||
fs_reg reg = inst->dst;
|
brw_reg reg = inst->dst;
|
||||||
for (unsigned j = 0; j < regs_written(inst); j++) {
|
for (unsigned j = 0; j < regs_written(inst); j++) {
|
||||||
setup_one_write(bd, inst, ip, reg);
|
setup_one_write(bd, inst, ip, reg);
|
||||||
reg.offset += REG_SIZE;
|
reg.offset += REG_SIZE;
|
||||||
|
|
@ -317,7 +317,7 @@ fs_live_variables::~fs_live_variables()
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
check_register_live_range(const fs_live_variables *live, int ip,
|
check_register_live_range(const fs_live_variables *live, int ip,
|
||||||
const fs_reg ®, unsigned n)
|
const brw_reg ®, unsigned n)
|
||||||
{
|
{
|
||||||
const unsigned var = live->var_from_reg(reg);
|
const unsigned var = live->var_from_reg(reg);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,7 @@ public:
|
||||||
|
|
||||||
bool vars_interfere(int a, int b) const;
|
bool vars_interfere(int a, int b) const;
|
||||||
bool vgrfs_interfere(int a, int b) const;
|
bool vgrfs_interfere(int a, int b) const;
|
||||||
int var_from_reg(const fs_reg ®) const
|
int var_from_reg(const brw_reg ®) const
|
||||||
{
|
{
|
||||||
return var_from_vgrf[reg.nr] + reg.offset / REG_SIZE;
|
return var_from_vgrf[reg.nr] + reg.offset / REG_SIZE;
|
||||||
}
|
}
|
||||||
|
|
@ -132,9 +132,9 @@ public:
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void setup_def_use();
|
void setup_def_use();
|
||||||
void setup_one_read(struct block_data *bd, int ip, const fs_reg ®);
|
void setup_one_read(struct block_data *bd, int ip, const brw_reg ®);
|
||||||
void setup_one_write(struct block_data *bd, fs_inst *inst, int ip,
|
void setup_one_write(struct block_data *bd, fs_inst *inst, int ip,
|
||||||
const fs_reg ®);
|
const brw_reg ®);
|
||||||
void compute_live_variables();
|
void compute_live_variables();
|
||||||
void compute_start_end();
|
void compute_start_end();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,10 +37,10 @@ brw_fs_lower_constant_loads(fs_visitor &s)
|
||||||
|
|
||||||
const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
|
const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
|
||||||
const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0);
|
const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0);
|
||||||
const fs_reg dst = ubld.vgrf(BRW_TYPE_UD);
|
const brw_reg dst = ubld.vgrf(BRW_TYPE_UD);
|
||||||
const unsigned base = pull_index * 4;
|
const unsigned base = pull_index * 4;
|
||||||
|
|
||||||
fs_reg srcs[PULL_UNIFORM_CONSTANT_SRCS];
|
brw_reg srcs[PULL_UNIFORM_CONSTANT_SRCS];
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE] = brw_imm_ud(index);
|
srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE] = brw_imm_ud(index);
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_OFFSET] = brw_imm_ud(base & ~(block_sz - 1));
|
srcs[PULL_UNIFORM_CONSTANT_SRC_OFFSET] = brw_imm_ud(base & ~(block_sz - 1));
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_SIZE] = brw_imm_ud(block_sz);
|
srcs[PULL_UNIFORM_CONSTANT_SRC_SIZE] = brw_imm_ud(block_sz);
|
||||||
|
|
@ -66,7 +66,7 @@ brw_fs_lower_constant_loads(fs_visitor &s)
|
||||||
|
|
||||||
s.VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
|
s.VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
|
||||||
brw_imm_ud(index),
|
brw_imm_ud(index),
|
||||||
fs_reg() /* surface_handle */,
|
brw_reg() /* surface_handle */,
|
||||||
inst->src[1],
|
inst->src[1],
|
||||||
pull_index * 4, 4, 1);
|
pull_index * 4, 4, 1);
|
||||||
inst->remove(block);
|
inst->remove(block);
|
||||||
|
|
@ -90,7 +90,7 @@ brw_fs_lower_load_payload(fs_visitor &s)
|
||||||
|
|
||||||
assert(inst->dst.file == VGRF);
|
assert(inst->dst.file == VGRF);
|
||||||
assert(inst->saturate == false);
|
assert(inst->saturate == false);
|
||||||
fs_reg dst = inst->dst;
|
brw_reg dst = inst->dst;
|
||||||
|
|
||||||
const fs_builder ibld(&s, block, inst);
|
const fs_builder ibld(&s, block, inst);
|
||||||
const fs_builder ubld = ibld.exec_all();
|
const fs_builder ubld = ibld.exec_all();
|
||||||
|
|
@ -188,7 +188,7 @@ brw_fs_lower_csel(fs_visitor &s)
|
||||||
const fs_builder ibld(&s, block, inst);
|
const fs_builder ibld(&s, block, inst);
|
||||||
|
|
||||||
/* CSEL: dst = src2 <op> 0 ? src0 : src1 */
|
/* CSEL: dst = src2 <op> 0 ? src0 : src1 */
|
||||||
fs_reg zero = brw_imm_reg(orig_type);
|
brw_reg zero = brw_imm_reg(orig_type);
|
||||||
ibld.CMP(retype(brw_null_reg(), orig_type),
|
ibld.CMP(retype(brw_null_reg(), orig_type),
|
||||||
inst->src[2], zero, inst->conditional_mod);
|
inst->src[2], zero, inst->conditional_mod);
|
||||||
|
|
||||||
|
|
@ -250,7 +250,7 @@ brw_fs_lower_sub_sat(fs_visitor &s)
|
||||||
*/
|
*/
|
||||||
if (inst->exec_size == 8 && inst->src[0].type != BRW_TYPE_Q &&
|
if (inst->exec_size == 8 && inst->src[0].type != BRW_TYPE_Q &&
|
||||||
inst->src[0].type != BRW_TYPE_UQ) {
|
inst->src[0].type != BRW_TYPE_UQ) {
|
||||||
fs_reg acc = retype(brw_acc_reg(inst->exec_size),
|
brw_reg acc = retype(brw_acc_reg(inst->exec_size),
|
||||||
inst->src[1].type);
|
inst->src[1].type);
|
||||||
|
|
||||||
ibld.MOV(acc, inst->src[1]);
|
ibld.MOV(acc, inst->src[1]);
|
||||||
|
|
@ -263,11 +263,11 @@ brw_fs_lower_sub_sat(fs_visitor &s)
|
||||||
*/
|
*/
|
||||||
fs_inst *add;
|
fs_inst *add;
|
||||||
|
|
||||||
fs_reg tmp = ibld.vgrf(inst->src[0].type);
|
brw_reg tmp = ibld.vgrf(inst->src[0].type);
|
||||||
ibld.SHR(tmp, inst->src[1], brw_imm_d(1));
|
ibld.SHR(tmp, inst->src[1], brw_imm_d(1));
|
||||||
|
|
||||||
fs_reg s1_sub_t = ibld.ADD(inst->src[1], negate(tmp));
|
brw_reg s1_sub_t = ibld.ADD(inst->src[1], negate(tmp));
|
||||||
fs_reg sat_s0_sub_t = ibld.ADD(inst->src[0], negate(tmp), &add);
|
brw_reg sat_s0_sub_t = ibld.ADD(inst->src[0], negate(tmp), &add);
|
||||||
add->saturate = true;
|
add->saturate = true;
|
||||||
|
|
||||||
add = ibld.ADD(inst->dst, sat_s0_sub_t, negate(s1_sub_t));
|
add = ibld.ADD(inst->dst, sat_s0_sub_t, negate(s1_sub_t));
|
||||||
|
|
@ -331,8 +331,8 @@ brw_fs_lower_barycentrics(fs_visitor &s)
|
||||||
switch (inst->opcode) {
|
switch (inst->opcode) {
|
||||||
case BRW_OPCODE_PLN: {
|
case BRW_OPCODE_PLN: {
|
||||||
assert(inst->exec_size == 16);
|
assert(inst->exec_size == 16);
|
||||||
const fs_reg tmp = ibld.vgrf(inst->src[1].type, 2);
|
const brw_reg tmp = ibld.vgrf(inst->src[1].type, 2);
|
||||||
fs_reg srcs[4];
|
brw_reg srcs[4];
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++)
|
||||||
srcs[i] = horiz_offset(offset(inst->src[1], ibld, i % 2),
|
srcs[i] = horiz_offset(offset(inst->src[1], ibld, i % 2),
|
||||||
|
|
@ -348,7 +348,7 @@ brw_fs_lower_barycentrics(fs_visitor &s)
|
||||||
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: {
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: {
|
||||||
assert(inst->exec_size == 16);
|
assert(inst->exec_size == 16);
|
||||||
const fs_reg tmp = ibld.vgrf(inst->dst.type, 2);
|
const brw_reg tmp = ibld.vgrf(inst->dst.type, 2);
|
||||||
|
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
for (unsigned i = 0; i < 2; i++) {
|
||||||
for (unsigned g = 0; g < inst->exec_size / 8; g++) {
|
for (unsigned g = 0; g < inst->exec_size / 8; g++) {
|
||||||
|
|
@ -386,8 +386,8 @@ lower_derivative(fs_visitor &s, bblock_t *block, fs_inst *inst,
|
||||||
unsigned swz0, unsigned swz1)
|
unsigned swz0, unsigned swz1)
|
||||||
{
|
{
|
||||||
const fs_builder ubld = fs_builder(&s, block, inst).exec_all();
|
const fs_builder ubld = fs_builder(&s, block, inst).exec_all();
|
||||||
const fs_reg tmp0 = ubld.vgrf(inst->src[0].type);
|
const brw_reg tmp0 = ubld.vgrf(inst->src[0].type);
|
||||||
const fs_reg tmp1 = ubld.vgrf(inst->src[0].type);
|
const brw_reg tmp1 = ubld.vgrf(inst->src[0].type);
|
||||||
|
|
||||||
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp0, inst->src[0], brw_imm_ud(swz0));
|
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp0, inst->src[0], brw_imm_ud(swz0));
|
||||||
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp1, inst->src[0], brw_imm_ud(swz1));
|
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp1, inst->src[0], brw_imm_ud(swz1));
|
||||||
|
|
@ -469,7 +469,7 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
|
||||||
|
|
||||||
const fs_builder ubld = fs_builder(&s, block, inst).exec_all().group(1, 0);
|
const fs_builder ubld = fs_builder(&s, block, inst).exec_all().group(1, 0);
|
||||||
|
|
||||||
fs_reg exec_mask = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg exec_mask = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.UNDEF(exec_mask);
|
ubld.UNDEF(exec_mask);
|
||||||
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, exec_mask,
|
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, exec_mask,
|
||||||
retype(brw_mask_reg(0),
|
retype(brw_mask_reg(0),
|
||||||
|
|
@ -483,7 +483,7 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
|
||||||
* will appear at the front of the mask.
|
* will appear at the front of the mask.
|
||||||
*/
|
*/
|
||||||
if (!(first && packed_dispatch)) {
|
if (!(first && packed_dispatch)) {
|
||||||
fs_reg mask = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg mask = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.UNDEF(mask);
|
ubld.UNDEF(mask);
|
||||||
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, mask,
|
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, mask,
|
||||||
retype(brw_sr0_reg(vmask ? 3 : 2),
|
retype(brw_sr0_reg(vmask ? 3 : 2),
|
||||||
|
|
@ -506,7 +506,7 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: {
|
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: {
|
||||||
fs_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.UNDEF(tmp);
|
ubld.UNDEF(tmp);
|
||||||
ubld.LZD(tmp, exec_mask);
|
ubld.LZD(tmp, exec_mask);
|
||||||
ubld.ADD(inst->dst, negate(tmp), brw_imm_uw(31));
|
ubld.ADD(inst->dst, negate(tmp), brw_imm_uw(31));
|
||||||
|
|
@ -553,15 +553,15 @@ brw_fs_lower_sends_overlapping_payload(fs_visitor &s)
|
||||||
const unsigned arg = inst->mlen < inst->ex_mlen ? 2 : 3;
|
const unsigned arg = inst->mlen < inst->ex_mlen ? 2 : 3;
|
||||||
const unsigned len = MIN2(inst->mlen, inst->ex_mlen);
|
const unsigned len = MIN2(inst->mlen, inst->ex_mlen);
|
||||||
|
|
||||||
fs_reg tmp = brw_vgrf(s.alloc.allocate(len),
|
brw_reg tmp = brw_vgrf(s.alloc.allocate(len),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
|
|
||||||
/* Sadly, we've lost all notion of channels and bit sizes at this
|
/* Sadly, we've lost all notion of channels and bit sizes at this
|
||||||
* point. Just WE_all it.
|
* point. Just WE_all it.
|
||||||
*/
|
*/
|
||||||
const fs_builder ibld = fs_builder(&s, block, inst).exec_all().group(16, 0);
|
const fs_builder ibld = fs_builder(&s, block, inst).exec_all().group(16, 0);
|
||||||
fs_reg copy_src = retype(inst->src[arg], BRW_TYPE_UD);
|
brw_reg copy_src = retype(inst->src[arg], BRW_TYPE_UD);
|
||||||
fs_reg copy_dst = tmp;
|
brw_reg copy_dst = tmp;
|
||||||
for (unsigned i = 0; i < len; i += 2) {
|
for (unsigned i = 0; i < len; i += 2) {
|
||||||
if (len == i + 1) {
|
if (len == i + 1) {
|
||||||
/* Only one register left; do SIMD8 */
|
/* Only one register left; do SIMD8 */
|
||||||
|
|
@ -696,7 +696,7 @@ brw_fs_lower_alu_restrictions(fs_visitor &s)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst *inst,
|
brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst *inst,
|
||||||
fs_reg *reg, bool compressed)
|
brw_reg *reg, bool compressed)
|
||||||
{
|
{
|
||||||
if (reg->file != VGRF)
|
if (reg->file != VGRF)
|
||||||
return;
|
return;
|
||||||
|
|
@ -801,7 +801,7 @@ brw_fs_lower_load_subgroup_invocation(fs_visitor &s)
|
||||||
|
|
||||||
if (inst->exec_size == 8) {
|
if (inst->exec_size == 8) {
|
||||||
assert(inst->dst.type == BRW_TYPE_UD);
|
assert(inst->dst.type == BRW_TYPE_UD);
|
||||||
fs_reg uw = retype(inst->dst, BRW_TYPE_UW);
|
brw_reg uw = retype(inst->dst, BRW_TYPE_UW);
|
||||||
ubld8.MOV(uw, brw_imm_v(0x76543210));
|
ubld8.MOV(uw, brw_imm_v(0x76543210));
|
||||||
ubld8.MOV(inst->dst, uw);
|
ubld8.MOV(inst->dst, uw);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -852,12 +852,12 @@ brw_fs_lower_indirect_mov(fs_visitor &s)
|
||||||
|
|
||||||
/* Extract unaligned part */
|
/* Extract unaligned part */
|
||||||
uint16_t extra_offset = inst->src[0].offset & 0x1;
|
uint16_t extra_offset = inst->src[0].offset & 0x1;
|
||||||
fs_reg offset = ibld.ADD(inst->src[1], brw_imm_uw(extra_offset));
|
brw_reg offset = ibld.ADD(inst->src[1], brw_imm_uw(extra_offset));
|
||||||
|
|
||||||
/* Check if offset is odd or even so that we can choose either high or
|
/* Check if offset is odd or even so that we can choose either high or
|
||||||
* low byte from the result.
|
* low byte from the result.
|
||||||
*/
|
*/
|
||||||
fs_reg is_odd = ibld.AND(offset, brw_imm_ud(1));
|
brw_reg is_odd = ibld.AND(offset, brw_imm_ud(1));
|
||||||
|
|
||||||
/* Make sure offset is word (2-bytes) aligned */
|
/* Make sure offset is word (2-bytes) aligned */
|
||||||
offset = ibld.AND(offset, brw_imm_uw(~1));
|
offset = ibld.AND(offset, brw_imm_uw(~1));
|
||||||
|
|
@ -865,24 +865,24 @@ brw_fs_lower_indirect_mov(fs_visitor &s)
|
||||||
/* Indirect addressing(vx1 and vxh) not supported with UB/B datatype for
|
/* Indirect addressing(vx1 and vxh) not supported with UB/B datatype for
|
||||||
* Src0, so change data type for src0 and dst to UW.
|
* Src0, so change data type for src0 and dst to UW.
|
||||||
*/
|
*/
|
||||||
fs_reg dst = ibld.vgrf(BRW_TYPE_UW);
|
brw_reg dst = ibld.vgrf(BRW_TYPE_UW);
|
||||||
|
|
||||||
/* Substract unaligned offset from src0 offset since we already
|
/* Substract unaligned offset from src0 offset since we already
|
||||||
* accounted unaligned part in the indirect byte offset.
|
* accounted unaligned part in the indirect byte offset.
|
||||||
*/
|
*/
|
||||||
fs_reg start = retype(inst->src[0], BRW_TYPE_UW);
|
brw_reg start = retype(inst->src[0], BRW_TYPE_UW);
|
||||||
start.offset &= ~extra_offset;
|
start.offset &= ~extra_offset;
|
||||||
|
|
||||||
/* Adjust length to account extra offset. */
|
/* Adjust length to account extra offset. */
|
||||||
assert(inst->src[2].file == IMM);
|
assert(inst->src[2].file == IMM);
|
||||||
fs_reg length = brw_imm_ud(inst->src[2].ud + extra_offset);
|
brw_reg length = brw_imm_ud(inst->src[2].ud + extra_offset);
|
||||||
|
|
||||||
ibld.emit(SHADER_OPCODE_MOV_INDIRECT, dst, start, offset, length);
|
ibld.emit(SHADER_OPCODE_MOV_INDIRECT, dst, start, offset, length);
|
||||||
|
|
||||||
/* Select high byte if offset is odd otherwise select low byte. */
|
/* Select high byte if offset is odd otherwise select low byte. */
|
||||||
fs_reg lo = ibld.AND(dst, brw_imm_uw(0xff));
|
brw_reg lo = ibld.AND(dst, brw_imm_uw(0xff));
|
||||||
fs_reg hi = ibld.SHR(dst, brw_imm_uw(8));
|
brw_reg hi = ibld.SHR(dst, brw_imm_uw(8));
|
||||||
fs_reg result = ibld.vgrf(BRW_TYPE_UW);
|
brw_reg result = ibld.vgrf(BRW_TYPE_UW);
|
||||||
ibld.CSEL(result, hi, lo, is_odd, BRW_CONDITIONAL_NZ);
|
ibld.CSEL(result, hi, lo, is_odd, BRW_CONDITIONAL_NZ);
|
||||||
|
|
||||||
/* Extra MOV needed here to convert back to the corresponding B type */
|
/* Extra MOV needed here to convert back to the corresponding B type */
|
||||||
|
|
|
||||||
|
|
@ -24,16 +24,16 @@ f16_using_mac(const fs_builder &bld, fs_inst *inst)
|
||||||
const brw_reg_type src1_type = BRW_TYPE_HF;
|
const brw_reg_type src1_type = BRW_TYPE_HF;
|
||||||
const brw_reg_type src2_type = BRW_TYPE_HF;
|
const brw_reg_type src2_type = BRW_TYPE_HF;
|
||||||
|
|
||||||
const fs_reg dest = inst->dst;
|
const brw_reg dest = inst->dst;
|
||||||
fs_reg src0 = inst->src[0];
|
brw_reg src0 = inst->src[0];
|
||||||
const fs_reg src1 = retype(inst->src[1], src1_type);
|
const brw_reg src1 = retype(inst->src[1], src1_type);
|
||||||
const fs_reg src2 = retype(inst->src[2], src2_type);
|
const brw_reg src2 = retype(inst->src[2], src2_type);
|
||||||
|
|
||||||
const unsigned dest_stride =
|
const unsigned dest_stride =
|
||||||
dest.type == BRW_TYPE_HF ? REG_SIZE / 2 : REG_SIZE;
|
dest.type == BRW_TYPE_HF ? REG_SIZE / 2 : REG_SIZE;
|
||||||
|
|
||||||
for (unsigned r = 0; r < inst->rcount; r++) {
|
for (unsigned r = 0; r < inst->rcount; r++) {
|
||||||
fs_reg temp = bld.vgrf(BRW_TYPE_HF);
|
brw_reg temp = bld.vgrf(BRW_TYPE_HF);
|
||||||
|
|
||||||
for (unsigned subword = 0; subword < 2; subword++) {
|
for (unsigned subword = 0; subword < 2; subword++) {
|
||||||
for (unsigned s = 0; s < inst->sdepth; s++) {
|
for (unsigned s = 0; s < inst->sdepth; s++) {
|
||||||
|
|
@ -50,7 +50,7 @@ f16_using_mac(const fs_builder &bld, fs_inst *inst)
|
||||||
*/
|
*/
|
||||||
if (s == 0 && subword == 0) {
|
if (s == 0 && subword == 0) {
|
||||||
const unsigned acc_width = 8;
|
const unsigned acc_width = 8;
|
||||||
fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD),
|
brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD),
|
||||||
inst->group % acc_width);
|
inst->group % acc_width);
|
||||||
|
|
||||||
if (bld.shader->devinfo->verx10 >= 125) {
|
if (bld.shader->devinfo->verx10 >= 125) {
|
||||||
|
|
@ -69,7 +69,7 @@ f16_using_mac(const fs_builder &bld, fs_inst *inst)
|
||||||
->writes_accumulator = true;
|
->writes_accumulator = true;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
fs_reg result;
|
brw_reg result;
|
||||||
|
|
||||||
/* As mentioned above, the MAC had an optional, explicit
|
/* As mentioned above, the MAC had an optional, explicit
|
||||||
* destination register. Various optimization passes are not
|
* destination register. Various optimization passes are not
|
||||||
|
|
@ -96,7 +96,7 @@ f16_using_mac(const fs_builder &bld, fs_inst *inst)
|
||||||
|
|
||||||
if (!src0.is_null()) {
|
if (!src0.is_null()) {
|
||||||
if (src0_type != BRW_TYPE_HF) {
|
if (src0_type != BRW_TYPE_HF) {
|
||||||
fs_reg temp2 = bld.vgrf(src0_type);
|
brw_reg temp2 = bld.vgrf(src0_type);
|
||||||
|
|
||||||
bld.MOV(temp2, temp);
|
bld.MOV(temp2, temp);
|
||||||
|
|
||||||
|
|
@ -134,10 +134,10 @@ int8_using_dp4a(const fs_builder &bld, fs_inst *inst)
|
||||||
const brw_reg_type src2_type = inst->src[2].type == BRW_TYPE_UB
|
const brw_reg_type src2_type = inst->src[2].type == BRW_TYPE_UB
|
||||||
? BRW_TYPE_UD : BRW_TYPE_D;
|
? BRW_TYPE_UD : BRW_TYPE_D;
|
||||||
|
|
||||||
fs_reg dest = inst->dst;
|
brw_reg dest = inst->dst;
|
||||||
fs_reg src0 = inst->src[0];
|
brw_reg src0 = inst->src[0];
|
||||||
const fs_reg src1 = retype(inst->src[1], src1_type);
|
const brw_reg src1 = retype(inst->src[1], src1_type);
|
||||||
const fs_reg src2 = retype(inst->src[2], src2_type);
|
const brw_reg src2 = retype(inst->src[2], src2_type);
|
||||||
|
|
||||||
const unsigned dest_stride = reg_unit(bld.shader->devinfo) * REG_SIZE;
|
const unsigned dest_stride = reg_unit(bld.shader->devinfo) * REG_SIZE;
|
||||||
|
|
||||||
|
|
@ -183,10 +183,10 @@ int8_using_mul_add(const fs_builder &bld, fs_inst *inst)
|
||||||
const brw_reg_type src2_type = inst->src[2].type == BRW_TYPE_UB
|
const brw_reg_type src2_type = inst->src[2].type == BRW_TYPE_UB
|
||||||
? BRW_TYPE_UD : BRW_TYPE_D;
|
? BRW_TYPE_UD : BRW_TYPE_D;
|
||||||
|
|
||||||
fs_reg dest = inst->dst;
|
brw_reg dest = inst->dst;
|
||||||
fs_reg src0 = inst->src[0];
|
brw_reg src0 = inst->src[0];
|
||||||
const fs_reg src1 = retype(inst->src[1], src1_type);
|
const brw_reg src1 = retype(inst->src[1], src1_type);
|
||||||
const fs_reg src2 = retype(inst->src[2], src2_type);
|
const brw_reg src2 = retype(inst->src[2], src2_type);
|
||||||
|
|
||||||
const unsigned dest_stride = REG_SIZE;
|
const unsigned dest_stride = REG_SIZE;
|
||||||
|
|
||||||
|
|
@ -199,9 +199,9 @@ int8_using_mul_add(const fs_builder &bld, fs_inst *inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned s = 0; s < inst->sdepth; s++) {
|
for (unsigned s = 0; s < inst->sdepth; s++) {
|
||||||
fs_reg temp1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg temp1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg temp2 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg temp2 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg temp3 = bld.vgrf(BRW_TYPE_UD, 2);
|
brw_reg temp3 = bld.vgrf(BRW_TYPE_UD, 2);
|
||||||
const brw_reg_type temp_type =
|
const brw_reg_type temp_type =
|
||||||
(inst->src[1].type == BRW_TYPE_B ||
|
(inst->src[1].type == BRW_TYPE_B ||
|
||||||
inst->src[2].type == BRW_TYPE_B)
|
inst->src[2].type == BRW_TYPE_B)
|
||||||
|
|
|
||||||
|
|
@ -207,14 +207,14 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
bool needs_mov = false;
|
bool needs_mov = false;
|
||||||
fs_reg orig_dst = inst->dst;
|
brw_reg orig_dst = inst->dst;
|
||||||
|
|
||||||
/* Get a new VGRF for the "low" 32x16-bit multiplication result if
|
/* Get a new VGRF for the "low" 32x16-bit multiplication result if
|
||||||
* reusing the original destination is impossible due to hardware
|
* reusing the original destination is impossible due to hardware
|
||||||
* restrictions, source/destination overlap, or it being the null
|
* restrictions, source/destination overlap, or it being the null
|
||||||
* register.
|
* register.
|
||||||
*/
|
*/
|
||||||
fs_reg low = inst->dst;
|
brw_reg low = inst->dst;
|
||||||
if (orig_dst.is_null() ||
|
if (orig_dst.is_null() ||
|
||||||
regions_overlap(inst->dst, inst->size_written,
|
regions_overlap(inst->dst, inst->size_written,
|
||||||
inst->src[0], inst->size_read(0)) ||
|
inst->src[0], inst->size_read(0)) ||
|
||||||
|
|
@ -227,7 +227,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get a new VGRF but keep the same stride as inst->dst */
|
/* Get a new VGRF but keep the same stride as inst->dst */
|
||||||
fs_reg high = brw_vgrf(s.alloc.allocate(regs_written(inst)), inst->dst.type);
|
brw_reg high = brw_vgrf(s.alloc.allocate(regs_written(inst)), inst->dst.type);
|
||||||
high.stride = inst->dst.stride;
|
high.stride = inst->dst.stride;
|
||||||
high.offset = inst->dst.offset % REG_SIZE;
|
high.offset = inst->dst.offset % REG_SIZE;
|
||||||
|
|
||||||
|
|
@ -319,19 +319,19 @@ brw_fs_lower_mul_qword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
unsigned int q_regs = regs_written(inst);
|
unsigned int q_regs = regs_written(inst);
|
||||||
unsigned int d_regs = (q_regs + 1) / 2;
|
unsigned int d_regs = (q_regs + 1) / 2;
|
||||||
|
|
||||||
fs_reg bd = brw_vgrf(s.alloc.allocate(q_regs), BRW_TYPE_UQ);
|
brw_reg bd = brw_vgrf(s.alloc.allocate(q_regs), BRW_TYPE_UQ);
|
||||||
fs_reg ad = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
|
brw_reg ad = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
|
||||||
fs_reg bc = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
|
brw_reg bc = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
|
||||||
|
|
||||||
/* Here we need the full 64 bit result for 32b * 32b. */
|
/* Here we need the full 64 bit result for 32b * 32b. */
|
||||||
if (devinfo->has_integer_dword_mul) {
|
if (devinfo->has_integer_dword_mul) {
|
||||||
ibld.MUL(bd, subscript(inst->src[0], BRW_TYPE_UD, 0),
|
ibld.MUL(bd, subscript(inst->src[0], BRW_TYPE_UD, 0),
|
||||||
subscript(inst->src[1], BRW_TYPE_UD, 0));
|
subscript(inst->src[1], BRW_TYPE_UD, 0));
|
||||||
} else {
|
} else {
|
||||||
fs_reg bd_high = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
|
brw_reg bd_high = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
|
||||||
fs_reg bd_low = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
|
brw_reg bd_low = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
|
||||||
const unsigned acc_width = reg_unit(devinfo) * 8;
|
const unsigned acc_width = reg_unit(devinfo) * 8;
|
||||||
fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD),
|
brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD),
|
||||||
inst->group % acc_width);
|
inst->group % acc_width);
|
||||||
|
|
||||||
fs_inst *mul = ibld.MUL(acc,
|
fs_inst *mul = ibld.MUL(acc,
|
||||||
|
|
@ -390,7 +390,7 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
/* Should have been lowered to 8-wide. */
|
/* Should have been lowered to 8-wide. */
|
||||||
assert(inst->exec_size <= brw_fs_get_lowered_simd_width(&s, inst));
|
assert(inst->exec_size <= brw_fs_get_lowered_simd_width(&s, inst));
|
||||||
const unsigned acc_width = reg_unit(devinfo) * 8;
|
const unsigned acc_width = reg_unit(devinfo) * 8;
|
||||||
const fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type),
|
const brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type),
|
||||||
inst->group % acc_width);
|
inst->group % acc_width);
|
||||||
fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
|
fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
|
||||||
ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
|
ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ brw_fs_lower_pack(fs_visitor &s)
|
||||||
|
|
||||||
assert(inst->dst.file == VGRF);
|
assert(inst->dst.file == VGRF);
|
||||||
assert(inst->saturate == false);
|
assert(inst->saturate == false);
|
||||||
fs_reg dst = inst->dst;
|
brw_reg dst = inst->dst;
|
||||||
|
|
||||||
const fs_builder ibld(&s, block, inst);
|
const fs_builder ibld(&s, block, inst);
|
||||||
/* The lowering generates 2 instructions for what was previously 1. This
|
/* The lowering generates 2 instructions for what was previously 1. This
|
||||||
|
|
|
||||||
|
|
@ -464,7 +464,7 @@ namespace brw {
|
||||||
brw_type_size_bytes(inst->src[i].type) == get_exec_type_size(inst));
|
brw_type_size_bytes(inst->src[i].type) == get_exec_type_size(inst));
|
||||||
|
|
||||||
const fs_builder ibld(v, block, inst);
|
const fs_builder ibld(v, block, inst);
|
||||||
const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
|
const brw_reg tmp = ibld.vgrf(get_exec_type(inst));
|
||||||
|
|
||||||
lower_instruction(v, block, ibld.MOV(tmp, inst->src[i]));
|
lower_instruction(v, block, ibld.MOV(tmp, inst->src[i]));
|
||||||
inst->src[i] = tmp;
|
inst->src[i] = tmp;
|
||||||
|
|
@ -495,7 +495,7 @@ namespace {
|
||||||
const unsigned stride =
|
const unsigned stride =
|
||||||
brw_type_size_bytes(inst->dst.type) * inst->dst.stride <= brw_type_size_bytes(type) ? 1 :
|
brw_type_size_bytes(inst->dst.type) * inst->dst.stride <= brw_type_size_bytes(type) ? 1 :
|
||||||
brw_type_size_bytes(inst->dst.type) * inst->dst.stride / brw_type_size_bytes(type);
|
brw_type_size_bytes(inst->dst.type) * inst->dst.stride / brw_type_size_bytes(type);
|
||||||
fs_reg tmp = ibld.vgrf(type, stride);
|
brw_reg tmp = ibld.vgrf(type, stride);
|
||||||
ibld.UNDEF(tmp);
|
ibld.UNDEF(tmp);
|
||||||
tmp = horiz_stride(tmp, stride);
|
tmp = horiz_stride(tmp, stride);
|
||||||
|
|
||||||
|
|
@ -549,7 +549,7 @@ namespace {
|
||||||
inst->exec_size * stride *
|
inst->exec_size * stride *
|
||||||
brw_type_size_bytes(inst->src[i].type),
|
brw_type_size_bytes(inst->src[i].type),
|
||||||
reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
|
reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
|
||||||
fs_reg tmp = brw_vgrf(v->alloc.allocate(size), inst->src[i].type);
|
brw_reg tmp = brw_vgrf(v->alloc.allocate(size), inst->src[i].type);
|
||||||
ibld.UNDEF(tmp);
|
ibld.UNDEF(tmp);
|
||||||
tmp = byte_offset(horiz_stride(tmp, stride),
|
tmp = byte_offset(horiz_stride(tmp, stride),
|
||||||
required_src_byte_offset(devinfo, inst, i));
|
required_src_byte_offset(devinfo, inst, i));
|
||||||
|
|
@ -560,7 +560,7 @@ namespace {
|
||||||
const brw_reg_type raw_type = brw_int_type(MIN2(brw_type_size_bytes(tmp.type), 4),
|
const brw_reg_type raw_type = brw_int_type(MIN2(brw_type_size_bytes(tmp.type), 4),
|
||||||
false);
|
false);
|
||||||
const unsigned n = brw_type_size_bytes(tmp.type) / brw_type_size_bytes(raw_type);
|
const unsigned n = brw_type_size_bytes(tmp.type) / brw_type_size_bytes(raw_type);
|
||||||
fs_reg raw_src = inst->src[i];
|
brw_reg raw_src = inst->src[i];
|
||||||
raw_src.negate = false;
|
raw_src.negate = false;
|
||||||
raw_src.abs = false;
|
raw_src.abs = false;
|
||||||
|
|
||||||
|
|
@ -578,7 +578,7 @@ namespace {
|
||||||
/* Point the original instruction at the temporary, making sure to keep
|
/* Point the original instruction at the temporary, making sure to keep
|
||||||
* any source modifiers in the instruction.
|
* any source modifiers in the instruction.
|
||||||
*/
|
*/
|
||||||
fs_reg lower_src = tmp;
|
brw_reg lower_src = tmp;
|
||||||
lower_src.negate = inst->src[i].negate;
|
lower_src.negate = inst->src[i].negate;
|
||||||
lower_src.abs = inst->src[i].abs;
|
lower_src.abs = inst->src[i].abs;
|
||||||
inst->src[i] = lower_src;
|
inst->src[i] = lower_src;
|
||||||
|
|
@ -607,7 +607,7 @@ namespace {
|
||||||
const unsigned stride = required_dst_byte_stride(inst) /
|
const unsigned stride = required_dst_byte_stride(inst) /
|
||||||
brw_type_size_bytes(inst->dst.type);
|
brw_type_size_bytes(inst->dst.type);
|
||||||
assert(stride > 0);
|
assert(stride > 0);
|
||||||
fs_reg tmp = ibld.vgrf(inst->dst.type, stride);
|
brw_reg tmp = ibld.vgrf(inst->dst.type, stride);
|
||||||
ibld.UNDEF(tmp);
|
ibld.UNDEF(tmp);
|
||||||
tmp = horiz_stride(tmp, stride);
|
tmp = horiz_stride(tmp, stride);
|
||||||
|
|
||||||
|
|
@ -665,7 +665,7 @@ namespace {
|
||||||
const unsigned n = get_exec_type_size(inst) / brw_type_size_bytes(raw_type);
|
const unsigned n = get_exec_type_size(inst) / brw_type_size_bytes(raw_type);
|
||||||
const fs_builder ibld(v, block, inst);
|
const fs_builder ibld(v, block, inst);
|
||||||
|
|
||||||
fs_reg tmp = ibld.vgrf(inst->dst.type, inst->dst.stride);
|
brw_reg tmp = ibld.vgrf(inst->dst.type, inst->dst.stride);
|
||||||
ibld.UNDEF(tmp);
|
ibld.UNDEF(tmp);
|
||||||
tmp = horiz_stride(tmp, inst->dst.stride);
|
tmp = horiz_stride(tmp, inst->dst.stride);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -500,19 +500,19 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
|
||||||
* lbld.group() from the i-th source region of instruction \p inst and return
|
* lbld.group() from the i-th source region of instruction \p inst and return
|
||||||
* it as result in packed form.
|
* it as result in packed form.
|
||||||
*/
|
*/
|
||||||
static fs_reg
|
static brw_reg
|
||||||
emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i)
|
emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i)
|
||||||
{
|
{
|
||||||
assert(lbld.group() >= inst->group);
|
assert(lbld.group() >= inst->group);
|
||||||
|
|
||||||
/* Specified channel group from the source region. */
|
/* Specified channel group from the source region. */
|
||||||
const fs_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group);
|
const brw_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group);
|
||||||
|
|
||||||
if (needs_src_copy(lbld, inst, i)) {
|
if (needs_src_copy(lbld, inst, i)) {
|
||||||
const unsigned num_components = inst->components_read(i);
|
const unsigned num_components = inst->components_read(i);
|
||||||
const fs_reg tmp = lbld.vgrf(inst->src[i].type, num_components);
|
const brw_reg tmp = lbld.vgrf(inst->src[i].type, num_components);
|
||||||
|
|
||||||
fs_reg comps[num_components];
|
brw_reg comps[num_components];
|
||||||
for (unsigned k = 0; k < num_components; ++k)
|
for (unsigned k = 0; k < num_components; ++k)
|
||||||
comps[k] = offset(src, inst->exec_size, k);
|
comps[k] = offset(src, inst->exec_size, k);
|
||||||
lbld.VEC(tmp, comps, num_components);
|
lbld.VEC(tmp, comps, num_components);
|
||||||
|
|
@ -585,7 +585,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
|
||||||
* inserted using \p lbld_before and any copy instructions required for
|
* inserted using \p lbld_before and any copy instructions required for
|
||||||
* zipping up the destination of \p inst will be inserted using \p lbld_after.
|
* zipping up the destination of \p inst will be inserted using \p lbld_after.
|
||||||
*/
|
*/
|
||||||
static fs_reg
|
static brw_reg
|
||||||
emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
|
emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
|
||||||
fs_inst *inst)
|
fs_inst *inst)
|
||||||
{
|
{
|
||||||
|
|
@ -596,7 +596,7 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
|
||||||
const struct intel_device_info *devinfo = lbld_before.shader->devinfo;
|
const struct intel_device_info *devinfo = lbld_before.shader->devinfo;
|
||||||
|
|
||||||
/* Specified channel group from the destination region. */
|
/* Specified channel group from the destination region. */
|
||||||
const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group);
|
const brw_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group);
|
||||||
|
|
||||||
if (!needs_dst_copy(lbld_after, inst)) {
|
if (!needs_dst_copy(lbld_after, inst)) {
|
||||||
/* No need to allocate a temporary for the lowered instruction, just
|
/* No need to allocate a temporary for the lowered instruction, just
|
||||||
|
|
@ -611,7 +611,7 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
|
||||||
const unsigned dst_size = (inst->size_written - residency_size) /
|
const unsigned dst_size = (inst->size_written - residency_size) /
|
||||||
inst->dst.component_size(inst->exec_size);
|
inst->dst.component_size(inst->exec_size);
|
||||||
|
|
||||||
const fs_reg tmp = lbld_after.vgrf(inst->dst.type,
|
const brw_reg tmp = lbld_after.vgrf(inst->dst.type,
|
||||||
dst_size + inst->has_sampler_residency());
|
dst_size + inst->has_sampler_residency());
|
||||||
|
|
||||||
if (inst->predicate) {
|
if (inst->predicate) {
|
||||||
|
|
@ -639,9 +639,9 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
|
||||||
* SIMD16 16 bit values.
|
* SIMD16 16 bit values.
|
||||||
*/
|
*/
|
||||||
const fs_builder rbld = lbld_after.exec_all().group(1, 0);
|
const fs_builder rbld = lbld_after.exec_all().group(1, 0);
|
||||||
fs_reg local_res_reg = component(
|
brw_reg local_res_reg = component(
|
||||||
retype(offset(tmp, lbld_before, dst_size), BRW_TYPE_UW), 0);
|
retype(offset(tmp, lbld_before, dst_size), BRW_TYPE_UW), 0);
|
||||||
fs_reg final_res_reg =
|
brw_reg final_res_reg =
|
||||||
retype(byte_offset(inst->dst,
|
retype(byte_offset(inst->dst,
|
||||||
inst->size_written - residency_size +
|
inst->size_written - residency_size +
|
||||||
lbld_after.group() / 8), BRW_TYPE_UW);
|
lbld_after.group() / 8), BRW_TYPE_UW);
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -9,7 +9,7 @@
|
||||||
using namespace brw;
|
using namespace brw;
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
src_as_uint(const fs_reg &src)
|
src_as_uint(const brw_reg &src)
|
||||||
{
|
{
|
||||||
assert(src.file == IMM);
|
assert(src.file == IMM);
|
||||||
|
|
||||||
|
|
@ -37,7 +37,7 @@ src_as_uint(const fs_reg &src)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static fs_reg
|
static brw_reg
|
||||||
brw_imm_for_type(uint64_t value, enum brw_reg_type type)
|
brw_imm_for_type(uint64_t value, enum brw_reg_type type)
|
||||||
{
|
{
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
|
@ -394,7 +394,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
||||||
*/
|
*/
|
||||||
assert(!inst->saturate);
|
assert(!inst->saturate);
|
||||||
|
|
||||||
fs_reg result;
|
brw_reg result;
|
||||||
|
|
||||||
switch (brw_type_size_bytes(inst->src[0].type)) {
|
switch (brw_type_size_bytes(inst->src[0].type)) {
|
||||||
case 2:
|
case 2:
|
||||||
|
|
@ -470,7 +470,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
||||||
*/
|
*/
|
||||||
if (progress && inst->sources == 2 && inst->is_commutative()) {
|
if (progress && inst->sources == 2 && inst->is_commutative()) {
|
||||||
if (inst->src[0].file == IMM) {
|
if (inst->src[0].file == IMM) {
|
||||||
fs_reg tmp = inst->src[1];
|
brw_reg tmp = inst->src[1];
|
||||||
inst->src[1] = inst->src[0];
|
inst->src[1] = inst->src[0];
|
||||||
inst->src[0] = tmp;
|
inst->src[0] = tmp;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ using namespace brw;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
assign_reg(const struct intel_device_info *devinfo,
|
assign_reg(const struct intel_device_info *devinfo,
|
||||||
unsigned *reg_hw_locations, fs_reg *reg)
|
unsigned *reg_hw_locations, brw_reg *reg)
|
||||||
{
|
{
|
||||||
if (reg->file == VGRF) {
|
if (reg->file == VGRF) {
|
||||||
reg->nr = reg_unit(devinfo) * reg_hw_locations[reg->nr] + reg->offset / REG_SIZE;
|
reg->nr = reg_unit(devinfo) * reg_hw_locations[reg->nr] + reg->offset / REG_SIZE;
|
||||||
|
|
@ -291,21 +291,21 @@ private:
|
||||||
void build_interference_graph();
|
void build_interference_graph();
|
||||||
void discard_interference_graph();
|
void discard_interference_graph();
|
||||||
|
|
||||||
fs_reg build_lane_offsets(const fs_builder &bld,
|
brw_reg build_lane_offsets(const fs_builder &bld,
|
||||||
uint32_t spill_offset, int ip);
|
uint32_t spill_offset, int ip);
|
||||||
fs_reg build_single_offset(const fs_builder &bld,
|
brw_reg build_single_offset(const fs_builder &bld,
|
||||||
uint32_t spill_offset, int ip);
|
uint32_t spill_offset, int ip);
|
||||||
fs_reg build_legacy_scratch_header(const fs_builder &bld,
|
brw_reg build_legacy_scratch_header(const fs_builder &bld,
|
||||||
uint32_t spill_offset, int ip);
|
uint32_t spill_offset, int ip);
|
||||||
|
|
||||||
void emit_unspill(const fs_builder &bld, struct shader_stats *stats,
|
void emit_unspill(const fs_builder &bld, struct shader_stats *stats,
|
||||||
fs_reg dst, uint32_t spill_offset, unsigned count, int ip);
|
brw_reg dst, uint32_t spill_offset, unsigned count, int ip);
|
||||||
void emit_spill(const fs_builder &bld, struct shader_stats *stats,
|
void emit_spill(const fs_builder &bld, struct shader_stats *stats,
|
||||||
fs_reg src, uint32_t spill_offset, unsigned count, int ip);
|
brw_reg src, uint32_t spill_offset, unsigned count, int ip);
|
||||||
|
|
||||||
void set_spill_costs();
|
void set_spill_costs();
|
||||||
int choose_spill_reg();
|
int choose_spill_reg();
|
||||||
fs_reg alloc_spill_reg(unsigned size, int ip);
|
brw_reg alloc_spill_reg(unsigned size, int ip);
|
||||||
void spill_reg(unsigned spill_reg);
|
void spill_reg(unsigned spill_reg);
|
||||||
|
|
||||||
void *mem_ctx;
|
void *mem_ctx;
|
||||||
|
|
@ -571,16 +571,16 @@ fs_reg_alloc::discard_interference_graph()
|
||||||
have_spill_costs = false;
|
have_spill_costs = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg
|
brw_reg
|
||||||
fs_reg_alloc::build_single_offset(const fs_builder &bld, uint32_t spill_offset, int ip)
|
fs_reg_alloc::build_single_offset(const fs_builder &bld, uint32_t spill_offset, int ip)
|
||||||
{
|
{
|
||||||
fs_reg offset = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD);
|
brw_reg offset = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD);
|
||||||
fs_inst *inst = bld.MOV(offset, brw_imm_ud(spill_offset));
|
fs_inst *inst = bld.MOV(offset, brw_imm_ud(spill_offset));
|
||||||
_mesa_set_add(spill_insts, inst);
|
_mesa_set_add(spill_insts, inst);
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg
|
brw_reg
|
||||||
fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, int ip)
|
fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, int ip)
|
||||||
{
|
{
|
||||||
/* LSC messages are limited to SIMD16 */
|
/* LSC messages are limited to SIMD16 */
|
||||||
|
|
@ -589,7 +589,7 @@ fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, i
|
||||||
const fs_builder ubld = bld.exec_all();
|
const fs_builder ubld = bld.exec_all();
|
||||||
const unsigned reg_count = ubld.dispatch_width() / 8;
|
const unsigned reg_count = ubld.dispatch_width() / 8;
|
||||||
|
|
||||||
fs_reg offset = retype(alloc_spill_reg(reg_count, ip), BRW_TYPE_UD);
|
brw_reg offset = retype(alloc_spill_reg(reg_count, ip), BRW_TYPE_UD);
|
||||||
fs_inst *inst;
|
fs_inst *inst;
|
||||||
|
|
||||||
/* Build an offset per lane in SIMD8 */
|
/* Build an offset per lane in SIMD8 */
|
||||||
|
|
@ -622,7 +622,7 @@ fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, i
|
||||||
/**
|
/**
|
||||||
* Generate a scratch header for pre-LSC platforms.
|
* Generate a scratch header for pre-LSC platforms.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld,
|
fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld,
|
||||||
uint32_t spill_offset, int ip)
|
uint32_t spill_offset, int ip)
|
||||||
{
|
{
|
||||||
|
|
@ -630,7 +630,7 @@ fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld,
|
||||||
const fs_builder ubld1 = bld.exec_all().group(1, 0);
|
const fs_builder ubld1 = bld.exec_all().group(1, 0);
|
||||||
|
|
||||||
/* Allocate a spill header and make it interfere with g0 */
|
/* Allocate a spill header and make it interfere with g0 */
|
||||||
fs_reg header = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD);
|
brw_reg header = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD);
|
||||||
ra_add_node_interference(g, first_vgrf_node + header.nr, first_payload_node);
|
ra_add_node_interference(g, first_vgrf_node + header.nr, first_payload_node);
|
||||||
|
|
||||||
fs_inst *inst = ubld8.emit(SHADER_OPCODE_SCRATCH_HEADER, header);
|
fs_inst *inst = ubld8.emit(SHADER_OPCODE_SCRATCH_HEADER, header);
|
||||||
|
|
@ -647,7 +647,7 @@ fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld,
|
||||||
void
|
void
|
||||||
fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||||
struct shader_stats *stats,
|
struct shader_stats *stats,
|
||||||
fs_reg dst,
|
brw_reg dst,
|
||||||
uint32_t spill_offset, unsigned count, int ip)
|
uint32_t spill_offset, unsigned count, int ip)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
@ -664,7 +664,7 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||||
*/
|
*/
|
||||||
const bool use_transpose = bld.dispatch_width() > 16;
|
const bool use_transpose = bld.dispatch_width() > 16;
|
||||||
const fs_builder ubld = use_transpose ? bld.exec_all().group(1, 0) : bld;
|
const fs_builder ubld = use_transpose ? bld.exec_all().group(1, 0) : bld;
|
||||||
fs_reg offset;
|
brw_reg offset;
|
||||||
if (use_transpose) {
|
if (use_transpose) {
|
||||||
offset = build_single_offset(ubld, spill_offset, ip);
|
offset = build_single_offset(ubld, spill_offset, ip);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -675,11 +675,11 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||||
* register. That way we don't need to burn an additional register
|
* register. That way we don't need to burn an additional register
|
||||||
* for register allocation spill/fill.
|
* for register allocation spill/fill.
|
||||||
*/
|
*/
|
||||||
fs_reg srcs[] = {
|
brw_reg srcs[] = {
|
||||||
brw_imm_ud(0), /* desc */
|
brw_imm_ud(0), /* desc */
|
||||||
brw_imm_ud(0), /* ex_desc */
|
brw_imm_ud(0), /* ex_desc */
|
||||||
offset, /* payload */
|
offset, /* payload */
|
||||||
fs_reg(), /* payload2 */
|
brw_reg(), /* payload2 */
|
||||||
};
|
};
|
||||||
|
|
||||||
unspill_inst = ubld.emit(SHADER_OPCODE_SEND, dst,
|
unspill_inst = ubld.emit(SHADER_OPCODE_SEND, dst,
|
||||||
|
|
@ -702,12 +702,12 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||||
unspill_inst->send_is_volatile = true;
|
unspill_inst->send_is_volatile = true;
|
||||||
unspill_inst->send_ex_desc_scratch = true;
|
unspill_inst->send_ex_desc_scratch = true;
|
||||||
} else {
|
} else {
|
||||||
fs_reg header = build_legacy_scratch_header(bld, spill_offset, ip);
|
brw_reg header = build_legacy_scratch_header(bld, spill_offset, ip);
|
||||||
|
|
||||||
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
||||||
const fs_reg ex_desc = brw_imm_ud(0);
|
const brw_reg ex_desc = brw_imm_ud(0);
|
||||||
|
|
||||||
fs_reg srcs[] = { brw_imm_ud(0), ex_desc, header };
|
brw_reg srcs[] = { brw_imm_ud(0), ex_desc, header };
|
||||||
unspill_inst = bld.emit(SHADER_OPCODE_SEND, dst,
|
unspill_inst = bld.emit(SHADER_OPCODE_SEND, dst,
|
||||||
srcs, ARRAY_SIZE(srcs));
|
srcs, ARRAY_SIZE(srcs));
|
||||||
unspill_inst->mlen = 1;
|
unspill_inst->mlen = 1;
|
||||||
|
|
@ -732,7 +732,7 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||||
void
|
void
|
||||||
fs_reg_alloc::emit_spill(const fs_builder &bld,
|
fs_reg_alloc::emit_spill(const fs_builder &bld,
|
||||||
struct shader_stats *stats,
|
struct shader_stats *stats,
|
||||||
fs_reg src,
|
brw_reg src,
|
||||||
uint32_t spill_offset, unsigned count, int ip)
|
uint32_t spill_offset, unsigned count, int ip)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
@ -744,13 +744,13 @@ fs_reg_alloc::emit_spill(const fs_builder &bld,
|
||||||
|
|
||||||
fs_inst *spill_inst;
|
fs_inst *spill_inst;
|
||||||
if (devinfo->verx10 >= 125) {
|
if (devinfo->verx10 >= 125) {
|
||||||
fs_reg offset = build_lane_offsets(bld, spill_offset, ip);
|
brw_reg offset = build_lane_offsets(bld, spill_offset, ip);
|
||||||
/* We leave the extended descriptor empty and flag the instruction
|
/* We leave the extended descriptor empty and flag the instruction
|
||||||
* relocate the extended descriptor. That way the surface offset is
|
* relocate the extended descriptor. That way the surface offset is
|
||||||
* directly put into the instruction and we don't need to use a
|
* directly put into the instruction and we don't need to use a
|
||||||
* register to hold it.
|
* register to hold it.
|
||||||
*/
|
*/
|
||||||
fs_reg srcs[] = {
|
brw_reg srcs[] = {
|
||||||
brw_imm_ud(0), /* desc */
|
brw_imm_ud(0), /* desc */
|
||||||
brw_imm_ud(0), /* ex_desc */
|
brw_imm_ud(0), /* ex_desc */
|
||||||
offset, /* payload */
|
offset, /* payload */
|
||||||
|
|
@ -775,12 +775,12 @@ fs_reg_alloc::emit_spill(const fs_builder &bld,
|
||||||
spill_inst->send_is_volatile = false;
|
spill_inst->send_is_volatile = false;
|
||||||
spill_inst->send_ex_desc_scratch = true;
|
spill_inst->send_ex_desc_scratch = true;
|
||||||
} else {
|
} else {
|
||||||
fs_reg header = build_legacy_scratch_header(bld, spill_offset, ip);
|
brw_reg header = build_legacy_scratch_header(bld, spill_offset, ip);
|
||||||
|
|
||||||
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
||||||
const fs_reg ex_desc = brw_imm_ud(0);
|
const brw_reg ex_desc = brw_imm_ud(0);
|
||||||
|
|
||||||
fs_reg srcs[] = { brw_imm_ud(0), ex_desc, header, src };
|
brw_reg srcs[] = { brw_imm_ud(0), ex_desc, header, src };
|
||||||
spill_inst = bld.emit(SHADER_OPCODE_SEND, bld.null_reg_f(),
|
spill_inst = bld.emit(SHADER_OPCODE_SEND, bld.null_reg_f(),
|
||||||
srcs, ARRAY_SIZE(srcs));
|
srcs, ARRAY_SIZE(srcs));
|
||||||
spill_inst->mlen = 1;
|
spill_inst->mlen = 1;
|
||||||
|
|
@ -903,7 +903,7 @@ fs_reg_alloc::choose_spill_reg()
|
||||||
return node - first_vgrf_node;
|
return node - first_vgrf_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg
|
brw_reg
|
||||||
fs_reg_alloc::alloc_spill_reg(unsigned size, int ip)
|
fs_reg_alloc::alloc_spill_reg(unsigned size, int ip)
|
||||||
{
|
{
|
||||||
int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo)));
|
int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo)));
|
||||||
|
|
@ -970,7 +970,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
||||||
int count = regs_read(inst, i);
|
int count = regs_read(inst, i);
|
||||||
int subset_spill_offset = spill_offset +
|
int subset_spill_offset = spill_offset +
|
||||||
ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
|
ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
|
||||||
fs_reg unspill_dst = alloc_spill_reg(count, ip);
|
brw_reg unspill_dst = alloc_spill_reg(count, ip);
|
||||||
|
|
||||||
inst->src[i].nr = unspill_dst.nr;
|
inst->src[i].nr = unspill_dst.nr;
|
||||||
inst->src[i].offset %= REG_SIZE;
|
inst->src[i].offset %= REG_SIZE;
|
||||||
|
|
@ -999,7 +999,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
||||||
inst->opcode != SHADER_OPCODE_UNDEF) {
|
inst->opcode != SHADER_OPCODE_UNDEF) {
|
||||||
int subset_spill_offset = spill_offset +
|
int subset_spill_offset = spill_offset +
|
||||||
ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
|
ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
|
||||||
fs_reg spill_src = alloc_spill_reg(regs_written(inst), ip);
|
brw_reg spill_src = alloc_spill_reg(regs_written(inst), ip);
|
||||||
|
|
||||||
inst->dst.nr = spill_src.nr;
|
inst->dst.nr = spill_src.nr;
|
||||||
inst->dst.offset %= REG_SIZE;
|
inst->dst.offset %= REG_SIZE;
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ static bool
|
||||||
is_nop_mov(const fs_inst *inst)
|
is_nop_mov(const fs_inst *inst)
|
||||||
{
|
{
|
||||||
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
|
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
|
||||||
fs_reg dst = inst->dst;
|
brw_reg dst = inst->dst;
|
||||||
for (int i = 0; i < inst->sources; i++) {
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
if (!dst.equals(inst->src[i])) {
|
if (!dst.equals(inst->src[i])) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -659,7 +659,7 @@ namespace {
|
||||||
* Look up the most current data dependency for register \p r.
|
* Look up the most current data dependency for register \p r.
|
||||||
*/
|
*/
|
||||||
dependency
|
dependency
|
||||||
get(const fs_reg &r) const
|
get(const brw_reg &r) const
|
||||||
{
|
{
|
||||||
if (const dependency *p = const_cast<scoreboard *>(this)->dep(r))
|
if (const dependency *p = const_cast<scoreboard *>(this)->dep(r))
|
||||||
return *p;
|
return *p;
|
||||||
|
|
@ -671,7 +671,7 @@ namespace {
|
||||||
* Specify the most current data dependency for register \p r.
|
* Specify the most current data dependency for register \p r.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
set(const fs_reg &r, const dependency &d)
|
set(const brw_reg &r, const dependency &d)
|
||||||
{
|
{
|
||||||
if (dependency *p = dep(r))
|
if (dependency *p = dep(r))
|
||||||
*p = d;
|
*p = d;
|
||||||
|
|
@ -761,7 +761,7 @@ namespace {
|
||||||
dependency accum_dep;
|
dependency accum_dep;
|
||||||
|
|
||||||
dependency *
|
dependency *
|
||||||
dep(const fs_reg &r)
|
dep(const brw_reg &r)
|
||||||
{
|
{
|
||||||
const unsigned reg = (r.file == VGRF ? r.nr + r.offset / REG_SIZE :
|
const unsigned reg = (r.file == VGRF ? r.nr + r.offset / REG_SIZE :
|
||||||
reg_offset(r) / REG_SIZE);
|
reg_offset(r) / REG_SIZE);
|
||||||
|
|
@ -1038,7 +1038,7 @@ namespace {
|
||||||
dependency::done;
|
dependency::done;
|
||||||
|
|
||||||
for (unsigned j = 0; j < regs_read(inst, i); j++) {
|
for (unsigned j = 0; j < regs_read(inst, i); j++) {
|
||||||
const fs_reg r = byte_offset(inst->src[i], REG_SIZE * j);
|
const brw_reg r = byte_offset(inst->src[i], REG_SIZE * j);
|
||||||
sb.set(r, shadow(sb.get(r), rd_dep));
|
sb.set(r, shadow(sb.get(r), rd_dep));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -200,14 +200,14 @@ brw_fs_opt_peephole_sel(fs_visitor &s)
|
||||||
* in the "then" clause uses a constant, we need to put it in a
|
* in the "then" clause uses a constant, we need to put it in a
|
||||||
* temporary.
|
* temporary.
|
||||||
*/
|
*/
|
||||||
fs_reg src0(then_mov[i]->src[0]);
|
brw_reg src0(then_mov[i]->src[0]);
|
||||||
if (src0.file == IMM) {
|
if (src0.file == IMM) {
|
||||||
src0 = ibld.vgrf(then_mov[i]->src[0].type);
|
src0 = ibld.vgrf(then_mov[i]->src[0].type);
|
||||||
ibld.MOV(src0, then_mov[i]->src[0]);
|
ibld.MOV(src0, then_mov[i]->src[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 64-bit immediates can't be placed in src1. */
|
/* 64-bit immediates can't be placed in src1. */
|
||||||
fs_reg src1(else_mov[i]->src[0]);
|
brw_reg src1(else_mov[i]->src[0]);
|
||||||
if (src1.file == IMM && brw_type_size_bytes(src1.type) == 8) {
|
if (src1.file == IMM && brw_type_size_bytes(src1.type) == 8) {
|
||||||
src1 = ibld.vgrf(else_mov[i]->src[0].type);
|
src1 = ibld.vgrf(else_mov[i]->src[0].type);
|
||||||
ibld.MOV(src1, else_mov[i]->src[0]);
|
ibld.MOV(src1, else_mov[i]->src[0]);
|
||||||
|
|
|
||||||
|
|
@ -386,7 +386,7 @@ cs_thread_payload::cs_thread_payload(const fs_visitor &v)
|
||||||
|
|
||||||
void
|
void
|
||||||
cs_thread_payload::load_subgroup_id(const fs_builder &bld,
|
cs_thread_payload::load_subgroup_id(const fs_builder &bld,
|
||||||
fs_reg &dest) const
|
brw_reg &dest) const
|
||||||
{
|
{
|
||||||
auto devinfo = bld.shader->devinfo;
|
auto devinfo = bld.shader->devinfo;
|
||||||
dest = retype(dest, BRW_TYPE_UD);
|
dest = retype(dest, BRW_TYPE_UD);
|
||||||
|
|
@ -483,9 +483,9 @@ bs_thread_payload::bs_thread_payload(const fs_visitor &v)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
bs_thread_payload::load_shader_type(const fs_builder &bld, fs_reg &dest) const
|
bs_thread_payload::load_shader_type(const fs_builder &bld, brw_reg &dest) const
|
||||||
{
|
{
|
||||||
fs_reg ud_dest = retype(dest, BRW_TYPE_UD);
|
brw_reg ud_dest = retype(dest, BRW_TYPE_UD);
|
||||||
bld.MOV(ud_dest, retype(brw_vec1_grf(0, 3), ud_dest.type));
|
bld.MOV(ud_dest, retype(brw_vec1_grf(0, 3), ud_dest.type));
|
||||||
bld.AND(ud_dest, ud_dest, brw_imm_ud(0xf));
|
bld.AND(ud_dest, ud_dest, brw_imm_ud(0xf));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ using namespace brw;
|
||||||
* data. It will get adjusted to be a real location before
|
* data. It will get adjusted to be a real location before
|
||||||
* generate_code() time.
|
* generate_code() time.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
fs_visitor::interp_reg(const fs_builder &bld, unsigned location,
|
fs_visitor::interp_reg(const fs_builder &bld, unsigned location,
|
||||||
unsigned channel, unsigned comp)
|
unsigned channel, unsigned comp)
|
||||||
{
|
{
|
||||||
|
|
@ -71,7 +71,7 @@ fs_visitor::interp_reg(const fs_builder &bld, unsigned location,
|
||||||
* assign_urb_setup()), so we need to use offset() instead of
|
* assign_urb_setup()), so we need to use offset() instead of
|
||||||
* component() to select the specified parameter.
|
* component() to select the specified parameter.
|
||||||
*/
|
*/
|
||||||
const fs_reg tmp = bld.vgrf(BRW_TYPE_UD);
|
const brw_reg tmp = bld.vgrf(BRW_TYPE_UD);
|
||||||
bld.MOV(tmp, offset(brw_attr_reg(regnr, BRW_TYPE_UD),
|
bld.MOV(tmp, offset(brw_attr_reg(regnr, BRW_TYPE_UD),
|
||||||
dispatch_width, comp));
|
dispatch_width, comp));
|
||||||
return retype(tmp, BRW_TYPE_F);
|
return retype(tmp, BRW_TYPE_F);
|
||||||
|
|
@ -84,7 +84,7 @@ fs_visitor::interp_reg(const fs_builder &bld, unsigned location,
|
||||||
* data. It will get adjusted to be a real location before
|
* data. It will get adjusted to be a real location before
|
||||||
* generate_code() time.
|
* generate_code() time.
|
||||||
*/
|
*/
|
||||||
fs_reg
|
brw_reg
|
||||||
fs_visitor::per_primitive_reg(const fs_builder &bld, int location, unsigned comp)
|
fs_visitor::per_primitive_reg(const fs_builder &bld, int location, unsigned comp)
|
||||||
{
|
{
|
||||||
assert(stage == MESA_SHADER_FRAGMENT);
|
assert(stage == MESA_SHADER_FRAGMENT);
|
||||||
|
|
@ -106,7 +106,7 @@ fs_visitor::per_primitive_reg(const fs_builder &bld, int location, unsigned comp
|
||||||
* assign_urb_setup()), so we need to use offset() instead of
|
* assign_urb_setup()), so we need to use offset() instead of
|
||||||
* component() to select the specified parameter.
|
* component() to select the specified parameter.
|
||||||
*/
|
*/
|
||||||
const fs_reg tmp = bld.vgrf(BRW_TYPE_UD);
|
const brw_reg tmp = bld.vgrf(BRW_TYPE_UD);
|
||||||
bld.MOV(tmp, offset(brw_attr_reg(regnr, BRW_TYPE_UD),
|
bld.MOV(tmp, offset(brw_attr_reg(regnr, BRW_TYPE_UD),
|
||||||
dispatch_width, comp % 4));
|
dispatch_width, comp % 4));
|
||||||
return retype(tmp, BRW_TYPE_F);
|
return retype(tmp, BRW_TYPE_F);
|
||||||
|
|
@ -128,9 +128,9 @@ fs_visitor::emit_interpolation_setup()
|
||||||
const struct brw_wm_prog_key *wm_key = (brw_wm_prog_key*) this->key;
|
const struct brw_wm_prog_key *wm_key = (brw_wm_prog_key*) this->key;
|
||||||
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
|
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
|
||||||
|
|
||||||
fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
|
brw_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
|
||||||
fs_reg int_sample_offset_xy; /* Used on Gen8+ */
|
brw_reg int_sample_offset_xy; /* Used on Gen8+ */
|
||||||
fs_reg half_int_sample_offset_x, half_int_sample_offset_y;
|
brw_reg half_int_sample_offset_x, half_int_sample_offset_y;
|
||||||
if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) {
|
if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) {
|
||||||
/* The thread payload only delivers subspan locations (ss0, ss1,
|
/* The thread payload only delivers subspan locations (ss0, ss1,
|
||||||
* ss2, ...). Since subspans covers 2x2 pixels blocks, we need to
|
* ss2, ...). Since subspans covers 2x2 pixels blocks, we need to
|
||||||
|
|
@ -161,9 +161,9 @@ fs_visitor::emit_interpolation_setup()
|
||||||
* coordinates out of 2 subspans coordinates in a single ADD instruction
|
* coordinates out of 2 subspans coordinates in a single ADD instruction
|
||||||
* (twice the operation above).
|
* (twice the operation above).
|
||||||
*/
|
*/
|
||||||
int_sample_offset_xy = fs_reg(brw_imm_v(0x11001010));
|
int_sample_offset_xy = brw_reg(brw_imm_v(0x11001010));
|
||||||
half_int_sample_offset_x = fs_reg(brw_imm_uw(0));
|
half_int_sample_offset_x = brw_reg(brw_imm_uw(0));
|
||||||
half_int_sample_offset_y = fs_reg(brw_imm_uw(0));
|
half_int_sample_offset_y = brw_reg(brw_imm_uw(0));
|
||||||
/* On Gfx12.5, because of regioning restrictions, the interpolation code
|
/* On Gfx12.5, because of regioning restrictions, the interpolation code
|
||||||
* is slightly different and works off X & Y only inputs. The ordering
|
* is slightly different and works off X & Y only inputs. The ordering
|
||||||
* of the half bytes here is a bit odd, with each subspan replicated
|
* of the half bytes here is a bit odd, with each subspan replicated
|
||||||
|
|
@ -173,13 +173,13 @@ fs_visitor::emit_interpolation_setup()
|
||||||
* X offset: 0 0 1 0 0 0 1 0
|
* X offset: 0 0 1 0 0 0 1 0
|
||||||
* Y offset: 0 0 0 0 1 0 1 0
|
* Y offset: 0 0 0 0 1 0 1 0
|
||||||
*/
|
*/
|
||||||
int_sample_offset_x = fs_reg(brw_imm_v(0x01000100));
|
int_sample_offset_x = brw_reg(brw_imm_v(0x01000100));
|
||||||
int_sample_offset_y = fs_reg(brw_imm_v(0x01010000));
|
int_sample_offset_y = brw_reg(brw_imm_v(0x01010000));
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg int_coarse_offset_x, int_coarse_offset_y; /* Used on Gen12HP+ */
|
brw_reg int_coarse_offset_x, int_coarse_offset_y; /* Used on Gen12HP+ */
|
||||||
fs_reg int_coarse_offset_xy; /* Used on Gen8+ */
|
brw_reg int_coarse_offset_xy; /* Used on Gen8+ */
|
||||||
fs_reg half_int_coarse_offset_x, half_int_coarse_offset_y;
|
brw_reg half_int_coarse_offset_x, half_int_coarse_offset_y;
|
||||||
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
|
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
|
||||||
/* In coarse pixel dispatch we have to do the same ADD instruction that
|
/* In coarse pixel dispatch we have to do the same ADD instruction that
|
||||||
* we do in normal per pixel dispatch, except this time we're not adding
|
* we do in normal per pixel dispatch, except this time we're not adding
|
||||||
|
|
@ -226,9 +226,9 @@ fs_visitor::emit_interpolation_setup()
|
||||||
bld.SHR(half_int_coarse_offset_y, suboffset(r1_0, 1), brw_imm_ud(1));
|
bld.SHR(half_int_coarse_offset_y, suboffset(r1_0, 1), brw_imm_ud(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */
|
brw_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */
|
||||||
fs_reg int_pixel_offset_xy; /* Used on Gen8+ */
|
brw_reg int_pixel_offset_xy; /* Used on Gen8+ */
|
||||||
fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
|
brw_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
|
||||||
switch (wm_prog_data->coarse_pixel_dispatch) {
|
switch (wm_prog_data->coarse_pixel_dispatch) {
|
||||||
case BRW_NEVER:
|
case BRW_NEVER:
|
||||||
int_pixel_offset_x = int_sample_offset_x;
|
int_pixel_offset_x = int_sample_offset_x;
|
||||||
|
|
@ -301,14 +301,14 @@ fs_visitor::emit_interpolation_setup()
|
||||||
if (devinfo->verx10 >= 125) {
|
if (devinfo->verx10 >= 125) {
|
||||||
const fs_builder dbld =
|
const fs_builder dbld =
|
||||||
abld.exec_all().group(hbld.dispatch_width() * 2, 0);
|
abld.exec_all().group(hbld.dispatch_width() * 2, 0);
|
||||||
const fs_reg int_pixel_x = dbld.vgrf(BRW_TYPE_UW);
|
const brw_reg int_pixel_x = dbld.vgrf(BRW_TYPE_UW);
|
||||||
const fs_reg int_pixel_y = dbld.vgrf(BRW_TYPE_UW);
|
const brw_reg int_pixel_y = dbld.vgrf(BRW_TYPE_UW);
|
||||||
|
|
||||||
dbld.ADD(int_pixel_x,
|
dbld.ADD(int_pixel_x,
|
||||||
fs_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)),
|
brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)),
|
||||||
int_pixel_offset_x);
|
int_pixel_offset_x);
|
||||||
dbld.ADD(int_pixel_y,
|
dbld.ADD(int_pixel_y,
|
||||||
fs_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
|
brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
|
||||||
int_pixel_offset_y);
|
int_pixel_offset_y);
|
||||||
|
|
||||||
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
|
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
|
||||||
|
|
@ -338,10 +338,10 @@ fs_visitor::emit_interpolation_setup()
|
||||||
*/
|
*/
|
||||||
const fs_builder dbld =
|
const fs_builder dbld =
|
||||||
abld.exec_all().group(hbld.dispatch_width() * 2, 0);
|
abld.exec_all().group(hbld.dispatch_width() * 2, 0);
|
||||||
fs_reg int_pixel_xy = dbld.vgrf(BRW_TYPE_UW);
|
brw_reg int_pixel_xy = dbld.vgrf(BRW_TYPE_UW);
|
||||||
|
|
||||||
dbld.ADD(int_pixel_xy,
|
dbld.ADD(int_pixel_xy,
|
||||||
fs_reg(stride(suboffset(gi_uw, 4), 1, 4, 0)),
|
brw_reg(stride(suboffset(gi_uw, 4), 1, 4, 0)),
|
||||||
int_pixel_offset_xy);
|
int_pixel_offset_xy);
|
||||||
|
|
||||||
hbld.emit(FS_OPCODE_PIXEL_X, offset(pixel_x, hbld, i), int_pixel_xy,
|
hbld.emit(FS_OPCODE_PIXEL_X, offset(pixel_x, hbld, i), int_pixel_xy,
|
||||||
|
|
@ -352,7 +352,7 @@ fs_visitor::emit_interpolation_setup()
|
||||||
}
|
}
|
||||||
|
|
||||||
abld = bld.annotate("compute pos.z");
|
abld = bld.annotate("compute pos.z");
|
||||||
fs_reg coarse_z;
|
brw_reg coarse_z;
|
||||||
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER &&
|
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER &&
|
||||||
wm_prog_data->uses_depth_w_coefficients) {
|
wm_prog_data->uses_depth_w_coefficients) {
|
||||||
/* In coarse pixel mode, the HW doesn't interpolate Z coordinate
|
/* In coarse pixel mode, the HW doesn't interpolate Z coordinate
|
||||||
|
|
@ -360,30 +360,30 @@ fs_visitor::emit_interpolation_setup()
|
||||||
* pixels locations, here we recompute the Z value with 2 coefficients
|
* pixels locations, here we recompute the Z value with 2 coefficients
|
||||||
* in X & Y axis.
|
* in X & Y axis.
|
||||||
*/
|
*/
|
||||||
fs_reg coef_payload = brw_vec8_grf(fs_payload().depth_w_coef_reg, 0);
|
brw_reg coef_payload = brw_vec8_grf(fs_payload().depth_w_coef_reg, 0);
|
||||||
const fs_reg x_start = brw_vec1_grf(coef_payload.nr, 2);
|
const brw_reg x_start = brw_vec1_grf(coef_payload.nr, 2);
|
||||||
const fs_reg y_start = brw_vec1_grf(coef_payload.nr, 6);
|
const brw_reg y_start = brw_vec1_grf(coef_payload.nr, 6);
|
||||||
const fs_reg z_cx = brw_vec1_grf(coef_payload.nr, 1);
|
const brw_reg z_cx = brw_vec1_grf(coef_payload.nr, 1);
|
||||||
const fs_reg z_cy = brw_vec1_grf(coef_payload.nr, 0);
|
const brw_reg z_cy = brw_vec1_grf(coef_payload.nr, 0);
|
||||||
const fs_reg z_c0 = brw_vec1_grf(coef_payload.nr, 3);
|
const brw_reg z_c0 = brw_vec1_grf(coef_payload.nr, 3);
|
||||||
|
|
||||||
const fs_reg float_pixel_x = abld.vgrf(BRW_TYPE_F);
|
const brw_reg float_pixel_x = abld.vgrf(BRW_TYPE_F);
|
||||||
const fs_reg float_pixel_y = abld.vgrf(BRW_TYPE_F);
|
const brw_reg float_pixel_y = abld.vgrf(BRW_TYPE_F);
|
||||||
|
|
||||||
abld.ADD(float_pixel_x, this->pixel_x, negate(x_start));
|
abld.ADD(float_pixel_x, this->pixel_x, negate(x_start));
|
||||||
abld.ADD(float_pixel_y, this->pixel_y, negate(y_start));
|
abld.ADD(float_pixel_y, this->pixel_y, negate(y_start));
|
||||||
|
|
||||||
/* r1.0 - 0:7 ActualCoarsePixelShadingSize.X */
|
/* r1.0 - 0:7 ActualCoarsePixelShadingSize.X */
|
||||||
const fs_reg u8_cps_width = fs_reg(retype(brw_vec1_grf(1, 0), BRW_TYPE_UB));
|
const brw_reg u8_cps_width = brw_reg(retype(brw_vec1_grf(1, 0), BRW_TYPE_UB));
|
||||||
/* r1.0 - 15:8 ActualCoarsePixelShadingSize.Y */
|
/* r1.0 - 15:8 ActualCoarsePixelShadingSize.Y */
|
||||||
const fs_reg u8_cps_height = byte_offset(u8_cps_width, 1);
|
const brw_reg u8_cps_height = byte_offset(u8_cps_width, 1);
|
||||||
const fs_reg u32_cps_width = abld.vgrf(BRW_TYPE_UD);
|
const brw_reg u32_cps_width = abld.vgrf(BRW_TYPE_UD);
|
||||||
const fs_reg u32_cps_height = abld.vgrf(BRW_TYPE_UD);
|
const brw_reg u32_cps_height = abld.vgrf(BRW_TYPE_UD);
|
||||||
abld.MOV(u32_cps_width, u8_cps_width);
|
abld.MOV(u32_cps_width, u8_cps_width);
|
||||||
abld.MOV(u32_cps_height, u8_cps_height);
|
abld.MOV(u32_cps_height, u8_cps_height);
|
||||||
|
|
||||||
const fs_reg f_cps_width = abld.vgrf(BRW_TYPE_F);
|
const brw_reg f_cps_width = abld.vgrf(BRW_TYPE_F);
|
||||||
const fs_reg f_cps_height = abld.vgrf(BRW_TYPE_F);
|
const brw_reg f_cps_height = abld.vgrf(BRW_TYPE_F);
|
||||||
abld.MOV(f_cps_width, u32_cps_width);
|
abld.MOV(f_cps_width, u32_cps_width);
|
||||||
abld.MOV(f_cps_height, u32_cps_height);
|
abld.MOV(f_cps_height, u32_cps_height);
|
||||||
|
|
||||||
|
|
@ -401,7 +401,7 @@ fs_visitor::emit_interpolation_setup()
|
||||||
|
|
||||||
if (wm_prog_data->uses_depth_w_coefficients ||
|
if (wm_prog_data->uses_depth_w_coefficients ||
|
||||||
wm_prog_data->uses_src_depth) {
|
wm_prog_data->uses_src_depth) {
|
||||||
fs_reg sample_z = this->pixel_z;
|
brw_reg sample_z = this->pixel_z;
|
||||||
|
|
||||||
switch (wm_prog_data->coarse_pixel_dispatch) {
|
switch (wm_prog_data->coarse_pixel_dispatch) {
|
||||||
case BRW_NEVER:
|
case BRW_NEVER:
|
||||||
|
|
@ -505,8 +505,8 @@ fs_visitor::emit_interpolation_setup()
|
||||||
if (!(centroid_modes & (1 << i)))
|
if (!(centroid_modes & (1 << i)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const fs_reg centroid_delta_xy = delta_xy[i];
|
const brw_reg centroid_delta_xy = delta_xy[i];
|
||||||
const fs_reg &pixel_delta_xy = delta_xy[i - 1];
|
const brw_reg &pixel_delta_xy = delta_xy[i - 1];
|
||||||
|
|
||||||
delta_xy[i] = bld.vgrf(BRW_TYPE_F, 2);
|
delta_xy[i] = bld.vgrf(BRW_TYPE_F, 2);
|
||||||
|
|
||||||
|
|
@ -525,15 +525,15 @@ fs_visitor::emit_interpolation_setup()
|
||||||
|
|
||||||
fs_inst *
|
fs_inst *
|
||||||
fs_visitor::emit_single_fb_write(const fs_builder &bld,
|
fs_visitor::emit_single_fb_write(const fs_builder &bld,
|
||||||
fs_reg color0, fs_reg color1,
|
brw_reg color0, brw_reg color1,
|
||||||
fs_reg src0_alpha, unsigned components)
|
brw_reg src0_alpha, unsigned components)
|
||||||
{
|
{
|
||||||
assert(stage == MESA_SHADER_FRAGMENT);
|
assert(stage == MESA_SHADER_FRAGMENT);
|
||||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
||||||
|
|
||||||
/* Hand over gl_FragDepth or the payload depth. */
|
/* Hand over gl_FragDepth or the payload depth. */
|
||||||
const fs_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg);
|
const brw_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg);
|
||||||
fs_reg src_depth, src_stencil;
|
brw_reg src_depth, src_stencil;
|
||||||
|
|
||||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
||||||
src_depth = frag_depth;
|
src_depth = frag_depth;
|
||||||
|
|
@ -541,13 +541,13 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
|
||||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
|
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
|
||||||
src_stencil = frag_stencil;
|
src_stencil = frag_stencil;
|
||||||
|
|
||||||
const fs_reg sources[] = {
|
const brw_reg sources[] = {
|
||||||
color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
|
color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
|
||||||
(prog_data->uses_omask ? sample_mask : fs_reg()),
|
(prog_data->uses_omask ? sample_mask : brw_reg()),
|
||||||
brw_imm_ud(components)
|
brw_imm_ud(components)
|
||||||
};
|
};
|
||||||
assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
|
assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
|
||||||
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
|
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, brw_reg(),
|
||||||
sources, ARRAY_SIZE(sources));
|
sources, ARRAY_SIZE(sources));
|
||||||
|
|
||||||
if (prog_data->uses_kill) {
|
if (prog_data->uses_kill) {
|
||||||
|
|
@ -572,7 +572,7 @@ fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha)
|
||||||
const fs_builder abld = bld.annotate(
|
const fs_builder abld = bld.annotate(
|
||||||
ralloc_asprintf(this->mem_ctx, "FB write target %d", target));
|
ralloc_asprintf(this->mem_ctx, "FB write target %d", target));
|
||||||
|
|
||||||
fs_reg src0_alpha;
|
brw_reg src0_alpha;
|
||||||
if (replicate_alpha && target != 0)
|
if (replicate_alpha && target != 0)
|
||||||
src0_alpha = offset(outputs[0], bld, 3);
|
src0_alpha = offset(outputs[0], bld, 3);
|
||||||
|
|
||||||
|
|
@ -589,9 +589,9 @@ fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha)
|
||||||
/* FINISHME: Factor out this frequently recurring pattern into a
|
/* FINISHME: Factor out this frequently recurring pattern into a
|
||||||
* helper function.
|
* helper function.
|
||||||
*/
|
*/
|
||||||
const fs_reg srcs[] = { reg_undef, reg_undef,
|
const brw_reg srcs[] = { reg_undef, reg_undef,
|
||||||
reg_undef, offset(this->outputs[0], bld, 3) };
|
reg_undef, offset(this->outputs[0], bld, 3) };
|
||||||
const fs_reg tmp = bld.vgrf(BRW_TYPE_UD, 4);
|
const brw_reg tmp = bld.vgrf(BRW_TYPE_UD, 4);
|
||||||
bld.LOAD_PAYLOAD(tmp, srcs, 4, 0);
|
bld.LOAD_PAYLOAD(tmp, srcs, 4, 0);
|
||||||
|
|
||||||
inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4);
|
inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4);
|
||||||
|
|
@ -665,7 +665,7 @@ fs_visitor::emit_fb_writes()
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
|
||||||
{
|
{
|
||||||
int slot, urb_offset, length;
|
int slot, urb_offset, length;
|
||||||
int starting_urb_offset = 0;
|
int starting_urb_offset = 0;
|
||||||
|
|
@ -675,8 +675,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||||
VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ | VARYING_BIT_PRIMITIVE_SHADING_RATE;
|
VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ | VARYING_BIT_PRIMITIVE_SHADING_RATE;
|
||||||
const struct intel_vue_map *vue_map = &vue_prog_data->vue_map;
|
const struct intel_vue_map *vue_map = &vue_prog_data->vue_map;
|
||||||
bool flush;
|
bool flush;
|
||||||
fs_reg sources[8];
|
brw_reg sources[8];
|
||||||
fs_reg urb_handle;
|
brw_reg urb_handle;
|
||||||
|
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case MESA_SHADER_VERTEX:
|
case MESA_SHADER_VERTEX:
|
||||||
|
|
@ -694,7 +694,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||||
|
|
||||||
const fs_builder bld = fs_builder(this).at_end();
|
const fs_builder bld = fs_builder(this).at_end();
|
||||||
|
|
||||||
fs_reg per_slot_offsets;
|
brw_reg per_slot_offsets;
|
||||||
|
|
||||||
if (stage == MESA_SHADER_GEOMETRY) {
|
if (stage == MESA_SHADER_GEOMETRY) {
|
||||||
const struct brw_gs_prog_data *gs_prog_data =
|
const struct brw_gs_prog_data *gs_prog_data =
|
||||||
|
|
@ -760,7 +760,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg zero = brw_vgrf(alloc.allocate(dispatch_width / 8),
|
brw_reg zero = brw_vgrf(alloc.allocate(dispatch_width / 8),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
bld.MOV(zero, brw_imm_ud(0u));
|
bld.MOV(zero, brw_imm_ud(0u));
|
||||||
|
|
||||||
|
|
@ -769,7 +769,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||||
sources[length++] = this->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE];
|
sources[length++] = this->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE];
|
||||||
} else if (devinfo->has_coarse_pixel_primitive_and_cb) {
|
} else if (devinfo->has_coarse_pixel_primitive_and_cb) {
|
||||||
uint32_t one_fp16 = 0x3C00;
|
uint32_t one_fp16 = 0x3C00;
|
||||||
fs_reg one_by_one_fp16 = brw_vgrf(alloc.allocate(dispatch_width / 8),
|
brw_reg one_by_one_fp16 = brw_vgrf(alloc.allocate(dispatch_width / 8),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
bld.MOV(one_by_one_fp16, brw_imm_ud((one_fp16 << 16) | one_fp16));
|
bld.MOV(one_by_one_fp16, brw_imm_ud((one_fp16 << 16) | one_fp16));
|
||||||
sources[length++] = one_by_one_fp16;
|
sources[length++] = one_by_one_fp16;
|
||||||
|
|
@ -839,7 +839,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||||
if (length == 8 || (length > 0 && slot == last_slot))
|
if (length == 8 || (length > 0 && slot == last_slot))
|
||||||
flush = true;
|
flush = true;
|
||||||
if (flush) {
|
if (flush) {
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
|
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets;
|
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets;
|
||||||
|
|
@ -883,14 +883,14 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||||
if (stage == MESA_SHADER_GEOMETRY)
|
if (stage == MESA_SHADER_GEOMETRY)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
fs_reg uniform_urb_handle = brw_vgrf(alloc.allocate(dispatch_width / 8),
|
brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(dispatch_width / 8),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
fs_reg payload = brw_vgrf(alloc.allocate(dispatch_width / 8),
|
brw_reg payload = brw_vgrf(alloc.allocate(dispatch_width / 8),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
|
|
||||||
bld.exec_all().MOV(uniform_urb_handle, urb_handle);
|
bld.exec_all().MOV(uniform_urb_handle, urb_handle);
|
||||||
|
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
|
srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
|
||||||
srcs[URB_LOGICAL_SRC_DATA] = payload;
|
srcs[URB_LOGICAL_SRC_DATA] = payload;
|
||||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
|
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
|
||||||
|
|
@ -910,9 +910,9 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||||
*/
|
*/
|
||||||
if (intel_needs_workaround(devinfo, 1805992985) && stage == MESA_SHADER_TESS_EVAL) {
|
if (intel_needs_workaround(devinfo, 1805992985) && stage == MESA_SHADER_TESS_EVAL) {
|
||||||
assert(dispatch_width == 8);
|
assert(dispatch_width == 8);
|
||||||
fs_reg uniform_urb_handle = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD);
|
brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD);
|
||||||
fs_reg uniform_mask = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD);
|
brw_reg uniform_mask = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD);
|
||||||
fs_reg payload = brw_vgrf(alloc.allocate(4), BRW_TYPE_UD);
|
brw_reg payload = brw_vgrf(alloc.allocate(4), BRW_TYPE_UD);
|
||||||
|
|
||||||
/* Workaround requires all 8 channels (lanes) to be valid. This is
|
/* Workaround requires all 8 channels (lanes) to be valid. This is
|
||||||
* understood to mean they all need to be alive. First trick is to find
|
* understood to mean they all need to be alive. First trick is to find
|
||||||
|
|
@ -941,7 +941,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||||
bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));
|
bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));
|
||||||
bld.exec_all().MOV(offset(payload, bld, 3), brw_imm_ud(0u));
|
bld.exec_all().MOV(offset(payload, bld, 3), brw_imm_ud(0u));
|
||||||
|
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
|
srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
|
||||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask;
|
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask;
|
||||||
srcs[URB_LOGICAL_SRC_DATA] = payload;
|
srcs[URB_LOGICAL_SRC_DATA] = payload;
|
||||||
|
|
@ -958,7 +958,7 @@ void
|
||||||
fs_visitor::emit_urb_fence()
|
fs_visitor::emit_urb_fence()
|
||||||
{
|
{
|
||||||
const fs_builder bld = fs_builder(this).at_end();
|
const fs_builder bld = fs_builder(this).at_end();
|
||||||
fs_reg dst = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dst = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
|
fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
|
||||||
brw_vec8_grf(0, 0),
|
brw_vec8_grf(0, 0),
|
||||||
brw_imm_ud(true),
|
brw_imm_ud(true),
|
||||||
|
|
@ -983,7 +983,7 @@ fs_visitor::emit_cs_terminate()
|
||||||
* make sure it uses the appropriate register range.
|
* make sure it uses the appropriate register range.
|
||||||
*/
|
*/
|
||||||
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_TYPE_UD);
|
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_TYPE_UD);
|
||||||
fs_reg payload = brw_vgrf(alloc.allocate(reg_unit(devinfo)),
|
brw_reg payload = brw_vgrf(alloc.allocate(reg_unit(devinfo)),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
ubld.group(8 * reg_unit(devinfo), 0).MOV(payload, g0);
|
ubld.group(8 * reg_unit(devinfo), 0).MOV(payload, g0);
|
||||||
|
|
||||||
|
|
@ -999,11 +999,11 @@ fs_visitor::emit_cs_terminate()
|
||||||
if (devinfo->ver < 11)
|
if (devinfo->ver < 11)
|
||||||
desc |= (1 << 4); /* Do not dereference URB */
|
desc |= (1 << 4); /* Do not dereference URB */
|
||||||
|
|
||||||
fs_reg srcs[4] = {
|
brw_reg srcs[4] = {
|
||||||
brw_imm_ud(desc), /* desc */
|
brw_imm_ud(desc), /* desc */
|
||||||
brw_imm_ud(0), /* ex_desc */
|
brw_imm_ud(0), /* ex_desc */
|
||||||
payload, /* payload */
|
payload, /* payload */
|
||||||
fs_reg(), /* payload2 */
|
brw_reg(), /* payload2 */
|
||||||
};
|
};
|
||||||
|
|
||||||
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, reg_undef, srcs, 4);
|
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, reg_undef, srcs, 4);
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,7 @@ brw_fs_workaround_memory_fence_before_eot(fs_visitor &s)
|
||||||
const fs_builder ibld(&s, block, inst);
|
const fs_builder ibld(&s, block, inst);
|
||||||
const fs_builder ubld = ibld.exec_all().group(1, 0);
|
const fs_builder ubld = ibld.exec_all().group(1, 0);
|
||||||
|
|
||||||
fs_reg dst = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg dst = ubld.vgrf(BRW_TYPE_UD);
|
||||||
fs_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,
|
fs_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,
|
||||||
dst, brw_vec8_grf(0, 0),
|
dst, brw_vec8_grf(0, 0),
|
||||||
/* commit enable */ brw_imm_ud(1),
|
/* commit enable */ brw_imm_ud(1),
|
||||||
|
|
@ -230,7 +230,7 @@ brw_fs_workaround_nomask_control_flow(fs_visitor &s)
|
||||||
*/
|
*/
|
||||||
const fs_builder ubld = fs_builder(&s, block, inst)
|
const fs_builder ubld = fs_builder(&s, block, inst)
|
||||||
.exec_all().group(s.dispatch_width, 0);
|
.exec_all().group(s.dispatch_width, 0);
|
||||||
const fs_reg flag = retype(brw_flag_reg(0, 0),
|
const brw_reg flag = retype(brw_flag_reg(0, 0),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
|
|
||||||
/* Due to the lack of flag register allocation we need to save
|
/* Due to the lack of flag register allocation we need to save
|
||||||
|
|
@ -238,7 +238,7 @@ brw_fs_workaround_nomask_control_flow(fs_visitor &s)
|
||||||
*/
|
*/
|
||||||
const bool save_flag = flag_liveout &
|
const bool save_flag = flag_liveout &
|
||||||
brw_fs_flag_mask(flag, s.dispatch_width / 8);
|
brw_fs_flag_mask(flag, s.dispatch_width / 8);
|
||||||
const fs_reg tmp = ubld.group(8, 0).vgrf(flag.type);
|
const brw_reg tmp = ubld.group(8, 0).vgrf(flag.type);
|
||||||
|
|
||||||
if (save_flag) {
|
if (save_flag) {
|
||||||
ubld.group(8, 0).UNDEF(tmp);
|
ubld.group(8, 0).UNDEF(tmp);
|
||||||
|
|
|
||||||
|
|
@ -28,10 +28,8 @@
|
||||||
#include "brw_ir.h"
|
#include "brw_ir.h"
|
||||||
#include "brw_ir_allocator.h"
|
#include "brw_ir_allocator.h"
|
||||||
|
|
||||||
using fs_reg = brw_reg;
|
static inline brw_reg
|
||||||
|
horiz_offset(const brw_reg ®, unsigned delta)
|
||||||
static inline fs_reg
|
|
||||||
horiz_offset(const fs_reg ®, unsigned delta)
|
|
||||||
{
|
{
|
||||||
switch (reg.file) {
|
switch (reg.file) {
|
||||||
case BAD_FILE:
|
case BAD_FILE:
|
||||||
|
|
@ -65,8 +63,8 @@ horiz_offset(const fs_reg ®, unsigned delta)
|
||||||
unreachable("Invalid register file");
|
unreachable("Invalid register file");
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline fs_reg
|
static inline brw_reg
|
||||||
offset(fs_reg reg, unsigned width, unsigned delta)
|
offset(brw_reg reg, unsigned width, unsigned delta)
|
||||||
{
|
{
|
||||||
switch (reg.file) {
|
switch (reg.file) {
|
||||||
case BAD_FILE:
|
case BAD_FILE:
|
||||||
|
|
@ -87,8 +85,8 @@ offset(fs_reg reg, unsigned width, unsigned delta)
|
||||||
* Get the scalar channel of \p reg given by \p idx and replicate it to all
|
* Get the scalar channel of \p reg given by \p idx and replicate it to all
|
||||||
* channels of the result.
|
* channels of the result.
|
||||||
*/
|
*/
|
||||||
static inline fs_reg
|
static inline brw_reg
|
||||||
component(fs_reg reg, unsigned idx)
|
component(brw_reg reg, unsigned idx)
|
||||||
{
|
{
|
||||||
reg = horiz_offset(reg, idx);
|
reg = horiz_offset(reg, idx);
|
||||||
reg.stride = 0;
|
reg.stride = 0;
|
||||||
|
|
@ -109,7 +107,7 @@ component(fs_reg reg, unsigned idx)
|
||||||
* address spaces, one for each allocation and input attribute respectively.
|
* address spaces, one for each allocation and input attribute respectively.
|
||||||
*/
|
*/
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
reg_space(const fs_reg &r)
|
reg_space(const brw_reg &r)
|
||||||
{
|
{
|
||||||
return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
|
return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
|
||||||
}
|
}
|
||||||
|
|
@ -119,7 +117,7 @@ reg_space(const fs_reg &r)
|
||||||
* reg_space().
|
* reg_space().
|
||||||
*/
|
*/
|
||||||
static inline unsigned
|
static inline unsigned
|
||||||
reg_offset(const fs_reg &r)
|
reg_offset(const brw_reg &r)
|
||||||
{
|
{
|
||||||
return (r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
|
return (r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
|
||||||
(r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
|
(r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
|
||||||
|
|
@ -132,7 +130,7 @@ reg_offset(const fs_reg &r)
|
||||||
* one, or zero if components are tightly packed in the register file.
|
* one, or zero if components are tightly packed in the register file.
|
||||||
*/
|
*/
|
||||||
static inline unsigned
|
static inline unsigned
|
||||||
reg_padding(const fs_reg &r)
|
reg_padding(const brw_reg &r)
|
||||||
{
|
{
|
||||||
const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
|
const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
|
||||||
r.hstride == 0 ? 0 :
|
r.hstride == 0 ? 0 :
|
||||||
|
|
@ -146,7 +144,7 @@ reg_padding(const fs_reg &r)
|
||||||
* spanning \p ds bytes.
|
* spanning \p ds bytes.
|
||||||
*/
|
*/
|
||||||
static inline bool
|
static inline bool
|
||||||
regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
|
regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
|
||||||
{
|
{
|
||||||
if (r.file != s.file)
|
if (r.file != s.file)
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -166,7 +164,7 @@ regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
|
||||||
* [s.offset, s.offset + ds[.
|
* [s.offset, s.offset + ds[.
|
||||||
*/
|
*/
|
||||||
static inline bool
|
static inline bool
|
||||||
region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
|
region_contained_in(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
|
||||||
{
|
{
|
||||||
return reg_space(r) == reg_space(s) &&
|
return reg_space(r) == reg_space(s) &&
|
||||||
reg_offset(r) >= reg_offset(s) &&
|
reg_offset(r) >= reg_offset(s) &&
|
||||||
|
|
@ -179,7 +177,7 @@ region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
|
||||||
* channels.
|
* channels.
|
||||||
*/
|
*/
|
||||||
static inline bool
|
static inline bool
|
||||||
is_periodic(const fs_reg ®, unsigned n)
|
is_periodic(const brw_reg ®, unsigned n)
|
||||||
{
|
{
|
||||||
if (reg.file == BAD_FILE || reg.is_null()) {
|
if (reg.file == BAD_FILE || reg.is_null()) {
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -203,7 +201,7 @@ is_periodic(const fs_reg ®, unsigned n)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
is_uniform(const fs_reg ®)
|
is_uniform(const brw_reg ®)
|
||||||
{
|
{
|
||||||
return is_periodic(reg, 1);
|
return is_periodic(reg, 1);
|
||||||
}
|
}
|
||||||
|
|
@ -211,47 +209,47 @@ is_uniform(const fs_reg ®)
|
||||||
/**
|
/**
|
||||||
* Get the specified 8-component quarter of a register.
|
* Get the specified 8-component quarter of a register.
|
||||||
*/
|
*/
|
||||||
static inline fs_reg
|
static inline brw_reg
|
||||||
quarter(const fs_reg ®, unsigned idx)
|
quarter(const brw_reg ®, unsigned idx)
|
||||||
{
|
{
|
||||||
assert(idx < 4);
|
assert(idx < 4);
|
||||||
return horiz_offset(reg, 8 * idx);
|
return horiz_offset(reg, 8 * idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline fs_reg
|
static inline brw_reg
|
||||||
horiz_stride(fs_reg reg, unsigned s)
|
horiz_stride(brw_reg reg, unsigned s)
|
||||||
{
|
{
|
||||||
reg.stride *= s;
|
reg.stride *= s;
|
||||||
return reg;
|
return reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool fs_reg_saturate_immediate(fs_reg *reg);
|
bool fs_reg_saturate_immediate(brw_reg *reg);
|
||||||
bool fs_reg_negate_immediate(fs_reg *reg);
|
bool fs_reg_negate_immediate(brw_reg *reg);
|
||||||
bool fs_reg_abs_immediate(fs_reg *reg);
|
bool fs_reg_abs_immediate(brw_reg *reg);
|
||||||
|
|
||||||
static const fs_reg reg_undef;
|
static const brw_reg reg_undef;
|
||||||
|
|
||||||
struct fs_inst : public exec_node {
|
struct fs_inst : public exec_node {
|
||||||
private:
|
private:
|
||||||
fs_inst &operator=(const fs_inst &);
|
fs_inst &operator=(const fs_inst &);
|
||||||
|
|
||||||
void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
|
void init(enum opcode opcode, uint8_t exec_width, const brw_reg &dst,
|
||||||
const fs_reg *src, unsigned sources);
|
const brw_reg *src, unsigned sources);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
|
DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
|
||||||
|
|
||||||
fs_inst();
|
fs_inst();
|
||||||
fs_inst(enum opcode opcode, uint8_t exec_size);
|
fs_inst(enum opcode opcode, uint8_t exec_size);
|
||||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
|
fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst);
|
||||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
|
||||||
const fs_reg &src0);
|
const brw_reg &src0);
|
||||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
|
||||||
const fs_reg &src0, const fs_reg &src1);
|
const brw_reg &src0, const brw_reg &src1);
|
||||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
|
||||||
const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
|
const brw_reg &src0, const brw_reg &src1, const brw_reg &src2);
|
||||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
|
||||||
const fs_reg src[], unsigned sources);
|
const brw_reg src[], unsigned sources);
|
||||||
fs_inst(const fs_inst &that);
|
fs_inst(const fs_inst &that);
|
||||||
~fs_inst();
|
~fs_inst();
|
||||||
|
|
||||||
|
|
@ -332,7 +330,7 @@ public:
|
||||||
const char *annotation;
|
const char *annotation;
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
uint8_t sources; /**< Number of fs_reg sources. */
|
uint8_t sources; /**< Number of brw_reg sources. */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execution size of the instruction. This is used by the generator to
|
* Execution size of the instruction. This is used by the generator to
|
||||||
|
|
@ -425,9 +423,9 @@ public:
|
||||||
uint32_t bits;
|
uint32_t bits;
|
||||||
};
|
};
|
||||||
|
|
||||||
fs_reg dst;
|
brw_reg dst;
|
||||||
fs_reg *src;
|
brw_reg *src;
|
||||||
fs_reg builtin_src[4];
|
brw_reg builtin_src[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -587,7 +585,7 @@ is_unordered(const intel_device_info *devinfo, const fs_inst *inst)
|
||||||
* single one-dimensional stride.
|
* single one-dimensional stride.
|
||||||
*/
|
*/
|
||||||
static inline unsigned
|
static inline unsigned
|
||||||
byte_stride(const fs_reg ®)
|
byte_stride(const brw_reg ®)
|
||||||
{
|
{
|
||||||
switch (reg.file) {
|
switch (reg.file) {
|
||||||
case BAD_FILE:
|
case BAD_FILE:
|
||||||
|
|
@ -675,7 +673,7 @@ has_dst_aligned_region_restriction(const intel_device_info *devinfo,
|
||||||
static inline bool
|
static inline bool
|
||||||
has_subdword_integer_region_restriction(const intel_device_info *devinfo,
|
has_subdword_integer_region_restriction(const intel_device_info *devinfo,
|
||||||
const fs_inst *inst,
|
const fs_inst *inst,
|
||||||
const fs_reg *srcs, unsigned num_srcs)
|
const brw_reg *srcs, unsigned num_srcs)
|
||||||
{
|
{
|
||||||
if (devinfo->ver >= 20 &&
|
if (devinfo->ver >= 20 &&
|
||||||
brw_type_is_int(inst->dst.type) &&
|
brw_type_is_int(inst->dst.type) &&
|
||||||
|
|
@ -741,7 +739,7 @@ is_copy_payload(brw_reg_file file, const fs_inst *inst)
|
||||||
inline bool
|
inline bool
|
||||||
is_identity_payload(brw_reg_file file, const fs_inst *inst) {
|
is_identity_payload(brw_reg_file file, const fs_inst *inst) {
|
||||||
if (is_copy_payload(file, inst)) {
|
if (is_copy_payload(file, inst)) {
|
||||||
fs_reg reg = inst->src[0];
|
brw_reg reg = inst->src[0];
|
||||||
|
|
||||||
for (unsigned i = 0; i < inst->sources; i++) {
|
for (unsigned i = 0; i < inst->sources; i++) {
|
||||||
reg.type = inst->src[i].type;
|
reg.type = inst->src[i].type;
|
||||||
|
|
@ -823,7 +821,7 @@ brw_fs_bit_mask(unsigned n)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned
|
static inline unsigned
|
||||||
brw_fs_flag_mask(const fs_reg &r, unsigned sz)
|
brw_fs_flag_mask(const brw_reg &r, unsigned sz)
|
||||||
{
|
{
|
||||||
if (r.file == ARF) {
|
if (r.file == ARF) {
|
||||||
const unsigned start = (r.nr - BRW_ARF_FLAG) * 4 + r.subnr;
|
const unsigned start = (r.nr - BRW_ARF_FLAG) * 4 + r.subnr;
|
||||||
|
|
|
||||||
|
|
@ -778,7 +778,7 @@ namespace {
|
||||||
* Return the dependency ID of a backend_reg, offset by \p delta GRFs.
|
* Return the dependency ID of a backend_reg, offset by \p delta GRFs.
|
||||||
*/
|
*/
|
||||||
enum intel_eu_dependency_id
|
enum intel_eu_dependency_id
|
||||||
reg_dependency_id(const intel_device_info *devinfo, const fs_reg &r,
|
reg_dependency_id(const intel_device_info *devinfo, const brw_reg &r,
|
||||||
const int delta)
|
const int delta)
|
||||||
{
|
{
|
||||||
if (r.file == VGRF) {
|
if (r.file == VGRF) {
|
||||||
|
|
|
||||||
|
|
@ -41,13 +41,13 @@ lower_urb_read_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
assert(inst->size_written % REG_SIZE == 0);
|
assert(inst->size_written % REG_SIZE == 0);
|
||||||
assert(inst->header_size == 0);
|
assert(inst->header_size == 0);
|
||||||
|
|
||||||
fs_reg payload_sources[2];
|
brw_reg payload_sources[2];
|
||||||
unsigned header_size = 0;
|
unsigned header_size = 0;
|
||||||
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE];
|
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE];
|
||||||
if (per_slot_present)
|
if (per_slot_present)
|
||||||
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
||||||
|
|
||||||
fs_reg payload = brw_vgrf(bld.shader->alloc.allocate(header_size),
|
brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(header_size),
|
||||||
BRW_TYPE_F);
|
BRW_TYPE_F);
|
||||||
bld.LOAD_PAYLOAD(payload, payload_sources, header_size, header_size);
|
bld.LOAD_PAYLOAD(payload, payload_sources, header_size, header_size);
|
||||||
|
|
||||||
|
|
@ -84,12 +84,12 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
|
||||||
assert(inst->header_size == 0);
|
assert(inst->header_size == 0);
|
||||||
|
|
||||||
/* Get the logical send arguments. */
|
/* Get the logical send arguments. */
|
||||||
const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE];
|
const brw_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE];
|
||||||
|
|
||||||
/* Calculate the total number of components of the payload. */
|
/* Calculate the total number of components of the payload. */
|
||||||
const unsigned dst_comps = inst->size_written / (REG_SIZE * reg_unit(devinfo));
|
const unsigned dst_comps = inst->size_written / (REG_SIZE * reg_unit(devinfo));
|
||||||
|
|
||||||
fs_reg payload = bld.vgrf(BRW_TYPE_UD);
|
brw_reg payload = bld.vgrf(BRW_TYPE_UD);
|
||||||
|
|
||||||
bld.MOV(payload, handle);
|
bld.MOV(payload, handle);
|
||||||
|
|
||||||
|
|
@ -101,7 +101,7 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
|
||||||
inst->offset = 0;
|
inst->offset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
brw_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
||||||
if (offsets.file != BAD_FILE) {
|
if (offsets.file != BAD_FILE) {
|
||||||
bld.ADD(payload, payload, offsets);
|
bld.ADD(payload, payload, offsets);
|
||||||
}
|
}
|
||||||
|
|
@ -147,8 +147,8 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
const unsigned length = 1 + per_slot_present + channel_mask_present +
|
const unsigned length = 1 + per_slot_present + channel_mask_present +
|
||||||
inst->components_read(URB_LOGICAL_SRC_DATA);
|
inst->components_read(URB_LOGICAL_SRC_DATA);
|
||||||
|
|
||||||
fs_reg *payload_sources = new fs_reg[length];
|
brw_reg *payload_sources = new brw_reg[length];
|
||||||
fs_reg payload = brw_vgrf(bld.shader->alloc.allocate(length),
|
brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(length),
|
||||||
BRW_TYPE_F);
|
BRW_TYPE_F);
|
||||||
|
|
||||||
unsigned header_size = 0;
|
unsigned header_size = 0;
|
||||||
|
|
@ -197,16 +197,16 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
|
||||||
assert(devinfo->has_lsc);
|
assert(devinfo->has_lsc);
|
||||||
|
|
||||||
/* Get the logical send arguments. */
|
/* Get the logical send arguments. */
|
||||||
const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE];
|
const brw_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE];
|
||||||
const fs_reg src = inst->components_read(URB_LOGICAL_SRC_DATA) ?
|
const brw_reg src = inst->components_read(URB_LOGICAL_SRC_DATA) ?
|
||||||
inst->src[URB_LOGICAL_SRC_DATA] : fs_reg(brw_imm_ud(0));
|
inst->src[URB_LOGICAL_SRC_DATA] : brw_reg(brw_imm_ud(0));
|
||||||
assert(brw_type_size_bytes(src.type) == 4);
|
assert(brw_type_size_bytes(src.type) == 4);
|
||||||
|
|
||||||
/* Calculate the total number of components of the payload. */
|
/* Calculate the total number of components of the payload. */
|
||||||
const unsigned src_comps = MAX2(1, inst->components_read(URB_LOGICAL_SRC_DATA));
|
const unsigned src_comps = MAX2(1, inst->components_read(URB_LOGICAL_SRC_DATA));
|
||||||
const unsigned src_sz = brw_type_size_bytes(src.type);
|
const unsigned src_sz = brw_type_size_bytes(src.type);
|
||||||
|
|
||||||
fs_reg payload = bld.vgrf(BRW_TYPE_UD);
|
brw_reg payload = bld.vgrf(BRW_TYPE_UD);
|
||||||
|
|
||||||
bld.MOV(payload, handle);
|
bld.MOV(payload, handle);
|
||||||
|
|
||||||
|
|
@ -218,12 +218,12 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
|
||||||
inst->offset = 0;
|
inst->offset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
brw_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
||||||
if (offsets.file != BAD_FILE) {
|
if (offsets.file != BAD_FILE) {
|
||||||
bld.ADD(payload, payload, offsets);
|
bld.ADD(payload, payload, offsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
const fs_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
|
const brw_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
|
||||||
unsigned mask = 0;
|
unsigned mask = 0;
|
||||||
|
|
||||||
if (cmask.file != BAD_FILE) {
|
if (cmask.file != BAD_FILE) {
|
||||||
|
|
@ -232,7 +232,7 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
|
||||||
mask = cmask.ud >> 16;
|
mask = cmask.ud >> 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg payload2 = bld.move_to_vgrf(src, src_comps);
|
brw_reg payload2 = bld.move_to_vgrf(src, src_comps);
|
||||||
const unsigned ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE;
|
const unsigned ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE;
|
||||||
|
|
||||||
inst->sfid = BRW_SFID_URB;
|
inst->sfid = BRW_SFID_URB;
|
||||||
|
|
@ -265,10 +265,10 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
|
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
|
||||||
fs_reg *dst, fs_reg color, unsigned components)
|
brw_reg *dst, brw_reg color, unsigned components)
|
||||||
{
|
{
|
||||||
if (key->clamp_fragment_color) {
|
if (key->clamp_fragment_color) {
|
||||||
fs_reg tmp = bld.vgrf(BRW_TYPE_F, 4);
|
brw_reg tmp = bld.vgrf(BRW_TYPE_F, 4);
|
||||||
assert(color.type == BRW_TYPE_F);
|
assert(color.type == BRW_TYPE_F);
|
||||||
|
|
||||||
for (unsigned i = 0; i < components; i++)
|
for (unsigned i = 0; i < components; i++)
|
||||||
|
|
@ -290,19 +290,19 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
{
|
{
|
||||||
assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
|
assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
const fs_reg color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0];
|
const brw_reg color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0];
|
||||||
const fs_reg color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1];
|
const brw_reg color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1];
|
||||||
const fs_reg src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
|
const brw_reg src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
|
||||||
const fs_reg src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
|
const brw_reg src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
|
||||||
const fs_reg dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];
|
const brw_reg dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];
|
||||||
const fs_reg src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
|
const brw_reg src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
|
||||||
fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
|
brw_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
|
||||||
const unsigned components =
|
const unsigned components =
|
||||||
inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
|
inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
|
||||||
|
|
||||||
assert(inst->target != 0 || src0_alpha.file == BAD_FILE);
|
assert(inst->target != 0 || src0_alpha.file == BAD_FILE);
|
||||||
|
|
||||||
fs_reg sources[15];
|
brw_reg sources[15];
|
||||||
int header_size = 2, payload_header_size;
|
int header_size = 2, payload_header_size;
|
||||||
unsigned length = 0;
|
unsigned length = 0;
|
||||||
|
|
||||||
|
|
@ -319,7 +319,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
*/
|
*/
|
||||||
const fs_builder ubld = bld.exec_all().group(8, 0);
|
const fs_builder ubld = bld.exec_all().group(8, 0);
|
||||||
|
|
||||||
fs_reg header = ubld.vgrf(BRW_TYPE_UD, 2);
|
brw_reg header = ubld.vgrf(BRW_TYPE_UD, 2);
|
||||||
if (bld.group() < 16) {
|
if (bld.group() < 16) {
|
||||||
/* The header starts off as g0 and g1 for the first half */
|
/* The header starts off as g0 and g1 for the first half */
|
||||||
ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0),
|
ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0),
|
||||||
|
|
@ -327,7 +327,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
} else {
|
} else {
|
||||||
/* The header starts off as g0 and g2 for the second half */
|
/* The header starts off as g0 and g2 for the second half */
|
||||||
assert(bld.group() < 32);
|
assert(bld.group() < 32);
|
||||||
const fs_reg header_sources[2] = {
|
const brw_reg header_sources[2] = {
|
||||||
retype(brw_vec8_grf(0, 0), BRW_TYPE_UD),
|
retype(brw_vec8_grf(0, 0), BRW_TYPE_UD),
|
||||||
retype(brw_vec8_grf(2, 0), BRW_TYPE_UD),
|
retype(brw_vec8_grf(2, 0), BRW_TYPE_UD),
|
||||||
};
|
};
|
||||||
|
|
@ -379,7 +379,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
sources[length] = brw_vgrf(bld.shader->alloc.allocate(1), BRW_TYPE_F);
|
sources[length] = brw_vgrf(bld.shader->alloc.allocate(1), BRW_TYPE_F);
|
||||||
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
|
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
|
||||||
.MOV(sources[length],
|
.MOV(sources[length],
|
||||||
fs_reg(brw_vec8_grf(fs_payload.aa_dest_stencil_reg[0], 0)));
|
brw_reg(brw_vec8_grf(fs_payload.aa_dest_stencil_reg[0], 0)));
|
||||||
length++;
|
length++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -387,7 +387,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
for (unsigned i = 0; i < bld.dispatch_width() / 8; i++) {
|
for (unsigned i = 0; i < bld.dispatch_width() / 8; i++) {
|
||||||
const fs_builder &ubld = bld.exec_all().group(8, i)
|
const fs_builder &ubld = bld.exec_all().group(8, i)
|
||||||
.annotate("FB write src0 alpha");
|
.annotate("FB write src0 alpha");
|
||||||
const fs_reg tmp = ubld.vgrf(BRW_TYPE_F);
|
const brw_reg tmp = ubld.vgrf(BRW_TYPE_F);
|
||||||
ubld.MOV(tmp, horiz_offset(src0_alpha, i * 8));
|
ubld.MOV(tmp, horiz_offset(src0_alpha, i * 8));
|
||||||
setup_color_payload(ubld, key, &sources[length], tmp, 1);
|
setup_color_payload(ubld, key, &sources[length], tmp, 1);
|
||||||
length++;
|
length++;
|
||||||
|
|
@ -395,7 +395,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sample_mask.file != BAD_FILE) {
|
if (sample_mask.file != BAD_FILE) {
|
||||||
const fs_reg tmp = brw_vgrf(bld.shader->alloc.allocate(reg_unit(devinfo)),
|
const brw_reg tmp = brw_vgrf(bld.shader->alloc.allocate(reg_unit(devinfo)),
|
||||||
BRW_TYPE_UD);
|
BRW_TYPE_UD);
|
||||||
|
|
||||||
/* Hand over gl_SampleMask. Only the lower 16 bits of each channel are
|
/* Hand over gl_SampleMask. Only the lower 16 bits of each channel are
|
||||||
|
|
@ -455,7 +455,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Send from the GRF */
|
/* Send from the GRF */
|
||||||
fs_reg payload = brw_vgrf(-1, BRW_TYPE_F);
|
brw_reg payload = brw_vgrf(-1, BRW_TYPE_F);
|
||||||
fs_inst *load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
|
fs_inst *load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
|
||||||
payload.nr = bld.shader->alloc.allocate(regs_written(load));
|
payload.nr = bld.shader->alloc.allocate(regs_written(load));
|
||||||
load->dst = payload;
|
load->dst = payload;
|
||||||
|
|
@ -468,7 +468,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt,
|
brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt,
|
||||||
0 /* coarse_rt_write */);
|
0 /* coarse_rt_write */);
|
||||||
|
|
||||||
fs_reg desc = brw_imm_ud(0);
|
brw_reg desc = brw_imm_ud(0);
|
||||||
if (prog_data->coarse_pixel_dispatch == BRW_ALWAYS) {
|
if (prog_data->coarse_pixel_dispatch == BRW_ALWAYS) {
|
||||||
inst->desc |= (1 << 18);
|
inst->desc |= (1 << 18);
|
||||||
} else if (prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
|
} else if (prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
|
||||||
|
|
@ -519,7 +519,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
const fs_builder &ubld = bld.exec_all().group(8, 0);
|
const fs_builder &ubld = bld.exec_all().group(8, 0);
|
||||||
const unsigned length = 2;
|
const unsigned length = 2;
|
||||||
const fs_reg header = ubld.vgrf(BRW_TYPE_UD, length);
|
const brw_reg header = ubld.vgrf(BRW_TYPE_UD, length);
|
||||||
|
|
||||||
assert(devinfo->ver >= 9 && devinfo->ver < 20);
|
assert(devinfo->ver >= 9 && devinfo->ver < 20);
|
||||||
|
|
||||||
|
|
@ -528,7 +528,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
BRW_TYPE_UD));
|
BRW_TYPE_UD));
|
||||||
} else {
|
} else {
|
||||||
assert(bld.group() < 32);
|
assert(bld.group() < 32);
|
||||||
const fs_reg header_sources[] = {
|
const brw_reg header_sources[] = {
|
||||||
retype(brw_vec8_grf(0, 0), BRW_TYPE_UD),
|
retype(brw_vec8_grf(0, 0), BRW_TYPE_UD),
|
||||||
retype(brw_vec8_grf(2, 0), BRW_TYPE_UD)
|
retype(brw_vec8_grf(2, 0), BRW_TYPE_UD)
|
||||||
};
|
};
|
||||||
|
|
@ -567,7 +567,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
inst->src[0] = brw_imm_ud(0);
|
inst->src[0] = brw_imm_ud(0);
|
||||||
inst->src[1] = brw_imm_ud(0);
|
inst->src[1] = brw_imm_ud(0);
|
||||||
inst->src[2] = header;
|
inst->src[2] = header;
|
||||||
inst->src[3] = fs_reg();
|
inst->src[3] = brw_reg();
|
||||||
inst->mlen = length;
|
inst->mlen = length;
|
||||||
inst->header_size = length;
|
inst->header_size = length;
|
||||||
inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
|
inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
|
||||||
|
|
@ -580,7 +580,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_high_sampler(const struct intel_device_info *devinfo, const fs_reg &sampler)
|
is_high_sampler(const struct intel_device_info *devinfo, const brw_reg &sampler)
|
||||||
{
|
{
|
||||||
return sampler.file != IMM || sampler.ud >= 16;
|
return sampler.file != IMM || sampler.ud >= 16;
|
||||||
}
|
}
|
||||||
|
|
@ -676,15 +676,15 @@ sampler_msg_type(const intel_device_info *devinfo,
|
||||||
* the given requested_alignment_sz.
|
* the given requested_alignment_sz.
|
||||||
*/
|
*/
|
||||||
static fs_inst *
|
static fs_inst *
|
||||||
emit_load_payload_with_padding(const fs_builder &bld, const fs_reg &dst,
|
emit_load_payload_with_padding(const fs_builder &bld, const brw_reg &dst,
|
||||||
const fs_reg *src, unsigned sources,
|
const brw_reg *src, unsigned sources,
|
||||||
unsigned header_size,
|
unsigned header_size,
|
||||||
unsigned requested_alignment_sz)
|
unsigned requested_alignment_sz)
|
||||||
{
|
{
|
||||||
unsigned length = 0;
|
unsigned length = 0;
|
||||||
unsigned num_srcs =
|
unsigned num_srcs =
|
||||||
sources * DIV_ROUND_UP(requested_alignment_sz, bld.dispatch_width());
|
sources * DIV_ROUND_UP(requested_alignment_sz, bld.dispatch_width());
|
||||||
fs_reg *src_comps = new fs_reg[num_srcs];
|
brw_reg *src_comps = new brw_reg[num_srcs];
|
||||||
|
|
||||||
for (unsigned i = 0; i < header_size; i++)
|
for (unsigned i = 0; i < header_size; i++)
|
||||||
src_comps[length++] = src[i];
|
src_comps[length++] = src[i];
|
||||||
|
|
@ -702,7 +702,7 @@ emit_load_payload_with_padding(const fs_builder &bld, const fs_reg &dst,
|
||||||
*/
|
*/
|
||||||
if (src_sz < requested_alignment_sz) {
|
if (src_sz < requested_alignment_sz) {
|
||||||
for (unsigned j = 0; j < (requested_alignment_sz / src_sz) - 1; j++) {
|
for (unsigned j = 0; j < (requested_alignment_sz / src_sz) - 1; j++) {
|
||||||
src_comps[length++] = retype(fs_reg(), padding_payload_type);
|
src_comps[length++] = retype(brw_reg(), padding_payload_type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -735,17 +735,17 @@ shader_opcode_needs_header(opcode op)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
const fs_reg &coordinate,
|
const brw_reg &coordinate,
|
||||||
const fs_reg &shadow_c,
|
const brw_reg &shadow_c,
|
||||||
fs_reg lod, const fs_reg &lod2,
|
brw_reg lod, const brw_reg &lod2,
|
||||||
const fs_reg &min_lod,
|
const brw_reg &min_lod,
|
||||||
const fs_reg &sample_index,
|
const brw_reg &sample_index,
|
||||||
const fs_reg &mcs,
|
const brw_reg &mcs,
|
||||||
const fs_reg &surface,
|
const brw_reg &surface,
|
||||||
const fs_reg &sampler,
|
const brw_reg &sampler,
|
||||||
const fs_reg &surface_handle,
|
const brw_reg &surface_handle,
|
||||||
const fs_reg &sampler_handle,
|
const brw_reg &sampler_handle,
|
||||||
const fs_reg &tg4_offset,
|
const brw_reg &tg4_offset,
|
||||||
unsigned payload_type_bit_size,
|
unsigned payload_type_bit_size,
|
||||||
unsigned coord_components,
|
unsigned coord_components,
|
||||||
unsigned grad_components,
|
unsigned grad_components,
|
||||||
|
|
@ -762,7 +762,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
unsigned reg_width = bld.dispatch_width() / 8;
|
unsigned reg_width = bld.dispatch_width() / 8;
|
||||||
unsigned header_size = 0, length = 0;
|
unsigned header_size = 0, length = 0;
|
||||||
opcode op = inst->opcode;
|
opcode op = inst->opcode;
|
||||||
fs_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE];
|
brw_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
|
||||||
sources[i] = bld.vgrf(payload_type);
|
sources[i] = bld.vgrf(payload_type);
|
||||||
|
|
||||||
|
|
@ -782,7 +782,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
* larger sampler numbers we need to offset the Sampler State Pointer in
|
* larger sampler numbers we need to offset the Sampler State Pointer in
|
||||||
* the header.
|
* the header.
|
||||||
*/
|
*/
|
||||||
fs_reg header = retype(sources[0], BRW_TYPE_UD);
|
brw_reg header = retype(sources[0], BRW_TYPE_UD);
|
||||||
for (header_size = 0; header_size < reg_unit(devinfo); header_size++)
|
for (header_size = 0; header_size < reg_unit(devinfo); header_size++)
|
||||||
sources[length++] = byte_offset(header, REG_SIZE * header_size);
|
sources[length++] = byte_offset(header, REG_SIZE * header_size);
|
||||||
|
|
||||||
|
|
@ -840,7 +840,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
ubld1.MOV(component(header, 3), sampler_handle);
|
ubld1.MOV(component(header, 3), sampler_handle);
|
||||||
}
|
}
|
||||||
} else if (is_high_sampler(devinfo, sampler)) {
|
} else if (is_high_sampler(devinfo, sampler)) {
|
||||||
fs_reg sampler_state_ptr =
|
brw_reg sampler_state_ptr =
|
||||||
retype(brw_vec1_grf(0, 3), BRW_TYPE_UD);
|
retype(brw_vec1_grf(0, 3), BRW_TYPE_UD);
|
||||||
|
|
||||||
/* Gfx11+ sampler message headers include bits in 4:0 which conflict
|
/* Gfx11+ sampler message headers include bits in 4:0 which conflict
|
||||||
|
|
@ -860,7 +860,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
ubld1.ADD(component(header, 3), sampler_state_ptr,
|
ubld1.ADD(component(header, 3), sampler_state_ptr,
|
||||||
brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size));
|
brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size));
|
||||||
} else {
|
} else {
|
||||||
fs_reg tmp = ubld1.vgrf(BRW_TYPE_UD);
|
brw_reg tmp = ubld1.vgrf(BRW_TYPE_UD);
|
||||||
ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0));
|
ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0));
|
||||||
ubld1.SHL(tmp, tmp, brw_imm_ud(4));
|
ubld1.SHL(tmp, tmp, brw_imm_ud(4));
|
||||||
ubld1.ADD(component(header, 3), sampler_state_ptr, tmp);
|
ubld1.ADD(component(header, 3), sampler_state_ptr, tmp);
|
||||||
|
|
@ -991,16 +991,16 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
* ld2dms_w si mcs0 mcs1 mcs2 mcs3 u v r
|
* ld2dms_w si mcs0 mcs1 mcs2 mcs3 u v r
|
||||||
*/
|
*/
|
||||||
if (op == SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) {
|
if (op == SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) {
|
||||||
fs_reg tmp = offset(mcs, bld, i);
|
brw_reg tmp = offset(mcs, bld, i);
|
||||||
sources[length] = retype(sources[length], payload_unsigned_type);
|
sources[length] = retype(sources[length], payload_unsigned_type);
|
||||||
bld.MOV(sources[length++],
|
bld.MOV(sources[length++],
|
||||||
mcs.file == IMM ? mcs :
|
mcs.file == IMM ? mcs :
|
||||||
fs_reg(subscript(tmp, payload_unsigned_type, 0)));
|
brw_reg(subscript(tmp, payload_unsigned_type, 0)));
|
||||||
|
|
||||||
sources[length] = retype(sources[length], payload_unsigned_type);
|
sources[length] = retype(sources[length], payload_unsigned_type);
|
||||||
bld.MOV(sources[length++],
|
bld.MOV(sources[length++],
|
||||||
mcs.file == IMM ? mcs :
|
mcs.file == IMM ? mcs :
|
||||||
fs_reg(subscript(tmp, payload_unsigned_type, 1)));
|
brw_reg(subscript(tmp, payload_unsigned_type, 1)));
|
||||||
} else {
|
} else {
|
||||||
sources[length] = retype(sources[length], payload_unsigned_type);
|
sources[length] = retype(sources[length], payload_unsigned_type);
|
||||||
bld.MOV(sources[length++],
|
bld.MOV(sources[length++],
|
||||||
|
|
@ -1087,7 +1087,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
bld.MOV(sources[length++], min_lod);
|
bld.MOV(sources[length++], min_lod);
|
||||||
}
|
}
|
||||||
|
|
||||||
const fs_reg src_payload =
|
const brw_reg src_payload =
|
||||||
brw_vgrf(bld.shader->alloc.allocate(length * reg_width),
|
brw_vgrf(bld.shader->alloc.allocate(length * reg_width),
|
||||||
BRW_TYPE_F);
|
BRW_TYPE_F);
|
||||||
/* In case of 16-bit payload each component takes one full register in
|
/* In case of 16-bit payload each component takes one full register in
|
||||||
|
|
@ -1149,7 +1149,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
inst->src[0] = brw_imm_ud(0);
|
inst->src[0] = brw_imm_ud(0);
|
||||||
} else {
|
} else {
|
||||||
const fs_builder ubld = bld.group(1, 0).exec_all();
|
const fs_builder ubld = bld.group(1, 0).exec_all();
|
||||||
fs_reg desc = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg desc = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.SHL(desc, sampler, brw_imm_ud(8));
|
ubld.SHL(desc, sampler, brw_imm_ud(8));
|
||||||
inst->src[0] = component(desc, 0);
|
inst->src[0] = component(desc, 0);
|
||||||
}
|
}
|
||||||
|
|
@ -1168,7 +1168,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
simd_mode,
|
simd_mode,
|
||||||
0 /* return_format unused on gfx7+ */);
|
0 /* return_format unused on gfx7+ */);
|
||||||
const fs_builder ubld = bld.group(1, 0).exec_all();
|
const fs_builder ubld = bld.group(1, 0).exec_all();
|
||||||
fs_reg desc = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg desc = ubld.vgrf(BRW_TYPE_UD);
|
||||||
if (surface.equals(sampler)) {
|
if (surface.equals(sampler)) {
|
||||||
/* This case is common in GL */
|
/* This case is common in GL */
|
||||||
ubld.MUL(desc, surface, brw_imm_ud(0x101));
|
ubld.MUL(desc, surface, brw_imm_ud(0x101));
|
||||||
|
|
@ -1212,7 +1212,7 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
|
||||||
const fs_inst *inst)
|
const fs_inst *inst)
|
||||||
{
|
{
|
||||||
assert(inst);
|
assert(inst);
|
||||||
const fs_reg *src = inst->src;
|
const brw_reg *src = inst->src;
|
||||||
unsigned src_type_size = 0;
|
unsigned src_type_size = 0;
|
||||||
|
|
||||||
/* All sources need to have the same size, therefore seek the first valid
|
/* All sources need to have the same size, therefore seek the first valid
|
||||||
|
|
@ -1263,18 +1263,18 @@ static void
|
||||||
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst)
|
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
const fs_reg coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE];
|
const brw_reg coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE];
|
||||||
const fs_reg shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
|
const brw_reg shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
|
||||||
const fs_reg lod = inst->src[TEX_LOGICAL_SRC_LOD];
|
const brw_reg lod = inst->src[TEX_LOGICAL_SRC_LOD];
|
||||||
const fs_reg lod2 = inst->src[TEX_LOGICAL_SRC_LOD2];
|
const brw_reg lod2 = inst->src[TEX_LOGICAL_SRC_LOD2];
|
||||||
const fs_reg min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD];
|
const brw_reg min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD];
|
||||||
const fs_reg sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];
|
const brw_reg sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];
|
||||||
const fs_reg mcs = inst->src[TEX_LOGICAL_SRC_MCS];
|
const brw_reg mcs = inst->src[TEX_LOGICAL_SRC_MCS];
|
||||||
const fs_reg surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
|
const brw_reg surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
|
||||||
const fs_reg sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];
|
const brw_reg sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];
|
||||||
const fs_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
|
const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
|
||||||
const fs_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
|
const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
|
||||||
const fs_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
|
const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
|
||||||
assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
|
assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
|
||||||
const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
|
const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
|
||||||
assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
|
assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
|
||||||
|
|
@ -1312,7 +1312,7 @@ emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst)
|
||||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
const fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
|
|
||||||
const fs_visitor &s = *bld.shader;
|
const fs_visitor &s = *bld.shader;
|
||||||
const fs_reg vector_mask = ubld.vgrf(BRW_TYPE_UW);
|
const brw_reg vector_mask = ubld.vgrf(BRW_TYPE_UW);
|
||||||
ubld.UNDEF(vector_mask);
|
ubld.UNDEF(vector_mask);
|
||||||
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, vector_mask, retype(brw_sr0_reg(3),
|
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, vector_mask, retype(brw_sr0_reg(3),
|
||||||
BRW_TYPE_UD));
|
BRW_TYPE_UD));
|
||||||
|
|
@ -1338,7 +1338,7 @@ emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
||||||
const fs_reg &surface, const fs_reg &surface_handle)
|
const brw_reg &surface, const brw_reg &surface_handle)
|
||||||
{
|
{
|
||||||
const brw_compiler *compiler = bld.shader->compiler;
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
|
|
||||||
|
|
@ -1362,7 +1362,7 @@ setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
||||||
} else {
|
} else {
|
||||||
inst->desc = desc;
|
inst->desc = desc;
|
||||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
const fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
fs_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.AND(tmp, surface, brw_imm_ud(0xff));
|
ubld.AND(tmp, surface, brw_imm_ud(0xff));
|
||||||
inst->src[0] = component(tmp, 0);
|
inst->src[0] = component(tmp, 0);
|
||||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||||
|
|
@ -1371,7 +1371,7 @@ setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst,
|
setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst,
|
||||||
uint32_t desc, const fs_reg &surface)
|
uint32_t desc, const brw_reg &surface)
|
||||||
{
|
{
|
||||||
const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;
|
const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
const brw_compiler *compiler = bld.shader->compiler;
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
|
|
@ -1397,7 +1397,7 @@ setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst,
|
||||||
inst->src[1] = brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud));
|
inst->src[1] = brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud));
|
||||||
} else {
|
} else {
|
||||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
const fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
fs_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.SHL(tmp, surface, brw_imm_ud(24));
|
ubld.SHL(tmp, surface, brw_imm_ud(24));
|
||||||
inst->src[1] = component(tmp, 0);
|
inst->src[1] = component(tmp, 0);
|
||||||
}
|
}
|
||||||
|
|
@ -1419,13 +1419,13 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
||||||
/* Get the logical send arguments. */
|
/* Get the logical send arguments. */
|
||||||
const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
const brw_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
||||||
const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
const brw_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
||||||
const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
const brw_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
||||||
const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
const brw_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
||||||
const UNUSED fs_reg dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];
|
const UNUSED brw_reg dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];
|
||||||
const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
const brw_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
||||||
const fs_reg allow_sample_mask =
|
const brw_reg allow_sample_mask =
|
||||||
inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];
|
inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];
|
||||||
assert(arg.file == IMM);
|
assert(arg.file == IMM);
|
||||||
assert(allow_sample_mask.file == IMM);
|
assert(allow_sample_mask.file == IMM);
|
||||||
|
|
@ -1450,10 +1450,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
|
|
||||||
const bool has_side_effects = inst->has_side_effects();
|
const bool has_side_effects = inst->has_side_effects();
|
||||||
|
|
||||||
fs_reg sample_mask = allow_sample_mask.ud ? brw_sample_mask_reg(bld) :
|
brw_reg sample_mask = allow_sample_mask.ud ? brw_sample_mask_reg(bld) :
|
||||||
fs_reg(brw_imm_ud(0xffffffff));
|
brw_reg(brw_imm_ud(0xffffffff));
|
||||||
|
|
||||||
fs_reg header;
|
brw_reg header;
|
||||||
if (is_stateless) {
|
if (is_stateless) {
|
||||||
assert(!is_surface_access);
|
assert(!is_surface_access);
|
||||||
fs_builder ubld = bld.exec_all().group(8, 0);
|
fs_builder ubld = bld.exec_all().group(8, 0);
|
||||||
|
|
@ -1462,7 +1462,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
}
|
}
|
||||||
const unsigned header_sz = header.file != BAD_FILE ? 1 : 0;
|
const unsigned header_sz = header.file != BAD_FILE ? 1 : 0;
|
||||||
|
|
||||||
fs_reg payload, payload2;
|
brw_reg payload, payload2;
|
||||||
unsigned mlen, ex_mlen = 0;
|
unsigned mlen, ex_mlen = 0;
|
||||||
if (src.file == BAD_FILE || header.file == BAD_FILE) {
|
if (src.file == BAD_FILE || header.file == BAD_FILE) {
|
||||||
/* We have split sends on gfx9 and above */
|
/* We have split sends on gfx9 and above */
|
||||||
|
|
@ -1482,7 +1482,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
/* Allocate space for the payload. */
|
/* Allocate space for the payload. */
|
||||||
const unsigned sz = header_sz + addr_sz + src_sz;
|
const unsigned sz = header_sz + addr_sz + src_sz;
|
||||||
payload = bld.vgrf(BRW_TYPE_UD, sz);
|
payload = bld.vgrf(BRW_TYPE_UD, sz);
|
||||||
fs_reg *const components = new fs_reg[sz];
|
brw_reg *const components = new brw_reg[sz];
|
||||||
unsigned n = 0;
|
unsigned n = 0;
|
||||||
|
|
||||||
/* Construct the payload. */
|
/* Construct the payload. */
|
||||||
|
|
@ -1657,13 +1657,13 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
assert(devinfo->has_lsc);
|
assert(devinfo->has_lsc);
|
||||||
|
|
||||||
/* Get the logical send arguments. */
|
/* Get the logical send arguments. */
|
||||||
const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
const brw_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
||||||
const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
const brw_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
||||||
const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
const brw_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
||||||
const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
const brw_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
||||||
const fs_reg dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];
|
const brw_reg dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];
|
||||||
const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
const brw_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
||||||
const fs_reg allow_sample_mask =
|
const brw_reg allow_sample_mask =
|
||||||
inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];
|
inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];
|
||||||
assert(arg.file == IMM);
|
assert(arg.file == IMM);
|
||||||
assert(allow_sample_mask.file == IMM);
|
assert(allow_sample_mask.file == IMM);
|
||||||
|
|
@ -1685,7 +1685,7 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
unsigned num_components = 0;
|
unsigned num_components = 0;
|
||||||
|
|
||||||
unsigned ex_mlen = 0;
|
unsigned ex_mlen = 0;
|
||||||
fs_reg payload, payload2;
|
brw_reg payload, payload2;
|
||||||
payload = bld.move_to_vgrf(addr, addr_sz);
|
payload = bld.move_to_vgrf(addr, addr_sz);
|
||||||
if (src.file != BAD_FILE) {
|
if (src.file != BAD_FILE) {
|
||||||
payload2 = bld.move_to_vgrf(src, src_comps);
|
payload2 = bld.move_to_vgrf(src, src_comps);
|
||||||
|
|
@ -1693,8 +1693,8 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Predicate the instruction on the sample mask if needed */
|
/* Predicate the instruction on the sample mask if needed */
|
||||||
fs_reg sample_mask = allow_sample_mask.ud ? brw_sample_mask_reg(bld) :
|
brw_reg sample_mask = allow_sample_mask.ud ? brw_sample_mask_reg(bld) :
|
||||||
fs_reg(brw_imm_ud(0xffffffff));
|
brw_reg(brw_imm_ud(0xffffffff));
|
||||||
if (sample_mask.file != BAD_FILE && sample_mask.file != IMM)
|
if (sample_mask.file != BAD_FILE && sample_mask.file != IMM)
|
||||||
brw_emit_predicate_on_sample_mask(bld, inst);
|
brw_emit_predicate_on_sample_mask(bld, inst);
|
||||||
|
|
||||||
|
|
@ -1831,11 +1831,11 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
assert(devinfo->has_lsc);
|
assert(devinfo->has_lsc);
|
||||||
|
|
||||||
/* Get the logical send arguments. */
|
/* Get the logical send arguments. */
|
||||||
const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
const brw_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
||||||
const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
const brw_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
||||||
const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
const brw_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
||||||
const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
const brw_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
||||||
const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
const brw_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
||||||
assert(arg.file == IMM);
|
assert(arg.file == IMM);
|
||||||
assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE);
|
assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE);
|
||||||
assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE);
|
assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE);
|
||||||
|
|
@ -1849,7 +1849,7 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
const bool write = inst->opcode == SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL;
|
const bool write = inst->opcode == SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL;
|
||||||
|
|
||||||
fs_builder ubld = bld.exec_all().group(1, 0);
|
fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
fs_reg stateless_ex_desc;
|
brw_reg stateless_ex_desc;
|
||||||
if (is_stateless) {
|
if (is_stateless) {
|
||||||
stateless_ex_desc = ubld.vgrf(BRW_TYPE_UD);
|
stateless_ex_desc = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.AND(stateless_ex_desc,
|
ubld.AND(stateless_ex_desc,
|
||||||
|
|
@ -1859,7 +1859,7 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
ubld.SHR(stateless_ex_desc, stateless_ex_desc, brw_imm_ud(4));
|
ubld.SHR(stateless_ex_desc, stateless_ex_desc, brw_imm_ud(4));
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg data;
|
brw_reg data;
|
||||||
if (write) {
|
if (write) {
|
||||||
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
||||||
data = retype(bld.move_to_vgrf(src, src_sz), BRW_TYPE_UD);
|
data = retype(bld.move_to_vgrf(src, src_sz), BRW_TYPE_UD);
|
||||||
|
|
@ -1913,11 +1913,11 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
||||||
/* Get the logical send arguments. */
|
/* Get the logical send arguments. */
|
||||||
const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
const brw_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
||||||
const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
const brw_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
||||||
const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
const brw_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
||||||
const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
const brw_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
||||||
const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
const brw_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
||||||
assert(arg.file == IMM);
|
assert(arg.file == IMM);
|
||||||
assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE);
|
assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE);
|
||||||
assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE);
|
assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE);
|
||||||
|
|
@ -1941,7 +1941,7 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
|
|
||||||
/* The address is stored in the header. See MH_A32_GO and MH_BTS_GO. */
|
/* The address is stored in the header. See MH_A32_GO and MH_BTS_GO. */
|
||||||
fs_builder ubld = bld.exec_all().group(8, 0);
|
fs_builder ubld = bld.exec_all().group(8, 0);
|
||||||
fs_reg header = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg header = ubld.vgrf(BRW_TYPE_UD);
|
||||||
|
|
||||||
if (is_stateless)
|
if (is_stateless)
|
||||||
ubld.emit(SHADER_OPCODE_SCRATCH_HEADER, header);
|
ubld.emit(SHADER_OPCODE_SCRATCH_HEADER, header);
|
||||||
|
|
@ -1954,7 +1954,7 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
else
|
else
|
||||||
ubld.group(1, 0).MOV(component(header, 2), addr);
|
ubld.group(1, 0).MOV(component(header, 2), addr);
|
||||||
|
|
||||||
fs_reg data;
|
brw_reg data;
|
||||||
unsigned ex_mlen = 0;
|
unsigned ex_mlen = 0;
|
||||||
if (write) {
|
if (write) {
|
||||||
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
||||||
|
|
@ -1981,14 +1981,14 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
inst->src[3] = data;
|
inst->src[3] = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static fs_reg
|
static brw_reg
|
||||||
emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr)
|
emit_a64_oword_block_header(const fs_builder &bld, const brw_reg &addr)
|
||||||
{
|
{
|
||||||
const fs_builder ubld = bld.exec_all().group(8, 0);
|
const fs_builder ubld = bld.exec_all().group(8, 0);
|
||||||
|
|
||||||
assert(brw_type_size_bytes(addr.type) == 8 && addr.stride == 0);
|
assert(brw_type_size_bytes(addr.type) == 8 && addr.stride == 0);
|
||||||
|
|
||||||
fs_reg expanded_addr = addr;
|
brw_reg expanded_addr = addr;
|
||||||
if (addr.file == UNIFORM) {
|
if (addr.file == UNIFORM) {
|
||||||
/* We can't do stride 1 with the UNIFORM file, it requires stride 0 */
|
/* We can't do stride 1 with the UNIFORM file, it requires stride 0 */
|
||||||
expanded_addr = ubld.vgrf(BRW_TYPE_UQ);
|
expanded_addr = ubld.vgrf(BRW_TYPE_UQ);
|
||||||
|
|
@ -1996,11 +1996,11 @@ emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr)
|
||||||
ubld.MOV(expanded_addr, retype(addr, BRW_TYPE_UQ));
|
ubld.MOV(expanded_addr, retype(addr, BRW_TYPE_UQ));
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg header = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg header = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.MOV(header, brw_imm_ud(0));
|
ubld.MOV(header, brw_imm_ud(0));
|
||||||
|
|
||||||
/* Use a 2-wide MOV to fill out the address */
|
/* Use a 2-wide MOV to fill out the address */
|
||||||
fs_reg addr_vec2 = expanded_addr;
|
brw_reg addr_vec2 = expanded_addr;
|
||||||
addr_vec2.type = BRW_TYPE_UD;
|
addr_vec2.type = BRW_TYPE_UD;
|
||||||
addr_vec2.stride = 1;
|
addr_vec2.stride = 1;
|
||||||
ubld.group(2, 0).MOV(header, addr_vec2);
|
ubld.group(2, 0).MOV(header, addr_vec2);
|
||||||
|
|
@ -2034,8 +2034,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
||||||
/* Get the logical send arguments. */
|
/* Get the logical send arguments. */
|
||||||
const fs_reg addr = inst->src[A64_LOGICAL_ADDRESS];
|
const brw_reg addr = inst->src[A64_LOGICAL_ADDRESS];
|
||||||
const fs_reg src = inst->src[A64_LOGICAL_SRC];
|
const brw_reg src = inst->src[A64_LOGICAL_SRC];
|
||||||
const unsigned src_sz = brw_type_size_bytes(src.type);
|
const unsigned src_sz = brw_type_size_bytes(src.type);
|
||||||
const unsigned dst_sz = brw_type_size_bytes(inst->dst.type);
|
const unsigned dst_sz = brw_type_size_bytes(inst->dst.type);
|
||||||
|
|
||||||
|
|
@ -2044,8 +2044,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
const unsigned arg = inst->src[A64_LOGICAL_ARG].ud;
|
const unsigned arg = inst->src[A64_LOGICAL_ARG].ud;
|
||||||
const bool has_side_effects = inst->has_side_effects();
|
const bool has_side_effects = inst->has_side_effects();
|
||||||
|
|
||||||
fs_reg payload = retype(bld.move_to_vgrf(addr, 1), BRW_TYPE_UD);
|
brw_reg payload = retype(bld.move_to_vgrf(addr, 1), BRW_TYPE_UD);
|
||||||
fs_reg payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_TYPE_UD);
|
brw_reg payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_TYPE_UD);
|
||||||
unsigned ex_mlen = src_comps * src_sz * inst->exec_size / REG_SIZE;
|
unsigned ex_mlen = src_comps * src_sz * inst->exec_size / REG_SIZE;
|
||||||
unsigned num_components = 0;
|
unsigned num_components = 0;
|
||||||
|
|
||||||
|
|
@ -2155,14 +2155,14 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
||||||
const fs_reg addr = inst->src[A64_LOGICAL_ADDRESS];
|
const brw_reg addr = inst->src[A64_LOGICAL_ADDRESS];
|
||||||
const fs_reg src = inst->src[A64_LOGICAL_SRC];
|
const brw_reg src = inst->src[A64_LOGICAL_SRC];
|
||||||
const unsigned src_comps = inst->components_read(1);
|
const unsigned src_comps = inst->components_read(1);
|
||||||
assert(inst->src[A64_LOGICAL_ARG].file == IMM);
|
assert(inst->src[A64_LOGICAL_ARG].file == IMM);
|
||||||
const unsigned arg = inst->src[A64_LOGICAL_ARG].ud;
|
const unsigned arg = inst->src[A64_LOGICAL_ARG].ud;
|
||||||
const bool has_side_effects = inst->has_side_effects();
|
const bool has_side_effects = inst->has_side_effects();
|
||||||
|
|
||||||
fs_reg payload, payload2;
|
brw_reg payload, payload2;
|
||||||
unsigned mlen, ex_mlen = 0, header_size = 0;
|
unsigned mlen, ex_mlen = 0, header_size = 0;
|
||||||
if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL ||
|
if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL ||
|
||||||
inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL ||
|
inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL ||
|
||||||
|
|
@ -2279,16 +2279,16 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld,
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
ASSERTED const brw_compiler *compiler = bld.shader->compiler;
|
ASSERTED const brw_compiler *compiler = bld.shader->compiler;
|
||||||
|
|
||||||
fs_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE];
|
brw_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE];
|
||||||
fs_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE];
|
brw_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE];
|
||||||
fs_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET];
|
brw_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET];
|
||||||
fs_reg alignment_B = inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT];
|
brw_reg alignment_B = inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT];
|
||||||
|
|
||||||
/* We are switching the instruction from an ALU-like instruction to a
|
/* We are switching the instruction from an ALU-like instruction to a
|
||||||
* send-from-grf instruction. Since sends can't handle strides or
|
* send-from-grf instruction. Since sends can't handle strides or
|
||||||
* source modifiers, we have to make a copy of the offset source.
|
* source modifiers, we have to make a copy of the offset source.
|
||||||
*/
|
*/
|
||||||
fs_reg ubo_offset = bld.move_to_vgrf(offset_B, 1);
|
brw_reg ubo_offset = bld.move_to_vgrf(offset_B, 1);
|
||||||
|
|
||||||
enum lsc_addr_surface_type surf_type =
|
enum lsc_addr_surface_type surf_type =
|
||||||
surface_handle.file == BAD_FILE ?
|
surface_handle.file == BAD_FILE ?
|
||||||
|
|
@ -2365,15 +2365,15 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
const brw_compiler *compiler = bld.shader->compiler;
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
|
|
||||||
fs_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE];
|
brw_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE];
|
||||||
fs_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE];
|
brw_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE];
|
||||||
fs_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET];
|
brw_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET];
|
||||||
|
|
||||||
/* We are switching the instruction from an ALU-like instruction to a
|
/* We are switching the instruction from an ALU-like instruction to a
|
||||||
* send-from-grf instruction. Since sends can't handle strides or
|
* send-from-grf instruction. Since sends can't handle strides or
|
||||||
* source modifiers, we have to make a copy of the offset source.
|
* source modifiers, we have to make a copy of the offset source.
|
||||||
*/
|
*/
|
||||||
fs_reg ubo_offset = bld.vgrf(BRW_TYPE_UD);
|
brw_reg ubo_offset = bld.vgrf(BRW_TYPE_UD);
|
||||||
bld.MOV(ubo_offset, offset_B);
|
bld.MOV(ubo_offset, offset_B);
|
||||||
|
|
||||||
assert(inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT].file == BRW_IMMEDIATE_VALUE);
|
assert(inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT].file == BRW_IMMEDIATE_VALUE);
|
||||||
|
|
@ -2445,7 +2445,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
||||||
/* We have to send something */
|
/* We have to send something */
|
||||||
fs_reg payload = brw_vec8_grf(0, 0);
|
brw_reg payload = brw_vec8_grf(0, 0);
|
||||||
unsigned mlen = 1;
|
unsigned mlen = 1;
|
||||||
|
|
||||||
unsigned mode;
|
unsigned mode;
|
||||||
|
|
@ -2473,7 +2473,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
const bool dynamic_mode =
|
const bool dynamic_mode =
|
||||||
inst->src[INTERP_SRC_DYNAMIC_MODE].file != BAD_FILE;
|
inst->src[INTERP_SRC_DYNAMIC_MODE].file != BAD_FILE;
|
||||||
|
|
||||||
fs_reg desc = inst->src[INTERP_SRC_MSG_DESC];
|
brw_reg desc = inst->src[INTERP_SRC_MSG_DESC];
|
||||||
uint32_t desc_imm =
|
uint32_t desc_imm =
|
||||||
brw_pixel_interp_desc(devinfo,
|
brw_pixel_interp_desc(devinfo,
|
||||||
/* Leave the mode at 0 if persample_dispatch is
|
/* Leave the mode at 0 if persample_dispatch is
|
||||||
|
|
@ -2488,7 +2488,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
desc_imm |= (1 << 15);
|
desc_imm |= (1 << 15);
|
||||||
} else if (wm_prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
|
} else if (wm_prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
|
||||||
STATIC_ASSERT(INTEL_MSAA_FLAG_COARSE_PI_MSG == (1 << 15));
|
STATIC_ASSERT(INTEL_MSAA_FLAG_COARSE_PI_MSG == (1 << 15));
|
||||||
fs_reg orig_desc = desc;
|
brw_reg orig_desc = desc;
|
||||||
const fs_builder &ubld = bld.exec_all().group(8, 0);
|
const fs_builder &ubld = bld.exec_all().group(8, 0);
|
||||||
desc = ubld.vgrf(BRW_TYPE_UD);
|
desc = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.AND(desc, dynamic_msaa_flags(wm_prog_data),
|
ubld.AND(desc, dynamic_msaa_flags(wm_prog_data),
|
||||||
|
|
@ -2518,7 +2518,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||||
* components of "Per Message Offset”, which will give us the pixel offset 0x0.
|
* components of "Per Message Offset”, which will give us the pixel offset 0x0.
|
||||||
*/
|
*/
|
||||||
if (dynamic_mode) {
|
if (dynamic_mode) {
|
||||||
fs_reg orig_desc = desc;
|
brw_reg orig_desc = desc;
|
||||||
const fs_builder &ubld = bld.exec_all().group(8, 0);
|
const fs_builder &ubld = bld.exec_all().group(8, 0);
|
||||||
desc = ubld.vgrf(BRW_TYPE_UD);
|
desc = ubld.vgrf(BRW_TYPE_UD);
|
||||||
|
|
||||||
|
|
@ -2565,13 +2565,13 @@ static void
|
||||||
lower_btd_logical_send(const fs_builder &bld, fs_inst *inst)
|
lower_btd_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
fs_reg global_addr = inst->src[0];
|
brw_reg global_addr = inst->src[0];
|
||||||
const fs_reg btd_record = inst->src[1];
|
const brw_reg btd_record = inst->src[1];
|
||||||
|
|
||||||
const unsigned unit = reg_unit(devinfo);
|
const unsigned unit = reg_unit(devinfo);
|
||||||
const unsigned mlen = 2 * unit;
|
const unsigned mlen = 2 * unit;
|
||||||
const fs_builder ubld = bld.exec_all();
|
const fs_builder ubld = bld.exec_all();
|
||||||
fs_reg header = ubld.vgrf(BRW_TYPE_UD, 2 * unit);
|
brw_reg header = ubld.vgrf(BRW_TYPE_UD, 2 * unit);
|
||||||
|
|
||||||
ubld.MOV(header, brw_imm_ud(0));
|
ubld.MOV(header, brw_imm_ud(0));
|
||||||
switch (inst->opcode) {
|
switch (inst->opcode) {
|
||||||
|
|
@ -2595,12 +2595,12 @@ lower_btd_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
/* Stack IDs are always in R1 regardless of whether we're coming from a
|
/* Stack IDs are always in R1 regardless of whether we're coming from a
|
||||||
* bindless shader or a regular compute shader.
|
* bindless shader or a regular compute shader.
|
||||||
*/
|
*/
|
||||||
fs_reg stack_ids = retype(offset(header, bld, 1), BRW_TYPE_UW);
|
brw_reg stack_ids = retype(offset(header, bld, 1), BRW_TYPE_UW);
|
||||||
bld.exec_all().MOV(stack_ids, retype(brw_vec8_grf(1 * unit, 0),
|
bld.exec_all().MOV(stack_ids, retype(brw_vec8_grf(1 * unit, 0),
|
||||||
BRW_TYPE_UW));
|
BRW_TYPE_UW));
|
||||||
|
|
||||||
unsigned ex_mlen = 0;
|
unsigned ex_mlen = 0;
|
||||||
fs_reg payload;
|
brw_reg payload;
|
||||||
if (inst->opcode == SHADER_OPCODE_BTD_SPAWN_LOGICAL) {
|
if (inst->opcode == SHADER_OPCODE_BTD_SPAWN_LOGICAL) {
|
||||||
ex_mlen = 2 * (inst->exec_size / 8);
|
ex_mlen = 2 * (inst->exec_size / 8);
|
||||||
payload = bld.move_to_vgrf(btd_record, 1);
|
payload = bld.move_to_vgrf(btd_record, 1);
|
||||||
|
|
@ -2643,33 +2643,33 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
* so that the MOV operates on 2 components rather than twice the same
|
* so that the MOV operates on 2 components rather than twice the same
|
||||||
* component.
|
* component.
|
||||||
*/
|
*/
|
||||||
fs_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_TYPE_UD);
|
brw_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_TYPE_UD);
|
||||||
globals_addr.stride = 1;
|
globals_addr.stride = 1;
|
||||||
const fs_reg bvh_level =
|
const brw_reg bvh_level =
|
||||||
inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ?
|
inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ?
|
||||||
inst->src[RT_LOGICAL_SRC_BVH_LEVEL] :
|
inst->src[RT_LOGICAL_SRC_BVH_LEVEL] :
|
||||||
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_BVH_LEVEL],
|
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_BVH_LEVEL],
|
||||||
inst->components_read(RT_LOGICAL_SRC_BVH_LEVEL));
|
inst->components_read(RT_LOGICAL_SRC_BVH_LEVEL));
|
||||||
const fs_reg trace_ray_control =
|
const brw_reg trace_ray_control =
|
||||||
inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL].file == BRW_IMMEDIATE_VALUE ?
|
inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL].file == BRW_IMMEDIATE_VALUE ?
|
||||||
inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] :
|
inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] :
|
||||||
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL],
|
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL],
|
||||||
inst->components_read(RT_LOGICAL_SRC_TRACE_RAY_CONTROL));
|
inst->components_read(RT_LOGICAL_SRC_TRACE_RAY_CONTROL));
|
||||||
const fs_reg synchronous_src = inst->src[RT_LOGICAL_SRC_SYNCHRONOUS];
|
const brw_reg synchronous_src = inst->src[RT_LOGICAL_SRC_SYNCHRONOUS];
|
||||||
assert(synchronous_src.file == BRW_IMMEDIATE_VALUE);
|
assert(synchronous_src.file == BRW_IMMEDIATE_VALUE);
|
||||||
const bool synchronous = synchronous_src.ud;
|
const bool synchronous = synchronous_src.ud;
|
||||||
|
|
||||||
const unsigned unit = reg_unit(devinfo);
|
const unsigned unit = reg_unit(devinfo);
|
||||||
const unsigned mlen = unit;
|
const unsigned mlen = unit;
|
||||||
const fs_builder ubld = bld.exec_all();
|
const fs_builder ubld = bld.exec_all();
|
||||||
fs_reg header = ubld.vgrf(BRW_TYPE_UD);
|
brw_reg header = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.MOV(header, brw_imm_ud(0));
|
ubld.MOV(header, brw_imm_ud(0));
|
||||||
ubld.group(2, 0).MOV(header, globals_addr);
|
ubld.group(2, 0).MOV(header, globals_addr);
|
||||||
if (synchronous)
|
if (synchronous)
|
||||||
ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous));
|
ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous));
|
||||||
|
|
||||||
const unsigned ex_mlen = inst->exec_size / 8;
|
const unsigned ex_mlen = inst->exec_size / 8;
|
||||||
fs_reg payload = bld.vgrf(BRW_TYPE_UD);
|
brw_reg payload = bld.vgrf(BRW_TYPE_UD);
|
||||||
if (bvh_level.file == BRW_IMMEDIATE_VALUE &&
|
if (bvh_level.file == BRW_IMMEDIATE_VALUE &&
|
||||||
trace_ray_control.file == BRW_IMMEDIATE_VALUE) {
|
trace_ray_control.file == BRW_IMMEDIATE_VALUE) {
|
||||||
bld.MOV(payload, brw_imm_ud(SET_BITS(trace_ray_control.ud, 9, 8) |
|
bld.MOV(payload, brw_imm_ud(SET_BITS(trace_ray_control.ud, 9, 8) |
|
||||||
|
|
@ -2720,9 +2720,9 @@ lower_get_buffer_size(const fs_builder &bld, fs_inst *inst)
|
||||||
*/
|
*/
|
||||||
assert(inst->exec_size == (devinfo->ver < 20 ? 8 : 16));
|
assert(inst->exec_size == (devinfo->ver < 20 ? 8 : 16));
|
||||||
|
|
||||||
fs_reg surface = inst->src[GET_BUFFER_SIZE_SRC_SURFACE];
|
brw_reg surface = inst->src[GET_BUFFER_SIZE_SRC_SURFACE];
|
||||||
fs_reg surface_handle = inst->src[GET_BUFFER_SIZE_SRC_SURFACE_HANDLE];
|
brw_reg surface_handle = inst->src[GET_BUFFER_SIZE_SRC_SURFACE_HANDLE];
|
||||||
fs_reg lod = inst->src[GET_BUFFER_SIZE_SRC_LOD];
|
brw_reg lod = inst->src[GET_BUFFER_SIZE_SRC_LOD];
|
||||||
|
|
||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->mlen = inst->exec_size / 8;
|
inst->mlen = inst->exec_size / 8;
|
||||||
|
|
@ -2916,10 +2916,10 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s)
|
||||||
if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
|
if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const fs_reg surface = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE];
|
const brw_reg surface = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE];
|
||||||
const fs_reg surface_handle = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE];
|
const brw_reg surface_handle = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE];
|
||||||
const fs_reg offset_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_OFFSET];
|
const brw_reg offset_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_OFFSET];
|
||||||
const fs_reg size_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_SIZE];
|
const brw_reg size_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_SIZE];
|
||||||
assert(surface.file == BAD_FILE || surface_handle.file == BAD_FILE);
|
assert(surface.file == BAD_FILE || surface_handle.file == BAD_FILE);
|
||||||
assert(offset_B.file == IMM);
|
assert(offset_B.file == IMM);
|
||||||
assert(size_B.file == IMM);
|
assert(size_B.file == IMM);
|
||||||
|
|
@ -2928,7 +2928,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s)
|
||||||
const fs_builder ubld =
|
const fs_builder ubld =
|
||||||
fs_builder(&s, block, inst).group(8, 0).exec_all();
|
fs_builder(&s, block, inst).group(8, 0).exec_all();
|
||||||
|
|
||||||
const fs_reg payload = ubld.vgrf(BRW_TYPE_UD);
|
const brw_reg payload = ubld.vgrf(BRW_TYPE_UD);
|
||||||
ubld.MOV(payload, offset_B);
|
ubld.MOV(payload, offset_B);
|
||||||
|
|
||||||
inst->sfid = GFX12_SFID_UGM;
|
inst->sfid = GFX12_SFID_UGM;
|
||||||
|
|
@ -2964,7 +2964,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s)
|
||||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||||
} else {
|
} else {
|
||||||
const fs_builder ubld = fs_builder(&s, block, inst).exec_all();
|
const fs_builder ubld = fs_builder(&s, block, inst).exec_all();
|
||||||
fs_reg header = fs_builder(&s, 8).exec_all().vgrf(BRW_TYPE_UD);
|
brw_reg header = fs_builder(&s, 8).exec_all().vgrf(BRW_TYPE_UD);
|
||||||
|
|
||||||
ubld.group(8, 0).MOV(header,
|
ubld.group(8, 0).MOV(header,
|
||||||
retype(brw_vec8_grf(0, 0), BRW_TYPE_UD));
|
retype(brw_vec8_grf(0, 0), BRW_TYPE_UD));
|
||||||
|
|
@ -2985,7 +2985,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s)
|
||||||
setup_surface_descriptors(ubld, inst, desc, surface, surface_handle);
|
setup_surface_descriptors(ubld, inst, desc, surface, surface_handle);
|
||||||
|
|
||||||
inst->src[2] = header;
|
inst->src[2] = header;
|
||||||
inst->src[3] = fs_reg(); /* unused for reads */
|
inst->src[3] = brw_reg(); /* unused for reads */
|
||||||
|
|
||||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -589,7 +589,7 @@ public:
|
||||||
|
|
||||||
void calculate_deps();
|
void calculate_deps();
|
||||||
bool is_compressed(const fs_inst *inst);
|
bool is_compressed(const fs_inst *inst);
|
||||||
bool register_needs_barrier(const fs_reg ®);
|
bool register_needs_barrier(const brw_reg ®);
|
||||||
schedule_node *choose_instruction_to_schedule();
|
schedule_node *choose_instruction_to_schedule();
|
||||||
int calculate_issue_time(const fs_inst *inst);
|
int calculate_issue_time(const fs_inst *inst);
|
||||||
|
|
||||||
|
|
@ -602,7 +602,7 @@ public:
|
||||||
void schedule_instructions();
|
void schedule_instructions();
|
||||||
void run(instruction_scheduler_mode mode);
|
void run(instruction_scheduler_mode mode);
|
||||||
|
|
||||||
int grf_index(const fs_reg ®);
|
int grf_index(const brw_reg ®);
|
||||||
|
|
||||||
void *mem_ctx;
|
void *mem_ctx;
|
||||||
linear_ctx *lin_ctx;
|
linear_ctx *lin_ctx;
|
||||||
|
|
@ -1071,7 +1071,7 @@ has_cross_lane_access(const fs_inst *inst)
|
||||||
* Some register access need dependencies on other instructions.
|
* Some register access need dependencies on other instructions.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
instruction_scheduler::register_needs_barrier(const fs_reg ®)
|
instruction_scheduler::register_needs_barrier(const brw_reg ®)
|
||||||
{
|
{
|
||||||
if (reg.file != ARF || reg.is_null())
|
if (reg.file != ARF || reg.is_null())
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -1175,7 +1175,7 @@ instruction_scheduler::clear_last_grf_write()
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
instruction_scheduler::grf_index(const fs_reg ®)
|
instruction_scheduler::grf_index(const brw_reg ®)
|
||||||
{
|
{
|
||||||
if (post_reg_alloc)
|
if (post_reg_alloc)
|
||||||
return reg.nr;
|
return reg.nr;
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_reg_saturate_immediate(fs_reg *reg)
|
fs_reg_saturate_immediate(brw_reg *reg)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
unsigned ud;
|
unsigned ud;
|
||||||
|
|
@ -93,7 +93,7 @@ fs_reg_saturate_immediate(fs_reg *reg)
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_reg_negate_immediate(fs_reg *reg)
|
fs_reg_negate_immediate(brw_reg *reg)
|
||||||
{
|
{
|
||||||
switch (reg->type) {
|
switch (reg->type) {
|
||||||
case BRW_TYPE_D:
|
case BRW_TYPE_D:
|
||||||
|
|
@ -136,7 +136,7 @@ fs_reg_negate_immediate(fs_reg *reg)
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_reg_abs_immediate(fs_reg *reg)
|
fs_reg_abs_immediate(brw_reg *reg)
|
||||||
{
|
{
|
||||||
switch (reg->type) {
|
switch (reg->type) {
|
||||||
case BRW_TYPE_D:
|
case BRW_TYPE_D:
|
||||||
|
|
@ -579,7 +579,7 @@ fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates)
|
||||||
if (exec_list_is_singular(&block->instructions)) {
|
if (exec_list_is_singular(&block->instructions)) {
|
||||||
this->opcode = BRW_OPCODE_NOP;
|
this->opcode = BRW_OPCODE_NOP;
|
||||||
this->resize_sources(0);
|
this->resize_sources(0);
|
||||||
this->dst = fs_reg();
|
this->dst = brw_reg();
|
||||||
this->size_written = 0;
|
this->size_written = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -130,10 +130,10 @@ cmod_propagation(fs_visitor *v)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, basic)
|
TEST_F(cmod_propagation_test, basic)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
|
||||||
|
|
||||||
|
|
@ -161,10 +161,10 @@ TEST_F(cmod_propagation_test, basic)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, basic_other_flag)
|
TEST_F(cmod_propagation_test, basic_other_flag)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE)
|
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE)
|
||||||
->flag_subreg = 1;
|
->flag_subreg = 1;
|
||||||
|
|
@ -194,10 +194,10 @@ TEST_F(cmod_propagation_test, basic_other_flag)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_nonzero)
|
TEST_F(cmod_propagation_test, cmp_nonzero)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg nonzero(brw_imm_f(1.0f));
|
brw_reg nonzero(brw_imm_f(1.0f));
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE);
|
||||||
|
|
||||||
|
|
@ -226,9 +226,9 @@ TEST_F(cmod_propagation_test, cmp_nonzero)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, non_cmod_instruction)
|
TEST_F(cmod_propagation_test, non_cmod_instruction)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg zero(brw_imm_ud(0u));
|
brw_reg zero(brw_imm_ud(0u));
|
||||||
bld.FBL(dest, src0);
|
bld.FBL(dest, src0);
|
||||||
bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE);
|
||||||
|
|
||||||
|
|
@ -257,8 +257,8 @@ TEST_F(cmod_propagation_test, non_cmod_instruction)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, non_cmod_livechannel)
|
TEST_F(cmod_propagation_test, non_cmod_livechannel)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg zero(brw_imm_d(0));
|
brw_reg zero(brw_imm_d(0));
|
||||||
bld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dest)->exec_size = 32;
|
bld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dest)->exec_size = 32;
|
||||||
bld.CMP(bld.null_reg_d(), dest, zero, BRW_CONDITIONAL_Z)->exec_size = 32;
|
bld.CMP(bld.null_reg_d(), dest, zero, BRW_CONDITIONAL_Z)->exec_size = 32;
|
||||||
|
|
||||||
|
|
@ -288,11 +288,11 @@ TEST_F(cmod_propagation_test, non_cmod_livechannel)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, intervening_flag_write)
|
TEST_F(cmod_propagation_test, intervening_flag_write)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
|
||||||
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
|
||||||
|
|
@ -325,11 +325,11 @@ TEST_F(cmod_propagation_test, intervening_flag_write)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, intervening_mismatch_flag_write)
|
TEST_F(cmod_propagation_test, intervening_mismatch_flag_write)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE)
|
bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE)
|
||||||
->flag_subreg = 1;
|
->flag_subreg = 1;
|
||||||
|
|
@ -365,12 +365,12 @@ TEST_F(cmod_propagation_test, intervening_mismatch_flag_write)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, intervening_flag_read)
|
TEST_F(cmod_propagation_test, intervening_flag_read)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest0, src0, src1);
|
bld.ADD(dest0, src0, src1);
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
||||||
bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
|
||||||
|
|
@ -403,12 +403,12 @@ TEST_F(cmod_propagation_test, intervening_flag_read)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, intervening_mismatch_flag_read)
|
TEST_F(cmod_propagation_test, intervening_mismatch_flag_read)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest0, src0, src1);
|
bld.ADD(dest0, src0, src1);
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
|
||||||
->flag_subreg = 1;
|
->flag_subreg = 1;
|
||||||
|
|
@ -444,13 +444,13 @@ TEST_F(cmod_propagation_test, intervening_mismatch_flag_read)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, intervening_dest_write)
|
TEST_F(cmod_propagation_test, intervening_dest_write)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F, 4);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F, 4);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F, 2);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F, 2);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
|
|
||||||
fs_reg tex_srcs[TEX_LOGICAL_NUM_SRCS];
|
brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS];
|
||||||
tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2;
|
tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2;
|
||||||
tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0);
|
tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0);
|
||||||
tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2);
|
tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2);
|
||||||
|
|
@ -491,12 +491,12 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
|
TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
|
set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
||||||
bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
|
||||||
|
|
@ -529,10 +529,10 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, negate)
|
TEST_F(cmod_propagation_test, negate)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
dest.negate = true;
|
dest.negate = true;
|
||||||
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
|
||||||
|
|
@ -561,9 +561,9 @@ TEST_F(cmod_propagation_test, negate)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, movnz)
|
TEST_F(cmod_propagation_test, movnz)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.CMP(dest, src0, src1, BRW_CONDITIONAL_GE);
|
bld.CMP(dest, src0, src1, BRW_CONDITIONAL_GE);
|
||||||
set_condmod(BRW_CONDITIONAL_NZ,
|
set_condmod(BRW_CONDITIONAL_NZ,
|
||||||
bld.MOV(bld.null_reg_f(), dest));
|
bld.MOV(bld.null_reg_f(), dest));
|
||||||
|
|
@ -592,10 +592,10 @@ TEST_F(cmod_propagation_test, movnz)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
|
TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_D);
|
brw_reg dest = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), retype(dest, BRW_TYPE_F), zero,
|
bld.CMP(bld.null_reg_f(), retype(dest, BRW_TYPE_F), zero,
|
||||||
BRW_CONDITIONAL_GE);
|
BRW_CONDITIONAL_GE);
|
||||||
|
|
@ -625,10 +625,10 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, andnz_one)
|
TEST_F(cmod_propagation_test, andnz_one)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_D);
|
brw_reg dest = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
fs_reg one(brw_imm_d(1));
|
brw_reg one(brw_imm_d(1));
|
||||||
|
|
||||||
bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
|
bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
|
||||||
set_condmod(BRW_CONDITIONAL_NZ,
|
set_condmod(BRW_CONDITIONAL_NZ,
|
||||||
|
|
@ -659,10 +659,10 @@ TEST_F(cmod_propagation_test, andnz_one)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, andnz_non_one)
|
TEST_F(cmod_propagation_test, andnz_non_one)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_D);
|
brw_reg dest = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
fs_reg nonone(brw_imm_d(38));
|
brw_reg nonone(brw_imm_d(38));
|
||||||
|
|
||||||
bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
|
bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
|
||||||
set_condmod(BRW_CONDITIONAL_NZ,
|
set_condmod(BRW_CONDITIONAL_NZ,
|
||||||
|
|
@ -693,9 +693,9 @@ TEST_F(cmod_propagation_test, andnz_non_one)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_cmpnz)
|
TEST_F(cmod_propagation_test, cmp_cmpnz)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0));
|
brw_reg zero(brw_imm_f(0));
|
||||||
|
|
||||||
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
||||||
bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ);
|
||||||
|
|
@ -720,9 +720,9 @@ TEST_F(cmod_propagation_test, cmp_cmpnz)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_cmpg)
|
TEST_F(cmod_propagation_test, cmp_cmpg)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0));
|
brw_reg zero(brw_imm_f(0));
|
||||||
|
|
||||||
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
||||||
bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_G);
|
bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_G);
|
||||||
|
|
@ -749,9 +749,9 @@ TEST_F(cmod_propagation_test, cmp_cmpg)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, plnnz_cmpnz)
|
TEST_F(cmod_propagation_test, plnnz_cmpnz)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0));
|
brw_reg zero(brw_imm_f(0));
|
||||||
|
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
|
||||||
bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ);
|
||||||
|
|
@ -776,9 +776,9 @@ TEST_F(cmod_propagation_test, plnnz_cmpnz)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, plnnz_cmpz)
|
TEST_F(cmod_propagation_test, plnnz_cmpz)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0));
|
brw_reg zero(brw_imm_f(0));
|
||||||
|
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
|
||||||
bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_Z);
|
bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_Z);
|
||||||
|
|
@ -803,10 +803,10 @@ TEST_F(cmod_propagation_test, plnnz_cmpz)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, plnnz_sel_cmpz)
|
TEST_F(cmod_propagation_test, plnnz_sel_cmpz)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0));
|
brw_reg zero(brw_imm_f(0));
|
||||||
|
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dst1, src0, zero));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dst1, src0, zero));
|
||||||
|
|
@ -837,9 +837,9 @@ TEST_F(cmod_propagation_test, plnnz_sel_cmpz)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_cmpg_D)
|
TEST_F(cmod_propagation_test, cmp_cmpg_D)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg zero(brw_imm_d(0));
|
brw_reg zero(brw_imm_d(0));
|
||||||
|
|
||||||
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
||||||
bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_G);
|
bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_G);
|
||||||
|
|
@ -866,9 +866,9 @@ TEST_F(cmod_propagation_test, cmp_cmpg_D)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_cmpg_UD)
|
TEST_F(cmod_propagation_test, cmp_cmpg_UD)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg zero(brw_imm_ud(0));
|
brw_reg zero(brw_imm_ud(0));
|
||||||
|
|
||||||
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
||||||
bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_G);
|
bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_G);
|
||||||
|
|
@ -893,9 +893,9 @@ TEST_F(cmod_propagation_test, cmp_cmpg_UD)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_cmpl_D)
|
TEST_F(cmod_propagation_test, cmp_cmpl_D)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg zero(brw_imm_d(0));
|
brw_reg zero(brw_imm_d(0));
|
||||||
|
|
||||||
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
||||||
bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_L);
|
||||||
|
|
@ -920,9 +920,9 @@ TEST_F(cmod_propagation_test, cmp_cmpl_D)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_cmpl_UD)
|
TEST_F(cmod_propagation_test, cmp_cmpl_UD)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg zero(brw_imm_ud(0));
|
brw_reg zero(brw_imm_ud(0));
|
||||||
|
|
||||||
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
|
||||||
bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_L);
|
||||||
|
|
@ -949,10 +949,10 @@ TEST_F(cmod_propagation_test, cmp_cmpl_UD)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, andz_one)
|
TEST_F(cmod_propagation_test, andz_one)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_D);
|
brw_reg dest = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
fs_reg one(brw_imm_d(1));
|
brw_reg one(brw_imm_d(1));
|
||||||
|
|
||||||
bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
|
bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
|
||||||
set_condmod(BRW_CONDITIONAL_Z,
|
set_condmod(BRW_CONDITIONAL_Z,
|
||||||
|
|
@ -983,9 +983,9 @@ TEST_F(cmod_propagation_test, andz_one)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, add_not_merge_with_compare)
|
TEST_F(cmod_propagation_test, add_not_merge_with_compare)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
||||||
|
|
||||||
|
|
@ -1016,9 +1016,9 @@ TEST_F(cmod_propagation_test, add_not_merge_with_compare)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, subtract_merge_with_compare)
|
TEST_F(cmod_propagation_test, subtract_merge_with_compare)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest, src0, negate(src1));
|
bld.ADD(dest, src0, negate(src1));
|
||||||
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
||||||
|
|
||||||
|
|
@ -1044,10 +1044,10 @@ TEST_F(cmod_propagation_test, subtract_merge_with_compare)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, subtract_immediate_merge_with_compare)
|
TEST_F(cmod_propagation_test, subtract_immediate_merge_with_compare)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg one(brw_imm_f(1.0f));
|
brw_reg one(brw_imm_f(1.0f));
|
||||||
fs_reg negative_one(brw_imm_f(-1.0f));
|
brw_reg negative_one(brw_imm_f(-1.0f));
|
||||||
|
|
||||||
bld.ADD(dest, src0, negative_one);
|
bld.ADD(dest, src0, negative_one);
|
||||||
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_NZ);
|
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_NZ);
|
||||||
|
|
@ -1074,10 +1074,10 @@ TEST_F(cmod_propagation_test, subtract_immediate_merge_with_compare)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_add)
|
TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_add)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest0, src0, negate(src1));
|
bld.ADD(dest0, src0, negate(src1));
|
||||||
bld.ADD(dest1, src0, src1);
|
bld.ADD(dest1, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
||||||
|
|
@ -1108,10 +1108,10 @@ TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_add)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_partial_write)
|
TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_partial_write)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest0, src0, negate(src1));
|
bld.ADD(dest0, src0, negate(src1));
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, src0, negate(src1)));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, src0, negate(src1)));
|
||||||
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
||||||
|
|
@ -1143,10 +1143,10 @@ TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_partia
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_add)
|
TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_add)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest0, src0, negate(src1));
|
bld.ADD(dest0, src0, negate(src1));
|
||||||
set_condmod(BRW_CONDITIONAL_EQ, bld.ADD(dest1, src0, src1));
|
set_condmod(BRW_CONDITIONAL_EQ, bld.ADD(dest1, src0, src1));
|
||||||
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
||||||
|
|
@ -1178,9 +1178,9 @@ TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_add)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, add_merge_with_compare)
|
TEST_F(cmod_propagation_test, add_merge_with_compare)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.CMP(bld.null_reg_f(), src0, negate(src1), BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_f(), src0, negate(src1), BRW_CONDITIONAL_L);
|
||||||
|
|
||||||
|
|
@ -1206,9 +1206,9 @@ TEST_F(cmod_propagation_test, add_merge_with_compare)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, negative_subtract_merge_with_compare)
|
TEST_F(cmod_propagation_test, negative_subtract_merge_with_compare)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest, src1, negate(src0));
|
bld.ADD(dest, src1, negate(src0));
|
||||||
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
|
||||||
|
|
||||||
|
|
@ -1237,11 +1237,11 @@ TEST_F(cmod_propagation_test, negative_subtract_merge_with_compare)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, subtract_delete_compare)
|
TEST_F(cmod_propagation_test, subtract_delete_compare)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
|
|
||||||
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)));
|
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)));
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(dest1, src2));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(dest1, src2));
|
||||||
|
|
@ -1276,11 +1276,11 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_other_flag)
|
||||||
/* This test is the same as subtract_delete_compare but it explicitly used
|
/* This test is the same as subtract_delete_compare but it explicitly used
|
||||||
* flag f0.1 for the subtraction and the comparison.
|
* flag f0.1 for the subtraction and the comparison.
|
||||||
*/
|
*/
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
|
|
||||||
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)))
|
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)))
|
||||||
->flag_subreg = 1;
|
->flag_subreg = 1;
|
||||||
|
|
@ -1315,9 +1315,9 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_other_flag)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, subtract_to_mismatch_flag)
|
TEST_F(cmod_propagation_test, subtract_to_mismatch_flag)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
|
|
||||||
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)));
|
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)));
|
||||||
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
|
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
|
||||||
|
|
@ -1350,9 +1350,9 @@ TEST_F(cmod_propagation_test, subtract_to_mismatch_flag)
|
||||||
TEST_F(cmod_propagation_test,
|
TEST_F(cmod_propagation_test,
|
||||||
subtract_merge_with_compare_intervening_mismatch_flag_write)
|
subtract_merge_with_compare_intervening_mismatch_flag_write)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
|
|
||||||
bld.ADD(dest0, src0, negate(src1));
|
bld.ADD(dest0, src0, negate(src1));
|
||||||
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
|
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
|
||||||
|
|
@ -1397,12 +1397,12 @@ TEST_F(cmod_propagation_test,
|
||||||
TEST_F(cmod_propagation_test,
|
TEST_F(cmod_propagation_test,
|
||||||
subtract_merge_with_compare_intervening_mismatch_flag_read)
|
subtract_merge_with_compare_intervening_mismatch_flag_read)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
|
|
||||||
bld.ADD(dest0, src0, negate(src1));
|
bld.ADD(dest0, src0, negate(src1));
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
|
||||||
|
|
@ -1437,10 +1437,10 @@ TEST_F(cmod_propagation_test,
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, subtract_delete_compare_derp)
|
TEST_F(cmod_propagation_test, subtract_delete_compare_derp)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
|
|
||||||
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest0, src0, negate(src1)));
|
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest0, src0, negate(src1)));
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, negate(src0), src1));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, negate(src0), src1));
|
||||||
|
|
@ -1472,8 +1472,8 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_derp)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch)
|
TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch)
|
||||||
{
|
{
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_D);
|
||||||
src0.type = BRW_TYPE_W;
|
src0.type = BRW_TYPE_W;
|
||||||
|
|
||||||
bld.ASR(dest0, negate(src0), brw_imm_d(15));
|
bld.ASR(dest0, negate(src0), brw_imm_d(15));
|
||||||
|
|
@ -1503,9 +1503,9 @@ TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, ior_f2i_nz)
|
TEST_F(cmod_propagation_test, ior_f2i_nz)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_D);
|
brw_reg dest = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
bld.OR(dest, src0, src1);
|
bld.OR(dest, src0, src1);
|
||||||
bld.MOV(bld.null_reg_d(), retype(dest, BRW_TYPE_F))
|
bld.MOV(bld.null_reg_d(), retype(dest, BRW_TYPE_F))
|
||||||
|
|
@ -1549,9 +1549,9 @@ cmod_propagation_test::test_mov_prop(enum brw_conditional_mod cmod,
|
||||||
enum brw_reg_type mov_dst_type,
|
enum brw_reg_type mov_dst_type,
|
||||||
bool expected_cmod_prop_progress)
|
bool expected_cmod_prop_progress)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(add_type);
|
brw_reg dest = bld.vgrf(add_type);
|
||||||
fs_reg src0 = bld.vgrf(add_type);
|
brw_reg src0 = bld.vgrf(add_type);
|
||||||
fs_reg src1 = bld.vgrf(add_type);
|
brw_reg src1 = bld.vgrf(add_type);
|
||||||
|
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
bld.MOV(retype(bld.null_reg_ud(), mov_dst_type), dest)
|
bld.MOV(retype(bld.null_reg_ud(), mov_dst_type), dest)
|
||||||
|
|
@ -2126,10 +2126,10 @@ cmod_propagation_test::test_saturate_prop(enum brw_conditional_mod before,
|
||||||
enum brw_reg_type op_type,
|
enum brw_reg_type op_type,
|
||||||
bool expected_cmod_prop_progress)
|
bool expected_cmod_prop_progress)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(add_type);
|
brw_reg dest = bld.vgrf(add_type);
|
||||||
fs_reg src0 = bld.vgrf(add_type);
|
brw_reg src0 = bld.vgrf(add_type);
|
||||||
fs_reg src1 = bld.vgrf(add_type);
|
brw_reg src1 = bld.vgrf(add_type);
|
||||||
fs_reg zero(brw_imm_ud(0));
|
brw_reg zero(brw_imm_ud(0));
|
||||||
|
|
||||||
bld.ADD(dest, src0, src1)->saturate = true;
|
bld.ADD(dest, src0, src1)->saturate = true;
|
||||||
|
|
||||||
|
|
@ -2611,9 +2611,9 @@ TEST_F(cmod_propagation_test, not_to_or)
|
||||||
/* Exercise propagation of conditional modifier from a NOT instruction to
|
/* Exercise propagation of conditional modifier from a NOT instruction to
|
||||||
* another ALU instruction as performed by cmod_propagate_not.
|
* another ALU instruction as performed by cmod_propagate_not.
|
||||||
*/
|
*/
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
bld.OR(dest, src0, src1);
|
bld.OR(dest, src0, src1);
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest));
|
||||||
|
|
||||||
|
|
@ -2644,9 +2644,9 @@ TEST_F(cmod_propagation_test, not_to_and)
|
||||||
/* Exercise propagation of conditional modifier from a NOT instruction to
|
/* Exercise propagation of conditional modifier from a NOT instruction to
|
||||||
* another ALU instruction as performed by cmod_propagate_not.
|
* another ALU instruction as performed by cmod_propagate_not.
|
||||||
*/
|
*/
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
bld.AND(dest, src0, src1);
|
bld.AND(dest, src0, src1);
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest));
|
||||||
|
|
||||||
|
|
@ -2682,9 +2682,9 @@ TEST_F(cmod_propagation_test, not_to_uadd)
|
||||||
* restriction is just the the destination type of the ALU instruction is
|
* restriction is just the the destination type of the ALU instruction is
|
||||||
* the same as the source type of the NOT instruction.
|
* the same as the source type of the NOT instruction.
|
||||||
*/
|
*/
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest));
|
||||||
|
|
||||||
|
|
@ -2722,9 +2722,9 @@ TEST_F(cmod_propagation_test, not_to_fadd_to_ud)
|
||||||
* restriction is just the the destination type of the ALU instruction is
|
* restriction is just the the destination type of the ALU instruction is
|
||||||
* the same as the source type of the NOT instruction.
|
* the same as the source type of the NOT instruction.
|
||||||
*/
|
*/
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest));
|
||||||
|
|
||||||
|
|
@ -2762,9 +2762,9 @@ TEST_F(cmod_propagation_test, not_to_fadd)
|
||||||
* restriction is just the the destination type of the ALU instruction is
|
* restriction is just the the destination type of the ALU instruction is
|
||||||
* the same as the source type of the NOT instruction.
|
* the same as the source type of the NOT instruction.
|
||||||
*/
|
*/
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dest, src0, src1);
|
bld.ADD(dest, src0, src1);
|
||||||
set_condmod(BRW_CONDITIONAL_NZ,
|
set_condmod(BRW_CONDITIONAL_NZ,
|
||||||
bld.NOT(bld.null_reg_ud(),
|
bld.NOT(bld.null_reg_ud(),
|
||||||
|
|
@ -2799,12 +2799,12 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_compatible_value)
|
||||||
/* Exercise propagation of conditional modifier from a NOT instruction to
|
/* Exercise propagation of conditional modifier from a NOT instruction to
|
||||||
* another ALU instruction as performed by cmod_propagate_not.
|
* another ALU instruction as performed by cmod_propagate_not.
|
||||||
*/
|
*/
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest0, src0, src1));
|
set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest0, src0, src1));
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0));
|
||||||
|
|
@ -2841,12 +2841,12 @@ TEST_F(cmod_propagation_test,
|
||||||
/* Exercise propagation of conditional modifier from a NOT instruction to
|
/* Exercise propagation of conditional modifier from a NOT instruction to
|
||||||
* another ALU instruction as performed by cmod_propagate_not.
|
* another ALU instruction as performed by cmod_propagate_not.
|
||||||
*/
|
*/
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest0, src0, src1))
|
set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest0, src0, src1))
|
||||||
->flag_subreg = 1;
|
->flag_subreg = 1;
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
||||||
|
|
@ -2886,12 +2886,12 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_incompatible_value
|
||||||
/* Exercise propagation of conditional modifier from a NOT instruction to
|
/* Exercise propagation of conditional modifier from a NOT instruction to
|
||||||
* another ALU instruction as performed by cmod_propagate_not.
|
* another ALU instruction as performed by cmod_propagate_not.
|
||||||
*/
|
*/
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.OR(dest0, src0, src1));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.OR(dest0, src0, src1));
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0));
|
||||||
|
|
@ -2928,10 +2928,10 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_write)
|
||||||
/* Exercise propagation of conditional modifier from a NOT instruction to
|
/* Exercise propagation of conditional modifier from a NOT instruction to
|
||||||
* another ALU instruction as performed by cmod_propagate_not.
|
* another ALU instruction as performed by cmod_propagate_not.
|
||||||
*/
|
*/
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
|
|
||||||
bld.OR(dest0, src0, src1);
|
bld.OR(dest0, src0, src1);
|
||||||
set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest1, src0, src1))
|
set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest1, src0, src1))
|
||||||
|
|
@ -2971,12 +2971,12 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read)
|
||||||
/* Exercise propagation of conditional modifier from a NOT instruction to
|
/* Exercise propagation of conditional modifier from a NOT instruction to
|
||||||
* another ALU instruction as performed by cmod_propagate_not.
|
* another ALU instruction as performed by cmod_propagate_not.
|
||||||
*/
|
*/
|
||||||
fs_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg dest0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_UD);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
|
|
||||||
bld.OR(dest0, src0, src1);
|
bld.OR(dest0, src0, src1);
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
|
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
|
||||||
|
|
@ -3013,10 +3013,10 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_to_add_float_e)
|
TEST_F(cmod_propagation_test, cmp_to_add_float_e)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg neg10(brw_imm_f(-10.0f));
|
brw_reg neg10(brw_imm_f(-10.0f));
|
||||||
fs_reg pos10(brw_imm_f(10.0f));
|
brw_reg pos10(brw_imm_f(10.0f));
|
||||||
|
|
||||||
bld.ADD(dest, src0, neg10)->saturate = true;
|
bld.ADD(dest, src0, neg10)->saturate = true;
|
||||||
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ);
|
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ);
|
||||||
|
|
@ -3043,10 +3043,10 @@ TEST_F(cmod_propagation_test, cmp_to_add_float_e)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_to_add_float_g)
|
TEST_F(cmod_propagation_test, cmp_to_add_float_g)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg neg10(brw_imm_f(-10.0f));
|
brw_reg neg10(brw_imm_f(-10.0f));
|
||||||
fs_reg pos10(brw_imm_f(10.0f));
|
brw_reg pos10(brw_imm_f(10.0f));
|
||||||
|
|
||||||
bld.ADD(dest, src0, neg10)->saturate = true;
|
bld.ADD(dest, src0, neg10)->saturate = true;
|
||||||
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G);
|
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G);
|
||||||
|
|
@ -3071,10 +3071,10 @@ TEST_F(cmod_propagation_test, cmp_to_add_float_g)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, cmp_to_add_float_le)
|
TEST_F(cmod_propagation_test, cmp_to_add_float_le)
|
||||||
{
|
{
|
||||||
fs_reg dest = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg neg10(brw_imm_f(-10.0f));
|
brw_reg neg10(brw_imm_f(-10.0f));
|
||||||
fs_reg pos10(brw_imm_f(10.0f));
|
brw_reg pos10(brw_imm_f(10.0f));
|
||||||
|
|
||||||
bld.ADD(dest, src0, neg10)->saturate = true;
|
bld.ADD(dest, src0, neg10)->saturate = true;
|
||||||
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE);
|
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE);
|
||||||
|
|
@ -3099,13 +3099,13 @@ TEST_F(cmod_propagation_test, cmp_to_add_float_le)
|
||||||
|
|
||||||
TEST_F(cmod_propagation_test, prop_across_sel)
|
TEST_F(cmod_propagation_test, prop_across_sel)
|
||||||
{
|
{
|
||||||
fs_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dest2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dest2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src3 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src3 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg zero(brw_imm_f(0.0f));
|
brw_reg zero(brw_imm_f(0.0f));
|
||||||
bld.ADD(dest1, src0, src1);
|
bld.ADD(dest1, src0, src1);
|
||||||
bld.emit_minmax(dest2, src2, src3, BRW_CONDITIONAL_GE);
|
bld.emit_minmax(dest2, src2, src3, BRW_CONDITIONAL_GE);
|
||||||
bld.CMP(bld.null_reg_f(), dest1, zero, BRW_CONDITIONAL_GE);
|
bld.CMP(bld.null_reg_f(), dest1, zero, BRW_CONDITIONAL_GE);
|
||||||
|
|
|
||||||
|
|
@ -77,9 +77,9 @@ TEST_F(FSCombineConstantsTest, Simple)
|
||||||
{
|
{
|
||||||
fs_builder bld = make_builder(shader);
|
fs_builder bld = make_builder(shader);
|
||||||
|
|
||||||
fs_reg r = brw_vec8_grf(1, 0);
|
brw_reg r = brw_vec8_grf(1, 0);
|
||||||
fs_reg imm_a = brw_imm_ud(1);
|
brw_reg imm_a = brw_imm_ud(1);
|
||||||
fs_reg imm_b = brw_imm_ud(2);
|
brw_reg imm_b = brw_imm_ud(2);
|
||||||
|
|
||||||
bld.SEL(r, imm_a, imm_b);
|
bld.SEL(r, imm_a, imm_b);
|
||||||
shader->calculate_cfg();
|
shader->calculate_cfg();
|
||||||
|
|
@ -102,10 +102,10 @@ TEST_F(FSCombineConstantsTest, DoContainingDo)
|
||||||
{
|
{
|
||||||
fs_builder bld = make_builder(shader);
|
fs_builder bld = make_builder(shader);
|
||||||
|
|
||||||
fs_reg r1 = brw_vec8_grf(1, 0);
|
brw_reg r1 = brw_vec8_grf(1, 0);
|
||||||
fs_reg r2 = brw_vec8_grf(2, 0);
|
brw_reg r2 = brw_vec8_grf(2, 0);
|
||||||
fs_reg imm_a = brw_imm_ud(1);
|
brw_reg imm_a = brw_imm_ud(1);
|
||||||
fs_reg imm_b = brw_imm_ud(2);
|
brw_reg imm_b = brw_imm_ud(2);
|
||||||
|
|
||||||
bld.DO();
|
bld.DO();
|
||||||
bld.DO();
|
bld.DO();
|
||||||
|
|
|
||||||
|
|
@ -119,10 +119,10 @@ copy_propagation(fs_visitor *v)
|
||||||
|
|
||||||
TEST_F(copy_propagation_test, basic)
|
TEST_F(copy_propagation_test, basic)
|
||||||
{
|
{
|
||||||
fs_reg vgrf0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg vgrf0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg vgrf1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg vgrf1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg vgrf2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg vgrf2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg vgrf3 = bld.vgrf(BRW_TYPE_F);
|
brw_reg vgrf3 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.MOV(vgrf0, vgrf2);
|
bld.MOV(vgrf0, vgrf2);
|
||||||
bld.ADD(vgrf1, vgrf0, vgrf3);
|
bld.ADD(vgrf1, vgrf0, vgrf3);
|
||||||
|
|
||||||
|
|
@ -160,9 +160,9 @@ TEST_F(copy_propagation_test, basic)
|
||||||
|
|
||||||
TEST_F(copy_propagation_test, maxmax_sat_imm)
|
TEST_F(copy_propagation_test, maxmax_sat_imm)
|
||||||
{
|
{
|
||||||
fs_reg vgrf0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg vgrf0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg vgrf1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg vgrf1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg vgrf2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg vgrf2 = bld.vgrf(BRW_TYPE_F);
|
||||||
|
|
||||||
static const struct {
|
static const struct {
|
||||||
enum brw_conditional_mod conditional_mod;
|
enum brw_conditional_mod conditional_mod;
|
||||||
|
|
|
||||||
|
|
@ -102,11 +102,11 @@ cse(fs_visitor *v)
|
||||||
|
|
||||||
TEST_F(cse_test, add3_invalid)
|
TEST_F(cse_test, add3_invalid)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.null_reg_d();
|
brw_reg dst0 = bld.null_reg_d();
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg src3 = bld.vgrf(BRW_TYPE_D);
|
brw_reg src3 = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
bld.ADD3(dst0, src0, src1, src2)
|
bld.ADD3(dst0, src0, src1, src2)
|
||||||
->conditional_mod = BRW_CONDITIONAL_NZ;
|
->conditional_mod = BRW_CONDITIONAL_NZ;
|
||||||
|
|
|
||||||
|
|
@ -120,10 +120,10 @@ saturate_propagation(fs_visitor *v)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, basic)
|
TEST_F(saturate_propagation_test, basic)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dst0, src0, src1);
|
bld.ADD(dst0, src0, src1);
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
||||||
|
|
@ -154,11 +154,11 @@ TEST_F(saturate_propagation_test, basic)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, other_non_saturated_use)
|
TEST_F(saturate_propagation_test, other_non_saturated_use)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dst0, src0, src1);
|
bld.ADD(dst0, src0, src1);
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
bld.ADD(dst2, dst0, src0);
|
bld.ADD(dst2, dst0, src0);
|
||||||
|
|
@ -191,10 +191,10 @@ TEST_F(saturate_propagation_test, other_non_saturated_use)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, predicated_instruction)
|
TEST_F(saturate_propagation_test, predicated_instruction)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dst0, src0, src1)
|
bld.ADD(dst0, src0, src1)
|
||||||
->predicate = BRW_PREDICATE_NORMAL;
|
->predicate = BRW_PREDICATE_NORMAL;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -225,9 +225,9 @@ TEST_F(saturate_propagation_test, predicated_instruction)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, neg_mov_sat)
|
TEST_F(saturate_propagation_test, neg_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.RNDU(dst0, src0);
|
bld.RNDU(dst0, src0);
|
||||||
dst0.negate = true;
|
dst0.negate = true;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -258,10 +258,10 @@ TEST_F(saturate_propagation_test, neg_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, add_neg_mov_sat)
|
TEST_F(saturate_propagation_test, add_neg_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dst0, src0, src1);
|
bld.ADD(dst0, src0, src1);
|
||||||
dst0.negate = true;
|
dst0.negate = true;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -295,10 +295,10 @@ TEST_F(saturate_propagation_test, add_neg_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, add_imm_float_neg_mov_sat)
|
TEST_F(saturate_propagation_test, add_imm_float_neg_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = brw_imm_f(1.0f);
|
brw_reg src1 = brw_imm_f(1.0f);
|
||||||
bld.ADD(dst0, src0, src1);
|
bld.ADD(dst0, src0, src1);
|
||||||
dst0.negate = true;
|
dst0.negate = true;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -332,10 +332,10 @@ TEST_F(saturate_propagation_test, add_imm_float_neg_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, mul_neg_mov_sat)
|
TEST_F(saturate_propagation_test, mul_neg_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.MUL(dst0, src0, src1);
|
bld.MUL(dst0, src0, src1);
|
||||||
dst0.negate = true;
|
dst0.negate = true;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -369,11 +369,11 @@ TEST_F(saturate_propagation_test, mul_neg_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, mad_neg_mov_sat)
|
TEST_F(saturate_propagation_test, mad_neg_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.MAD(dst0, src0, src1, src2);
|
bld.MAD(dst0, src0, src1, src2);
|
||||||
dst0.negate = true;
|
dst0.negate = true;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -409,11 +409,11 @@ TEST_F(saturate_propagation_test, mad_neg_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, mad_imm_float_neg_mov_sat)
|
TEST_F(saturate_propagation_test, mad_imm_float_neg_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = brw_imm_f(1.0f);
|
brw_reg src0 = brw_imm_f(1.0f);
|
||||||
fs_reg src1 = brw_imm_f(-2.0f);
|
brw_reg src1 = brw_imm_f(-2.0f);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F);
|
||||||
/* The builder for MAD tries to be helpful and not put immediates as direct
|
/* The builder for MAD tries to be helpful and not put immediates as direct
|
||||||
* sources. We want to test specifically that case.
|
* sources. We want to test specifically that case.
|
||||||
*/
|
*/
|
||||||
|
|
@ -453,11 +453,11 @@ TEST_F(saturate_propagation_test, mad_imm_float_neg_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat)
|
TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.MUL(dst0, src0, src1);
|
bld.MUL(dst0, src0, src1);
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
dst0.negate = true;
|
dst0.negate = true;
|
||||||
|
|
@ -494,11 +494,11 @@ TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat)
|
TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.MUL(dst0, src0, src1);
|
bld.MUL(dst0, src0, src1);
|
||||||
dst0.negate = true;
|
dst0.negate = true;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -536,10 +536,10 @@ TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, abs_mov_sat)
|
TEST_F(saturate_propagation_test, abs_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dst0, src0, src1);
|
bld.ADD(dst0, src0, src1);
|
||||||
dst0.abs = true;
|
dst0.abs = true;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -570,11 +570,11 @@ TEST_F(saturate_propagation_test, abs_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, producer_saturates)
|
TEST_F(saturate_propagation_test, producer_saturates)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
set_saturate(true, bld.ADD(dst0, src0, src1));
|
set_saturate(true, bld.ADD(dst0, src0, src1));
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
bld.MOV(dst2, dst0);
|
bld.MOV(dst2, dst0);
|
||||||
|
|
@ -608,11 +608,11 @@ TEST_F(saturate_propagation_test, producer_saturates)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, intervening_saturating_copy)
|
TEST_F(saturate_propagation_test, intervening_saturating_copy)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dst0, src0, src1);
|
bld.ADD(dst0, src0, src1);
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
set_saturate(true, bld.MOV(dst2, dst0));
|
set_saturate(true, bld.MOV(dst2, dst0));
|
||||||
|
|
@ -648,13 +648,13 @@ TEST_F(saturate_propagation_test, intervening_saturating_copy)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, intervening_dest_write)
|
TEST_F(saturate_propagation_test, intervening_dest_write)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F, 4);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F, 4);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src2 = bld.vgrf(BRW_TYPE_F, 2);
|
brw_reg src2 = bld.vgrf(BRW_TYPE_F, 2);
|
||||||
|
|
||||||
fs_reg tex_srcs[TEX_LOGICAL_NUM_SRCS];
|
brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS];
|
||||||
tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2;
|
tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2;
|
||||||
tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0);
|
tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0);
|
||||||
tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2);
|
tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2);
|
||||||
|
|
@ -695,11 +695,11 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat)
|
TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.MUL(dst0, src0, src1);
|
bld.MUL(dst0, src0, src1);
|
||||||
dst0.negate = true;
|
dst0.negate = true;
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
@ -737,10 +737,10 @@ TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, smaller_exec_size_consumer)
|
TEST_F(saturate_propagation_test, smaller_exec_size_consumer)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.ADD(dst0, src0, src1);
|
bld.ADD(dst0, src0, src1);
|
||||||
set_saturate(true, bld.group(8, 0).MOV(dst1, dst0));
|
set_saturate(true, bld.group(8, 0).MOV(dst1, dst0));
|
||||||
|
|
||||||
|
|
@ -770,10 +770,10 @@ TEST_F(saturate_propagation_test, smaller_exec_size_consumer)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, larger_exec_size_consumer)
|
TEST_F(saturate_propagation_test, larger_exec_size_consumer)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.group(8, 0).ADD(dst0, src0, src1);
|
bld.group(8, 0).ADD(dst0, src0, src1);
|
||||||
set_saturate(true, bld.MOV(dst1, dst0));
|
set_saturate(true, bld.MOV(dst1, dst0));
|
||||||
|
|
||||||
|
|
@ -803,11 +803,11 @@ TEST_F(saturate_propagation_test, larger_exec_size_consumer)
|
||||||
|
|
||||||
TEST_F(saturate_propagation_test, offset_source_barrier)
|
TEST_F(saturate_propagation_test, offset_source_barrier)
|
||||||
{
|
{
|
||||||
fs_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst1 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
brw_reg dst2 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src0 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg src1 = bld.vgrf(BRW_TYPE_F);
|
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
|
||||||
bld.group(16, 0).ADD(dst0, src0, src1);
|
bld.group(16, 0).ADD(dst0, src0, src1);
|
||||||
bld.group(1, 0).ADD(dst1, component(dst0, 8), brw_imm_f(1.0f));
|
bld.group(1, 0).ADD(dst1, component(dst0, 8), brw_imm_f(1.0f));
|
||||||
set_saturate(true, bld.group(16, 0).MOV(dst2, dst0));
|
set_saturate(true, bld.group(16, 0).MOV(dst2, dst0));
|
||||||
|
|
|
||||||
|
|
@ -106,8 +106,8 @@ lower_scoreboard(fs_visitor *v)
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_inst *
|
fs_inst *
|
||||||
emit_SEND(const fs_builder &bld, const fs_reg &dst,
|
emit_SEND(const fs_builder &bld, const brw_reg &dst,
|
||||||
const fs_reg &desc, const fs_reg &payload)
|
const brw_reg &desc, const brw_reg &payload)
|
||||||
{
|
{
|
||||||
fs_inst *inst = bld.emit(SHADER_OPCODE_SEND, dst, desc, desc, payload);
|
fs_inst *inst = bld.emit(SHADER_OPCODE_SEND, dst, desc, desc, payload);
|
||||||
inst->mlen = 1;
|
inst->mlen = 1;
|
||||||
|
|
@ -150,12 +150,12 @@ std::ostream &operator<<(std::ostream &os, const tgl_swsb &swsb) {
|
||||||
|
|
||||||
TEST_F(scoreboard_test, RAW_inorder_inorder)
|
TEST_F(scoreboard_test, RAW_inorder_inorder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg y = bld.vgrf(BRW_TYPE_D);
|
brw_reg y = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.ADD( x, g[1], g[2]);
|
bld.ADD( x, g[1], g[2]);
|
||||||
bld.MUL( y, g[3], g[4]);
|
bld.MUL( y, g[3], g[4]);
|
||||||
bld.AND(g[5], x, y);
|
bld.AND(g[5], x, y);
|
||||||
|
|
@ -176,11 +176,11 @@ TEST_F(scoreboard_test, RAW_inorder_inorder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, RAW_inorder_outoforder)
|
TEST_F(scoreboard_test, RAW_inorder_outoforder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.ADD( x, g[1], g[2]);
|
bld.ADD( x, g[1], g[2]);
|
||||||
bld.MUL( g[3], g[4], g[5]);
|
bld.MUL( g[3], g[4], g[5]);
|
||||||
emit_SEND(bld, g[6], g[7], x);
|
emit_SEND(bld, g[6], g[7], x);
|
||||||
|
|
@ -201,12 +201,12 @@ TEST_F(scoreboard_test, RAW_inorder_outoforder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, RAW_outoforder_inorder)
|
TEST_F(scoreboard_test, RAW_outoforder_inorder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg y = bld.vgrf(BRW_TYPE_D);
|
brw_reg y = bld.vgrf(BRW_TYPE_D);
|
||||||
emit_SEND(bld, x, g[1], g[2]);
|
emit_SEND(bld, x, g[1], g[2]);
|
||||||
bld.MUL( y, g[3], g[4]);
|
bld.MUL( y, g[3], g[4]);
|
||||||
bld.AND( g[5], x, y);
|
bld.AND( g[5], x, y);
|
||||||
|
|
@ -227,7 +227,7 @@ TEST_F(scoreboard_test, RAW_outoforder_inorder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, RAW_outoforder_outoforder)
|
TEST_F(scoreboard_test, RAW_outoforder_outoforder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
|
|
@ -235,7 +235,7 @@ TEST_F(scoreboard_test, RAW_outoforder_outoforder)
|
||||||
* SBIDs. Since it is not possible we expect a SYNC instruction to be
|
* SBIDs. Since it is not possible we expect a SYNC instruction to be
|
||||||
* added.
|
* added.
|
||||||
*/
|
*/
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
emit_SEND(bld, x, g[1], g[2]);
|
emit_SEND(bld, x, g[1], g[2]);
|
||||||
emit_SEND(bld, g[3], x, g[4])->sfid++;
|
emit_SEND(bld, g[3], x, g[4])->sfid++;
|
||||||
|
|
||||||
|
|
@ -259,11 +259,11 @@ TEST_F(scoreboard_test, RAW_outoforder_outoforder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, WAR_inorder_inorder)
|
TEST_F(scoreboard_test, WAR_inorder_inorder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.ADD(g[1], x, g[2]);
|
bld.ADD(g[1], x, g[2]);
|
||||||
bld.MUL(g[3], g[4], g[5]);
|
bld.MUL(g[3], g[4], g[5]);
|
||||||
bld.AND( x, g[6], g[7]);
|
bld.AND( x, g[6], g[7]);
|
||||||
|
|
@ -284,11 +284,11 @@ TEST_F(scoreboard_test, WAR_inorder_inorder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, WAR_inorder_outoforder)
|
TEST_F(scoreboard_test, WAR_inorder_outoforder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.ADD( g[1], x, g[2]);
|
bld.ADD( g[1], x, g[2]);
|
||||||
bld.MUL( g[3], g[4], g[5]);
|
bld.MUL( g[3], g[4], g[5]);
|
||||||
emit_SEND(bld, x, g[6], g[7]);
|
emit_SEND(bld, x, g[6], g[7]);
|
||||||
|
|
@ -309,11 +309,11 @@ TEST_F(scoreboard_test, WAR_inorder_outoforder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, WAR_outoforder_inorder)
|
TEST_F(scoreboard_test, WAR_outoforder_inorder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
emit_SEND(bld, g[1], g[2], x);
|
emit_SEND(bld, g[1], g[2], x);
|
||||||
bld.MUL( g[4], g[5], g[6]);
|
bld.MUL( g[4], g[5], g[6]);
|
||||||
bld.AND( x, g[7], g[8]);
|
bld.AND( x, g[7], g[8]);
|
||||||
|
|
@ -334,11 +334,11 @@ TEST_F(scoreboard_test, WAR_outoforder_inorder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, WAR_outoforder_outoforder)
|
TEST_F(scoreboard_test, WAR_outoforder_outoforder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
emit_SEND(bld, g[1], g[2], x);
|
emit_SEND(bld, g[1], g[2], x);
|
||||||
emit_SEND(bld, x, g[3], g[4])->sfid++;
|
emit_SEND(bld, x, g[3], g[4])->sfid++;
|
||||||
|
|
||||||
|
|
@ -362,11 +362,11 @@ TEST_F(scoreboard_test, WAR_outoforder_outoforder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, WAW_inorder_inorder)
|
TEST_F(scoreboard_test, WAW_inorder_inorder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.ADD( x, g[1], g[2]);
|
bld.ADD( x, g[1], g[2]);
|
||||||
bld.MUL(g[3], g[4], g[5]);
|
bld.MUL(g[3], g[4], g[5]);
|
||||||
bld.AND( x, g[6], g[7]);
|
bld.AND( x, g[6], g[7]);
|
||||||
|
|
@ -392,11 +392,11 @@ TEST_F(scoreboard_test, WAW_inorder_inorder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, WAW_inorder_outoforder)
|
TEST_F(scoreboard_test, WAW_inorder_outoforder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.ADD( x, g[1], g[2]);
|
bld.ADD( x, g[1], g[2]);
|
||||||
bld.MUL( g[3], g[4], g[5]);
|
bld.MUL( g[3], g[4], g[5]);
|
||||||
emit_SEND(bld, x, g[6], g[7]);
|
emit_SEND(bld, x, g[6], g[7]);
|
||||||
|
|
@ -417,11 +417,11 @@ TEST_F(scoreboard_test, WAW_inorder_outoforder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, WAW_outoforder_inorder)
|
TEST_F(scoreboard_test, WAW_outoforder_inorder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
emit_SEND(bld, x, g[1], g[2]);
|
emit_SEND(bld, x, g[1], g[2]);
|
||||||
bld.MUL( g[3], g[4], g[5]);
|
bld.MUL( g[3], g[4], g[5]);
|
||||||
bld.AND( x, g[6], g[7]);
|
bld.AND( x, g[6], g[7]);
|
||||||
|
|
@ -442,11 +442,11 @@ TEST_F(scoreboard_test, WAW_outoforder_inorder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, WAW_outoforder_outoforder)
|
TEST_F(scoreboard_test, WAW_outoforder_outoforder)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
emit_SEND(bld, x, g[1], g[2]);
|
emit_SEND(bld, x, g[1], g[2]);
|
||||||
emit_SEND(bld, x, g[3], g[4])->sfid++;
|
emit_SEND(bld, x, g[3], g[4])->sfid++;
|
||||||
|
|
||||||
|
|
@ -471,11 +471,11 @@ TEST_F(scoreboard_test, WAW_outoforder_outoforder)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, loop1)
|
TEST_F(scoreboard_test, loop1)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
|
|
||||||
bld.emit(BRW_OPCODE_DO);
|
bld.emit(BRW_OPCODE_DO);
|
||||||
|
|
@ -501,11 +501,11 @@ TEST_F(scoreboard_test, loop1)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, loop2)
|
TEST_F(scoreboard_test, loop2)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.XOR(g[3], g[1], g[2]);
|
bld.XOR(g[3], g[1], g[2]);
|
||||||
bld.XOR(g[4], g[1], g[2]);
|
bld.XOR(g[4], g[1], g[2]);
|
||||||
|
|
@ -536,11 +536,11 @@ TEST_F(scoreboard_test, loop2)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, loop3)
|
TEST_F(scoreboard_test, loop3)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
|
|
||||||
bld.emit(BRW_OPCODE_DO);
|
bld.emit(BRW_OPCODE_DO);
|
||||||
|
|
@ -573,11 +573,11 @@ TEST_F(scoreboard_test, loop3)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, conditional1)
|
TEST_F(scoreboard_test, conditional1)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.emit(BRW_OPCODE_IF);
|
bld.emit(BRW_OPCODE_IF);
|
||||||
|
|
||||||
|
|
@ -602,11 +602,11 @@ TEST_F(scoreboard_test, conditional1)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, conditional2)
|
TEST_F(scoreboard_test, conditional2)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.XOR(g[3], g[1], g[2]);
|
bld.XOR(g[3], g[1], g[2]);
|
||||||
bld.XOR(g[4], g[1], g[2]);
|
bld.XOR(g[4], g[1], g[2]);
|
||||||
|
|
@ -634,11 +634,11 @@ TEST_F(scoreboard_test, conditional2)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, conditional3)
|
TEST_F(scoreboard_test, conditional3)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.emit(BRW_OPCODE_IF);
|
bld.emit(BRW_OPCODE_IF);
|
||||||
|
|
||||||
|
|
@ -666,11 +666,11 @@ TEST_F(scoreboard_test, conditional3)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, conditional4)
|
TEST_F(scoreboard_test, conditional4)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.emit(BRW_OPCODE_IF);
|
bld.emit(BRW_OPCODE_IF);
|
||||||
|
|
||||||
|
|
@ -698,11 +698,11 @@ TEST_F(scoreboard_test, conditional4)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, conditional5)
|
TEST_F(scoreboard_test, conditional5)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.emit(BRW_OPCODE_IF);
|
bld.emit(BRW_OPCODE_IF);
|
||||||
|
|
||||||
|
|
@ -735,11 +735,11 @@ TEST_F(scoreboard_test, conditional5)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, conditional6)
|
TEST_F(scoreboard_test, conditional6)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.emit(BRW_OPCODE_IF);
|
bld.emit(BRW_OPCODE_IF);
|
||||||
|
|
||||||
|
|
@ -779,11 +779,11 @@ TEST_F(scoreboard_test, conditional6)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, conditional7)
|
TEST_F(scoreboard_test, conditional7)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.emit(BRW_OPCODE_IF);
|
bld.emit(BRW_OPCODE_IF);
|
||||||
|
|
||||||
|
|
@ -823,11 +823,11 @@ TEST_F(scoreboard_test, conditional7)
|
||||||
|
|
||||||
TEST_F(scoreboard_test, conditional8)
|
TEST_F(scoreboard_test, conditional8)
|
||||||
{
|
{
|
||||||
fs_reg g[16];
|
brw_reg g[16];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||||
g[i] = bld.vgrf(BRW_TYPE_D);
|
g[i] = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
bld.XOR( x, g[1], g[2]);
|
bld.XOR( x, g[1], g[2]);
|
||||||
bld.XOR(g[3], g[1], g[2]);
|
bld.XOR(g[3], g[1], g[2]);
|
||||||
bld.XOR(g[4], g[1], g[2]);
|
bld.XOR(g[4], g[1], g[2]);
|
||||||
|
|
@ -871,10 +871,10 @@ TEST_F(scoreboard_test, gfx125_RaR_over_different_pipes)
|
||||||
devinfo->verx10 = 125;
|
devinfo->verx10 = 125;
|
||||||
brw_init_isa_info(&compiler->isa, devinfo);
|
brw_init_isa_info(&compiler->isa, devinfo);
|
||||||
|
|
||||||
fs_reg a = bld.vgrf(BRW_TYPE_D);
|
brw_reg a = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg b = bld.vgrf(BRW_TYPE_D);
|
brw_reg b = bld.vgrf(BRW_TYPE_D);
|
||||||
fs_reg f = bld.vgrf(BRW_TYPE_F);
|
brw_reg f = bld.vgrf(BRW_TYPE_F);
|
||||||
fs_reg x = bld.vgrf(BRW_TYPE_D);
|
brw_reg x = bld.vgrf(BRW_TYPE_D);
|
||||||
|
|
||||||
bld.ADD(f, x, x);
|
bld.ADD(f, x, x);
|
||||||
bld.ADD(a, x, x);
|
bld.ADD(a, x, x);
|
||||||
|
|
|
||||||
|
|
@ -140,9 +140,9 @@ TEST_F(PredicatedBreakTest, TopBreakWithoutContinue)
|
||||||
fs_builder a = make_builder(shader_a);
|
fs_builder a = make_builder(shader_a);
|
||||||
fs_builder b = make_builder(shader_b);
|
fs_builder b = make_builder(shader_b);
|
||||||
|
|
||||||
fs_reg r1 = brw_vec8_grf(1, 0);
|
brw_reg r1 = brw_vec8_grf(1, 0);
|
||||||
fs_reg r2 = brw_vec8_grf(2, 0);
|
brw_reg r2 = brw_vec8_grf(2, 0);
|
||||||
fs_reg r3 = brw_vec8_grf(3, 0);
|
brw_reg r3 = brw_vec8_grf(3, 0);
|
||||||
|
|
||||||
a.DO();
|
a.DO();
|
||||||
a.CMP(r1, r2, r3, BRW_CONDITIONAL_NZ);
|
a.CMP(r1, r2, r3, BRW_CONDITIONAL_NZ);
|
||||||
|
|
@ -174,9 +174,9 @@ TEST_F(PredicatedBreakTest, TopBreakWithContinue)
|
||||||
fs_builder a = make_builder(shader_a);
|
fs_builder a = make_builder(shader_a);
|
||||||
fs_builder b = make_builder(shader_b);
|
fs_builder b = make_builder(shader_b);
|
||||||
|
|
||||||
fs_reg r1 = brw_vec8_grf(1, 0);
|
brw_reg r1 = brw_vec8_grf(1, 0);
|
||||||
fs_reg r2 = brw_vec8_grf(2, 0);
|
brw_reg r2 = brw_vec8_grf(2, 0);
|
||||||
fs_reg r3 = brw_vec8_grf(3, 0);
|
brw_reg r3 = brw_vec8_grf(3, 0);
|
||||||
|
|
||||||
a.DO();
|
a.DO();
|
||||||
a.CMP(r1, r2, r3, BRW_CONDITIONAL_NZ);
|
a.CMP(r1, r2, r3, BRW_CONDITIONAL_NZ);
|
||||||
|
|
@ -218,9 +218,9 @@ TEST_F(PredicatedBreakTest, DISABLED_BottomBreakWithoutContinue)
|
||||||
fs_builder a = make_builder(shader_a);
|
fs_builder a = make_builder(shader_a);
|
||||||
fs_builder b = make_builder(shader_b);
|
fs_builder b = make_builder(shader_b);
|
||||||
|
|
||||||
fs_reg r1 = brw_vec8_grf(1, 0);
|
brw_reg r1 = brw_vec8_grf(1, 0);
|
||||||
fs_reg r2 = brw_vec8_grf(2, 0);
|
brw_reg r2 = brw_vec8_grf(2, 0);
|
||||||
fs_reg r3 = brw_vec8_grf(3, 0);
|
brw_reg r3 = brw_vec8_grf(3, 0);
|
||||||
|
|
||||||
a.DO();
|
a.DO();
|
||||||
a.ADD(r1, r2, r3);
|
a.ADD(r1, r2, r3);
|
||||||
|
|
@ -256,9 +256,9 @@ TEST_F(PredicatedBreakTest, BottomBreakWithContinue)
|
||||||
fs_builder a = make_builder(shader_a);
|
fs_builder a = make_builder(shader_a);
|
||||||
fs_builder b = make_builder(shader_b);
|
fs_builder b = make_builder(shader_b);
|
||||||
|
|
||||||
fs_reg r1 = brw_vec8_grf(1, 0);
|
brw_reg r1 = brw_vec8_grf(1, 0);
|
||||||
fs_reg r2 = brw_vec8_grf(2, 0);
|
brw_reg r2 = brw_vec8_grf(2, 0);
|
||||||
fs_reg r3 = brw_vec8_grf(3, 0);
|
brw_reg r3 = brw_vec8_grf(3, 0);
|
||||||
|
|
||||||
a.DO();
|
a.DO();
|
||||||
a.ADD(r1, r2, r3);
|
a.ADD(r1, r2, r3);
|
||||||
|
|
@ -300,9 +300,9 @@ TEST_F(PredicatedBreakTest, TwoBreaks)
|
||||||
fs_builder a = make_builder(shader_a);
|
fs_builder a = make_builder(shader_a);
|
||||||
fs_builder b = make_builder(shader_b);
|
fs_builder b = make_builder(shader_b);
|
||||||
|
|
||||||
fs_reg r1 = brw_vec8_grf(1, 0);
|
brw_reg r1 = brw_vec8_grf(1, 0);
|
||||||
fs_reg r2 = brw_vec8_grf(2, 0);
|
brw_reg r2 = brw_vec8_grf(2, 0);
|
||||||
fs_reg r3 = brw_vec8_grf(3, 0);
|
brw_reg r3 = brw_vec8_grf(3, 0);
|
||||||
|
|
||||||
a.DO();
|
a.DO();
|
||||||
a.ADD(r1, r2, r3);
|
a.ADD(r1, r2, r3);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue