diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 6aafd6aafec..90e750d7b4f 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -48,11 +48,11 @@ using namespace brw; static void -initialize_sources(fs_inst *inst, const fs_reg src[], uint8_t num_sources); +initialize_sources(fs_inst *inst, const brw_reg src[], uint8_t num_sources); void -fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, - const fs_reg *src, unsigned sources) +fs_inst::init(enum opcode opcode, uint8_t exec_size, const brw_reg &dst, + const brw_reg *src, unsigned sources) { memset((void*)this, 0, sizeof(*this)); @@ -100,34 +100,34 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size) init(opcode, exec_size, reg_undef, NULL, 0); } -fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst) +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst) { init(opcode, exec_size, dst, NULL, 0); } -fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, - const fs_reg &src0) +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst, + const brw_reg &src0) { - const fs_reg src[1] = { src0 }; + const brw_reg src[1] = { src0 }; init(opcode, exec_size, dst, src, 1); } -fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1) +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst, + const brw_reg &src0, const brw_reg &src1) { - const fs_reg src[2] = { src0, src1 }; + const brw_reg src[2] = { src0, src1 }; init(opcode, exec_size, dst, src, 2); } -fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1, const fs_reg &src2) +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst, + const brw_reg &src0, const brw_reg &src1, const brw_reg &src2) { - const fs_reg src[3] = { src0, src1, src2 }; + const brw_reg src[3] = { src0, src1, src2 }; init(opcode, exec_size, dst, src, 3); } -fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst, - const fs_reg src[], unsigned sources) +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const brw_reg &dst, + const brw_reg src[], unsigned sources) { init(opcode, exec_width, dst, src, sources); } @@ -145,10 +145,10 @@ fs_inst::~fs_inst() } static void -initialize_sources(fs_inst *inst, const fs_reg src[], uint8_t num_sources) +initialize_sources(fs_inst *inst, const brw_reg src[], uint8_t num_sources) { if (num_sources > ARRAY_SIZE(inst->builtin_src)) - inst->src = new fs_reg[num_sources]; + inst->src = new brw_reg[num_sources]; else inst->src = inst->builtin_src; @@ -164,14 +164,14 @@ fs_inst::resize_sources(uint8_t num_sources) if (this->sources == num_sources) return; - fs_reg *old_src = this->src; - fs_reg *new_src; + brw_reg *old_src = this->src; + brw_reg *new_src; const unsigned builtin_size = ARRAY_SIZE(this->builtin_src); if (old_src == this->builtin_src) { if (num_sources > builtin_size) { - new_src = new fs_reg[num_sources]; + new_src = new brw_reg[num_sources]; for (unsigned i = 0; i < this->sources; i++) new_src[i] = old_src[i]; @@ -189,7 +189,7 @@ fs_inst::resize_sources(uint8_t num_sources) new_src = old_src; } else { - new_src = new fs_reg[num_sources]; + new_src = new brw_reg[num_sources]; for (unsigned i = 0; i < num_sources; i++) new_src[i] = old_src[i]; } @@ -204,10 +204,10 @@ fs_inst::resize_sources(uint8_t num_sources) void fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, - const fs_reg &dst, - const fs_reg &surface, - const fs_reg &surface_handle, - const fs_reg &varying_offset, + const brw_reg &dst, + const brw_reg &surface, + const brw_reg &surface_handle, + const brw_reg &varying_offset, uint32_t const_offset, uint8_t alignment, unsigned components) @@ -218,7 +218,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, * be any component of a vector, and then we load 4 contiguous * components starting from that. TODO: Support loading fewer than 4. */ - fs_reg total_offset = bld.ADD(varying_offset, brw_imm_ud(const_offset)); + brw_reg total_offset = bld.ADD(varying_offset, brw_imm_ud(const_offset)); /* The pull load message will load a vec4 (16 bytes). If we are loading * a double this means we are only loading 2 elements worth of data. @@ -226,9 +226,9 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, * so other parts of the driver don't get confused about the size of the * result. */ - fs_reg vec4_result = bld.vgrf(BRW_TYPE_F, 4); + brw_reg vec4_result = bld.vgrf(BRW_TYPE_F, 4); - fs_reg srcs[PULL_VARYING_CONSTANT_SRCS]; + brw_reg srcs[PULL_VARYING_CONSTANT_SRCS]; srcs[PULL_VARYING_CONSTANT_SRC_SURFACE] = surface; srcs[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE] = surface_handle; srcs[PULL_VARYING_CONSTANT_SRC_OFFSET] = total_offset; @@ -668,7 +668,7 @@ fs_inst::is_partial_write() const /* Special case UNDEF since a lot of places in the backend do things like this : * * fs_builder ubld = bld.exec_all().group(1, 0); - * fs_reg tmp = ubld.vgrf(BRW_TYPE_UD); + * brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); * ubld.UNDEF(tmp); <- partial write, even if the whole register is concerned */ if (this->opcode == SHADER_OPCODE_UNDEF) { @@ -1151,13 +1151,13 @@ fs_visitor::emit_gs_thread_end() if (mark_last_urb_write_with_eot()) return; - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(0); inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); } else { - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count; srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1); @@ -1212,7 +1212,7 @@ fs_visitor::assign_curb_setup() /* The base offset for our push data is passed in as R0.0[31:6]. We have * to mask off the bottom 6 bits. */ - fs_reg base_addr = + brw_reg base_addr = ubld.AND(retype(brw_vec1_grf(0, 0), BRW_TYPE_UD), brw_imm_ud(INTEL_MASK(31, 6))); @@ -1228,17 +1228,17 @@ fs_visitor::assign_curb_setup() /* This pass occurs after all of the optimization passes, so don't * emit an 'ADD addr, base_addr, 0' instruction. */ - fs_reg addr = i == 0 ? base_addr : + brw_reg addr = i == 0 ? base_addr : ubld.ADD(base_addr, brw_imm_ud(i * REG_SIZE)); - fs_reg srcs[4] = { + brw_reg srcs[4] = { brw_imm_ud(0), /* desc */ brw_imm_ud(0), /* ex_desc */ addr, /* payload */ - fs_reg(), /* payload2 */ + brw_reg(), /* payload2 */ }; - fs_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0), + brw_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0), BRW_TYPE_UD); fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, dest, srcs, 4); @@ -1310,10 +1310,10 @@ fs_visitor::assign_curb_setup() struct brw_reg mask = brw_vec1_grf(payload().num_regs + mask_param / 8, mask_param % 8); - fs_reg b32; + brw_reg b32; for (unsigned i = 0; i < 64; i++) { if (i % 16 == 0 && (want_zero & BITFIELD64_RANGE(i, 16))) { - fs_reg shifted = ubld.vgrf(BRW_TYPE_W, 2); + brw_reg shifted = ubld.vgrf(BRW_TYPE_W, 2); ubld.SHL(horiz_offset(shifted, 8), byte_offset(retype(mask, BRW_TYPE_W), i / 8), brw_imm_v(0x01234567)); @@ -1625,13 +1625,13 @@ fs_visitor::assign_urb_setup() foreach_block_and_inst(block, fs_inst, inst, cfg) { for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == ATTR) { - /* ATTR fs_reg::nr in the FS is in units of logical scalar + /* ATTR brw_reg::nr in the FS is in units of logical scalar * inputs each of which consumes 16B on Gfx4-Gfx12. In * single polygon mode this leads to the following layout * of the vertex setup plane parameters in the ATTR * register file: * - * fs_reg::nr Input Comp0 Comp1 Comp2 Comp3 + * brw_reg::nr Input Comp0 Comp1 Comp2 Comp3 * 0 Attr0.x a1-a0 a2-a0 N/A a0 * 1 Attr0.y a1-a0 a2-a0 N/A a0 * 2 Attr0.z a1-a0 a2-a0 N/A a0 @@ -1644,7 +1644,7 @@ fs_visitor::assign_urb_setup() * different plane parameters, so each parameter above is * represented as a dispatch_width-wide vector: * - * fs_reg::nr fs_reg::offset Input Comp0 ... CompN + * brw_reg::nr brw_reg::offset Input Comp0 ... CompN * 0 0 Attr0.x a1[0]-a0[0] ... a1[N]-a0[N] * 0 4 * dispatch_width Attr0.x a2[0]-a0[0] ... a2[N]-a0[N] * 0 8 * dispatch_width Attr0.x N/A ... N/A @@ -1955,7 +1955,7 @@ fs_visitor::assign_constant_locations() } bool -fs_visitor::get_pull_locs(const fs_reg &src, +fs_visitor::get_pull_locs(const brw_reg &src, unsigned *out_surf_index, unsigned *out_pull_index) { @@ -1993,11 +1993,11 @@ fs_visitor::emit_repclear_shader() assert(uniforms == 0); assume(key->nr_color_regions > 0); - fs_reg color_output = retype(brw_vec4_grf(127, 0), BRW_TYPE_UD); - fs_reg header = retype(brw_vec8_grf(125, 0), BRW_TYPE_UD); + brw_reg color_output = retype(brw_vec4_grf(127, 0), BRW_TYPE_UD); + brw_reg header = retype(brw_vec8_grf(125, 0), BRW_TYPE_UD); /* We pass the clear color as a flat input. Copy it to the output. */ - fs_reg color_input = + brw_reg color_input = brw_make_reg(BRW_GENERAL_REGISTER_FILE, 2, 3, 0, 0, BRW_TYPE_UD, BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); @@ -2047,7 +2047,7 @@ fs_visitor::emit_repclear_shader() * thread payload, \p bld is required to have a dispatch_width() not greater * than 16 for fragment shaders. */ -fs_reg +brw_reg brw_sample_mask_reg(const fs_builder &bld) { const fs_visitor &s = *bld.shader; @@ -2107,7 +2107,7 @@ brw_emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst) bld.dispatch_width() == inst->exec_size); const fs_visitor &s = *bld.shader; - const fs_reg sample_mask = brw_sample_mask_reg(bld); + const brw_reg sample_mask = brw_sample_mask_reg(bld); const unsigned subreg = sample_mask_flag_subreg(s); if (s.devinfo->ver >= 20 || brw_wm_prog_data(s.prog_data)->uses_kill) { @@ -3011,8 +3011,8 @@ fs_visitor::set_tcs_invocation_id() * * 22:16 on gfx11+ * * 23:17 otherwise */ - fs_reg t = - bld.AND(fs_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)), + brw_reg t = + bld.AND(brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)), brw_imm_ud(instance_id_mask)); if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) { @@ -3023,9 +3023,9 @@ fs_visitor::set_tcs_invocation_id() assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH); - fs_reg channels_uw = bld.vgrf(BRW_TYPE_UW); - fs_reg channels_ud = bld.vgrf(BRW_TYPE_UD); - bld.MOV(channels_uw, fs_reg(brw_imm_uv(0x76543210))); + brw_reg channels_uw = bld.vgrf(BRW_TYPE_UW); + brw_reg channels_ud = bld.vgrf(BRW_TYPE_UD); + bld.MOV(channels_uw, brw_reg(brw_imm_uv(0x76543210))); bld.MOV(channels_ud, channels_uw); if (tcs_prog_data->instances == 1) { @@ -3054,7 +3054,7 @@ fs_visitor::emit_tcs_thread_end() * algorithm to set it optimally). On other platforms, we simply write * zero to a reserved/MBZ patch header DWord which has no consequence. */ - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16); srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0); @@ -3258,7 +3258,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) * stored in R0.15/R1.15 on gfx20+ and in R1.7/R2.7 on * gfx6+. */ - const fs_reg dispatch_mask = + const brw_reg dispatch_mask = devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) : brw_vec1_grf(i + 1, 7); bld.exec_all().group(1, 0) @@ -4533,18 +4533,18 @@ bool brw_should_print_shader(const nir_shader *shader, uint64_t debug_flag) } namespace brw { - fs_reg + brw_reg fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2], brw_reg_type type, unsigned n) { if (!regs[0]) - return fs_reg(); + return brw_reg(); if (bld.dispatch_width() > 16) { - const fs_reg tmp = bld.vgrf(type, n); + const brw_reg tmp = bld.vgrf(type, n); const brw::fs_builder hbld = bld.exec_all().group(16, 0); const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); - fs_reg *const components = new fs_reg[m * n]; + brw_reg *const components = new brw_reg[m * n]; for (unsigned c = 0; c < n; c++) { for (unsigned g = 0; g < m; g++) @@ -4558,22 +4558,22 @@ namespace brw { return tmp; } else { - return fs_reg(retype(brw_vec8_grf(regs[0], 0), type)); + return brw_reg(retype(brw_vec8_grf(regs[0], 0), type)); } } - fs_reg + brw_reg fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]) { if (!regs[0]) - return fs_reg(); + return brw_reg(); else if (bld.shader->devinfo->ver >= 20) return fetch_payload_reg(bld, regs, BRW_TYPE_F, 2); - const fs_reg tmp = bld.vgrf(BRW_TYPE_F, 2); + const brw_reg tmp = bld.vgrf(BRW_TYPE_F, 2); const brw::fs_builder hbld = bld.exec_all().group(8, 0); const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); - fs_reg *const components = new fs_reg[2 * m]; + brw_reg *const components = new brw_reg[2 * m]; for (unsigned c = 0; c < 2; c++) { for (unsigned g = 0; g < m; g++) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 2bb6b8814fc..2a1cbaa095e 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -76,21 +76,21 @@ namespace brw { ~def_analysis(); fs_inst * - get(const fs_reg ®) const + get(const brw_reg ®) const { return reg.file == VGRF && reg.nr < def_count ? def_insts[reg.nr] : NULL; } bblock_t * - get_block(const fs_reg ®) const + get_block(const brw_reg ®) const { return reg.file == VGRF && reg.nr < def_count ? def_blocks[reg.nr] : NULL; } uint32_t - get_use_count(const fs_reg ®) const + get_use_count(const brw_reg ®) const { return reg.file == VGRF && reg.nr < def_count ? def_use_counts[reg.nr] : 0; @@ -164,33 +164,33 @@ protected: struct vs_thread_payload : public thread_payload { vs_thread_payload(const fs_visitor &v); - fs_reg urb_handles; + brw_reg urb_handles; }; struct tcs_thread_payload : public thread_payload { tcs_thread_payload(const fs_visitor &v); - fs_reg patch_urb_output; - fs_reg primitive_id; - fs_reg icp_handle_start; + brw_reg patch_urb_output; + brw_reg primitive_id; + brw_reg icp_handle_start; }; struct tes_thread_payload : public thread_payload { tes_thread_payload(const fs_visitor &v); - fs_reg patch_urb_input; - fs_reg primitive_id; - fs_reg coords[3]; - fs_reg urb_output; + brw_reg patch_urb_input; + brw_reg primitive_id; + brw_reg coords[3]; + brw_reg urb_output; }; struct gs_thread_payload : public thread_payload { gs_thread_payload(fs_visitor &v); - fs_reg urb_handles; - fs_reg primitive_id; - fs_reg instance_id; - fs_reg icp_handle_start; + brw_reg urb_handles; + brw_reg primitive_id; + brw_reg instance_id; + brw_reg icp_handle_start; }; struct fs_thread_payload : public thread_payload { @@ -215,34 +215,34 @@ struct fs_thread_payload : public thread_payload { struct cs_thread_payload : public thread_payload { cs_thread_payload(const fs_visitor &v); - void load_subgroup_id(const brw::fs_builder &bld, fs_reg &dest) const; + void load_subgroup_id(const brw::fs_builder &bld, brw_reg &dest) const; - fs_reg local_invocation_id[3]; + brw_reg local_invocation_id[3]; protected: - fs_reg subgroup_id_; + brw_reg subgroup_id_; }; struct task_mesh_thread_payload : public cs_thread_payload { task_mesh_thread_payload(fs_visitor &v); - fs_reg extended_parameter_0; - fs_reg local_index; - fs_reg inline_parameter; + brw_reg extended_parameter_0; + brw_reg local_index; + brw_reg inline_parameter; - fs_reg urb_output; + brw_reg urb_output; /* URB to read Task memory inputs. Only valid for MESH stage. */ - fs_reg task_urb_input; + brw_reg task_urb_input; }; struct bs_thread_payload : public thread_payload { bs_thread_payload(const fs_visitor &v); - fs_reg global_arg_ptr; - fs_reg local_arg_ptr; + brw_reg global_arg_ptr; + brw_reg local_arg_ptr; - void load_shader_type(const brw::fs_builder &bld, fs_reg &dest) const; + void load_shader_type(const brw::fs_builder &bld, brw_reg &dest) const; }; enum instruction_scheduler_mode { @@ -293,10 +293,10 @@ public: void import_uniforms(fs_visitor *v); void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld, - const fs_reg &dst, - const fs_reg &surface, - const fs_reg &surface_handle, - const fs_reg &varying_offset, + const brw_reg &dst, + const brw_reg &surface, + const brw_reg &surface_handle, + const brw_reg &varying_offset, uint32_t const_offset, uint8_t alignment, unsigned components); @@ -324,7 +324,7 @@ public: void calculate_payload_ranges(unsigned payload_node_count, int *payload_last_use_ip) const; void assign_constant_locations(); - bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index, + bool get_pull_locs(const brw_reg &src, unsigned *out_surf_index, unsigned *out_pull_index); void invalidate_analysis(brw::analysis_dependency_class c); @@ -343,23 +343,23 @@ public: void set_tcs_invocation_id(); fs_inst *emit_single_fb_write(const brw::fs_builder &bld, - fs_reg color1, fs_reg color2, - fs_reg src0_alpha, unsigned components); + brw_reg color1, brw_reg color2, + brw_reg src0_alpha, unsigned components); void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha); void emit_fb_writes(); - void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg()); - void emit_gs_control_data_bits(const fs_reg &vertex_count); - fs_reg gs_urb_channel_mask(const fs_reg &dword_index); - fs_reg gs_urb_per_slot_dword_index(const fs_reg &vertex_count); + void emit_urb_writes(const brw_reg &gs_vertex_count = brw_reg()); + void emit_gs_control_data_bits(const brw_reg &vertex_count); + brw_reg gs_urb_channel_mask(const brw_reg &dword_index); + brw_reg gs_urb_per_slot_dword_index(const brw_reg &vertex_count); void emit_gs_thread_end(); bool mark_last_urb_write_with_eot(); void emit_tcs_thread_end(); void emit_urb_fence(); void emit_cs_terminate(); - fs_reg interp_reg(const brw::fs_builder &bld, unsigned location, + brw_reg interp_reg(const brw::fs_builder &bld, unsigned location, unsigned channel, unsigned comp); - fs_reg per_primitive_reg(const brw::fs_builder &bld, + brw_reg per_primitive_reg(const brw::fs_builder &bld, int location, unsigned comp); void dump_instruction_to_file(const fs_inst *inst, FILE *file, const brw::def_analysis *defs) const; @@ -416,11 +416,11 @@ public: */ int *push_constant_loc; - fs_reg frag_depth; - fs_reg frag_stencil; - fs_reg sample_mask; - fs_reg outputs[VARYING_SLOT_MAX]; - fs_reg dual_src_output; + brw_reg frag_depth; + brw_reg frag_stencil; + brw_reg sample_mask; + brw_reg outputs[VARYING_SLOT_MAX]; + brw_reg dual_src_output; int first_non_payload_grf; bool failed; @@ -479,15 +479,15 @@ public: bool source_depth_to_render_target; - fs_reg pixel_x; - fs_reg pixel_y; - fs_reg pixel_z; - fs_reg wpos_w; - fs_reg pixel_w; - fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT]; - fs_reg final_gs_vertex_count; - fs_reg control_data_bits; - fs_reg invocation_id; + brw_reg pixel_x; + brw_reg pixel_y; + brw_reg pixel_z; + brw_reg wpos_w; + brw_reg pixel_w; + brw_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT]; + brw_reg final_gs_vertex_count; + brw_reg control_data_bits; + brw_reg invocation_id; unsigned grf_used; bool spilled_any_registers; @@ -597,15 +597,15 @@ private: }; namespace brw { - fs_reg + brw_reg fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2], brw_reg_type type = BRW_TYPE_F, unsigned n = 1); - fs_reg + brw_reg fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]); - inline fs_reg + inline brw_reg dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data) { return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD); @@ -621,8 +621,8 @@ namespace brw { } void shuffle_from_32bit_read(const brw::fs_builder &bld, - const fs_reg &dst, - const fs_reg &src, + const brw_reg &dst, + const brw_reg &src, uint32_t first_component, uint32_t components); @@ -636,7 +636,7 @@ void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data); bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width); -fs_reg brw_sample_mask_reg(const brw::fs_builder &bld); +brw_reg brw_sample_mask_reg(const brw::fs_builder &bld); void brw_emit_predicate_on_sample_mask(const brw::fs_builder &bld, fs_inst *inst); int brw_get_subgroup_id_param_index(const intel_device_info *devinfo, diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index 00f38fbf50a..62a8b1bcfe1 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -480,7 +480,7 @@ namespace { * possibly incur bank conflicts. */ bool - is_grf(const fs_reg &r) + is_grf(const brw_reg &r) { return r.file == VGRF || r.file == FIXED_GRF; } @@ -492,7 +492,7 @@ namespace { * allocation or whether it was part of a VGRF allocation. */ unsigned - reg_of(const fs_reg &r) + reg_of(const brw_reg &r) { assert(is_grf(r)); if (r.file == VGRF) @@ -871,8 +871,8 @@ namespace { * Apply the GRF atom permutation given by \p map to register \p r and * return the result. */ - fs_reg - transform(const partitioning &p, const permutation &map, fs_reg r) + brw_reg + transform(const partitioning &p, const permutation &map, brw_reg r) { if (r.file == VGRF) { const unsigned reg = reg_of(r); diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index f2b344afea4..23bacac1b3d 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -184,7 +184,7 @@ namespace brw { * dispatch_width units (which is just enough space for one logical * component in this IR). */ - fs_reg + brw_reg vgrf(enum brw_reg_type type, unsigned n = 1) const { const unsigned unit = reg_unit(shader->devinfo); @@ -202,34 +202,34 @@ namespace brw { /** * Create a null register of floating type. */ - fs_reg + brw_reg null_reg_f() const { - return fs_reg(retype(brw_null_reg(), BRW_TYPE_F)); + return brw_reg(retype(brw_null_reg(), BRW_TYPE_F)); } - fs_reg + brw_reg null_reg_df() const { - return fs_reg(retype(brw_null_reg(), BRW_TYPE_DF)); + return brw_reg(retype(brw_null_reg(), BRW_TYPE_DF)); } /** * Create a null register of signed integer type. */ - fs_reg + brw_reg null_reg_d() const { - return fs_reg(retype(brw_null_reg(), BRW_TYPE_D)); + return brw_reg(retype(brw_null_reg(), BRW_TYPE_D)); } /** * Create a null register of unsigned integer type. */ - fs_reg + brw_reg null_reg_ud() const { - return fs_reg(retype(brw_null_reg(), BRW_TYPE_UD)); + return brw_reg(retype(brw_null_reg(), BRW_TYPE_UD)); } /** @@ -254,7 +254,7 @@ namespace brw { * Create and insert a nullary instruction into the program. */ fs_inst * - emit(enum opcode opcode, const fs_reg &dst) const + emit(enum opcode opcode, const brw_reg &dst) const { return emit(fs_inst(opcode, dispatch_width(), dst)); } @@ -263,7 +263,7 @@ namespace brw { * Create and insert a unary instruction into the program. */ fs_inst * - emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0) const + emit(enum opcode opcode, const brw_reg &dst, const brw_reg &src0) const { return emit(fs_inst(opcode, dispatch_width(), dst, src0)); } @@ -272,8 +272,8 @@ namespace brw { * Create and insert a binary instruction into the program. */ fs_inst * - emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1) const + emit(enum opcode opcode, const brw_reg &dst, const brw_reg &src0, + const brw_reg &src1) const { return emit(fs_inst(opcode, dispatch_width(), dst, src0, src1)); @@ -283,8 +283,8 @@ namespace brw { * Create and insert a ternary instruction into the program. */ fs_inst * - emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1, const fs_reg &src2) const + emit(enum opcode opcode, const brw_reg &dst, const brw_reg &src0, + const brw_reg &src1, const brw_reg &src2) const { switch (opcode) { case BRW_OPCODE_BFE: @@ -307,7 +307,7 @@ namespace brw { * into the program. */ fs_inst * - emit(enum opcode opcode, const fs_reg &dst, const fs_reg srcs[], + emit(enum opcode opcode, const brw_reg &dst, const brw_reg srcs[], unsigned n) const { /* Use the emit() methods for specific operand counts to ensure that @@ -352,8 +352,8 @@ namespace brw { * Generally useful to get the minimum or maximum of two values. */ fs_inst * - emit_minmax(const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1, brw_conditional_mod mod) const + emit_minmax(const brw_reg &dst, const brw_reg &src0, + const brw_reg &src1, brw_conditional_mod mod) const { assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); @@ -367,8 +367,8 @@ namespace brw { /** * Copy any live channel from \p src to the first channel of the result. */ - fs_reg - emit_uniformize(const fs_reg &src) const + brw_reg + emit_uniformize(const brw_reg &src) const { /* FIXME: We use a vector chan_index and dst to allow constant and * copy propagration to move result all the way into the consuming @@ -378,37 +378,37 @@ namespace brw { * should go back to scalar destinations here. */ const fs_builder ubld = exec_all(); - const fs_reg chan_index = vgrf(BRW_TYPE_UD); - const fs_reg dst = vgrf(src.type); + const brw_reg chan_index = vgrf(BRW_TYPE_UD); + const brw_reg dst = vgrf(src.type); ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0)); - return fs_reg(component(dst, 0)); + return brw_reg(component(dst, 0)); } - fs_reg - move_to_vgrf(const fs_reg &src, unsigned num_components) const + brw_reg + move_to_vgrf(const brw_reg &src, unsigned num_components) const { - fs_reg *const src_comps = new fs_reg[num_components]; + brw_reg *const src_comps = new brw_reg[num_components]; for (unsigned i = 0; i < num_components; i++) src_comps[i] = offset(src, dispatch_width(), i); - const fs_reg dst = vgrf(src.type, num_components); + const brw_reg dst = vgrf(src.type, num_components); LOAD_PAYLOAD(dst, src_comps, num_components, 0); delete[] src_comps; - return fs_reg(dst); + return brw_reg(dst); } void emit_scan_step(enum opcode opcode, brw_conditional_mod mod, - const fs_reg &tmp, + const brw_reg &tmp, unsigned left_offset, unsigned left_stride, unsigned right_offset, unsigned right_stride) const { - fs_reg left, right; + brw_reg left, right; left = horiz_stride(horiz_offset(tmp, left_offset), left_stride); right = horiz_stride(horiz_offset(tmp, right_offset), right_stride); if ((tmp.type == BRW_TYPE_Q || tmp.type == BRW_TYPE_UQ) && @@ -430,13 +430,13 @@ namespace brw { /* We treat the bottom 32 bits as unsigned regardless of * whether or not the integer as a whole is signed. */ - fs_reg right_low = subscript(right, BRW_TYPE_UD, 0); - fs_reg left_low = subscript(left, BRW_TYPE_UD, 0); + brw_reg right_low = subscript(right, BRW_TYPE_UD, 0); + brw_reg left_low = subscript(left, BRW_TYPE_UD, 0); /* The upper bits get the same sign as the 64-bit type */ brw_reg_type type32 = brw_type_with_size(tmp.type, 32); - fs_reg right_high = subscript(right, type32, 1); - fs_reg left_high = subscript(left, type32, 1); + brw_reg right_high = subscript(right, type32, 1); + brw_reg left_high = subscript(left, type32, 1); /* Build up our comparison: * @@ -468,7 +468,7 @@ namespace brw { } void - emit_scan(enum opcode opcode, const fs_reg &tmp, + emit_scan(enum opcode opcode, const brw_reg &tmp, unsigned cluster_size, brw_conditional_mod mod) const { assert(dispatch_width() >= 8); @@ -479,8 +479,8 @@ namespace brw { if (dispatch_width() * brw_type_size_bytes(tmp.type) > 2 * REG_SIZE) { const unsigned half_width = dispatch_width() / 2; const fs_builder ubld = exec_all().group(half_width, 0); - fs_reg left = tmp; - fs_reg right = horiz_offset(tmp, half_width); + brw_reg left = tmp; + brw_reg right = horiz_offset(tmp, half_width); ubld.emit_scan(opcode, left, cluster_size, mod); ubld.emit_scan(opcode, right, cluster_size, mod); if (cluster_size > half_width) { @@ -547,15 +547,15 @@ namespace brw { */ #define _ALU1(prefix, op) \ fs_inst * \ - op(const fs_reg &dst, const fs_reg &src0) const \ + op(const brw_reg &dst, const brw_reg &src0) const \ { \ assert(_dispatch_width == 1 || \ (dst.file >= VGRF && dst.stride != 0) || \ (dst.file < VGRF && dst.hstride != 0)); \ return emit(prefix##op, dst, src0); \ } \ - fs_reg \ - op(const fs_reg &src0, fs_inst **out = NULL) const \ + brw_reg \ + op(const brw_reg &src0, fs_inst **out = NULL) const \ { \ fs_inst *inst = op(vgrf(src0.type), src0); \ if (out) *out = inst; \ @@ -565,12 +565,12 @@ namespace brw { #define VIRT1(op) _ALU1(SHADER_OPCODE_, op) fs_inst * - alu2(opcode op, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const + alu2(opcode op, const brw_reg &dst, const brw_reg &src0, const brw_reg &src1) const { return emit(op, dst, src0, src1); } - fs_reg - alu2(opcode op, const fs_reg &src0, const fs_reg &src1, fs_inst **out = NULL) const + brw_reg + alu2(opcode op, const brw_reg &src0, const brw_reg &src1, fs_inst **out = NULL) const { enum brw_reg_type inferred_dst_type = brw_type_larger_of(src0.type, src1.type); @@ -581,12 +581,12 @@ namespace brw { #define _ALU2(prefix, op) \ fs_inst * \ - op(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const \ + op(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1) const \ { \ return alu2(prefix##op, dst, src0, src1); \ } \ - fs_reg \ - op(const fs_reg &src0, const fs_reg &src1, fs_inst **out = NULL) const \ + brw_reg \ + op(const brw_reg &src0, const brw_reg &src1, fs_inst **out = NULL) const \ { \ return alu2(prefix##op, src0, src1, out); \ } @@ -595,7 +595,7 @@ namespace brw { #define ALU2_ACC(op) \ fs_inst * \ - op(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const \ + op(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1) const \ { \ fs_inst *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \ inst->writes_accumulator = true; \ @@ -604,8 +604,8 @@ namespace brw { #define ALU3(op) \ fs_inst * \ - op(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, \ - const fs_reg &src2) const \ + op(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1, \ + const brw_reg &src2) const \ { \ return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \ } @@ -672,13 +672,13 @@ namespace brw { /** @} */ fs_inst * - ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const + ADD(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1) const { return alu2(BRW_OPCODE_ADD, dst, src0, src1); } - fs_reg - ADD(const fs_reg &src0, const fs_reg &src1, fs_inst **out = NULL) const + brw_reg + ADD(const brw_reg &src0, const brw_reg &src1, fs_inst **out = NULL) const { if (src1.file == IMM && src1.ud == 0 && !out) return src0; @@ -692,7 +692,7 @@ namespace brw { * the flag register with the packed 16 bits of the result. */ fs_inst * - CMP(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, + CMP(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1, brw_conditional_mod condition) const { /* Take the instruction: @@ -718,7 +718,7 @@ namespace brw { * CMPN: Behaves like CMP, but produces true if src1 is NaN. */ fs_inst * - CMPN(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, + CMPN(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1, brw_conditional_mod condition) const { /* Take the instruction: @@ -753,8 +753,8 @@ namespace brw { * CSEL: dst = src2 0.0f ? src0 : src1 */ fs_inst * - CSEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, - const fs_reg &src2, brw_conditional_mod condition) const + CSEL(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1, + const brw_reg &src2, brw_conditional_mod condition) const { return set_condmod(condition, emit(BRW_OPCODE_CSEL, @@ -768,8 +768,8 @@ namespace brw { * Emit a linear interpolation instruction. */ fs_inst * - LRP(const fs_reg &dst, const fs_reg &x, const fs_reg &y, - const fs_reg &a) const + LRP(const brw_reg &dst, const brw_reg &x, const brw_reg &y, + const brw_reg &a) const { if (shader->devinfo->ver <= 10) { /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so @@ -779,14 +779,14 @@ namespace brw { } else { /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ - const fs_reg y_times_a = vgrf(dst.type); - const fs_reg one_minus_a = vgrf(dst.type); - const fs_reg x_times_one_minus_a = vgrf(dst.type); + const brw_reg y_times_a = vgrf(dst.type); + const brw_reg one_minus_a = vgrf(dst.type); + const brw_reg x_times_one_minus_a = vgrf(dst.type); MUL(y_times_a, y, a); ADD(one_minus_a, negate(a), brw_imm_f(1.0f)); - MUL(x_times_one_minus_a, x, fs_reg(one_minus_a)); - return ADD(dst, fs_reg(x_times_one_minus_a), fs_reg(y_times_a)); + MUL(x_times_one_minus_a, x, brw_reg(one_minus_a)); + return ADD(dst, brw_reg(x_times_one_minus_a), brw_reg(y_times_a)); } } @@ -794,7 +794,7 @@ namespace brw { * Collect a number of registers in a contiguous range of registers. */ fs_inst * - LOAD_PAYLOAD(const fs_reg &dst, const fs_reg *src, + LOAD_PAYLOAD(const brw_reg &dst, const brw_reg *src, unsigned sources, unsigned header_size) const { fs_inst *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources); @@ -809,7 +809,7 @@ namespace brw { } fs_inst * - VEC(const fs_reg &dst, const fs_reg *src, unsigned sources) const + VEC(const brw_reg &dst, const brw_reg *src, unsigned sources) const { return sources == 1 ? MOV(dst, src[0]) : LOAD_PAYLOAD(dst, src, sources, 0); @@ -822,7 +822,7 @@ namespace brw { } fs_inst * - UNDEF(const fs_reg &dst) const + UNDEF(const brw_reg &dst) const { assert(dst.file == VGRF); assert(dst.offset % REG_SIZE == 0); @@ -834,7 +834,7 @@ namespace brw { } fs_inst * - DPAS(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, const fs_reg &src2, + DPAS(const brw_reg &dst, const brw_reg &src0, const brw_reg &src1, const brw_reg &src2, unsigned sdepth, unsigned rcount) const { assert(_dispatch_width == 8 * reg_unit(shader->devinfo)); @@ -868,14 +868,14 @@ namespace brw { * Workaround for negation of UD registers. See comment in * fs_generator::generate_code() for more details. */ - fs_reg - fix_unsigned_negate(const fs_reg &src) const + brw_reg + fix_unsigned_negate(const brw_reg &src) const { if (src.type == BRW_TYPE_UD && src.negate) { - fs_reg temp = vgrf(BRW_TYPE_UD); + brw_reg temp = vgrf(BRW_TYPE_UD); MOV(temp, src); - return fs_reg(temp); + return brw_reg(temp); } else { return src; } @@ -885,8 +885,8 @@ namespace brw { * Workaround for source register modes not supported by the ternary * instruction encoding. */ - fs_reg - fix_3src_operand(const fs_reg &src) const + brw_reg + fix_3src_operand(const brw_reg &src) const { switch (src.file) { case FIXED_GRF: @@ -905,7 +905,7 @@ namespace brw { break; } - fs_reg expanded = vgrf(src.type); + brw_reg expanded = vgrf(src.type); MOV(expanded, src); return expanded; } @@ -925,8 +925,8 @@ namespace brw { }; } -static inline fs_reg -offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta) +static inline brw_reg +offset(const brw_reg ®, const brw::fs_builder &bld, unsigned delta) { return offset(reg, bld.dispatch_width(), delta); } diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index debd25acc16..1c3cb041717 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -1150,7 +1150,7 @@ struct register_allocation { uint16_t avail; }; -static fs_reg +static brw_reg allocate_slots(struct register_allocation *regs, unsigned num_regs, unsigned bytes, unsigned align_bytes, brw::simple_allocator &alloc) @@ -1172,7 +1172,7 @@ allocate_slots(struct register_allocation *regs, unsigned num_regs, regs[i].avail &= ~(mask << j); - fs_reg reg = brw_vgrf(regs[i].nr, BRW_TYPE_F); + brw_reg reg = brw_vgrf(regs[i].nr, BRW_TYPE_F); reg.offset = j * 2; return reg; @@ -1243,7 +1243,7 @@ parcel_out_registers(struct imm *imm, unsigned len, const bblock_t *cur_block, */ const unsigned width = ver == 8 && imm[i].is_half_float ? 2 : 1; - const fs_reg reg = allocate_slots(regs, num_regs, + const brw_reg reg = allocate_slots(regs, num_regs, imm[i].size * width, get_alignment_for_imm(&imm[i]), alloc); @@ -1569,7 +1569,7 @@ brw_fs_opt_combine_constants(fs_visitor &s) const uint32_t width = 1; const fs_builder ibld = fs_builder(&s, width).at(insert_block, n).exec_all(); - fs_reg reg = brw_vgrf(imm->nr, BRW_TYPE_F); + brw_reg reg = brw_vgrf(imm->nr, BRW_TYPE_F); reg.offset = imm->subreg_offset; reg.stride = 0; @@ -1591,7 +1591,7 @@ brw_fs_opt_combine_constants(fs_visitor &s) /* Rewrite the immediate sources to refer to the new GRFs. */ for (int i = 0; i < table.len; i++) { foreach_list_typed(reg_link, link, link, table.imm[i].uses) { - fs_reg *reg = &link->inst->src[link->src]; + brw_reg *reg = &link->inst->src[link->src]; if (link->inst->opcode == BRW_OPCODE_SEL) { if (link->type == either_type) { @@ -1709,7 +1709,7 @@ brw_fs_opt_combine_constants(fs_visitor &s) inst->conditional_mod == BRW_CONDITIONAL_GE || inst->conditional_mod == BRW_CONDITIONAL_L); - fs_reg temp = inst->src[0]; + brw_reg temp = inst->src[0]; inst->src[0] = inst->src[1]; inst->src[1] = temp; diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 3f82fd49b16..6c97e18c171 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -46,8 +46,8 @@ namespace { /* avoid conflict with opt_copy_propagation_elements */ struct acp_entry { struct rb_node by_dst; struct rb_node by_src; - fs_reg dst; - fs_reg src; + brw_reg dst; + brw_reg src; unsigned global_idx; unsigned size_written; unsigned size_read; @@ -338,7 +338,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(linear_ctx *lin_ctx, cfg_t *cfg, * Like reg_offset, but register must be VGRF or FIXED_GRF. */ static inline unsigned -grf_reg_offset(const fs_reg &r) +grf_reg_offset(const brw_reg &r) { return (r.file == VGRF ? 0 : r.nr) * REG_SIZE + r.offset + @@ -349,7 +349,7 @@ grf_reg_offset(const fs_reg &r) * Like regions_overlap, but register must be VGRF or FIXED_GRF. */ static inline bool -grf_regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) +grf_regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds) { return reg_space(r) == reg_space(s) && !(grf_reg_offset(r) + dr <= grf_reg_offset(s) || @@ -793,7 +793,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, * regioning restrictions that apply to integer types smaller than a dword. * See BSpec #56640 for details. */ - const fs_reg tmp = horiz_stride(entry->src, inst->src[arg].stride); + const brw_reg tmp = horiz_stride(entry->src, inst->src[arg].stride); if (has_subdword_integer_region_restriction(devinfo, inst, &tmp, 1)) return false; @@ -936,7 +936,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, } static bool -try_constant_propagate_value(fs_reg val, brw_reg_type dst_type, +try_constant_propagate_value(brw_reg val, brw_reg_type dst_type, fs_inst *inst, int arg) { bool progress = false; @@ -1394,7 +1394,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, inst->src[i].is_contiguous())) { const brw_reg_type t = i < inst->header_size ? BRW_TYPE_UD : inst->src[i].type; - fs_reg dst = byte_offset(retype(inst->dst, t), offset); + brw_reg dst = byte_offset(retype(inst->dst, t), offset); if (!dst.equals(inst->src[i])) { acp_entry *entry = linear_zalloc(lin_ctx, acp_entry); entry->dst = dst; @@ -1486,7 +1486,7 @@ brw_fs_opt_copy_propagation(fs_visitor &s) static bool try_copy_propagate_def(const brw_compiler *compiler, const brw::simple_allocator &alloc, - fs_inst *def, const fs_reg &val, + fs_inst *def, const brw_reg &val, fs_inst *inst, int arg, uint8_t max_polygons) { @@ -1716,7 +1716,7 @@ try_copy_propagate_def(const brw_compiler *compiler, } static bool -try_constant_propagate_def(fs_inst *def, fs_reg val, fs_inst *inst, int arg) +try_constant_propagate_def(fs_inst *def, brw_reg val, fs_inst *inst, int arg) { /* Bail if inst is reading more than a single vector component of entry */ if (inst->size_read(arg) > def->dst.component_size(inst->exec_size)) @@ -1728,8 +1728,8 @@ try_constant_propagate_def(fs_inst *def, fs_reg val, fs_inst *inst, int arg) /** * Handle cases like UW subreads of a UD immediate, with an offset. */ -static fs_reg -extract_imm(fs_reg val, brw_reg_type type, unsigned offset) +static brw_reg +extract_imm(brw_reg val, brw_reg_type type, unsigned offset) { assert(val.file == IMM); @@ -1748,16 +1748,16 @@ extract_imm(fs_reg val, brw_reg_type type, unsigned offset) val.d = (val.d << (bitsize * (32/bitsize - 1 - offset))) >> ((32/bitsize - 1) * bitsize); break; default: - return fs_reg(); + return brw_reg(); } return val; } -static fs_reg -find_value_for_offset(fs_inst *def, const fs_reg &src, unsigned src_size) +static brw_reg +find_value_for_offset(fs_inst *def, const brw_reg &src, unsigned src_size) { - fs_reg val; + brw_reg val; switch (def->opcode) { case BRW_OPCODE_MOV: @@ -1837,7 +1837,7 @@ brw_fs_opt_copy_propagation_defs(fs_visitor &s) } } - fs_reg val = + brw_reg val = find_value_for_offset(def, inst->src[i], inst->size_read(i)); if (val.file == IMM) { diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 7bc8ca6dd5b..42f82cf994e 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -162,8 +162,8 @@ local_only(const fs_inst *inst) static bool operands_match(const fs_inst *a, const fs_inst *b, bool *negate) { - fs_reg *xs = a->src; - fs_reg *ys = b->src; + brw_reg *xs = a->src; + brw_reg *ys = b->src; if (a->opcode == BRW_OPCODE_MAD) { return xs[0].equals(ys[0]) && @@ -251,7 +251,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) #define HASH(hash, data) XXH32(&(data), sizeof(data), hash) uint32_t -hash_reg(uint32_t hash, const fs_reg &r) +hash_reg(uint32_t hash, const brw_reg &r) { struct { uint64_t u64; @@ -314,7 +314,7 @@ hash_inst(const void *v) /* Canonicalize negations on either source (or both) and commutatively * combine the hashes for both sources. */ - fs_reg src[2] = { inst->src[0], inst->src[1] }; + brw_reg src[2] = { inst->src[0], inst->src[1] }; uint32_t src_hash[2]; for (int i = 0; i < 2; i++) { @@ -383,8 +383,8 @@ remap_sources(fs_visitor &s, const brw::def_analysis &defs, if (def_block->end_ip_delta) s.cfg->adjust_block_ips(); - fs_reg neg = brw_vgrf(new_nr, BRW_TYPE_F); - fs_reg tmp = dbld.MOV(negate(neg)); + brw_reg neg = brw_vgrf(new_nr, BRW_TYPE_F); + brw_reg tmp = dbld.MOV(negate(neg)); inst->src[i].nr = tmp.nr; remap_table[old_nr] = tmp.nr; } else { diff --git a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp index 53db51bfbc2..d0e9f0d0ee4 100644 --- a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp +++ b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp @@ -100,8 +100,8 @@ brw_fs_opt_dead_code_eliminate(fs_visitor &s) if (!result_live && (can_omit_write(inst) || can_eliminate(devinfo, inst, flag_live))) { - inst->dst = fs_reg(spread(retype(brw_null_reg(), inst->dst.type), - inst->dst.stride)); + inst->dst = brw_reg(spread(retype(brw_null_reg(), inst->dst.type), + inst->dst.stride)); progress = true; } } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index a6d03286ec1..a72ec8204e3 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -66,7 +66,6 @@ brw_math_function(enum opcode op) static struct brw_reg normalize_brw_reg_for_encoding(brw_reg *reg) - { struct brw_reg brw_reg; diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp index 08bbecce81d..a2559403ca6 100644 --- a/src/intel/compiler/brw_fs_live_variables.cpp +++ b/src/intel/compiler/brw_fs_live_variables.cpp @@ -54,7 +54,7 @@ using namespace brw; void fs_live_variables::setup_one_read(struct block_data *bd, - int ip, const fs_reg ®) + int ip, const brw_reg ®) { int var = var_from_reg(reg); assert(var < num_vars); @@ -72,7 +72,7 @@ fs_live_variables::setup_one_read(struct block_data *bd, void fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst, - int ip, const fs_reg ®) + int ip, const brw_reg ®) { int var = var_from_reg(reg); assert(var < num_vars); @@ -115,7 +115,7 @@ fs_live_variables::setup_def_use() foreach_inst_in_block(fs_inst, inst, block) { /* Set use[] for this instruction */ for (unsigned int i = 0; i < inst->sources; i++) { - fs_reg reg = inst->src[i]; + brw_reg reg = inst->src[i]; if (reg.file != VGRF) continue; @@ -130,7 +130,7 @@ fs_live_variables::setup_def_use() /* Set def[] for this instruction */ if (inst->dst.file == VGRF) { - fs_reg reg = inst->dst; + brw_reg reg = inst->dst; for (unsigned j = 0; j < regs_written(inst); j++) { setup_one_write(bd, inst, ip, reg); reg.offset += REG_SIZE; @@ -317,7 +317,7 @@ fs_live_variables::~fs_live_variables() static bool check_register_live_range(const fs_live_variables *live, int ip, - const fs_reg ®, unsigned n) + const brw_reg ®, unsigned n) { const unsigned var = live->var_from_reg(reg); diff --git a/src/intel/compiler/brw_fs_live_variables.h b/src/intel/compiler/brw_fs_live_variables.h index d78653d13b6..1d9d199fac0 100644 --- a/src/intel/compiler/brw_fs_live_variables.h +++ b/src/intel/compiler/brw_fs_live_variables.h @@ -92,7 +92,7 @@ public: bool vars_interfere(int a, int b) const; bool vgrfs_interfere(int a, int b) const; - int var_from_reg(const fs_reg ®) const + int var_from_reg(const brw_reg ®) const { return var_from_vgrf[reg.nr] + reg.offset / REG_SIZE; } @@ -132,9 +132,9 @@ public: protected: void setup_def_use(); - void setup_one_read(struct block_data *bd, int ip, const fs_reg ®); + void setup_one_read(struct block_data *bd, int ip, const brw_reg ®); void setup_one_write(struct block_data *bd, fs_inst *inst, int ip, - const fs_reg ®); + const brw_reg ®); void compute_live_variables(); void compute_start_end(); diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index 6f6763743fb..77f773817d5 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -37,10 +37,10 @@ brw_fs_lower_constant_loads(fs_visitor &s) const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0); - const fs_reg dst = ubld.vgrf(BRW_TYPE_UD); + const brw_reg dst = ubld.vgrf(BRW_TYPE_UD); const unsigned base = pull_index * 4; - fs_reg srcs[PULL_UNIFORM_CONSTANT_SRCS]; + brw_reg srcs[PULL_UNIFORM_CONSTANT_SRCS]; srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE] = brw_imm_ud(index); srcs[PULL_UNIFORM_CONSTANT_SRC_OFFSET] = brw_imm_ud(base & ~(block_sz - 1)); srcs[PULL_UNIFORM_CONSTANT_SRC_SIZE] = brw_imm_ud(block_sz); @@ -66,7 +66,7 @@ brw_fs_lower_constant_loads(fs_visitor &s) s.VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst, brw_imm_ud(index), - fs_reg() /* surface_handle */, + brw_reg() /* surface_handle */, inst->src[1], pull_index * 4, 4, 1); inst->remove(block); @@ -90,7 +90,7 @@ brw_fs_lower_load_payload(fs_visitor &s) assert(inst->dst.file == VGRF); assert(inst->saturate == false); - fs_reg dst = inst->dst; + brw_reg dst = inst->dst; const fs_builder ibld(&s, block, inst); const fs_builder ubld = ibld.exec_all(); @@ -188,7 +188,7 @@ brw_fs_lower_csel(fs_visitor &s) const fs_builder ibld(&s, block, inst); /* CSEL: dst = src2 0 ? src0 : src1 */ - fs_reg zero = brw_imm_reg(orig_type); + brw_reg zero = brw_imm_reg(orig_type); ibld.CMP(retype(brw_null_reg(), orig_type), inst->src[2], zero, inst->conditional_mod); @@ -250,7 +250,7 @@ brw_fs_lower_sub_sat(fs_visitor &s) */ if (inst->exec_size == 8 && inst->src[0].type != BRW_TYPE_Q && inst->src[0].type != BRW_TYPE_UQ) { - fs_reg acc = retype(brw_acc_reg(inst->exec_size), + brw_reg acc = retype(brw_acc_reg(inst->exec_size), inst->src[1].type); ibld.MOV(acc, inst->src[1]); @@ -263,11 +263,11 @@ brw_fs_lower_sub_sat(fs_visitor &s) */ fs_inst *add; - fs_reg tmp = ibld.vgrf(inst->src[0].type); + brw_reg tmp = ibld.vgrf(inst->src[0].type); ibld.SHR(tmp, inst->src[1], brw_imm_d(1)); - fs_reg s1_sub_t = ibld.ADD(inst->src[1], negate(tmp)); - fs_reg sat_s0_sub_t = ibld.ADD(inst->src[0], negate(tmp), &add); + brw_reg s1_sub_t = ibld.ADD(inst->src[1], negate(tmp)); + brw_reg sat_s0_sub_t = ibld.ADD(inst->src[0], negate(tmp), &add); add->saturate = true; add = ibld.ADD(inst->dst, sat_s0_sub_t, negate(s1_sub_t)); @@ -331,8 +331,8 @@ brw_fs_lower_barycentrics(fs_visitor &s) switch (inst->opcode) { case BRW_OPCODE_PLN: { assert(inst->exec_size == 16); - const fs_reg tmp = ibld.vgrf(inst->src[1].type, 2); - fs_reg srcs[4]; + const brw_reg tmp = ibld.vgrf(inst->src[1].type, 2); + brw_reg srcs[4]; for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) srcs[i] = horiz_offset(offset(inst->src[1], ibld, i % 2), @@ -348,7 +348,7 @@ brw_fs_lower_barycentrics(fs_visitor &s) case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: { assert(inst->exec_size == 16); - const fs_reg tmp = ibld.vgrf(inst->dst.type, 2); + const brw_reg tmp = ibld.vgrf(inst->dst.type, 2); for (unsigned i = 0; i < 2; i++) { for (unsigned g = 0; g < inst->exec_size / 8; g++) { @@ -386,8 +386,8 @@ lower_derivative(fs_visitor &s, bblock_t *block, fs_inst *inst, unsigned swz0, unsigned swz1) { const fs_builder ubld = fs_builder(&s, block, inst).exec_all(); - const fs_reg tmp0 = ubld.vgrf(inst->src[0].type); - const fs_reg tmp1 = ubld.vgrf(inst->src[0].type); + const brw_reg tmp0 = ubld.vgrf(inst->src[0].type); + const brw_reg tmp1 = ubld.vgrf(inst->src[0].type); ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp0, inst->src[0], brw_imm_ud(swz0)); ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp1, inst->src[0], brw_imm_ud(swz1)); @@ -469,7 +469,7 @@ brw_fs_lower_find_live_channel(fs_visitor &s) const fs_builder ubld = fs_builder(&s, block, inst).exec_all().group(1, 0); - fs_reg exec_mask = ubld.vgrf(BRW_TYPE_UD); + brw_reg exec_mask = ubld.vgrf(BRW_TYPE_UD); ubld.UNDEF(exec_mask); ubld.emit(SHADER_OPCODE_READ_ARCH_REG, exec_mask, retype(brw_mask_reg(0), @@ -483,7 +483,7 @@ brw_fs_lower_find_live_channel(fs_visitor &s) * will appear at the front of the mask. */ if (!(first && packed_dispatch)) { - fs_reg mask = ubld.vgrf(BRW_TYPE_UD); + brw_reg mask = ubld.vgrf(BRW_TYPE_UD); ubld.UNDEF(mask); ubld.emit(SHADER_OPCODE_READ_ARCH_REG, mask, retype(brw_sr0_reg(vmask ? 3 : 2), @@ -506,7 +506,7 @@ brw_fs_lower_find_live_channel(fs_visitor &s) break; case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: { - fs_reg tmp = ubld.vgrf(BRW_TYPE_UD); + brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); ubld.UNDEF(tmp); ubld.LZD(tmp, exec_mask); ubld.ADD(inst->dst, negate(tmp), brw_imm_uw(31)); @@ -553,15 +553,15 @@ brw_fs_lower_sends_overlapping_payload(fs_visitor &s) const unsigned arg = inst->mlen < inst->ex_mlen ? 2 : 3; const unsigned len = MIN2(inst->mlen, inst->ex_mlen); - fs_reg tmp = brw_vgrf(s.alloc.allocate(len), + brw_reg tmp = brw_vgrf(s.alloc.allocate(len), BRW_TYPE_UD); /* Sadly, we've lost all notion of channels and bit sizes at this * point. Just WE_all it. */ const fs_builder ibld = fs_builder(&s, block, inst).exec_all().group(16, 0); - fs_reg copy_src = retype(inst->src[arg], BRW_TYPE_UD); - fs_reg copy_dst = tmp; + brw_reg copy_src = retype(inst->src[arg], BRW_TYPE_UD); + brw_reg copy_dst = tmp; for (unsigned i = 0; i < len; i += 2) { if (len == i + 1) { /* Only one register left; do SIMD8 */ @@ -696,7 +696,7 @@ brw_fs_lower_alu_restrictions(fs_visitor &s) static void brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst *inst, - fs_reg *reg, bool compressed) + brw_reg *reg, bool compressed) { if (reg->file != VGRF) return; @@ -801,7 +801,7 @@ brw_fs_lower_load_subgroup_invocation(fs_visitor &s) if (inst->exec_size == 8) { assert(inst->dst.type == BRW_TYPE_UD); - fs_reg uw = retype(inst->dst, BRW_TYPE_UW); + brw_reg uw = retype(inst->dst, BRW_TYPE_UW); ubld8.MOV(uw, brw_imm_v(0x76543210)); ubld8.MOV(inst->dst, uw); } else { @@ -852,12 +852,12 @@ brw_fs_lower_indirect_mov(fs_visitor &s) /* Extract unaligned part */ uint16_t extra_offset = inst->src[0].offset & 0x1; - fs_reg offset = ibld.ADD(inst->src[1], brw_imm_uw(extra_offset)); + brw_reg offset = ibld.ADD(inst->src[1], brw_imm_uw(extra_offset)); /* Check if offset is odd or even so that we can choose either high or * low byte from the result. */ - fs_reg is_odd = ibld.AND(offset, brw_imm_ud(1)); + brw_reg is_odd = ibld.AND(offset, brw_imm_ud(1)); /* Make sure offset is word (2-bytes) aligned */ offset = ibld.AND(offset, brw_imm_uw(~1)); @@ -865,24 +865,24 @@ brw_fs_lower_indirect_mov(fs_visitor &s) /* Indirect addressing(vx1 and vxh) not supported with UB/B datatype for * Src0, so change data type for src0 and dst to UW. */ - fs_reg dst = ibld.vgrf(BRW_TYPE_UW); + brw_reg dst = ibld.vgrf(BRW_TYPE_UW); /* Substract unaligned offset from src0 offset since we already * accounted unaligned part in the indirect byte offset. */ - fs_reg start = retype(inst->src[0], BRW_TYPE_UW); + brw_reg start = retype(inst->src[0], BRW_TYPE_UW); start.offset &= ~extra_offset; /* Adjust length to account extra offset. */ assert(inst->src[2].file == IMM); - fs_reg length = brw_imm_ud(inst->src[2].ud + extra_offset); + brw_reg length = brw_imm_ud(inst->src[2].ud + extra_offset); ibld.emit(SHADER_OPCODE_MOV_INDIRECT, dst, start, offset, length); /* Select high byte if offset is odd otherwise select low byte. */ - fs_reg lo = ibld.AND(dst, brw_imm_uw(0xff)); - fs_reg hi = ibld.SHR(dst, brw_imm_uw(8)); - fs_reg result = ibld.vgrf(BRW_TYPE_UW); + brw_reg lo = ibld.AND(dst, brw_imm_uw(0xff)); + brw_reg hi = ibld.SHR(dst, brw_imm_uw(8)); + brw_reg result = ibld.vgrf(BRW_TYPE_UW); ibld.CSEL(result, hi, lo, is_odd, BRW_CONDITIONAL_NZ); /* Extra MOV needed here to convert back to the corresponding B type */ diff --git a/src/intel/compiler/brw_fs_lower_dpas.cpp b/src/intel/compiler/brw_fs_lower_dpas.cpp index d8e63c6c23c..3a90ee5eeee 100644 --- a/src/intel/compiler/brw_fs_lower_dpas.cpp +++ b/src/intel/compiler/brw_fs_lower_dpas.cpp @@ -24,16 +24,16 @@ f16_using_mac(const fs_builder &bld, fs_inst *inst) const brw_reg_type src1_type = BRW_TYPE_HF; const brw_reg_type src2_type = BRW_TYPE_HF; - const fs_reg dest = inst->dst; - fs_reg src0 = inst->src[0]; - const fs_reg src1 = retype(inst->src[1], src1_type); - const fs_reg src2 = retype(inst->src[2], src2_type); + const brw_reg dest = inst->dst; + brw_reg src0 = inst->src[0]; + const brw_reg src1 = retype(inst->src[1], src1_type); + const brw_reg src2 = retype(inst->src[2], src2_type); const unsigned dest_stride = dest.type == BRW_TYPE_HF ? REG_SIZE / 2 : REG_SIZE; for (unsigned r = 0; r < inst->rcount; r++) { - fs_reg temp = bld.vgrf(BRW_TYPE_HF); + brw_reg temp = bld.vgrf(BRW_TYPE_HF); for (unsigned subword = 0; subword < 2; subword++) { for (unsigned s = 0; s < inst->sdepth; s++) { @@ -50,7 +50,7 @@ f16_using_mac(const fs_builder &bld, fs_inst *inst) */ if (s == 0 && subword == 0) { const unsigned acc_width = 8; - fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD), + brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD), inst->group % acc_width); if (bld.shader->devinfo->verx10 >= 125) { @@ -69,7 +69,7 @@ f16_using_mac(const fs_builder &bld, fs_inst *inst) ->writes_accumulator = true; } else { - fs_reg result; + brw_reg result; /* As mentioned above, the MAC had an optional, explicit * destination register. Various optimization passes are not @@ -96,7 +96,7 @@ f16_using_mac(const fs_builder &bld, fs_inst *inst) if (!src0.is_null()) { if (src0_type != BRW_TYPE_HF) { - fs_reg temp2 = bld.vgrf(src0_type); + brw_reg temp2 = bld.vgrf(src0_type); bld.MOV(temp2, temp); @@ -134,10 +134,10 @@ int8_using_dp4a(const fs_builder &bld, fs_inst *inst) const brw_reg_type src2_type = inst->src[2].type == BRW_TYPE_UB ? BRW_TYPE_UD : BRW_TYPE_D; - fs_reg dest = inst->dst; - fs_reg src0 = inst->src[0]; - const fs_reg src1 = retype(inst->src[1], src1_type); - const fs_reg src2 = retype(inst->src[2], src2_type); + brw_reg dest = inst->dst; + brw_reg src0 = inst->src[0]; + const brw_reg src1 = retype(inst->src[1], src1_type); + const brw_reg src2 = retype(inst->src[2], src2_type); const unsigned dest_stride = reg_unit(bld.shader->devinfo) * REG_SIZE; @@ -183,10 +183,10 @@ int8_using_mul_add(const fs_builder &bld, fs_inst *inst) const brw_reg_type src2_type = inst->src[2].type == BRW_TYPE_UB ? BRW_TYPE_UD : BRW_TYPE_D; - fs_reg dest = inst->dst; - fs_reg src0 = inst->src[0]; - const fs_reg src1 = retype(inst->src[1], src1_type); - const fs_reg src2 = retype(inst->src[2], src2_type); + brw_reg dest = inst->dst; + brw_reg src0 = inst->src[0]; + const brw_reg src1 = retype(inst->src[1], src1_type); + const brw_reg src2 = retype(inst->src[2], src2_type); const unsigned dest_stride = REG_SIZE; @@ -199,9 +199,9 @@ int8_using_mul_add(const fs_builder &bld, fs_inst *inst) } for (unsigned s = 0; s < inst->sdepth; s++) { - fs_reg temp1 = bld.vgrf(BRW_TYPE_UD); - fs_reg temp2 = bld.vgrf(BRW_TYPE_UD); - fs_reg temp3 = bld.vgrf(BRW_TYPE_UD, 2); + brw_reg temp1 = bld.vgrf(BRW_TYPE_UD); + brw_reg temp2 = bld.vgrf(BRW_TYPE_UD); + brw_reg temp3 = bld.vgrf(BRW_TYPE_UD, 2); const brw_reg_type temp_type = (inst->src[1].type == BRW_TYPE_B || inst->src[2].type == BRW_TYPE_B) diff --git a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp index 493e4a52aee..a3a39af65d3 100644 --- a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp +++ b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp @@ -207,14 +207,14 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) */ bool needs_mov = false; - fs_reg orig_dst = inst->dst; + brw_reg orig_dst = inst->dst; /* Get a new VGRF for the "low" 32x16-bit multiplication result if * reusing the original destination is impossible due to hardware * restrictions, source/destination overlap, or it being the null * register. */ - fs_reg low = inst->dst; + brw_reg low = inst->dst; if (orig_dst.is_null() || regions_overlap(inst->dst, inst->size_written, inst->src[0], inst->size_read(0)) || @@ -227,7 +227,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) } /* Get a new VGRF but keep the same stride as inst->dst */ - fs_reg high = brw_vgrf(s.alloc.allocate(regs_written(inst)), inst->dst.type); + brw_reg high = brw_vgrf(s.alloc.allocate(regs_written(inst)), inst->dst.type); high.stride = inst->dst.stride; high.offset = inst->dst.offset % REG_SIZE; @@ -319,19 +319,19 @@ brw_fs_lower_mul_qword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) unsigned int q_regs = regs_written(inst); unsigned int d_regs = (q_regs + 1) / 2; - fs_reg bd = brw_vgrf(s.alloc.allocate(q_regs), BRW_TYPE_UQ); - fs_reg ad = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); - fs_reg bc = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); + brw_reg bd = brw_vgrf(s.alloc.allocate(q_regs), BRW_TYPE_UQ); + brw_reg ad = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); + brw_reg bc = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); /* Here we need the full 64 bit result for 32b * 32b. */ if (devinfo->has_integer_dword_mul) { ibld.MUL(bd, subscript(inst->src[0], BRW_TYPE_UD, 0), subscript(inst->src[1], BRW_TYPE_UD, 0)); } else { - fs_reg bd_high = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); - fs_reg bd_low = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); + brw_reg bd_high = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); + brw_reg bd_low = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); const unsigned acc_width = reg_unit(devinfo) * 8; - fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD), + brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD), inst->group % acc_width); fs_inst *mul = ibld.MUL(acc, @@ -390,7 +390,7 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) /* Should have been lowered to 8-wide. */ assert(inst->exec_size <= brw_fs_get_lowered_simd_width(&s, inst)); const unsigned acc_width = reg_unit(devinfo) * 8; - const fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type), + const brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type), inst->group % acc_width); fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]); ibld.MACH(inst->dst, inst->src[0], inst->src[1]); diff --git a/src/intel/compiler/brw_fs_lower_pack.cpp b/src/intel/compiler/brw_fs_lower_pack.cpp index f10c9ad721a..b793910af38 100644 --- a/src/intel/compiler/brw_fs_lower_pack.cpp +++ b/src/intel/compiler/brw_fs_lower_pack.cpp @@ -40,7 +40,7 @@ brw_fs_lower_pack(fs_visitor &s) assert(inst->dst.file == VGRF); assert(inst->saturate == false); - fs_reg dst = inst->dst; + brw_reg dst = inst->dst; const fs_builder ibld(&s, block, inst); /* The lowering generates 2 instructions for what was previously 1. This diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index 019ac03f377..93991a48c92 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -464,7 +464,7 @@ namespace brw { brw_type_size_bytes(inst->src[i].type) == get_exec_type_size(inst)); const fs_builder ibld(v, block, inst); - const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); + const brw_reg tmp = ibld.vgrf(get_exec_type(inst)); lower_instruction(v, block, ibld.MOV(tmp, inst->src[i])); inst->src[i] = tmp; @@ -495,7 +495,7 @@ namespace { const unsigned stride = brw_type_size_bytes(inst->dst.type) * inst->dst.stride <= brw_type_size_bytes(type) ? 1 : brw_type_size_bytes(inst->dst.type) * inst->dst.stride / brw_type_size_bytes(type); - fs_reg tmp = ibld.vgrf(type, stride); + brw_reg tmp = ibld.vgrf(type, stride); ibld.UNDEF(tmp); tmp = horiz_stride(tmp, stride); @@ -549,7 +549,7 @@ namespace { inst->exec_size * stride * brw_type_size_bytes(inst->src[i].type), reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); - fs_reg tmp = brw_vgrf(v->alloc.allocate(size), inst->src[i].type); + brw_reg tmp = brw_vgrf(v->alloc.allocate(size), inst->src[i].type); ibld.UNDEF(tmp); tmp = byte_offset(horiz_stride(tmp, stride), required_src_byte_offset(devinfo, inst, i)); @@ -560,7 +560,7 @@ namespace { const brw_reg_type raw_type = brw_int_type(MIN2(brw_type_size_bytes(tmp.type), 4), false); const unsigned n = brw_type_size_bytes(tmp.type) / brw_type_size_bytes(raw_type); - fs_reg raw_src = inst->src[i]; + brw_reg raw_src = inst->src[i]; raw_src.negate = false; raw_src.abs = false; @@ -578,7 +578,7 @@ namespace { /* Point the original instruction at the temporary, making sure to keep * any source modifiers in the instruction. */ - fs_reg lower_src = tmp; + brw_reg lower_src = tmp; lower_src.negate = inst->src[i].negate; lower_src.abs = inst->src[i].abs; inst->src[i] = lower_src; @@ -607,7 +607,7 @@ namespace { const unsigned stride = required_dst_byte_stride(inst) / brw_type_size_bytes(inst->dst.type); assert(stride > 0); - fs_reg tmp = ibld.vgrf(inst->dst.type, stride); + brw_reg tmp = ibld.vgrf(inst->dst.type, stride); ibld.UNDEF(tmp); tmp = horiz_stride(tmp, stride); @@ -665,7 +665,7 @@ namespace { const unsigned n = get_exec_type_size(inst) / brw_type_size_bytes(raw_type); const fs_builder ibld(v, block, inst); - fs_reg tmp = ibld.vgrf(inst->dst.type, inst->dst.stride); + brw_reg tmp = ibld.vgrf(inst->dst.type, inst->dst.stride); ibld.UNDEF(tmp); tmp = horiz_stride(tmp, inst->dst.stride); diff --git a/src/intel/compiler/brw_fs_lower_simd_width.cpp b/src/intel/compiler/brw_fs_lower_simd_width.cpp index 63fb26a7a44..7f73f44812b 100644 --- a/src/intel/compiler/brw_fs_lower_simd_width.cpp +++ b/src/intel/compiler/brw_fs_lower_simd_width.cpp @@ -500,19 +500,19 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i) * lbld.group() from the i-th source region of instruction \p inst and return * it as result in packed form. */ -static fs_reg +static brw_reg emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i) { assert(lbld.group() >= inst->group); /* Specified channel group from the source region. */ - const fs_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group); + const brw_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group); if (needs_src_copy(lbld, inst, i)) { const unsigned num_components = inst->components_read(i); - const fs_reg tmp = lbld.vgrf(inst->src[i].type, num_components); + const brw_reg tmp = lbld.vgrf(inst->src[i].type, num_components); - fs_reg comps[num_components]; + brw_reg comps[num_components]; for (unsigned k = 0; k < num_components; ++k) comps[k] = offset(src, inst->exec_size, k); lbld.VEC(tmp, comps, num_components); @@ -585,7 +585,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst) * inserted using \p lbld_before and any copy instructions required for * zipping up the destination of \p inst will be inserted using \p lbld_after. */ -static fs_reg +static brw_reg emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after, fs_inst *inst) { @@ -596,7 +596,7 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after, const struct intel_device_info *devinfo = lbld_before.shader->devinfo; /* Specified channel group from the destination region. */ - const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group); + const brw_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group); if (!needs_dst_copy(lbld_after, inst)) { /* No need to allocate a temporary for the lowered instruction, just @@ -611,7 +611,7 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after, const unsigned dst_size = (inst->size_written - residency_size) / inst->dst.component_size(inst->exec_size); - const fs_reg tmp = lbld_after.vgrf(inst->dst.type, + const brw_reg tmp = lbld_after.vgrf(inst->dst.type, dst_size + inst->has_sampler_residency()); if (inst->predicate) { @@ -639,9 +639,9 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after, * SIMD16 16 bit values. */ const fs_builder rbld = lbld_after.exec_all().group(1, 0); - fs_reg local_res_reg = component( + brw_reg local_res_reg = component( retype(offset(tmp, lbld_before, dst_size), BRW_TYPE_UW), 0); - fs_reg final_res_reg = + brw_reg final_res_reg = retype(byte_offset(inst->dst, inst->size_written - residency_size + lbld_after.group() / 8), BRW_TYPE_UW); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 5195f70c8f8..0f4042931f5 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -54,22 +54,22 @@ struct nir_to_brw_state { */ fs_builder bld; - fs_reg *ssa_values; + brw_reg *ssa_values; fs_inst **resource_insts; struct brw_fs_bind_info *ssa_bind_infos; - fs_reg *uniform_values; - fs_reg *system_values; + brw_reg *uniform_values; + brw_reg *system_values; }; -static fs_reg get_nir_src(nir_to_brw_state &ntb, const nir_src &src); -static fs_reg get_nir_def(nir_to_brw_state &ntb, const nir_def &def); +static brw_reg get_nir_src(nir_to_brw_state &ntb, const nir_src &src); +static brw_reg get_nir_def(nir_to_brw_state &ntb, const nir_def &def); static nir_component_mask_t get_nir_write_mask(const nir_def &def); static void fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const fs_builder &bld, nir_intrinsic_instr *instr); -static fs_reg emit_samplepos_setup(nir_to_brw_state &ntb); -static fs_reg emit_sampleid_setup(nir_to_brw_state &ntb); -static fs_reg emit_samplemaskin_setup(nir_to_brw_state &ntb); -static fs_reg emit_shading_rate_setup(nir_to_brw_state &ntb); +static brw_reg emit_samplepos_setup(nir_to_brw_state &ntb); +static brw_reg emit_sampleid_setup(nir_to_brw_state &ntb); +static brw_reg emit_samplemaskin_setup(nir_to_brw_state &ntb); +static brw_reg emit_shading_rate_setup(nir_to_brw_state &ntb); static void fs_nir_emit_impl(nir_to_brw_state &ntb, nir_function_impl *impl); static void fs_nir_emit_cf_list(nir_to_brw_state &ntb, exec_list *list); @@ -81,7 +81,7 @@ static void fs_nir_emit_instr(nir_to_brw_state &ntb, nir_instr *instr); static void fs_nir_emit_surface_atomic(nir_to_brw_state &ntb, const fs_builder &bld, nir_intrinsic_instr *instr, - fs_reg surface, + brw_reg surface, bool bindless); static void fs_nir_emit_global_atomic(nir_to_brw_state &ntb, const fs_builder &bld, @@ -119,10 +119,10 @@ brw_texture_offset(const nir_tex_instr *tex, unsigned src, return true; } -static fs_reg +static brw_reg setup_imm_b(const fs_builder &bld, int8_t v) { - const fs_reg tmp = bld.vgrf(BRW_TYPE_B); + const brw_reg tmp = bld.vgrf(BRW_TYPE_B); bld.MOV(tmp, brw_imm_w(v)); return tmp; } @@ -167,7 +167,7 @@ fs_nir_setup_outputs(nir_to_brw_state &ntb) reg_size = MAX2(vec4s[i + loc] + i, reg_size); } - fs_reg reg = ntb.bld.vgrf(BRW_TYPE_F, 4 * reg_size); + brw_reg reg = ntb.bld.vgrf(BRW_TYPE_F, 4 * reg_size); for (unsigned i = 0; i < reg_size; i++) { assert(loc + i < ARRAY_SIZE(s.outputs)); s.outputs[loc + i] = offset(reg, ntb.bld, 4 * i); @@ -202,7 +202,7 @@ fs_nir_setup_uniforms(fs_visitor &s) } } -static fs_reg +static brw_reg emit_work_group_id_setup(nir_to_brw_state &ntb) { fs_visitor &s = ntb.s; @@ -210,7 +210,7 @@ emit_work_group_id_setup(nir_to_brw_state &ntb) assert(gl_shader_stage_is_compute(s.stage)); - fs_reg id = bld.vgrf(BRW_TYPE_UD, 3); + brw_reg id = bld.vgrf(BRW_TYPE_UD, 3); struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_TYPE_UD)); bld.MOV(id, r0_1); @@ -227,7 +227,7 @@ static bool emit_system_values_block(nir_to_brw_state &ntb, nir_block *block) { fs_visitor &s = ntb.s; - fs_reg *reg; + brw_reg *reg; nir_foreach_instr(instr, block) { if (instr->type != nir_instr_type_intrinsic) @@ -314,7 +314,7 @@ emit_system_values_block(nir_to_brw_state &ntb, nir_block *block) * subspans 0 and 1) in SIMD8 and an additional byte (the pixel * masks for 2 and 3) in SIMD16. */ - fs_reg shifted = abld.vgrf(BRW_TYPE_UW); + brw_reg shifted = abld.vgrf(BRW_TYPE_UW); for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) { const fs_builder hbld = abld.group(MIN2(16, s.dispatch_width), i); @@ -338,12 +338,12 @@ emit_system_values_block(nir_to_brw_state &ntb, nir_block *block) * performs 1's complement negation, so we can use that instead of * a NOT instruction. */ - fs_reg inverted = negate(shifted); + brw_reg inverted = negate(shifted); /* We then resolve the 0/1 result to 0/~0 boolean values by ANDing * with 1 and negating. */ - fs_reg anded = abld.vgrf(BRW_TYPE_UD); + brw_reg anded = abld.vgrf(BRW_TYPE_UD); abld.AND(anded, inverted, brw_imm_uw(1)); *reg = abld.MOV(negate(retype(anded, BRW_TYPE_D))); @@ -370,16 +370,16 @@ fs_nir_emit_system_values(nir_to_brw_state &ntb) const fs_builder &bld = ntb.bld; fs_visitor &s = ntb.s; - ntb.system_values = ralloc_array(ntb.mem_ctx, fs_reg, SYSTEM_VALUE_MAX); + ntb.system_values = ralloc_array(ntb.mem_ctx, brw_reg, SYSTEM_VALUE_MAX); for (unsigned i = 0; i < SYSTEM_VALUE_MAX; i++) { - ntb.system_values[i] = fs_reg(); + ntb.system_values[i] = brw_reg(); } /* Always emit SUBGROUP_INVOCATION. Dead code will clean it up if we * never end up using it. */ { - fs_reg ® = ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]; + brw_reg ® = ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]; reg = bld.vgrf(s.dispatch_width < 16 ? BRW_TYPE_UD : BRW_TYPE_UW); bld.emit(SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION, reg); } @@ -392,10 +392,10 @@ fs_nir_emit_system_values(nir_to_brw_state &ntb) static void fs_nir_emit_impl(nir_to_brw_state &ntb, nir_function_impl *impl) { - ntb.ssa_values = rzalloc_array(ntb.mem_ctx, fs_reg, impl->ssa_alloc); + ntb.ssa_values = rzalloc_array(ntb.mem_ctx, brw_reg, impl->ssa_alloc); ntb.resource_insts = rzalloc_array(ntb.mem_ctx, fs_inst *, impl->ssa_alloc); ntb.ssa_bind_infos = rzalloc_array(ntb.mem_ctx, struct brw_fs_bind_info, impl->ssa_alloc); - ntb.uniform_values = rzalloc_array(ntb.mem_ctx, fs_reg, impl->ssa_alloc); + ntb.uniform_values = rzalloc_array(ntb.mem_ctx, brw_reg, impl->ssa_alloc); fs_nir_emit_cf_list(ntb, &impl->body); } @@ -430,7 +430,7 @@ fs_nir_emit_if(nir_to_brw_state &ntb, nir_if *if_stmt) const fs_builder &bld = ntb.bld; bool invert; - fs_reg cond_reg; + brw_reg cond_reg; /* If the condition has the form !other_condition, use other_condition as * the source, but invert the predicate on the if instruction. @@ -493,7 +493,7 @@ fs_nir_emit_block(nir_to_brw_state &ntb, nir_block *block) */ static bool optimize_extract_to_float(nir_to_brw_state &ntb, nir_alu_instr *instr, - const fs_reg &result) + const brw_reg &result) { const intel_device_info *devinfo = ntb.devinfo; const fs_builder &bld = ntb.bld; @@ -551,7 +551,7 @@ optimize_extract_to_float(nir_to_brw_state &ntb, nir_alu_instr *instr, /* Element type to extract.*/ const brw_reg_type type = brw_int_type(bytes, is_signed); - fs_reg op0 = get_nir_src(ntb, src0->src[0].src); + brw_reg op0 = get_nir_src(ntb, src0->src[0].src); op0.type = brw_type_for_nir_type(devinfo, (nir_alu_type)(nir_op_infos[src0->op].input_types[0] | nir_src_bit_size(src0->src[0].src))); @@ -580,7 +580,7 @@ optimize_extract_to_float(nir_to_brw_state &ntb, nir_alu_instr *instr, static bool optimize_frontfacing_ternary(nir_to_brw_state &ntb, nir_alu_instr *instr, - const fs_reg &result) + const brw_reg &result) { const intel_device_info *devinfo = ntb.devinfo; fs_visitor &s = ntb.s; @@ -601,7 +601,7 @@ optimize_frontfacing_ternary(nir_to_brw_state &ntb, /* nir_opt_algebraic should have gotten rid of bcsel(b, a, a) */ assert(value1 == -value2); - fs_reg tmp = ntb.bld.vgrf(BRW_TYPE_D); + brw_reg tmp = ntb.bld.vgrf(BRW_TYPE_D); if (devinfo->ver >= 20) { /* Gfx20+ has separate back-facing bits for each pair of @@ -610,7 +610,7 @@ optimize_frontfacing_ternary(nir_to_brw_state &ntb, * each channel. Unfortunately they're no longer aligned to the * sign bit of a 16-bit word, so a left shift is necessary. */ - fs_reg ff = ntb.bld.vgrf(BRW_TYPE_UW); + brw_reg ff = ntb.bld.vgrf(BRW_TYPE_UW); for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) { const fs_builder hbld = ntb.bld.group(16, i); @@ -648,7 +648,7 @@ optimize_frontfacing_ternary(nir_to_brw_state &ntb, } else if (devinfo->ver >= 12) { /* Bit 15 of g1.1 is 0 if the polygon is front facing. */ - fs_reg g1 = fs_reg(retype(brw_vec1_grf(1, 1), BRW_TYPE_W)); + brw_reg g1 = brw_reg(retype(brw_vec1_grf(1, 1), BRW_TYPE_W)); /* For (gl_FrontFacing ? 1.0 : -1.0), emit: * @@ -664,7 +664,7 @@ optimize_frontfacing_ternary(nir_to_brw_state &ntb, g1, brw_imm_uw(0x3f80)); } else { /* Bit 15 of g0.0 is 0 if the polygon is front facing. */ - fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_TYPE_W)); + brw_reg g0 = brw_reg(retype(brw_vec1_grf(0, 0), BRW_TYPE_W)); /* For (gl_FrontFacing ? 1.0 : -1.0), emit: * @@ -711,16 +711,16 @@ brw_rnd_mode_from_execution_mode(unsigned execution_mode) return BRW_RND_MODE_UNSPECIFIED; } -static fs_reg +static brw_reg prepare_alu_destination_and_sources(nir_to_brw_state &ntb, const fs_builder &bld, nir_alu_instr *instr, - fs_reg *op, + brw_reg *op, bool need_dest) { const intel_device_info *devinfo = ntb.devinfo; - fs_reg result = + brw_reg result = need_dest ? get_nir_def(ntb, instr->def) : bld.null_reg_ud(); result.type = brw_type_for_nir_type(devinfo, @@ -775,15 +775,15 @@ prepare_alu_destination_and_sources(nir_to_brw_state &ntb, return result; } -static fs_reg -resolve_source_modifiers(const fs_builder &bld, const fs_reg &src) +static brw_reg +resolve_source_modifiers(const fs_builder &bld, const brw_reg &src) { return (src.abs || src.negate) ? bld.MOV(src) : src; } static void resolve_inot_sources(nir_to_brw_state &ntb, const fs_builder &bld, nir_alu_instr *instr, - fs_reg *op) + brw_reg *op) { for (unsigned i = 0; i < 2; i++) { nir_alu_instr *inot_instr = nir_src_as_alu_instr(instr->src[i].src); @@ -802,7 +802,7 @@ resolve_inot_sources(nir_to_brw_state &ntb, const fs_builder &bld, nir_alu_instr static bool try_emit_b2fi_of_inot(nir_to_brw_state &ntb, const fs_builder &bld, - fs_reg result, + brw_reg result, nir_alu_instr *instr) { const intel_device_info *devinfo = bld.shader->devinfo; @@ -828,7 +828,7 @@ try_emit_b2fi_of_inot(nir_to_brw_state &ntb, const fs_builder &bld, /* b2[fi](inot(a)) maps a=0 => 1, a=-1 => 0. Since a can only be 0 or -1, * this is float(1 + a). */ - fs_reg op; + brw_reg op; prepare_alu_destination_and_sources(ntb, bld, inot_instr, &op, false); @@ -857,8 +857,8 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, unsigned execution_mode = bld.shader->nir->info.float_controls_execution_mode; - fs_reg op[NIR_MAX_VEC_COMPONENTS]; - fs_reg result = prepare_alu_destination_and_sources(ntb, bld, instr, op, need_dest); + brw_reg op[NIR_MAX_VEC_COMPONENTS]; + brw_reg result = prepare_alu_destination_and_sources(ntb, bld, instr, op, need_dest); #ifndef NDEBUG /* Everything except raw moves, some type conversions, iabs, and ineg @@ -900,7 +900,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, case nir_op_vec4: case nir_op_vec8: case nir_op_vec16: { - fs_reg temp = result; + brw_reg temp = result; bool need_extra_copy = false; nir_intrinsic_instr *store_reg = @@ -924,7 +924,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, nir_component_mask_t write_mask = get_nir_write_mask(instr->def); unsigned last_bit = util_last_bit(write_mask); - fs_reg comps[last_bit]; + brw_reg comps[last_bit]; for (unsigned i = 0; i < last_bit; i++) { if (instr->op == nir_op_mov) @@ -1177,7 +1177,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, op[1] = resolve_source_modifiers(bld, op[1]); /* AVG(x, y) - ((x ^ y) & 1) */ - fs_reg one = retype(brw_imm_ud(1), result.type); + brw_reg one = retype(brw_imm_ud(1), result.type); bld.ADD(result, bld.AVG(op[0], op[1]), negate(bld.AND(bld.XOR(op[0], op[1]), one))); break; @@ -1228,7 +1228,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, if (instr->def.bit_size == 32) { bld.emit(SHADER_OPCODE_MULH, result, op[0], op[1]); } else { - fs_reg tmp = bld.vgrf(brw_type_with_size(op[0].type, 32)); + brw_reg tmp = bld.vgrf(brw_type_with_size(op[0].type, 32)); bld.MUL(tmp, op[0], op[1]); bld.MOV(result, subscript(tmp, result.type, 1)); } @@ -1290,7 +1290,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, case nir_op_fge32: case nir_op_feq32: case nir_op_fneu32: { - fs_reg dest = result; + brw_reg dest = result; const uint32_t bit_size = nir_src_bit_size(instr->src[0].src); if (bit_size != 32) { @@ -1320,7 +1320,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, case nir_op_uge32: case nir_op_ieq32: case nir_op_ine32: { - fs_reg dest = result; + brw_reg dest = result; const uint32_t bit_size = brw_type_size_bits(op[0].type); if (bit_size != 32) { @@ -1464,19 +1464,19 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, break; case nir_op_fquantize2f16: { - fs_reg tmp16 = bld.vgrf(BRW_TYPE_D); - fs_reg tmp32 = bld.vgrf(BRW_TYPE_F); + brw_reg tmp16 = bld.vgrf(BRW_TYPE_D); + brw_reg tmp32 = bld.vgrf(BRW_TYPE_F); /* The destination stride must be at least as big as the source stride. */ tmp16 = subscript(tmp16, BRW_TYPE_HF, 0); /* Check for denormal */ - fs_reg abs_src0 = op[0]; + brw_reg abs_src0 = op[0]; abs_src0.abs = true; bld.CMP(bld.null_reg_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), BRW_CONDITIONAL_L); /* Get the appropriately signed zero */ - fs_reg zero = retype(bld.AND(retype(op[0], BRW_TYPE_UD), + brw_reg zero = retype(bld.AND(retype(op[0], BRW_TYPE_UD), brw_imm_ud(0x80000000)), BRW_TYPE_F); /* Do the actual F32 -> F16 -> F32 conversion */ bld.MOV(tmp16, op[0]); @@ -1761,7 +1761,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, if (instr->def.bit_size == 64) { if (instr->op == nir_op_extract_i8) { /* If we need to sign extend, extract to a word first */ - fs_reg w_temp = bld.vgrf(BRW_TYPE_W); + brw_reg w_temp = bld.vgrf(BRW_TYPE_W); bld.MOV(w_temp, subscript(op[0], type, byte)); bld.MOV(result, w_temp); } else if (byte & 1) { @@ -1805,9 +1805,9 @@ fs_nir_emit_load_const(nir_to_brw_state &ntb, const brw_reg_type reg_type = brw_type_with_size(BRW_TYPE_D, instr->def.bit_size); - fs_reg reg = bld.vgrf(reg_type, instr->def.num_components); + brw_reg reg = bld.vgrf(reg_type, instr->def.num_components); - fs_reg comps[instr->def.num_components]; + brw_reg comps[instr->def.num_components]; switch (instr->def.bit_size) { case 8: @@ -1858,20 +1858,20 @@ is_resource_src(nir_src src) nir_instr_as_intrinsic(src.ssa->parent_instr)->intrinsic == nir_intrinsic_resource_intel; } -static fs_reg +static brw_reg get_resource_nir_src(nir_to_brw_state &ntb, const nir_src &src) { if (!is_resource_src(src)) - return fs_reg(); + return brw_reg(); return ntb.uniform_values[src.ssa->index]; } -static fs_reg +static brw_reg get_nir_src(nir_to_brw_state &ntb, const nir_src &src) { nir_intrinsic_instr *load_reg = nir_load_reg_for_def(src.ssa); - fs_reg reg; + brw_reg reg; if (!load_reg) { if (nir_src_is_undef(src)) { const brw_reg_type reg_type = @@ -1906,15 +1906,15 @@ get_nir_src(nir_to_brw_state &ntb, const nir_src &src) * enough restrictions in 64-bit immediates that you can't take the return * value and treat it the same as the result of get_nir_src(). */ -static fs_reg +static brw_reg get_nir_src_imm(nir_to_brw_state &ntb, const nir_src &src) { assert(nir_src_bit_size(src) == 32); return nir_src_is_const(src) ? - fs_reg(brw_imm_d(nir_src_as_int(src))) : get_nir_src(ntb, src); + brw_reg(brw_imm_d(nir_src_as_int(src))) : get_nir_src(ntb, src); } -static fs_reg +static brw_reg get_nir_def(nir_to_brw_state &ntb, const nir_def &def) { const fs_builder &bld = ntb.bld; @@ -1955,16 +1955,16 @@ get_nir_write_mask(const nir_def &def) static fs_inst * emit_pixel_interpolater_send(const fs_builder &bld, enum opcode opcode, - const fs_reg &dst, - const fs_reg &src, - const fs_reg &desc, - const fs_reg &flag_reg, + const brw_reg &dst, + const brw_reg &src, + const brw_reg &desc, + const brw_reg &flag_reg, glsl_interp_mode interpolation) { struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(bld.shader->prog_data); - fs_reg srcs[INTERP_NUM_SRCS]; + brw_reg srcs[INTERP_NUM_SRCS]; srcs[INTERP_SRC_OFFSET] = src; srcs[INTERP_SRC_MSG_DESC] = desc; srcs[INTERP_SRC_DYNAMIC_MODE] = flag_reg; @@ -2000,9 +2000,9 @@ emit_pixel_interpolater_send(const fs_builder &bld, * This can be used to access the value of a "Source Depth and/or W * Attribute Vertex Deltas", "Perspective Bary Planes" or * "Non-Perspective Bary Planes" payload field conveniently for - * multiple polygons as a single fs_reg. + * multiple polygons as a single brw_reg. */ -static fs_reg +static brw_reg fetch_polygon_reg(const fs_builder &bld, unsigned reg, unsigned subreg) { const fs_visitor *shader = bld.shader; @@ -2035,8 +2035,8 @@ fetch_polygon_reg(const fs_builder &bld, unsigned reg, unsigned subreg) */ static void emit_pixel_interpolater_alu_at_offset(const fs_builder &bld, - const fs_reg &dst, - const fs_reg &offs, + const brw_reg &dst, + const brw_reg &offs, glsl_interp_mode interpolation) { const fs_visitor *shader = bld.shader; @@ -2059,10 +2059,10 @@ emit_pixel_interpolater_alu_at_offset(const fs_builder &bld, } /* Account for half-pixel X/Y coordinate offset. */ - const fs_reg off_x = bld.vgrf(BRW_TYPE_F); + const brw_reg off_x = bld.vgrf(BRW_TYPE_F); bld.ADD(off_x, offs, brw_imm_f(0.5)); - const fs_reg off_y = bld.vgrf(BRW_TYPE_F); + const brw_reg off_y = bld.vgrf(BRW_TYPE_F); bld.ADD(off_y, offset(offs, bld, 1), brw_imm_f(0.5)); /* Process no more than two polygons at a time to avoid hitting @@ -2076,41 +2076,41 @@ emit_pixel_interpolater_alu_at_offset(const fs_builder &bld, /* Fetch needed parameters from the thread payload. */ const unsigned bary_coef_reg = interpolation == INTERP_MODE_NOPERSPECTIVE ? payload.npc_bary_coef_reg : payload.pc_bary_coef_reg; - const fs_reg start_x = devinfo->ver < 12 ? fetch_polygon_reg(ibld, 1, 1) : + const brw_reg start_x = devinfo->ver < 12 ? fetch_polygon_reg(ibld, 1, 1) : fetch_polygon_reg(ibld, bary_coef_reg, devinfo->ver >= 20 ? 6 : 2); - const fs_reg start_y = devinfo->ver < 12 ? fetch_polygon_reg(ibld, 1, 6) : + const brw_reg start_y = devinfo->ver < 12 ? fetch_polygon_reg(ibld, 1, 6) : fetch_polygon_reg(ibld, bary_coef_reg, devinfo->ver >= 20 ? 7 : 6); - const fs_reg bary1_c0 = fetch_polygon_reg(ibld, bary_coef_reg, + const brw_reg bary1_c0 = fetch_polygon_reg(ibld, bary_coef_reg, devinfo->ver >= 20 ? 2 : 3); - const fs_reg bary1_cx = fetch_polygon_reg(ibld, bary_coef_reg, 1); - const fs_reg bary1_cy = fetch_polygon_reg(ibld, bary_coef_reg, 0); + const brw_reg bary1_cx = fetch_polygon_reg(ibld, bary_coef_reg, 1); + const brw_reg bary1_cy = fetch_polygon_reg(ibld, bary_coef_reg, 0); - const fs_reg bary2_c0 = fetch_polygon_reg(ibld, bary_coef_reg, + const brw_reg bary2_c0 = fetch_polygon_reg(ibld, bary_coef_reg, devinfo->ver >= 20 ? 5 : 7); - const fs_reg bary2_cx = fetch_polygon_reg(ibld, bary_coef_reg, + const brw_reg bary2_cx = fetch_polygon_reg(ibld, bary_coef_reg, devinfo->ver >= 20 ? 4 : 5); - const fs_reg bary2_cy = fetch_polygon_reg(ibld, bary_coef_reg, + const brw_reg bary2_cy = fetch_polygon_reg(ibld, bary_coef_reg, devinfo->ver >= 20 ? 3 : 4); - const fs_reg rhw_c0 = devinfo->ver >= 20 ? + const brw_reg rhw_c0 = devinfo->ver >= 20 ? fetch_polygon_reg(ibld, payload.depth_w_coef_reg + 1, 5) : fetch_polygon_reg(ibld, payload.depth_w_coef_reg, 7); - const fs_reg rhw_cx = devinfo->ver >= 20 ? + const brw_reg rhw_cx = devinfo->ver >= 20 ? fetch_polygon_reg(ibld, payload.depth_w_coef_reg + 1, 4) : fetch_polygon_reg(ibld, payload.depth_w_coef_reg, 5); - const fs_reg rhw_cy = devinfo->ver >= 20 ? + const brw_reg rhw_cy = devinfo->ver >= 20 ? fetch_polygon_reg(ibld, payload.depth_w_coef_reg + 1, 3) : fetch_polygon_reg(ibld, payload.depth_w_coef_reg, 4); /* Compute X/Y coordinate deltas relative to the origin of the polygon. */ - const fs_reg delta_x = ibld.vgrf(BRW_TYPE_F); + const brw_reg delta_x = ibld.vgrf(BRW_TYPE_F); ibld.ADD(delta_x, offset(shader->pixel_x, ibld, i), negate(start_x)); ibld.ADD(delta_x, delta_x, offset(off_x, ibld, i)); - const fs_reg delta_y = ibld.vgrf(BRW_TYPE_F); + const brw_reg delta_y = ibld.vgrf(BRW_TYPE_F); ibld.ADD(delta_y, offset(shader->pixel_y, ibld, i), negate(start_y)); ibld.ADD(delta_y, delta_y, offset(off_y, ibld, i)); @@ -2120,13 +2120,13 @@ emit_pixel_interpolater_alu_at_offset(const fs_builder &bld, * allow the accumulator to be used for linear interpolation. */ const unsigned acc_width = 16 * reg_unit(devinfo); - const fs_reg rhw = ibld.vgrf(BRW_TYPE_F); - const fs_reg bary1 = ibld.vgrf(BRW_TYPE_F); - const fs_reg bary2 = ibld.vgrf(BRW_TYPE_F); + const brw_reg rhw = ibld.vgrf(BRW_TYPE_F); + const brw_reg bary1 = ibld.vgrf(BRW_TYPE_F); + const brw_reg bary2 = ibld.vgrf(BRW_TYPE_F); for (unsigned j = 0; j < DIV_ROUND_UP(ibld.dispatch_width(), acc_width); j++) { const fs_builder jbld = ibld.group(MIN2(ibld.dispatch_width(), acc_width), j); - const fs_reg acc = suboffset(brw_acc_reg(16), jbld.group() % acc_width); + const brw_reg acc = suboffset(brw_acc_reg(16), jbld.group() % acc_width); if (interpolation != INTERP_MODE_NOPERSPECTIVE) { jbld.MAD(acc, horiz_offset(rhw_c0, acc_width * j), @@ -2153,7 +2153,7 @@ emit_pixel_interpolater_alu_at_offset(const fs_builder &bld, ibld.MOV(offset(dst, ibld, i), bary1); ibld.MOV(offset(offset(dst, bld, 1), ibld, i), bary2); } else { - const fs_reg w = ibld.vgrf(BRW_TYPE_F); + const brw_reg w = ibld.vgrf(BRW_TYPE_F); ibld.emit(SHADER_OPCODE_RCP, w, rhw); ibld.MUL(offset(dst, ibld, i), bary1, w); ibld.MUL(offset(offset(dst, bld, 1), ibld, i), bary2, w); @@ -2171,15 +2171,15 @@ emit_pixel_interpolater_alu_at_offset(const fs_builder &bld, */ static void emit_pixel_interpolater_alu_at_sample(const fs_builder &bld, - const fs_reg &dst, - const fs_reg &idx, + const brw_reg &dst, + const brw_reg &idx, glsl_interp_mode interpolation) { const fs_thread_payload &payload = bld.shader->fs_payload(); const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(bld.shader->prog_data); const fs_builder ubld = bld.exec_all().group(16, 0); - const fs_reg sample_offs_xy = ubld.vgrf(BRW_TYPE_UD); + const brw_reg sample_offs_xy = ubld.vgrf(BRW_TYPE_UD); assert(wm_prog_data->uses_sample_offsets); /* Interleave the X/Y coordinates of each sample in order to allow @@ -2187,7 +2187,7 @@ emit_pixel_interpolater_alu_at_sample(const fs_builder &bld, * coordinates, then another MOV for the 16 Y coordinates. */ for (unsigned i = 0; i < 2; i++) { - const fs_reg reg = retype(brw_vec16_grf(payload.sample_offsets_reg, 4 * i), + const brw_reg reg = retype(brw_vec16_grf(payload.sample_offsets_reg, 4 * i), BRW_TYPE_UB); ubld.MOV(subscript(sample_offs_xy, BRW_TYPE_UW, i), reg); } @@ -2195,20 +2195,20 @@ emit_pixel_interpolater_alu_at_sample(const fs_builder &bld, /* Use indirect addressing to fetch the X/Y offsets of the sample * index provided for each channel. */ - const fs_reg idx_b = bld.vgrf(BRW_TYPE_UD); + const brw_reg idx_b = bld.vgrf(BRW_TYPE_UD); bld.MUL(idx_b, idx, brw_imm_ud(brw_type_size_bytes(BRW_TYPE_UD))); - const fs_reg off_xy = bld.vgrf(BRW_TYPE_UD); + const brw_reg off_xy = bld.vgrf(BRW_TYPE_UD); bld.emit(SHADER_OPCODE_MOV_INDIRECT, off_xy, component(sample_offs_xy, 0), idx_b, brw_imm_ud(16 * brw_type_size_bytes(BRW_TYPE_UD))); /* Convert the selected fixed-point offsets to floating-point * offsets. */ - const fs_reg offs = bld.vgrf(BRW_TYPE_F, 2); + const brw_reg offs = bld.vgrf(BRW_TYPE_F, 2); for (unsigned i = 0; i < 2; i++) { - const fs_reg tmp = bld.vgrf(BRW_TYPE_F); + const brw_reg tmp = bld.vgrf(BRW_TYPE_F); bld.MOV(tmp, subscript(off_xy, BRW_TYPE_UW, i)); bld.MUL(tmp, tmp, brw_imm_f(0.0625)); bld.ADD(offset(offs, bld, i), tmp, brw_imm_f(-0.5)); @@ -2221,8 +2221,8 @@ emit_pixel_interpolater_alu_at_sample(const fs_builder &bld, /** * Computes 1 << x, given a D/UD register containing some value x. */ -static fs_reg -intexp2(const fs_builder &bld, const fs_reg &x) +static brw_reg +intexp2(const fs_builder &bld, const brw_reg &x) { assert(x.type == BRW_TYPE_UD || x.type == BRW_TYPE_D); @@ -2252,7 +2252,7 @@ emit_gs_end_primitive(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src /* Cut bits use one bit per vertex. */ assert(s.gs_compile->control_data_bits_per_vertex == 1); - fs_reg vertex_count = get_nir_src(ntb, vertex_count_nir_src); + brw_reg vertex_count = get_nir_src(ntb, vertex_count_nir_src); vertex_count.type = BRW_TYPE_UD; /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting @@ -2279,8 +2279,8 @@ emit_gs_end_primitive(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src const fs_builder abld = ntb.bld.annotate("end primitive"); /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */ - fs_reg prev_count = abld.ADD(vertex_count, brw_imm_ud(0xffffffffu)); - fs_reg mask = intexp2(abld, prev_count); + brw_reg prev_count = abld.ADD(vertex_count, brw_imm_ud(0xffffffffu)); + brw_reg mask = intexp2(abld, prev_count); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this * architecture, 1 << (vertex_count - 1) is equivalent to 1 << @@ -2289,8 +2289,8 @@ emit_gs_end_primitive(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src abld.OR(s.control_data_bits, s.control_data_bits, mask); } -fs_reg -fs_visitor::gs_urb_per_slot_dword_index(const fs_reg &vertex_count) +brw_reg +fs_visitor::gs_urb_per_slot_dword_index(const brw_reg &vertex_count) { /* We use a single UD register to accumulate control data bits (32 bits * for each of the SIMD8 channels). So we need to write a DWord (32 bits) @@ -2330,16 +2330,16 @@ fs_visitor::gs_urb_per_slot_dword_index(const fs_reg &vertex_count) * * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) */ - fs_reg prev_count = abld.ADD(vertex_count, brw_imm_ud(0xffffffffu)); + brw_reg prev_count = abld.ADD(vertex_count, brw_imm_ud(0xffffffffu)); unsigned log2_bits_per_vertex = util_last_bit(gs_compile->control_data_bits_per_vertex); return abld.SHR(prev_count, brw_imm_ud(6u - log2_bits_per_vertex)); } -fs_reg -fs_visitor::gs_urb_channel_mask(const fs_reg &dword_index) +brw_reg +fs_visitor::gs_urb_channel_mask(const brw_reg &dword_index) { - fs_reg channel_mask; + brw_reg channel_mask; /* Xe2+ can do URB loads with a byte offset, so we don't need to * construct a channel mask. @@ -2369,13 +2369,13 @@ fs_visitor::gs_urb_channel_mask(const fs_reg &dword_index) /* Set the channel masks to 1 << (dword_index % 4), so that we'll * write to the appropriate DWORD within the OWORD. */ - fs_reg channel = ubld.AND(dword_index, brw_imm_ud(3u)); + brw_reg channel = ubld.AND(dword_index, brw_imm_ud(3u)); /* Then the channel masks need to be in bits 23:16. */ return ubld.SHL(intexp2(ubld, channel), brw_imm_ud(16u)); } void -fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) +fs_visitor::emit_gs_control_data_bits(const brw_reg &vertex_count) { assert(stage == MESA_SHADER_GEOMETRY); assert(gs_compile->control_data_bits_per_vertex != 0); @@ -2385,9 +2385,9 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) const fs_builder bld = fs_builder(this).at_end(); const fs_builder abld = bld.annotate("emit control data bits"); - fs_reg dword_index = gs_urb_per_slot_dword_index(vertex_count); - fs_reg channel_mask = gs_urb_channel_mask(dword_index); - fs_reg per_slot_offset; + brw_reg dword_index = gs_urb_per_slot_dword_index(vertex_count); + brw_reg channel_mask = gs_urb_channel_mask(dword_index); + brw_reg per_slot_offset; const unsigned max_control_data_header_size_bits = devinfo->ver >= 20 ? 32 : 128; @@ -2408,12 +2408,12 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) /* If there are channel masks, add 3 extra copies of the data. */ const unsigned length = 1 + 3 * unsigned(channel_mask.file != BAD_FILE); - fs_reg sources[length]; + brw_reg sources[length]; for (unsigned i = 0; i < ARRAY_SIZE(sources); i++) sources[i] = this->control_data_bits; - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask; @@ -2434,7 +2434,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) } static void -set_gs_stream_control_data_bits(nir_to_brw_state &ntb, const fs_reg &vertex_count, +set_gs_stream_control_data_bits(nir_to_brw_state &ntb, const brw_reg &vertex_count, unsigned stream_id) { fs_visitor &s = ntb.s; @@ -2460,17 +2460,17 @@ set_gs_stream_control_data_bits(nir_to_brw_state &ntb, const fs_reg &vertex_coun const fs_builder abld = ntb.bld.annotate("set stream control data bits", NULL); /* reg::sid = stream_id */ - fs_reg sid = abld.MOV(brw_imm_ud(stream_id)); + brw_reg sid = abld.MOV(brw_imm_ud(stream_id)); /* reg:shift_count = 2 * (vertex_count - 1) */ - fs_reg shift_count = abld.SHL(vertex_count, brw_imm_ud(1u)); + brw_reg shift_count = abld.SHL(vertex_count, brw_imm_ud(1u)); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to * stream_id << ((2 * (vertex_count - 1)) % 32). */ - fs_reg mask = abld.SHL(sid, shift_count); + brw_reg mask = abld.SHL(sid, shift_count); abld.OR(s.control_data_bits, s.control_data_bits, mask); } @@ -2484,7 +2484,7 @@ emit_gs_vertex(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src, struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(s.prog_data); - fs_reg vertex_count = get_nir_src(ntb, vertex_count_nir_src); + brw_reg vertex_count = get_nir_src(ntb, vertex_count_nir_src); vertex_count.type = BRW_TYPE_UD; /* Haswell and later hardware ignores the "Render Stream Select" bits @@ -2572,7 +2572,7 @@ emit_gs_vertex(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src, } static void -emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, +emit_gs_input_load(nir_to_brw_state &ntb, const brw_reg &dst, const nir_src &vertex_src, unsigned base_offset, const nir_src &offset_src, @@ -2595,8 +2595,8 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, int imm_offset = (base_offset + nir_src_as_uint(offset_src)) * 4 + nir_src_as_uint(vertex_src) * push_reg_count; - fs_reg comps[num_components]; - const fs_reg attr = brw_attr_reg(0, dst.type); + brw_reg comps[num_components]; + const brw_reg attr = brw_attr_reg(0, dst.type); for (unsigned i = 0; i < num_components; i++) { comps[i] = offset(attr, bld, imm_offset + i + first_component); } @@ -2607,8 +2607,8 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, /* Resort to the pull model. Ensure the VUE handles are provided. */ assert(gs_prog_data->base.include_vue_handles); - fs_reg start = s.gs_payload().icp_handle_start; - fs_reg icp_handle = ntb.bld.vgrf(BRW_TYPE_UD); + brw_reg start = s.gs_payload().icp_handle_start; + brw_reg icp_handle = ntb.bld.vgrf(BRW_TYPE_UD); if (gs_prog_data->invocations == 1) { if (nir_src_is_const(vertex_src)) { @@ -2626,16 +2626,16 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, * by 32 (shifting by 5), and add the two together. This is * the final indirect byte offset. */ - fs_reg sequence = + brw_reg sequence = ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]; /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */ - fs_reg channel_offsets = bld.SHL(sequence, brw_imm_ud(2u)); + brw_reg channel_offsets = bld.SHL(sequence, brw_imm_ud(2u)); /* Convert vertex_index to bytes (multiply by 32) */ - fs_reg vertex_offset_bytes = + brw_reg vertex_offset_bytes = bld.SHL(retype(get_nir_src(ntb, vertex_src), BRW_TYPE_UD), brw_imm_ud(5u)); - fs_reg icp_offset_bytes = + brw_reg icp_offset_bytes = bld.ADD(vertex_offset_bytes, channel_offsets); /* Use first_icp_handle as the base offset. There is one register @@ -2643,7 +2643,7 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, * we might read up to nir->info.gs.vertices_in registers. */ bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start, - fs_reg(icp_offset_bytes), + brw_reg(icp_offset_bytes), brw_imm_ud(s.nir->info.gs.vertices_in * REG_SIZE)); } } else { @@ -2658,7 +2658,7 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, * * Convert vertex_index to bytes (multiply by 4) */ - fs_reg icp_offset_bytes = + brw_reg icp_offset_bytes = bld.SHL(retype(get_nir_src(ntb, vertex_src), BRW_TYPE_UD), brw_imm_ud(2u)); @@ -2667,28 +2667,28 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, * we might read up to ceil(nir->info.gs.vertices_in / 8) registers. */ bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start, - fs_reg(icp_offset_bytes), + brw_reg(icp_offset_bytes), brw_imm_ud(DIV_ROUND_UP(s.nir->info.gs.vertices_in, 8) * REG_SIZE)); } } fs_inst *inst; - fs_reg indirect_offset = get_nir_src(ntb, offset_src); + brw_reg indirect_offset = get_nir_src(ntb, offset_src); if (nir_src_is_const(offset_src)) { - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle; /* Constant indexing - use global offset. */ if (first_component != 0) { unsigned read_components = num_components + first_component; - fs_reg tmp = bld.vgrf(dst.type, read_components); + brw_reg tmp = bld.vgrf(dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * tmp.component_size(inst->exec_size); - fs_reg comps[num_components]; + brw_reg comps[num_components]; for (unsigned i = 0; i < num_components; i++) { comps[i] = offset(tmp, bld, i + first_component); } @@ -2703,13 +2703,13 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, } else { /* Indirect indexing - use per-slot offsets as well. */ unsigned read_components = num_components + first_component; - fs_reg tmp = bld.vgrf(dst.type, read_components); + brw_reg tmp = bld.vgrf(dst.type, read_components); /* Convert oword offset to bytes on Xe2+ */ if (devinfo->ver >= 20) indirect_offset = bld.SHL(indirect_offset, brw_imm_ud(4u)); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; @@ -2718,7 +2718,7 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * tmp.component_size(inst->exec_size); - fs_reg comps[num_components]; + brw_reg comps[num_components]; for (unsigned i = 0; i < num_components; i++) { comps[i] = offset(tmp, bld, i + first_component); } @@ -2733,7 +2733,7 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, } } -static fs_reg +static brw_reg get_indirect_offset(nir_to_brw_state &ntb, nir_intrinsic_instr *instr) { const intel_device_info *devinfo = ntb.devinfo; @@ -2745,10 +2745,10 @@ get_indirect_offset(nir_to_brw_state &ntb, nir_intrinsic_instr *instr) * into the "base" index. */ assert(nir_src_as_uint(*offset_src) == 0); - return fs_reg(); + return brw_reg(); } - fs_reg offset = get_nir_src(ntb, *offset_src); + brw_reg offset = get_nir_src(ntb, *offset_src); if (devinfo->ver < 20) return offset; @@ -2765,7 +2765,7 @@ fs_nir_emit_vs_intrinsic(nir_to_brw_state &ntb, fs_visitor &s = ntb.s; assert(s.stage == MESA_SHADER_VERTEX); - fs_reg dest; + brw_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_def(ntb, instr->def); @@ -2776,12 +2776,12 @@ fs_nir_emit_vs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_input: { assert(instr->def.bit_size == 32); - const fs_reg src = offset(brw_attr_reg(0, dest.type), bld, + const brw_reg src = offset(brw_attr_reg(0, dest.type), bld, nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr) + nir_src_as_uint(instr->src[0])); - fs_reg comps[instr->num_components]; + brw_reg comps[instr->num_components]; for (unsigned i = 0; i < instr->num_components; i++) { comps[i] = offset(src, bld, i); } @@ -2803,7 +2803,7 @@ fs_nir_emit_vs_intrinsic(nir_to_brw_state &ntb, } } -static fs_reg +static brw_reg get_tcs_single_patch_icp_handle(nir_to_brw_state &ntb, const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -2813,9 +2813,9 @@ get_tcs_single_patch_icp_handle(nir_to_brw_state &ntb, const fs_builder &bld, const nir_src &vertex_src = instr->src[0]; nir_intrinsic_instr *vertex_intrin = nir_src_as_intrinsic(vertex_src); - const fs_reg start = s.tcs_payload().icp_handle_start; + const brw_reg start = s.tcs_payload().icp_handle_start; - fs_reg icp_handle; + brw_reg icp_handle; if (nir_src_is_const(vertex_src)) { /* Emit a MOV to resolve <0,1,0> regioning. */ @@ -2835,7 +2835,7 @@ get_tcs_single_patch_icp_handle(nir_to_brw_state &ntb, const fs_builder &bld, icp_handle = bld.vgrf(BRW_TYPE_UD); /* Each ICP handle is a single DWord (4 bytes) */ - fs_reg vertex_offset_bytes = + brw_reg vertex_offset_bytes = bld.SHL(retype(get_nir_src(ntb, vertex_src), BRW_TYPE_UD), brw_imm_ud(2u)); @@ -2848,7 +2848,7 @@ get_tcs_single_patch_icp_handle(nir_to_brw_state &ntb, const fs_builder &bld, return icp_handle; } -static fs_reg +static brw_reg get_tcs_multi_patch_icp_handle(nir_to_brw_state &ntb, const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -2859,7 +2859,7 @@ get_tcs_multi_patch_icp_handle(nir_to_brw_state &ntb, const fs_builder &bld, const nir_src &vertex_src = instr->src[0]; const unsigned grf_size_bytes = REG_SIZE * reg_unit(devinfo); - const fs_reg start = s.tcs_payload().icp_handle_start; + const brw_reg start = s.tcs_payload().icp_handle_start; if (nir_src_is_const(vertex_src)) return byte_offset(start, nir_src_as_uint(vertex_src) * grf_size_bytes); @@ -2875,23 +2875,23 @@ get_tcs_multi_patch_icp_handle(nir_to_brw_state &ntb, const fs_builder &bld, * by the GRF size (by shifting), and add the two together. This is * the final indirect byte offset. */ - fs_reg sequence = ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]; + brw_reg sequence = ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]; /* Offsets will be 0, 4, 8, ... */ - fs_reg channel_offsets = bld.SHL(sequence, brw_imm_ud(2u)); + brw_reg channel_offsets = bld.SHL(sequence, brw_imm_ud(2u)); /* Convert vertex_index to bytes (multiply by 32) */ assert(util_is_power_of_two_nonzero(grf_size_bytes)); /* for ffs() */ - fs_reg vertex_offset_bytes = + brw_reg vertex_offset_bytes = bld.SHL(retype(get_nir_src(ntb, vertex_src), BRW_TYPE_UD), brw_imm_ud(ffs(grf_size_bytes) - 1)); - fs_reg icp_offset_bytes = + brw_reg icp_offset_bytes = bld.ADD(vertex_offset_bytes, channel_offsets); /* Use start of ICP handles as the base offset. There is one register * of URB handles per vertex, so inform the register allocator that * we might read up to nir->info.gs.vertices_in registers. */ - fs_reg icp_handle = bld.vgrf(BRW_TYPE_UD); + brw_reg icp_handle = bld.vgrf(BRW_TYPE_UD); bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start, icp_offset_bytes, brw_imm_ud(brw_tcs_prog_key_input_vertices(tcs_key) * @@ -2902,22 +2902,22 @@ get_tcs_multi_patch_icp_handle(nir_to_brw_state &ntb, const fs_builder &bld, static void setup_barrier_message_payload_gfx125(const fs_builder &bld, - const fs_reg &msg_payload) + const brw_reg &msg_payload) { const fs_builder ubld = bld.exec_all().group(1, 0); const struct intel_device_info *devinfo = bld.shader->devinfo; assert(devinfo->verx10 >= 125); /* From BSpec: 54006, mov r0.2[31:24] into m0.2[31:24] and m0.2[23:16] */ - fs_reg m0_10ub = horiz_offset(retype(msg_payload, BRW_TYPE_UB), 10); - fs_reg r0_11ub = + brw_reg m0_10ub = horiz_offset(retype(msg_payload, BRW_TYPE_UB), 10); + brw_reg r0_11ub = stride(suboffset(retype(brw_vec1_grf(0, 0), BRW_TYPE_UB), 11), 0, 1, 0); ubld.group(2, 0).MOV(m0_10ub, r0_11ub); if (devinfo->ver >= 20) { /* Use an active threads barrier. */ - const fs_reg m0_2ud = component(retype(msg_payload, BRW_TYPE_UD), 2); + const brw_reg m0_2ud = component(retype(msg_payload, BRW_TYPE_UD), 2); ubld.OR(m0_2ud, m0_2ud, brw_imm_ud(1u << 8)); } } @@ -2932,7 +2932,7 @@ emit_barrier(nir_to_brw_state &ntb) /* We are getting the barrier ID from the compute shader header */ assert(gl_shader_stage_uses_workgroup(s.stage)); - fs_reg payload = brw_vgrf(s.alloc.allocate(1), BRW_TYPE_UD); + brw_reg payload = brw_vgrf(s.alloc.allocate(1), BRW_TYPE_UD); /* Clear the message payload */ bld.exec_all().group(8, 0).MOV(payload, brw_imm_ud(0u)); @@ -2957,7 +2957,7 @@ emit_barrier(nir_to_brw_state &ntb) } /* Copy the barrier id from r0.2 to the message payload reg.2 */ - fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)); + brw_reg r0_2 = brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)); bld.exec_all().group(1, 0).AND(component(payload, 2), r0_2, brw_imm_ud(barrier_id_mask)); } @@ -2978,8 +2978,8 @@ emit_tcs_barrier(nir_to_brw_state &ntb) assert(s.stage == MESA_SHADER_TESS_CTRL); struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(s.prog_data); - fs_reg m0 = bld.vgrf(BRW_TYPE_UD); - fs_reg m0_2 = component(m0, 2); + brw_reg m0 = bld.vgrf(BRW_TYPE_UD); + brw_reg m0_2 = component(m0, 2); const fs_builder chanbld = bld.exec_all().group(1, 0); @@ -3023,7 +3023,7 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(s.prog_data); struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; - fs_reg dst; + brw_reg dst; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dst = get_nir_def(ntb, instr->def); @@ -3050,14 +3050,14 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_per_vertex_input: { assert(instr->def.bit_size == 32); - fs_reg indirect_offset = get_indirect_offset(ntb, instr); + brw_reg indirect_offset = get_indirect_offset(ntb, instr); unsigned imm_offset = nir_intrinsic_base(instr); fs_inst *inst; const bool multi_patch = vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH; - fs_reg icp_handle = multi_patch ? + brw_reg icp_handle = multi_patch ? get_tcs_multi_patch_icp_handle(ntb, bld, instr) : get_tcs_single_patch_icp_handle(ntb, bld, instr); @@ -3068,17 +3068,17 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, unsigned num_components = instr->num_components; unsigned first_component = nir_intrinsic_component(instr); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle; if (indirect_offset.file == BAD_FILE) { /* Constant indexing - use global offset. */ if (first_component != 0) { unsigned read_components = num_components + first_component; - fs_reg tmp = bld.vgrf(dst.type, read_components); + brw_reg tmp = bld.vgrf(dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, ARRAY_SIZE(srcs)); - fs_reg comps[num_components]; + brw_reg comps[num_components]; for (unsigned i = 0; i < num_components; i++) { comps[i] = offset(tmp, bld, i + first_component); } @@ -3094,10 +3094,10 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, if (first_component != 0) { unsigned read_components = num_components + first_component; - fs_reg tmp = bld.vgrf(dst.type, read_components); + brw_reg tmp = bld.vgrf(dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, ARRAY_SIZE(srcs)); - fs_reg comps[num_components]; + brw_reg comps[num_components]; for (unsigned i = 0; i < num_components; i++) { comps[i] = offset(tmp, bld, i + first_component); } @@ -3127,7 +3127,7 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: { assert(instr->def.bit_size == 32); - fs_reg indirect_offset = get_indirect_offset(ntb, instr); + brw_reg indirect_offset = get_indirect_offset(ntb, instr); unsigned imm_offset = nir_intrinsic_base(instr); unsigned first_component = nir_intrinsic_component(instr); @@ -3136,20 +3136,20 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, /* This MOV replicates the output handle to all enabled channels * is SINGLE_PATCH mode. */ - fs_reg patch_handle = bld.MOV(s.tcs_payload().patch_urb_output); + brw_reg patch_handle = bld.MOV(s.tcs_payload().patch_urb_output); { - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = patch_handle; if (first_component != 0) { unsigned read_components = instr->num_components + first_component; - fs_reg tmp = bld.vgrf(dst.type, read_components); + brw_reg tmp = bld.vgrf(dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * REG_SIZE * reg_unit(devinfo); - fs_reg comps[instr->num_components]; + brw_reg comps[instr->num_components]; for (unsigned i = 0; i < instr->num_components; i++) { comps[i] = offset(tmp, bld, i + first_component); } @@ -3163,18 +3163,18 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, } } else { /* Indirect indexing - use per-slot offsets as well. */ - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; if (first_component != 0) { unsigned read_components = instr->num_components + first_component; - fs_reg tmp = bld.vgrf(dst.type, read_components); + brw_reg tmp = bld.vgrf(dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * REG_SIZE * reg_unit(devinfo); - fs_reg comps[instr->num_components]; + brw_reg comps[instr->num_components]; for (unsigned i = 0; i < instr->num_components; i++) { comps[i] = offset(tmp, bld, i + first_component); } @@ -3192,8 +3192,8 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: { assert(nir_src_bit_size(instr->src[0]) == 32); - fs_reg value = get_nir_src(ntb, instr->src[0]); - fs_reg indirect_offset = get_indirect_offset(ntb, instr); + brw_reg value = get_nir_src(ntb, instr->src[0]); + brw_reg indirect_offset = get_indirect_offset(ntb, instr); unsigned imm_offset = nir_intrinsic_base(instr); unsigned mask = nir_intrinsic_write_mask(instr); @@ -3208,11 +3208,11 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, const bool has_urb_lsc = devinfo->ver >= 20; - fs_reg mask_reg; + brw_reg mask_reg; if (mask != WRITEMASK_XYZW) mask_reg = brw_imm_ud(mask << 16); - fs_reg sources[4]; + brw_reg sources[4]; unsigned m = has_urb_lsc ? 0 : first_component; for (unsigned i = 0; i < num_components; i++) { @@ -3226,7 +3226,7 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, assert(has_urb_lsc || m == (first_component + num_components)); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask_reg; @@ -3257,7 +3257,7 @@ fs_nir_emit_tes_intrinsic(nir_to_brw_state &ntb, assert(s.stage == MESA_SHADER_TESS_EVAL); struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(s.prog_data); - fs_reg dest; + brw_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_def(ntb, instr->def); @@ -3274,7 +3274,7 @@ fs_nir_emit_tes_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: { assert(instr->def.bit_size == 32); - fs_reg indirect_offset = get_indirect_offset(ntb, instr); + brw_reg indirect_offset = get_indirect_offset(ntb, instr); unsigned imm_offset = nir_intrinsic_base(instr); unsigned first_component = nir_intrinsic_component(instr); @@ -3285,9 +3285,9 @@ fs_nir_emit_tes_intrinsic(nir_to_brw_state &ntb, */ const unsigned max_push_slots = 32; if (imm_offset < max_push_slots) { - const fs_reg src = horiz_offset(brw_attr_reg(0, dest.type), + const brw_reg src = horiz_offset(brw_attr_reg(0, dest.type), 4 * imm_offset + first_component); - fs_reg comps[instr->num_components]; + brw_reg comps[instr->num_components]; for (unsigned i = 0; i < instr->num_components; i++) { comps[i] = component(src, i); } @@ -3298,17 +3298,17 @@ fs_nir_emit_tes_intrinsic(nir_to_brw_state &ntb, (imm_offset / 2) + 1); } else { /* Replicate the patch handle to all enabled channels */ - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = s.tes_payload().patch_urb_input; if (first_component != 0) { unsigned read_components = instr->num_components + first_component; - fs_reg tmp = bld.vgrf(dest.type, read_components); + brw_reg tmp = bld.vgrf(dest.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * REG_SIZE * reg_unit(devinfo); - fs_reg comps[instr->num_components]; + brw_reg comps[instr->num_components]; for (unsigned i = 0; i < instr->num_components; i++) { comps[i] = offset(tmp, bld, i + first_component); } @@ -3329,17 +3329,17 @@ fs_nir_emit_tes_intrinsic(nir_to_brw_state &ntb, */ unsigned num_components = instr->num_components; - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = s.tes_payload().patch_urb_input; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; if (first_component != 0) { unsigned read_components = num_components + first_component; - fs_reg tmp = bld.vgrf(dest.type, read_components); + brw_reg tmp = bld.vgrf(dest.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, ARRAY_SIZE(srcs)); - fs_reg comps[instr->num_components]; + brw_reg comps[instr->num_components]; for (unsigned i = 0; i < instr->num_components; i++) { comps[i] = offset(tmp, bld, i + first_component); } @@ -3368,9 +3368,9 @@ fs_nir_emit_gs_intrinsic(nir_to_brw_state &ntb, fs_visitor &s = ntb.s; assert(s.stage == MESA_SHADER_GEOMETRY); - fs_reg indirect_offset; + brw_reg indirect_offset; - fs_reg dest; + brw_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_def(ntb, instr->def); @@ -3410,7 +3410,7 @@ fs_nir_emit_gs_intrinsic(nir_to_brw_state &ntb, break; case nir_intrinsic_load_invocation_id: { - fs_reg val = ntb.system_values[SYSTEM_VALUE_INVOCATION_ID]; + brw_reg val = ntb.system_values[SYSTEM_VALUE_INVOCATION_ID]; assert(val.file != BAD_FILE); dest.type = val.type; bld.MOV(dest, val); @@ -3426,7 +3426,7 @@ fs_nir_emit_gs_intrinsic(nir_to_brw_state &ntb, /** * Fetch the current render target layer index. */ -static fs_reg +static brw_reg fetch_render_target_array_index(const fs_builder &bld) { const fs_visitor *v = bld.shader; @@ -3437,7 +3437,7 @@ fetch_render_target_array_index(const fs_builder &bld) * to use a <1;8,0> region in order to select the correct word * for each channel. */ - const fs_reg idx = bld.vgrf(BRW_TYPE_UD); + const brw_reg idx = bld.vgrf(BRW_TYPE_UD); for (unsigned i = 0; i < DIV_ROUND_UP(bld.dispatch_width(), 16); i++) { const fs_builder hbld = bld.group(16, i); @@ -3456,7 +3456,7 @@ fetch_render_target_array_index(const fs_builder &bld) * dispatch mode. */ assert(bld.dispatch_width() == 16); - const fs_reg idx = bld.vgrf(BRW_TYPE_UD); + const brw_reg idx = bld.vgrf(BRW_TYPE_UD); for (unsigned i = 0; i < v->max_polygons; i++) { const fs_builder hbld = bld.group(8, i); @@ -3469,7 +3469,7 @@ fetch_render_target_array_index(const fs_builder &bld) /* The render target array index is provided in the thread payload as * bits 26:16 of r1.1. */ - const fs_reg idx = bld.vgrf(BRW_TYPE_UD); + const brw_reg idx = bld.vgrf(BRW_TYPE_UD); bld.AND(idx, brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 3), brw_imm_uw(0x7ff)); return idx; @@ -3477,14 +3477,14 @@ fetch_render_target_array_index(const fs_builder &bld) /* The render target array index is provided in the thread payload as * bits 26:16 of r0.0. */ - const fs_reg idx = bld.vgrf(BRW_TYPE_UD); + const brw_reg idx = bld.vgrf(BRW_TYPE_UD); bld.AND(idx, brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 0, 1), brw_imm_uw(0x7ff)); return idx; } } -static fs_reg +static brw_reg fetch_viewport_index(const fs_builder &bld) { const fs_visitor *v = bld.shader; @@ -3495,7 +3495,7 @@ fetch_viewport_index(const fs_builder &bld) * to use a <1;8,0> region in order to select the correct word * for each channel. */ - const fs_reg idx = bld.vgrf(BRW_TYPE_UD); + const brw_reg idx = bld.vgrf(BRW_TYPE_UD); for (unsigned i = 0; i < DIV_ROUND_UP(bld.dispatch_width(), 16); i++) { const fs_builder hbld = bld.group(16, i); @@ -3515,8 +3515,8 @@ fetch_viewport_index(const fs_builder &bld) * dispatch mode. */ assert(bld.dispatch_width() == 16); - const fs_reg idx = bld.vgrf(BRW_TYPE_UD); - fs_reg vp_idx_per_poly_dw[2] = { + const brw_reg idx = bld.vgrf(BRW_TYPE_UD); + brw_reg vp_idx_per_poly_dw[2] = { brw_ud1_reg(BRW_GENERAL_REGISTER_FILE, 1, 1), /* R1.1 bits 30:27 */ brw_ud1_reg(BRW_GENERAL_REGISTER_FILE, 1, 6), /* R1.6 bits 30:27 */ }; @@ -3531,7 +3531,7 @@ fetch_viewport_index(const fs_builder &bld) /* The viewport index is provided in the thread payload as * bits 30:27 of r1.1. */ - const fs_reg idx = bld.vgrf(BRW_TYPE_UD); + const brw_reg idx = bld.vgrf(BRW_TYPE_UD); bld.SHR(idx, bld.AND(brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 3), brw_imm_uw(0x7800)), @@ -3541,7 +3541,7 @@ fetch_viewport_index(const fs_builder &bld) /* The viewport index is provided in the thread payload as * bits 30:27 of r0.0. */ - const fs_reg idx = bld.vgrf(BRW_TYPE_UD); + const brw_reg idx = bld.vgrf(BRW_TYPE_UD); bld.SHR(idx, bld.AND(brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 0, 1), brw_imm_uw(0x7800)), @@ -3551,16 +3551,16 @@ fetch_viewport_index(const fs_builder &bld) } /* Sample from the MCS surface attached to this multisample texture. */ -static fs_reg -emit_mcs_fetch(nir_to_brw_state &ntb, const fs_reg &coordinate, unsigned components, - const fs_reg &texture, - const fs_reg &texture_handle) +static brw_reg +emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned components, + const brw_reg &texture, + const brw_reg &texture_handle) { const fs_builder &bld = ntb.bld; - const fs_reg dest = bld.vgrf(BRW_TYPE_UD, 4); + const brw_reg dest = bld.vgrf(BRW_TYPE_UD, 4); - fs_reg srcs[TEX_LOGICAL_NUM_SRCS]; + brw_reg srcs[TEX_LOGICAL_NUM_SRCS]; srcs[TEX_LOGICAL_SRC_COORDINATE] = coordinate; srcs[TEX_LOGICAL_SRC_SURFACE] = texture; srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0); @@ -3585,7 +3585,7 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const fs_reg &coordinate, unsigned compone * framebuffer at the current fragment coordinates and sample index. */ static fs_inst * -emit_non_coherent_fb_read(nir_to_brw_state &ntb, const fs_builder &bld, const fs_reg &dst, +emit_non_coherent_fb_read(nir_to_brw_state &ntb, const fs_builder &bld, const brw_reg &dst, unsigned target) { fs_visitor &s = ntb.s; @@ -3597,7 +3597,7 @@ emit_non_coherent_fb_read(nir_to_brw_state &ntb, const fs_builder &bld, const fs assert(!wm_key->coherent_fb_fetch); /* Calculate the fragment coordinates. */ - const fs_reg coords = bld.vgrf(BRW_TYPE_UD, 3); + const brw_reg coords = bld.vgrf(BRW_TYPE_UD, 3); bld.MOV(offset(coords, bld, 0), s.pixel_x); bld.MOV(offset(coords, bld, 1), s.pixel_y); bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld)); @@ -3613,9 +3613,9 @@ emit_non_coherent_fb_read(nir_to_brw_state &ntb, const fs_builder &bld, const fs ntb.system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE) ntb.system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(ntb); - const fs_reg sample = ntb.system_values[SYSTEM_VALUE_SAMPLE_ID]; - const fs_reg mcs = wm_key->multisample_fbo ? - emit_mcs_fetch(ntb, coords, 3, brw_imm_ud(target), fs_reg()) : fs_reg(); + const brw_reg sample = ntb.system_values[SYSTEM_VALUE_SAMPLE_ID]; + const brw_reg mcs = wm_key->multisample_fbo ? + emit_mcs_fetch(ntb, coords, 3, brw_imm_ud(target), brw_reg()) : brw_reg(); /* Use either a normal or a CMS texel fetch message depending on whether * the framebuffer is single or multisample. On SKL+ use the wide CMS @@ -3639,7 +3639,7 @@ emit_non_coherent_fb_read(nir_to_brw_state &ntb, const fs_builder &bld, const fs } /* Emit the instruction. */ - fs_reg srcs[TEX_LOGICAL_NUM_SRCS]; + brw_reg srcs[TEX_LOGICAL_NUM_SRCS]; srcs[TEX_LOGICAL_SRC_COORDINATE] = coords; srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0); srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = sample; @@ -3661,7 +3661,7 @@ emit_non_coherent_fb_read(nir_to_brw_state &ntb, const fs_builder &bld, const fs * read message. Requires SKL+. */ static fs_inst * -emit_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, unsigned target) +emit_coherent_fb_read(const fs_builder &bld, const brw_reg &dst, unsigned target) { fs_inst *inst = bld.emit(FS_OPCODE_FB_READ_LOGICAL, dst); inst->target = target; @@ -3670,14 +3670,14 @@ emit_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, unsigned target) return inst; } -static fs_reg -alloc_temporary(const fs_builder &bld, unsigned size, fs_reg *regs, unsigned n) +static brw_reg +alloc_temporary(const fs_builder &bld, unsigned size, brw_reg *regs, unsigned n) { if (n && regs[0].file != BAD_FILE) { return regs[0]; } else { - const fs_reg tmp = bld.vgrf(BRW_TYPE_F, size); + const brw_reg tmp = bld.vgrf(BRW_TYPE_F, size); for (unsigned i = 0; i < n; i++) regs[i] = tmp; @@ -3686,7 +3686,7 @@ alloc_temporary(const fs_builder &bld, unsigned size, fs_reg *regs, unsigned n) } } -static fs_reg +static brw_reg alloc_frag_output(nir_to_brw_state &ntb, unsigned location) { fs_visitor &s = ntb.s; @@ -3723,7 +3723,7 @@ alloc_frag_output(nir_to_brw_state &ntb, unsigned location) } static void -emit_is_helper_invocation(nir_to_brw_state &ntb, fs_reg result) +emit_is_helper_invocation(nir_to_brw_state &ntb, brw_reg result) { const fs_builder &bld = ntb.bld; @@ -3751,14 +3751,14 @@ emit_is_helper_invocation(nir_to_brw_state &ntb, fs_reg result) } } -static fs_reg +static brw_reg emit_frontfacing_interpolation(nir_to_brw_state &ntb) { const intel_device_info *devinfo = ntb.devinfo; const fs_builder &bld = ntb.bld; fs_visitor &s = ntb.s; - fs_reg ff = bld.vgrf(BRW_TYPE_D); + brw_reg ff = bld.vgrf(BRW_TYPE_D); if (devinfo->ver >= 20) { /* Gfx20+ has separate back-facing bits for each pair of @@ -3766,7 +3766,7 @@ emit_frontfacing_interpolation(nir_to_brw_state &ntb) * use a <1;8,0> region in order to select the correct word for * each channel. */ - const fs_reg tmp = bld.vgrf(BRW_TYPE_UW); + const brw_reg tmp = bld.vgrf(BRW_TYPE_UW); for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) { const fs_builder hbld = bld.group(16, i); @@ -3785,7 +3785,7 @@ emit_frontfacing_interpolation(nir_to_brw_state &ntb) * dispatch mode. */ assert(s.dispatch_width == 16); - fs_reg tmp = bld.vgrf(BRW_TYPE_W); + brw_reg tmp = bld.vgrf(BRW_TYPE_W); for (unsigned i = 0; i < s.max_polygons; i++) { const fs_builder hbld = bld.group(8, i); @@ -3797,9 +3797,9 @@ emit_frontfacing_interpolation(nir_to_brw_state &ntb) bld.NOT(ff, tmp); } else if (devinfo->ver >= 12) { - fs_reg g1 = fs_reg(retype(brw_vec1_grf(1, 1), BRW_TYPE_W)); + brw_reg g1 = brw_reg(retype(brw_vec1_grf(1, 1), BRW_TYPE_W)); - fs_reg tmp = bld.vgrf(BRW_TYPE_W); + brw_reg tmp = bld.vgrf(BRW_TYPE_W); bld.ASR(tmp, g1, brw_imm_d(15)); bld.NOT(ff, tmp); } else { @@ -3814,7 +3814,7 @@ emit_frontfacing_interpolation(nir_to_brw_state &ntb) * * An ASR 15 fills the low word of the destination. */ - fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_TYPE_W)); + brw_reg g0 = brw_reg(retype(brw_vec1_grf(0, 0), BRW_TYPE_W)); bld.ASR(ff, negate(g0), brw_imm_d(15)); } @@ -3822,7 +3822,7 @@ emit_frontfacing_interpolation(nir_to_brw_state &ntb) return ff; } -static fs_reg +static brw_reg emit_samplepos_setup(nir_to_brw_state &ntb) { const fs_builder &bld = ntb.bld; @@ -3832,7 +3832,7 @@ emit_samplepos_setup(nir_to_brw_state &ntb) struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s.prog_data); const fs_builder abld = bld.annotate("compute sample position"); - fs_reg pos = abld.vgrf(BRW_TYPE_F, 2); + brw_reg pos = abld.vgrf(BRW_TYPE_F, 2); if (wm_prog_data->persample_dispatch == BRW_NEVER) { /* From ARB_sample_shading specification: @@ -3856,14 +3856,14 @@ emit_samplepos_setup(nir_to_brw_state &ntb) * The X, Y sample positions come in as bytes in thread payload. So, read * the positions using vstride=16, width=8, hstride=2. */ - const fs_reg sample_pos_reg = + const brw_reg sample_pos_reg = fetch_payload_reg(abld, s.fs_payload().sample_pos_reg, BRW_TYPE_W); for (unsigned i = 0; i < 2; i++) { - fs_reg tmp_d = bld.vgrf(BRW_TYPE_D); + brw_reg tmp_d = bld.vgrf(BRW_TYPE_D); abld.MOV(tmp_d, subscript(sample_pos_reg, BRW_TYPE_B, i)); /* Convert int_sample_pos to floating point */ - fs_reg tmp_f = bld.vgrf(BRW_TYPE_F); + brw_reg tmp_f = bld.vgrf(BRW_TYPE_F); abld.MOV(tmp_f, tmp_d); /* Scale to the range [0, 1] */ abld.MUL(offset(pos, abld, i), tmp_f, brw_imm_f(1 / 16.0f)); @@ -3882,7 +3882,7 @@ emit_samplepos_setup(nir_to_brw_state &ntb) return pos; } -static fs_reg +static brw_reg emit_sampleid_setup(nir_to_brw_state &ntb) { const intel_device_info *devinfo = ntb.devinfo; @@ -3894,7 +3894,7 @@ emit_sampleid_setup(nir_to_brw_state &ntb) struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s.prog_data); const fs_builder abld = bld.annotate("compute sample id"); - fs_reg sample_id = abld.vgrf(BRW_TYPE_UD); + brw_reg sample_id = abld.vgrf(BRW_TYPE_UD); assert(key->multisample_fbo != BRW_NEVER); @@ -3926,7 +3926,7 @@ emit_sampleid_setup(nir_to_brw_state &ntb) * TODO: These payload bits exist on Gfx7 too, but they appear to always * be zero, so this code fails to work. We should find out why. */ - const fs_reg tmp = abld.vgrf(BRW_TYPE_UW); + const brw_reg tmp = abld.vgrf(BRW_TYPE_UW); for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) { const fs_builder hbld = abld.group(MIN2(16, s.dispatch_width), i); @@ -3953,7 +3953,7 @@ emit_sampleid_setup(nir_to_brw_state &ntb) return sample_id; } -static fs_reg +static brw_reg emit_samplemaskin_setup(nir_to_brw_state &ntb) { const fs_builder &bld = ntb.bld; @@ -3965,7 +3965,7 @@ emit_samplemaskin_setup(nir_to_brw_state &ntb) /* The HW doesn't provide us with expected values. */ assert(wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS); - fs_reg coverage_mask = + brw_reg coverage_mask = fetch_payload_reg(bld, s.fs_payload().sample_mask_in_reg, BRW_TYPE_UD); if (wm_prog_data->persample_dispatch == BRW_NEVER) @@ -3986,9 +3986,9 @@ emit_samplemaskin_setup(nir_to_brw_state &ntb) if (ntb.system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE) ntb.system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(ntb); - fs_reg one = abld.MOV(brw_imm_ud(1)); - fs_reg enabled_mask = abld.SHL(one, ntb.system_values[SYSTEM_VALUE_SAMPLE_ID]); - fs_reg mask = abld.AND(enabled_mask, coverage_mask); + brw_reg one = abld.MOV(brw_imm_ud(1)); + brw_reg enabled_mask = abld.SHL(one, ntb.system_values[SYSTEM_VALUE_SAMPLE_ID]); + brw_reg mask = abld.AND(enabled_mask, coverage_mask); if (wm_prog_data->persample_dispatch == BRW_ALWAYS) return mask; @@ -4000,7 +4000,7 @@ emit_samplemaskin_setup(nir_to_brw_state &ntb) return mask; } -static fs_reg +static brw_reg emit_shading_rate_setup(nir_to_brw_state &ntb) { const intel_device_info *devinfo = ntb.devinfo; @@ -4026,14 +4026,14 @@ emit_shading_rate_setup(nir_to_brw_state &ntb) */ /* r1.0 - 0:7 ActualCoarsePixelShadingSize.X */ - fs_reg actual_x = fs_reg(retype(brw_vec1_grf(1, 0), BRW_TYPE_UB)); + brw_reg actual_x = brw_reg(retype(brw_vec1_grf(1, 0), BRW_TYPE_UB)); /* r1.0 - 15:8 ActualCoarsePixelShadingSize.Y */ - fs_reg actual_y = byte_offset(actual_x, 1); + brw_reg actual_y = byte_offset(actual_x, 1); - fs_reg int_rate_y = abld.SHR(actual_y, brw_imm_ud(1)); - fs_reg int_rate_x = abld.SHR(actual_x, brw_imm_ud(1)); + brw_reg int_rate_y = abld.SHR(actual_y, brw_imm_ud(1)); + brw_reg int_rate_x = abld.SHR(actual_x, brw_imm_ud(1)); - fs_reg rate = abld.OR(abld.SHL(int_rate_x, brw_imm_ud(2)), int_rate_y); + brw_reg rate = abld.OR(abld.SHL(int_rate_x, brw_imm_ud(2)), int_rate_y); if (wm_prog_data->coarse_pixel_dispatch == BRW_ALWAYS) return rate; @@ -4055,7 +4055,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, assert(s.stage == MESA_SHADER_FRAGMENT); - fs_reg dest; + brw_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_def(ntb, instr->def); @@ -4066,7 +4066,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_sample_pos: case nir_intrinsic_load_sample_pos_or_center: { - fs_reg sample_pos = ntb.system_values[SYSTEM_VALUE_SAMPLE_POS]; + brw_reg sample_pos = ntb.system_values[SYSTEM_VALUE_SAMPLE_POS]; assert(sample_pos.file != BAD_FILE); dest.type = sample_pos.type; bld.MOV(dest, sample_pos); @@ -4088,7 +4088,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_sample_id: case nir_intrinsic_load_frag_shading_rate: { gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); - fs_reg val = ntb.system_values[sv]; + brw_reg val = ntb.system_values[sv]; assert(val.file != BAD_FILE); dest.type = val.type; bld.MOV(dest, val); @@ -4096,14 +4096,14 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_store_output: { - const fs_reg src = get_nir_src(ntb, instr->src[0]); + const brw_reg src = get_nir_src(ntb, instr->src[0]); const unsigned store_offset = nir_src_as_uint(instr->src[1]); const unsigned location = nir_intrinsic_base(instr) + SET_FIELD(store_offset, BRW_NIR_FRAG_OUTPUT_LOCATION); - const fs_reg new_dest = retype(alloc_frag_output(ntb, location), + const brw_reg new_dest = retype(alloc_frag_output(ntb, location), src.type); - fs_reg comps[instr->num_components]; + brw_reg comps[instr->num_components]; for (unsigned i = 0; i < instr->num_components; i++) { comps[i] = offset(src, bld, i); } @@ -4118,14 +4118,14 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, assert(l >= FRAG_RESULT_DATA0); const unsigned load_offset = nir_src_as_uint(instr->src[0]); const unsigned target = l - FRAG_RESULT_DATA0 + load_offset; - const fs_reg tmp = bld.vgrf(dest.type, 4); + const brw_reg tmp = bld.vgrf(dest.type, 4); if (reinterpret_cast(s.key)->coherent_fb_fetch) emit_coherent_fb_read(bld, tmp, target); else emit_non_coherent_fb_read(ntb, bld, tmp, target); - fs_reg comps[instr->num_components]; + brw_reg comps[instr->num_components]; for (unsigned i = 0; i < instr->num_components; i++) { comps[i] = offset(tmp, bld, i + nir_intrinsic_component(instr)); } @@ -4185,7 +4185,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, brw_imm_d(0), BRW_CONDITIONAL_Z); } } else { - fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_TYPE_UW)); + brw_reg some_reg = brw_reg(retype(brw_vec8_grf(0, 0), BRW_TYPE_UW)); cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ); } @@ -4280,7 +4280,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, /* Use the delta_xy values computed from the payload */ enum brw_barycentric_mode bary = brw_barycentric_mode( reinterpret_cast(s.key), instr); - const fs_reg srcs[] = { offset(s.delta_xy[bary], bld, 0), + const brw_reg srcs[] = { offset(s.delta_xy[bary], bld, 0), offset(s.delta_xy[bary], bld, 1) }; bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0); break; @@ -4297,18 +4297,18 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, interpolation); } else { - fs_reg msg_data; + brw_reg msg_data; if (nir_src_is_const(instr->src[0])) { msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4); } else { - const fs_reg sample_src = retype(get_nir_src(ntb, instr->src[0]), + const brw_reg sample_src = retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_UD); - const fs_reg sample_id = bld.emit_uniformize(sample_src); + const brw_reg sample_id = bld.emit_uniformize(sample_src); msg_data = component(bld.group(8, 0).vgrf(BRW_TYPE_UD), 0); bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u)); } - fs_reg flag_reg; + brw_reg flag_reg; struct brw_wm_prog_key *wm_prog_key = (struct brw_wm_prog_key *) s.key; if (wm_prog_key->multisample_fbo == BRW_SOMETIMES) { struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s.prog_data); @@ -4322,7 +4322,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, dest, - fs_reg(), /* src */ + brw_reg(), /* src */ msg_data, flag_reg, interpolation); @@ -4348,26 +4348,26 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dest, - fs_reg(), /* src */ + brw_reg(), /* src */ brw_imm_ud(off_x | (off_y << 4)), - fs_reg(), /* flag_reg */ + brw_reg(), /* flag_reg */ interpolation); } else { - fs_reg src = retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_D); + brw_reg src = retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_D); const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; emit_pixel_interpolater_send(bld, opcode, dest, src, brw_imm_ud(0u), - fs_reg(), /* flag_reg */ + brw_reg(), /* flag_reg */ interpolation); } break; } case nir_intrinsic_load_frag_coord: { - fs_reg comps[4] = { s.pixel_x, s.pixel_y, s.pixel_z, s.wpos_w }; + brw_reg comps[4] = { s.pixel_x, s.pixel_y, s.pixel_z, s.wpos_w }; bld.VEC(dest, comps, 4); break; } @@ -4378,7 +4378,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, nir_intrinsic_instr *bary_intrinsic = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr); nir_intrinsic_op bary_intrin = bary_intrinsic->intrinsic; - fs_reg dst_xy; + brw_reg dst_xy; if (bary_intrin == nir_intrinsic_load_barycentric_at_offset || bary_intrin == nir_intrinsic_load_barycentric_at_sample) { @@ -4392,7 +4392,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, } for (unsigned int i = 0; i < instr->num_components; i++) { - fs_reg interp = + brw_reg interp = s.interp_reg(bld, nir_intrinsic_base(instr), nir_intrinsic_component(instr) + i, 0); interp.type = BRW_TYPE_F; @@ -4420,7 +4420,7 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, assert(gl_shader_stage_uses_workgroup(s.stage)); struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(s.prog_data); - fs_reg dest; + brw_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_def(ntb, instr->def); @@ -4459,7 +4459,7 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, break; case nir_intrinsic_load_workgroup_id: { - fs_reg val = ntb.system_values[SYSTEM_VALUE_WORKGROUP_ID]; + brw_reg val = ntb.system_values[SYSTEM_VALUE_WORKGROUP_ID]; assert(val.file != BAD_FILE); dest.type = val.type; for (unsigned i = 0; i < 3; i++) @@ -4472,7 +4472,7 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, cs_prog_data->uses_num_work_groups = true; - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(0); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(3); /* num components */ @@ -4493,10 +4493,10 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_shared: { const unsigned bit_size = instr->def.bit_size; - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX7_BTI_SLM); - fs_reg addr = retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_UD); + brw_reg addr = retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_UD); unsigned base = nir_intrinsic_base(instr); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = base ? bld.ADD(addr, brw_imm_ud(base)) : addr; @@ -4522,7 +4522,7 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, assert(instr->def.num_components == 1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size); - fs_reg read_result = bld.vgrf(BRW_TYPE_UD); + brw_reg read_result = bld.vgrf(BRW_TYPE_UD); bld.emit(SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, read_result, srcs, SURFACE_LOGICAL_NUM_SRCS); bld.MOV(dest, subscript(read_result, dest.type, 0)); @@ -4532,10 +4532,10 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_store_shared: { const unsigned bit_size = nir_src_bit_size(instr->src[0]); - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX7_BTI_SLM); - fs_reg addr = retype(get_nir_src(ntb, instr->src[1]), BRW_TYPE_UD); + brw_reg addr = retype(get_nir_src(ntb, instr->src[1]), BRW_TYPE_UD); unsigned base = nir_intrinsic_base(instr); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = base ? bld.ADD(addr, brw_imm_ud(base)) : addr; @@ -4546,7 +4546,7 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, */ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); - fs_reg data = get_nir_src(ntb, instr->src[0]); + brw_reg data = get_nir_src(ntb, instr->src[0]); data.type = brw_type_with_size(BRW_TYPE_UD, bit_size); assert(bit_size <= 32); @@ -4559,7 +4559,7 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, srcs[SURFACE_LOGICAL_SRC_DATA] = data; srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components); bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); } else { assert(nir_src_num_components(instr->src[0]) == 1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size); @@ -4568,7 +4568,7 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, bld.MOV(srcs[SURFACE_LOGICAL_SRC_DATA], data); bld.emit(SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); } break; } @@ -4591,7 +4591,7 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, brw_type_for_nir_type(devinfo, nir_intrinsic_src_type(instr)); dest = retype(dest, dest_type); - fs_reg src0 = retype(get_nir_src(ntb, instr->src[0]), dest_type); + brw_reg src0 = retype(get_nir_src(ntb, instr->src[0]), dest_type); fs_builder bld16 = bld.exec_all().group(16, 0); fs_builder bldn = devinfo->ver >= 20 ? bld16 : bld.exec_all().group(8, 0); @@ -4622,7 +4622,7 @@ emit_rt_lsc_fence(const fs_builder &bld, const intel_device_info *devinfo = bld.shader->devinfo; const fs_builder ubld = bld.exec_all().group(8, 0); - fs_reg tmp = ubld.vgrf(BRW_TYPE_UD); + brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, tmp, brw_imm_ud(0) /* desc */, brw_imm_ud(0) /* ex_desc */, @@ -4649,7 +4649,7 @@ fs_nir_emit_bs_intrinsic(nir_to_brw_state &ntb, assert(brw_shader_stage_is_bindless(s.stage)); const bs_thread_payload &payload = s.bs_payload(); - fs_reg dest; + brw_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_def(ntb, instr->def); @@ -4672,7 +4672,7 @@ fs_nir_emit_bs_intrinsic(nir_to_brw_state &ntb, } } -static fs_reg +static brw_reg brw_nir_reduction_op_identity(const fs_builder &bld, nir_op op, brw_reg_type type) { @@ -4788,7 +4788,7 @@ add_rebuild_src(nir_src *src, void *state) return true; } -static fs_reg +static brw_reg try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def *resource_def, bool a64 = false) { @@ -4800,7 +4800,7 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, if (!nir_foreach_src(resource_def->parent_instr, add_rebuild_src, &resources)) - return fs_reg(); + return brw_reg(); resources.array.push_back(resource_def); if (resources.array.size() == 1) { @@ -4816,8 +4816,8 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, case nir_intrinsic_load_uniform: { unsigned base_offset = nir_intrinsic_base(intrin); unsigned load_offset = nir_src_as_uint(intrin->src[0]); - fs_reg src = brw_uniform_reg(base_offset / 4, - brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size)); + brw_reg src = brw_uniform_reg(base_offset / 4, + brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size)); src.offset = load_offset + base_offset % 4; return src; } @@ -4826,8 +4826,8 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, assert(ntb.s.stage == MESA_SHADER_MESH || ntb.s.stage == MESA_SHADER_TASK); const task_mesh_thread_payload &payload = ntb.s.task_mesh_payload(); - fs_reg data = offset(payload.inline_parameter, 1, - nir_intrinsic_align_offset(intrin)); + brw_reg data = offset(payload.inline_parameter, 1, + nir_intrinsic_align_offset(intrin)); return retype(data, brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size)); } @@ -4881,7 +4881,7 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, if (nir_op_infos[alu->op].num_inputs > 3) break; - fs_reg srcs[3]; + brw_reg srcs[3]; for (unsigned s = 0; s < nir_op_infos[alu->op].num_inputs; s++) { srcs[s] = offset( ntb.resource_insts[alu->src[s].src.ssa->index]->dst, @@ -4896,7 +4896,7 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, &ntb.resource_insts[def->index]); break; case nir_op_iadd3: { - fs_reg dst = ubld8.vgrf(srcs[0].type); + brw_reg dst = ubld8.vgrf(srcs[0].type); ntb.resource_insts[def->index] = ubld8.ADD3(dst, srcs[1].file == IMM ? srcs[1] : srcs[0], @@ -4924,7 +4924,7 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, case nir_op_ult32: { if (brw_type_size_bits(srcs[0].type) != 32) break; - fs_reg dst = ubld8.vgrf(srcs[0].type); + brw_reg dst = ubld8.vgrf(srcs[0].type); enum brw_reg_type utype = brw_type_with_size(srcs[0].type, brw_type_size_bits(srcs[0].type)); @@ -4948,7 +4948,7 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, &ntb.resource_insts[def->index]); break; case nir_op_pack_64_2x32_split: { - fs_reg dst = ubld8.vgrf(BRW_TYPE_Q); + brw_reg dst = ubld8.vgrf(BRW_TYPE_Q); ntb.resource_insts[def->index] = ubld8.emit(FS_OPCODE_PACK, dst, srcs[0], srcs[1]); } @@ -4972,8 +4972,8 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, unsigned base_offset = nir_intrinsic_base(intrin); unsigned load_offset = nir_src_as_uint(intrin->src[0]); - fs_reg src = brw_uniform_reg(base_offset / 4, - brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size)); + brw_reg src = brw_uniform_reg(base_offset / 4, + brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size)); src.offset = load_offset + base_offset % 4; ubld8.MOV(src, &ntb.resource_insts[def->index]); break; @@ -4983,7 +4983,7 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, assert(ntb.s.stage == MESA_SHADER_MESH || ntb.s.stage == MESA_SHADER_TASK); const task_mesh_thread_payload &payload = ntb.s.task_mesh_payload(); - fs_reg data = retype( + brw_reg data = retype( offset(payload.inline_parameter, 1, nir_intrinsic_align_offset(intrin)), brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size)); @@ -5009,7 +5009,7 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, case nir_intrinsic_load_reloc_const_intel: { uint32_t id = nir_intrinsic_param_idx(intrin); - fs_reg dst = ubld8.vgrf(BRW_TYPE_D); + brw_reg dst = ubld8.vgrf(BRW_TYPE_D); ntb.resource_insts[def->index] = ubld8.emit(SHADER_OPCODE_MOV_RELOC_IMM, dst, brw_imm_ud(id), brw_imm_ud(0)); @@ -5020,10 +5020,10 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, case nir_intrinsic_load_ssbo_uniform_block_intel: { enum brw_reg_type type = brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size); - fs_reg src_data = retype(ntb.ssa_values[def->index], type); + brw_reg src_data = retype(ntb.ssa_values[def->index], type); unsigned n_components = ntb.s.alloc.sizes[src_data.nr] / (bld.dispatch_width() / 8); - fs_reg dst_data = ubld8.vgrf(type, n_components); + brw_reg dst_data = ubld8.vgrf(type, n_components); ntb.resource_insts[def->index] = ubld8.MOV(dst_data, src_data); for (unsigned i = 1; i < n_components; i++) { ubld8.MOV(offset(dst_data, ubld8, i), @@ -5056,7 +5056,7 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, fprintf(stderr, "\n"); } #endif - return fs_reg(); + return brw_reg(); } } @@ -5064,23 +5064,23 @@ try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld, return component(ntb.resource_insts[resource_def->index]->dst, 0); } -static fs_reg +static brw_reg get_nir_image_intrinsic_image(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_intrinsic_instr *instr) { if (is_resource_src(instr->src[0])) { - fs_reg surf_index = get_resource_nir_src(ntb, instr->src[0]); + brw_reg surf_index = get_resource_nir_src(ntb, instr->src[0]); if (surf_index.file != BAD_FILE) return surf_index; } - fs_reg image = retype(get_nir_src_imm(ntb, instr->src[0]), BRW_TYPE_UD); - fs_reg surf_index = image; + brw_reg image = retype(get_nir_src_imm(ntb, instr->src[0]), BRW_TYPE_UD); + brw_reg surf_index = image; return bld.emit_uniformize(surf_index); } -static fs_reg +static brw_reg get_nir_buffer_intrinsic_index(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_intrinsic_instr *instr, bool *no_mask_handle = NULL) { @@ -5098,7 +5098,7 @@ get_nir_buffer_intrinsic_index(nir_to_brw_state &ntb, const brw::fs_builder &bld *no_mask_handle = true; return brw_imm_ud(nir_src_as_uint(src)); } else if (is_resource_src(src)) { - fs_reg surf_index = get_resource_nir_src(ntb, src); + brw_reg surf_index = get_resource_nir_src(ntb, src); if (surf_index.file != BAD_FILE) { if (no_mask_handle) *no_mask_handle = true; @@ -5131,7 +5131,7 @@ get_nir_buffer_intrinsic_index(nir_to_brw_state &ntb, const brw::fs_builder &bld * at the same logical offset, the scratch read/write instruction acts on * continuous elements and we get good cache locality. */ -static fs_reg +static brw_reg swizzle_nir_scratch_addr(nir_to_brw_state &ntb, const brw::fs_builder &bld, const nir_src &nir_addr_src, @@ -5139,7 +5139,7 @@ swizzle_nir_scratch_addr(nir_to_brw_state &ntb, { fs_visitor &s = ntb.s; - const fs_reg &chan_index = + const brw_reg &chan_index = ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]; const unsigned chan_index_bits = ffs(s.dispatch_width) - 1; @@ -5163,7 +5163,7 @@ swizzle_nir_scratch_addr(nir_to_brw_state &ntb, } } - const fs_reg nir_addr = + const brw_reg nir_addr = retype(get_nir_src(ntb, nir_addr_src), BRW_TYPE_UD); if (in_dwords) { @@ -5176,8 +5176,8 @@ swizzle_nir_scratch_addr(nir_to_brw_state &ntb, /* This case substantially more annoying because we have to pay * attention to those pesky two bottom bits. */ - fs_reg chan_addr = bld.SHL(chan_index, brw_imm_ud(2)); - fs_reg addr_bits = + brw_reg chan_addr = bld.SHL(chan_index, brw_imm_ud(2)); + brw_reg addr_bits = bld.OR(bld.AND(nir_addr, brw_imm_ud(0x3u)), bld.SHL(bld.AND(nir_addr, brw_imm_ud(~0x3u)), brw_imm_ud(chan_index_bits))); @@ -5203,8 +5203,8 @@ choose_oword_block_size_dwords(const struct intel_device_info *devinfo, return block; } -static fs_reg -increment_a64_address(const fs_builder &_bld, fs_reg address, uint32_t v, bool use_no_mask) +static brw_reg +increment_a64_address(const fs_builder &_bld, brw_reg address, uint32_t v, bool use_no_mask) { const fs_builder bld = use_no_mask ? _bld.exec_all().group(8, 0) : _bld; @@ -5213,11 +5213,11 @@ increment_a64_address(const fs_builder &_bld, fs_reg address, uint32_t v, bool u imm.u64 = v; return bld.ADD(address, imm); } else { - fs_reg dst = bld.vgrf(BRW_TYPE_UQ); - fs_reg dst_low = subscript(dst, BRW_TYPE_UD, 0); - fs_reg dst_high = subscript(dst, BRW_TYPE_UD, 1); - fs_reg src_low = subscript(address, BRW_TYPE_UD, 0); - fs_reg src_high = subscript(address, BRW_TYPE_UD, 1); + brw_reg dst = bld.vgrf(BRW_TYPE_UQ); + brw_reg dst_low = subscript(dst, BRW_TYPE_UD, 0); + brw_reg dst_high = subscript(dst, BRW_TYPE_UD, 1); + brw_reg src_low = subscript(address, BRW_TYPE_UD, 0); + brw_reg src_high = subscript(address, BRW_TYPE_UD, 1); /* Add low and if that overflows, add carry to high. */ bld.ADD(dst_low, src_low, brw_imm_ud(v))->conditional_mod = BRW_CONDITIONAL_O; @@ -5226,7 +5226,7 @@ increment_a64_address(const fs_builder &_bld, fs_reg address, uint32_t v, bool u } } -static fs_reg +static brw_reg emit_fence(const fs_builder &bld, enum opcode opcode, uint8_t sfid, uint32_t desc, bool commit_enable, uint8_t bti) @@ -5234,7 +5234,7 @@ emit_fence(const fs_builder &bld, enum opcode opcode, assert(opcode == SHADER_OPCODE_INTERLOCK || opcode == SHADER_OPCODE_MEMORY_FENCE); - fs_reg dst = bld.vgrf(BRW_TYPE_UD); + brw_reg dst = bld.vgrf(BRW_TYPE_UD); fs_inst *fence = bld.emit(opcode, dst, brw_vec8_grf(0, 0), brw_imm_ud(commit_enable), brw_imm_ud(bti)); @@ -5280,15 +5280,15 @@ lsc_fence_descriptor_for_intrinsic(const struct intel_device_info *devinfo, /** * Create a MOV to read the timestamp register. */ -static fs_reg +static brw_reg get_timestamp(const fs_builder &bld) { fs_visitor &s = *bld.shader; - fs_reg ts = fs_reg(retype(brw_vec4_reg(BRW_ARCHITECTURE_REGISTER_FILE, + brw_reg ts = brw_reg(retype(brw_vec4_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TIMESTAMP, 0), BRW_TYPE_UD)); - fs_reg dst = brw_vgrf(s.alloc.allocate(1), BRW_TYPE_UD); + brw_reg dst = brw_vgrf(s.alloc.allocate(1), BRW_TYPE_UD); /* We want to read the 3 fields we care about even if it's not enabled in * the dispatch. @@ -5309,7 +5309,7 @@ component_from_intrinsic(nir_intrinsic_instr *instr) static void adjust_handle_and_offset(const fs_builder &bld, - fs_reg &urb_handle, + brw_reg &urb_handle, unsigned &urb_global_offset) { /* Make sure that URB global offset is below 2048 (2^11), because @@ -5328,8 +5328,8 @@ adjust_handle_and_offset(const fs_builder &bld, static void emit_urb_direct_vec4_write(const fs_builder &bld, unsigned urb_global_offset, - const fs_reg &src, - fs_reg urb_handle, + const brw_reg &src, + brw_reg urb_handle, unsigned dst_comp_offset, unsigned comps, unsigned mask) @@ -5337,7 +5337,7 @@ emit_urb_direct_vec4_write(const fs_builder &bld, for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) { fs_builder bld8 = bld.group(8, q); - fs_reg payload_srcs[8]; + brw_reg payload_srcs[8]; unsigned length = 0; for (unsigned i = 0; i < dst_comp_offset; i++) @@ -5346,7 +5346,7 @@ emit_urb_direct_vec4_write(const fs_builder &bld, for (unsigned c = 0; c < comps; c++) payload_srcs[length++] = quarter(offset(src, bld, c), q); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length), @@ -5363,7 +5363,7 @@ emit_urb_direct_vec4_write(const fs_builder &bld, static void emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &src, fs_reg urb_handle) + const brw_reg &src, brw_reg urb_handle) { assert(nir_src_bit_size(instr->src[0]) == 32); @@ -5393,8 +5393,8 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr, static void emit_urb_direct_vec4_write_xe2(const fs_builder &bld, unsigned offset_in_bytes, - const fs_reg &src, - fs_reg urb_handle, + const brw_reg &src, + brw_reg urb_handle, unsigned comps, unsigned mask) { @@ -5410,12 +5410,12 @@ emit_urb_direct_vec4_write_xe2(const fs_builder &bld, for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) { fs_builder hbld = bld.group(write_size, q); - fs_reg payload_srcs[comps]; + brw_reg payload_srcs[comps]; for (unsigned c = 0; c < comps; c++) payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); int nr = bld.shader->alloc.allocate(comps * runit); @@ -5430,7 +5430,7 @@ emit_urb_direct_vec4_write_xe2(const fs_builder &bld, static void emit_urb_direct_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &src, fs_reg urb_handle) + const brw_reg &src, brw_reg urb_handle) { assert(nir_src_bit_size(instr->src[0]) == 32); @@ -5452,10 +5452,10 @@ emit_urb_direct_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, static void emit_urb_indirect_vec4_write(const fs_builder &bld, - const fs_reg &offset_src, + const brw_reg &offset_src, unsigned base, - const fs_reg &src, - fs_reg urb_handle, + const brw_reg &src, + brw_reg urb_handle, unsigned dst_comp_offset, unsigned comps, unsigned mask) @@ -5465,10 +5465,10 @@ emit_urb_indirect_vec4_write(const fs_builder &bld, /* offset is always positive, so signedness doesn't matter */ assert(offset_src.type == BRW_TYPE_D || offset_src.type == BRW_TYPE_UD); - fs_reg qtr = bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q)); - fs_reg off = bld8.SHR(bld8.ADD(qtr, brw_imm_ud(base)), brw_imm_ud(2)); + brw_reg qtr = bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q)); + brw_reg off = bld8.SHR(bld8.ADD(qtr, brw_imm_ud(base)), brw_imm_ud(2)); - fs_reg payload_srcs[8]; + brw_reg payload_srcs[8]; unsigned length = 0; for (unsigned i = 0; i < dst_comp_offset; i++) @@ -5477,7 +5477,7 @@ emit_urb_indirect_vec4_write(const fs_builder &bld, for (unsigned c = 0; c < comps; c++) payload_srcs[length++] = quarter(offset(src, bld, c), q); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); @@ -5494,8 +5494,8 @@ emit_urb_indirect_vec4_write(const fs_builder &bld, static void emit_urb_indirect_writes_mod(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &src, const fs_reg &offset_src, - fs_reg urb_handle, unsigned mod) + const brw_reg &src, const brw_reg &offset_src, + brw_reg urb_handle, unsigned mod) { assert(nir_src_bit_size(instr->src[0]) == 32); @@ -5514,8 +5514,8 @@ emit_urb_indirect_writes_mod(const fs_builder &bld, nir_intrinsic_instr *instr, static void emit_urb_indirect_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &src, const fs_reg &offset_src, - fs_reg urb_handle) + const brw_reg &src, const brw_reg &offset_src, + brw_reg urb_handle) { assert(nir_src_bit_size(instr->src[0]) == 32); @@ -5539,17 +5539,17 @@ emit_urb_indirect_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) { fs_builder wbld = bld.group(write_size, q); - fs_reg payload_srcs[comps]; + brw_reg payload_srcs[comps]; for (unsigned c = 0; c < comps; c++) payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q); - fs_reg addr = + brw_reg addr = wbld.ADD(wbld.SHL(retype(horiz_offset(offset_src, write_size * q), BRW_TYPE_UD), brw_imm_ud(2)), urb_handle); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = addr; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); int nr = bld.shader->alloc.allocate(comps * runit); @@ -5564,8 +5564,8 @@ emit_urb_indirect_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, static void emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &src, const fs_reg &offset_src, - fs_reg urb_handle) + const brw_reg &src, const brw_reg &offset_src, + brw_reg urb_handle) { assert(nir_src_bit_size(instr->src[0]) == 32); @@ -5585,7 +5585,7 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr, if (((1 << c) & nir_intrinsic_write_mask(instr)) == 0) continue; - fs_reg src_comp = offset(src, bld, c); + brw_reg src_comp = offset(src, bld, c); for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) { fs_builder bld8 = bld.group(8, q); @@ -5594,21 +5594,21 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr, assert(offset_src.type == BRW_TYPE_D || offset_src.type == BRW_TYPE_UD); - fs_reg off = + brw_reg off = bld8.ADD(quarter(retype(offset_src, BRW_TYPE_UD), q), brw_imm_ud(c + base_in_dwords)); - fs_reg m = bld8.AND(off, brw_imm_ud(0x3)); - fs_reg t = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m); - fs_reg mask = bld8.SHL(t, brw_imm_ud(16)); - fs_reg final_offset = bld8.SHR(off, brw_imm_ud(2)); + brw_reg m = bld8.AND(off, brw_imm_ud(0x3)); + brw_reg t = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m); + brw_reg mask = bld8.SHL(t, brw_imm_ud(16)); + brw_reg final_offset = bld8.SHR(off, brw_imm_ud(2)); - fs_reg payload_srcs[4]; + brw_reg payload_srcs[4]; unsigned length = 0; for (unsigned j = 0; j < 4; j++) payload_srcs[length++] = quarter(src_comp, q); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask; @@ -5626,7 +5626,7 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr, static void emit_urb_direct_reads(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &dest, fs_reg urb_handle) + const brw_reg &dest, brw_reg urb_handle) { assert(instr->def.bit_size == 32); @@ -5648,8 +5648,8 @@ emit_urb_direct_reads(const fs_builder &bld, nir_intrinsic_instr *instr, const unsigned num_regs = comp_offset + comps; fs_builder ubld8 = bld.group(8, 0).exec_all(); - fs_reg data = ubld8.vgrf(BRW_TYPE_UD, num_regs); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg data = ubld8.vgrf(BRW_TYPE_UD, num_regs); + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; fs_inst *inst = ubld8.emit(SHADER_OPCODE_URB_READ_LOGICAL, data, @@ -5659,15 +5659,15 @@ emit_urb_direct_reads(const fs_builder &bld, nir_intrinsic_instr *instr, inst->size_written = num_regs * REG_SIZE; for (unsigned c = 0; c < comps; c++) { - fs_reg dest_comp = offset(dest, bld, c); - fs_reg data_comp = horiz_stride(offset(data, ubld8, comp_offset + c), 0); + brw_reg dest_comp = offset(dest, bld, c); + brw_reg data_comp = horiz_stride(offset(data, ubld8, comp_offset + c), 0); bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp); } } static void emit_urb_direct_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &dest, fs_reg urb_handle) + const brw_reg &dest, brw_reg urb_handle) { assert(instr->def.bit_size == 32); @@ -5687,8 +5687,8 @@ emit_urb_direct_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, if (offset_in_dwords > 0) urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4)); - fs_reg data = ubld16.vgrf(BRW_TYPE_UD, comps); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps); + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; fs_inst *inst = ubld16.emit(SHADER_OPCODE_URB_READ_LOGICAL, @@ -5696,15 +5696,15 @@ emit_urb_direct_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, inst->size_written = 2 * comps * REG_SIZE; for (unsigned c = 0; c < comps; c++) { - fs_reg dest_comp = offset(dest, bld, c); - fs_reg data_comp = horiz_stride(offset(data, ubld16, c), 0); + brw_reg dest_comp = offset(dest, bld, c); + brw_reg data_comp = horiz_stride(offset(data, ubld16, c), 0); bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp); } } static void emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &dest, const fs_reg &offset_src, fs_reg urb_handle) + const brw_reg &dest, const brw_reg &offset_src, brw_reg urb_handle) { assert(instr->def.bit_size == 32); @@ -5712,12 +5712,12 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr, if (comps == 0) return; - fs_reg seq_ud; + brw_reg seq_ud; { fs_builder ubld8 = bld.group(8, 0).exec_all(); seq_ud = ubld8.vgrf(BRW_TYPE_UD, 1); - fs_reg seq_uw = ubld8.vgrf(BRW_TYPE_UW, 1); - ubld8.MOV(seq_uw, fs_reg(brw_imm_v(0x76543210))); + brw_reg seq_uw = ubld8.vgrf(BRW_TYPE_UW, 1); + ubld8.MOV(seq_uw, brw_reg(brw_imm_v(0x76543210))); ubld8.MOV(seq_ud, seq_uw); seq_ud = ubld8.SHL(seq_ud, brw_imm_ud(2)); } @@ -5732,31 +5732,31 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr, /* offset is always positive, so signedness doesn't matter */ assert(offset_src.type == BRW_TYPE_D || offset_src.type == BRW_TYPE_UD); - fs_reg off = + brw_reg off = bld8.ADD(bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q)), brw_imm_ud(base_in_dwords + c)); STATIC_ASSERT(IS_POT(REG_SIZE) && REG_SIZE > 1); - fs_reg comp; + brw_reg comp; comp = bld8.AND(off, brw_imm_ud(0x3)); comp = bld8.SHL(comp, brw_imm_ud(ffs(REG_SIZE) - 1)); comp = bld8.ADD(comp, seq_ud); off = bld8.SHR(off, brw_imm_ud(2)); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off; - fs_reg data = bld8.vgrf(BRW_TYPE_UD, 4); + brw_reg data = bld8.vgrf(BRW_TYPE_UD, 4); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_READ_LOGICAL, data, srcs, ARRAY_SIZE(srcs)); inst->offset = 0; inst->size_written = 4 * REG_SIZE; - fs_reg dest_comp = offset(dest, bld, c); + brw_reg dest_comp = offset(dest, bld, c); bld8.emit(SHADER_OPCODE_MOV_INDIRECT, retype(quarter(dest_comp, q), BRW_TYPE_UD), data, @@ -5768,8 +5768,8 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr, static void emit_urb_indirect_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &dest, const fs_reg &offset_src, - fs_reg urb_handle) + const brw_reg &dest, const brw_reg &offset_src, + brw_reg urb_handle) { assert(instr->def.bit_size == 32); @@ -5785,16 +5785,16 @@ emit_urb_indirect_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, if (offset_in_dwords > 0) urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4)); - fs_reg data = ubld16.vgrf(BRW_TYPE_UD, comps); + brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps); for (unsigned q = 0; q < bld.dispatch_width() / 16; q++) { fs_builder wbld = bld.group(16, q); - fs_reg addr = wbld.SHL(retype(horiz_offset(offset_src, 16 * q), - BRW_TYPE_UD), - brw_imm_ud(2)); + brw_reg addr = wbld.SHL(retype(horiz_offset(offset_src, 16 * q), + BRW_TYPE_UD), + brw_imm_ud(2)); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = wbld.ADD(addr, urb_handle); fs_inst *inst = wbld.emit(SHADER_OPCODE_URB_READ_LOGICAL, @@ -5802,8 +5802,8 @@ emit_urb_indirect_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, inst->size_written = 2 * comps * REG_SIZE; for (unsigned c = 0; c < comps; c++) { - fs_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q); - fs_reg data_comp = offset(data, wbld, c); + brw_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q); + brw_reg data_comp = offset(data, wbld, c); wbld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp); } } @@ -5812,9 +5812,9 @@ emit_urb_indirect_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr, static void emit_task_mesh_store(nir_to_brw_state &ntb, const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &urb_handle) + const brw_reg &urb_handle) { - fs_reg src = get_nir_src(ntb, instr->src[0]); + brw_reg src = get_nir_src(ntb, instr->src[0]); nir_src *offset_nir_src = nir_get_io_offset_src(instr); if (nir_src_is_const(*offset_nir_src)) { @@ -5850,9 +5850,9 @@ emit_task_mesh_store(nir_to_brw_state &ntb, static void emit_task_mesh_load(nir_to_brw_state &ntb, const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &urb_handle) + const brw_reg &urb_handle) { - fs_reg dest = get_nir_def(ntb, instr->def); + brw_reg dest = get_nir_def(ntb, instr->def); nir_src *offset_nir_src = nir_get_io_offset_src(instr); /* TODO(mesh): for per_vertex and per_primitive, if we could keep around @@ -5882,13 +5882,13 @@ fs_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const fs_builder &bld, assert(s.stage == MESA_SHADER_MESH || s.stage == MESA_SHADER_TASK); const task_mesh_thread_payload &payload = s.task_mesh_payload(); - fs_reg dest; + brw_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_def(ntb, instr->def); switch (instr->intrinsic) { case nir_intrinsic_load_mesh_inline_data_intel: { - fs_reg data = offset(payload.inline_parameter, 1, nir_intrinsic_align_offset(instr)); + brw_reg data = offset(payload.inline_parameter, 1, nir_intrinsic_align_offset(instr)); bld.MOV(dest, retype(data, dest.type)); break; } @@ -6007,7 +6007,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, return; } - fs_reg dest; + brw_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_def(ntb, instr->def); @@ -6026,7 +6026,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, if (nir_intrinsic_resource_access_intel(instr) & nir_resource_intel_non_uniform) { - ntb.uniform_values[instr->def.index] = fs_reg(); + ntb.uniform_values[instr->def.index] = brw_reg(); } else { ntb.uniform_values[instr->def.index] = try_rebuild_source(ntb, bld, instr->src[1].ssa); @@ -6051,7 +6051,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, /* Get some metadata from the image intrinsic. */ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; switch (instr->intrinsic) { case nir_intrinsic_image_load: @@ -6088,7 +6088,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(ntb, instr->src[3]); srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); bld.emit(SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); } else { unsigned num_srcs = info->num_srcs; enum lsc_opcode op = lsc_aop_for_nir_intrinsic(instr); @@ -6099,12 +6099,12 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); - fs_reg data; + brw_reg data; if (num_srcs >= 4) data = get_nir_src(ntb, instr->src[3]); if (num_srcs >= 5) { - fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { data, get_nir_src(ntb, instr->src[4]) }; + brw_reg tmp = bld.vgrf(data.type, 2); + brw_reg sources[2] = { data, get_nir_src(ntb, instr->src[4]) }; bld.LOAD_PAYLOAD(tmp, sources, 2, 0); data = tmp; } @@ -6127,12 +6127,12 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * Incidentally, this means that we can handle bindless with exactly the * same code. */ - fs_reg image = retype(get_nir_src_imm(ntb, instr->src[0]), BRW_TYPE_UD); + brw_reg image = retype(get_nir_src_imm(ntb, instr->src[0]), BRW_TYPE_UD); image = bld.emit_uniformize(image); assert(nir_src_as_uint(instr->src[1]) == 0); - fs_reg srcs[TEX_LOGICAL_NUM_SRCS]; + brw_reg srcs[TEX_LOGICAL_NUM_SRCS]; if (instr->intrinsic == nir_intrinsic_image_size) srcs[TEX_LOGICAL_SRC_SURFACE] = image; else @@ -6147,7 +6147,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, */ const fs_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0); - fs_reg tmp = ubld.vgrf(BRW_TYPE_UD, 4); + brw_reg tmp = ubld.vgrf(BRW_TYPE_UD, 4); fs_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL, tmp, srcs, ARRAY_SIZE(srcs)); inst->size_written = 4 * REG_SIZE * reg_unit(devinfo); @@ -6160,7 +6160,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_image_load_raw_intel: { - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[SURFACE_LOGICAL_SRC_SURFACE] = get_nir_image_intrinsic_image(ntb, bld, instr); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(ntb, instr->src[1]); @@ -6176,7 +6176,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_image_store_raw_intel: { - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[SURFACE_LOGICAL_SRC_SURFACE] = get_nir_image_intrinsic_image(ntb, bld, instr); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(ntb, instr->src[1]); @@ -6186,7 +6186,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); break; } @@ -6277,7 +6277,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } unsigned fence_regs_count = 0; - fs_reg fence_regs[4] = {}; + brw_reg fence_regs[4] = {}; const fs_builder ubld = bld.group(8, 0); @@ -6436,8 +6436,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_shader_clock: { /* We cannot do anything if there is an event, so ignore it for now */ - const fs_reg shader_clock = get_timestamp(bld); - const fs_reg srcs[] = { component(shader_clock, 0), + const brw_reg shader_clock = get_timestamp(bld); + const brw_reg srcs[] = { component(shader_clock, 0), component(shader_clock, 1) }; bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0); break; @@ -6449,7 +6449,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, /* Emit the reloc in the smallest SIMD size to limit register usage. */ const fs_builder ubld = bld.exec_all().group(1, 0); - fs_reg small_dest = ubld.vgrf(dest.type); + brw_reg small_dest = ubld.vgrf(dest.type); ubld.UNDEF(small_dest); ubld.exec_all().group(1, 0).emit(SHADER_OPCODE_MOV_RELOC_IMM, small_dest, brw_imm_ud(id), brw_imm_ud(base)); @@ -6466,7 +6466,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, unsigned base_offset = nir_intrinsic_base(instr); assert(base_offset % 4 == 0 || base_offset % brw_type_size_bytes(dest.type) == 0); - fs_reg src = brw_uniform_reg(base_offset / 4, dest.type); + brw_reg src = brw_uniform_reg(base_offset / 4, dest.type); if (nir_src_is_const(instr->src[0])) { unsigned load_offset = nir_src_as_uint(instr->src[0]); @@ -6481,7 +6481,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, bld.MOV(offset(dest, bld, j), offset(src, bld, j)); } } else { - fs_reg indirect = retype(get_nir_src(ntb, instr->src[0]), + brw_reg indirect = retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_UD); /* We need to pass a size to the MOV_INDIRECT but we don't want it to @@ -6525,7 +6525,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_ubo: case nir_intrinsic_load_ubo_uniform_block_intel: { - fs_reg surface, surface_handle; + brw_reg surface, surface_handle; bool no_mask_handle = false; if (get_nir_src_bindless(ntb, instr->src[0])) @@ -6536,7 +6536,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, if (!nir_src_is_const(instr->src[1])) { if (instr->intrinsic == nir_intrinsic_load_ubo) { /* load_ubo with non-uniform offset */ - fs_reg base_offset = retype(get_nir_src(ntb, instr->src[1]), + brw_reg base_offset = retype(get_nir_src(ntb, instr->src[1]), BRW_TYPE_UD); const unsigned comps_per_load = brw_type_size_bytes(dest.type) == 8 ? 2 : 4; @@ -6558,14 +6558,14 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const fs_builder ubld8 = bld.exec_all().group(8, 0); const fs_builder ubld16 = bld.exec_all().group(16, 0); - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[SURFACE_LOGICAL_SRC_SURFACE] = surface; srcs[SURFACE_LOGICAL_SRC_SURFACE_HANDLE] = surface_handle; const nir_src load_offset = instr->src[1]; if (nir_src_is_const(load_offset)) { - fs_reg addr = + brw_reg addr = ubld8.MOV(brw_imm_ud(nir_src_as_uint(load_offset))); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = component(addr, 0); } else { @@ -6577,7 +6577,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, ALIGN(instr->num_components, REG_SIZE * reg_unit(devinfo) / 4); unsigned loaded_dwords = 0; - const fs_reg packed_consts = + const brw_reg packed_consts = ubld1.vgrf(BRW_TYPE_UD, total_dwords); while (loaded_dwords < total_dwords) { @@ -6628,7 +6628,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, DIV_ROUND_UP(load_offset + type_size * instr->num_components, 32); /* See if we've selected this as a push constant candidate */ - fs_reg push_reg; + brw_reg push_reg; for (int i = 0; i < 4; i++) { const struct brw_ubo_range *range = &s.prog_data->ubo_ranges[i]; if (range->block == ubo_block && @@ -6660,8 +6660,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const unsigned count = MIN2(instr->num_components - c, (block_sz - base % block_sz) / type_size); - const fs_reg packed_consts = ubld.vgrf(BRW_TYPE_UD); - fs_reg srcs[PULL_UNIFORM_CONSTANT_SRCS]; + const brw_reg packed_consts = ubld.vgrf(BRW_TYPE_UD); + brw_reg srcs[PULL_UNIFORM_CONSTANT_SRCS]; srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE] = surface; srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE] = surface_handle; srcs[PULL_UNIFORM_CONSTANT_SRC_OFFSET] = brw_imm_ud(base & ~(block_sz - 1)); @@ -6670,7 +6670,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, srcs, PULL_UNIFORM_CONSTANT_SRCS); - const fs_reg consts = + const brw_reg consts = retype(byte_offset(packed_consts, base & (block_sz - 1)), dest.type); @@ -6687,9 +6687,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_global_constant: { assert(instr->def.bit_size <= 32); assert(nir_intrinsic_align(instr) > 0); - fs_reg srcs[A64_LOGICAL_NUM_SRCS]; + brw_reg srcs[A64_LOGICAL_NUM_SRCS]; srcs[A64_LOGICAL_ADDRESS] = get_nir_src(ntb, instr->src[0]); - srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */ + srcs[A64_LOGICAL_SRC] = brw_reg(); /* No source data */ srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(nir_intrinsic_access(instr) & ACCESS_INCLUDE_HELPERS); @@ -6707,7 +6707,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } else { const unsigned bit_size = instr->def.bit_size; assert(instr->def.num_components == 1); - fs_reg tmp = bld.vgrf(BRW_TYPE_UD); + brw_reg tmp = bld.vgrf(BRW_TYPE_UD); srcs[A64_LOGICAL_ARG] = brw_imm_ud(bit_size); @@ -6724,7 +6724,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, (1u << instr->num_components) - 1); assert(nir_intrinsic_align(instr) > 0); - fs_reg srcs[A64_LOGICAL_NUM_SRCS]; + brw_reg srcs[A64_LOGICAL_NUM_SRCS]; srcs[A64_LOGICAL_ADDRESS] = get_nir_src(ntb, instr->src[1]); srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(nir_intrinsic_access(instr) & ACCESS_INCLUDE_HELPERS); @@ -6736,19 +6736,19 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[A64_LOGICAL_SRC] = get_nir_src(ntb, instr->src[0]); /* Data */ srcs[A64_LOGICAL_ARG] = brw_imm_ud(instr->num_components); - bld.emit(SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL, fs_reg(), + bld.emit(SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL, brw_reg(), srcs, A64_LOGICAL_NUM_SRCS); } else { assert(nir_src_num_components(instr->src[0]) == 1); const unsigned bit_size = nir_src_bit_size(instr->src[0]); brw_reg_type data_type = brw_type_with_size(BRW_TYPE_UD, bit_size); - fs_reg tmp = bld.vgrf(BRW_TYPE_UD); + brw_reg tmp = bld.vgrf(BRW_TYPE_UD); bld.MOV(tmp, retype(get_nir_src(ntb, instr->src[0]), data_type)); srcs[A64_LOGICAL_SRC] = tmp; srcs[A64_LOGICAL_ARG] = brw_imm_ud(bit_size); - bld.emit(SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL, fs_reg(), + bld.emit(SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL, brw_reg(), srcs, A64_LOGICAL_NUM_SRCS); } break; @@ -6764,7 +6764,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, assert(instr->num_components == 8 || instr->num_components == 16); const fs_builder ubld = bld.exec_all().group(instr->num_components, 0); - fs_reg load_val; + brw_reg load_val; bool is_pred_const = nir_src_is_const(instr->src[1]); if (is_pred_const && nir_src_as_uint(instr->src[1]) == 0) { @@ -6774,7 +6774,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, load_val = brw_imm_ud(0); } else { /* The uniform process may stomp the flag so do this first */ - fs_reg addr = bld.emit_uniformize(get_nir_src(ntb, instr->src[0])); + brw_reg addr = bld.emit_uniformize(get_nir_src(ntb, instr->src[0])); load_val = ubld.vgrf(BRW_TYPE_UD); @@ -6783,7 +6783,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, */ if (!is_pred_const) { /* Load the predicate */ - fs_reg pred = bld.emit_uniformize(get_nir_src(ntb, instr->src[1])); + brw_reg pred = bld.emit_uniformize(get_nir_src(ntb, instr->src[1])); fs_inst *mov = ubld.MOV(bld.null_reg_d(), pred); mov->conditional_mod = BRW_CONDITIONAL_NZ; @@ -6793,9 +6793,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, mov->predicate_inverse = true; } - fs_reg srcs[A64_LOGICAL_NUM_SRCS]; + brw_reg srcs[A64_LOGICAL_NUM_SRCS]; srcs[A64_LOGICAL_ADDRESS] = addr; - srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */ + srcs[A64_LOGICAL_SRC] = brw_reg(); /* No source data */ srcs[A64_LOGICAL_ARG] = brw_imm_ud(instr->num_components); /* This intrinsic loads memory from a uniform address, sometimes * shared across lanes. We never need to mask it. @@ -6834,12 +6834,12 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, ntb.uniform_values[instr->src[0].ssa->index] = try_rebuild_source(ntb, bld, instr->src[0].ssa, true); bool no_mask = ntb.uniform_values[instr->src[0].ssa->index].file != BAD_FILE; - fs_reg address = + brw_reg address = ntb.uniform_values[instr->src[0].ssa->index].file != BAD_FILE ? ntb.uniform_values[instr->src[0].ssa->index] : bld.emit_uniformize(get_nir_src(ntb, instr->src[0])); - const fs_reg packed_consts = + const brw_reg packed_consts = ubld1.vgrf(BRW_TYPE_UD, total_dwords); while (loaded_dwords < total_dwords) { @@ -6850,9 +6850,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const fs_builder &ubld = block <= 8 ? ubld8 : ubld16; - fs_reg srcs[A64_LOGICAL_NUM_SRCS]; + brw_reg srcs[A64_LOGICAL_NUM_SRCS]; srcs[A64_LOGICAL_ADDRESS] = address; - srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */ + srcs[A64_LOGICAL_SRC] = brw_reg(); /* No source data */ srcs[A64_LOGICAL_ARG] = brw_imm_ud(block); srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0); fs_inst *inst = @@ -6876,7 +6876,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_ssbo: { const unsigned bit_size = instr->def.bit_size; - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[get_nir_src_bindless(ntb, instr->src[0]) ? SURFACE_LOGICAL_SRC_SURFACE_HANDLE : SURFACE_LOGICAL_SRC_SURFACE] = @@ -6903,7 +6903,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, assert(instr->def.num_components == 1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size); - fs_reg read_result = bld.vgrf(BRW_TYPE_UD); + brw_reg read_result = bld.vgrf(BRW_TYPE_UD); bld.emit(SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, read_result, srcs, SURFACE_LOGICAL_NUM_SRCS); bld.MOV(dest, subscript(read_result, dest.type, 0)); @@ -6913,7 +6913,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_store_ssbo: { const unsigned bit_size = nir_src_bit_size(instr->src[0]); - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[get_nir_src_bindless(ntb, instr->src[1]) ? SURFACE_LOGICAL_SRC_SURFACE_HANDLE : SURFACE_LOGICAL_SRC_SURFACE] = @@ -6922,7 +6922,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); - fs_reg data = get_nir_src(ntb, instr->src[0]); + brw_reg data = get_nir_src(ntb, instr->src[0]); data.type = brw_type_with_size(BRW_TYPE_UD, bit_size); assert(bit_size <= 32); @@ -6935,7 +6935,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[SURFACE_LOGICAL_SRC_DATA] = data; srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components); bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); } else { assert(nir_src_num_components(instr->src[0]) == 1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size); @@ -6944,14 +6944,14 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, bld.MOV(srcs[SURFACE_LOGICAL_SRC_DATA], data); bld.emit(SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); } break; } case nir_intrinsic_load_ssbo_uniform_block_intel: case nir_intrinsic_load_shared_uniform_block_intel: { - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; const bool is_ssbo = instr->intrinsic == nir_intrinsic_load_ssbo_uniform_block_intel; @@ -6962,7 +6962,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, SURFACE_LOGICAL_SRC_SURFACE] = get_nir_buffer_intrinsic_index(ntb, bld, instr, &no_mask_handle); } else { - srcs[SURFACE_LOGICAL_SRC_SURFACE] = fs_reg(brw_imm_ud(GFX7_BTI_SLM)); + srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_reg(brw_imm_ud(GFX7_BTI_SLM)); /* SLM has to use aligned OWord Block Read messages on pre-LSC HW. */ assert(devinfo->has_lsc || nir_intrinsic_align(instr) >= 16); @@ -6977,13 +6977,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const fs_builder ubld8 = bld.exec_all().group(8, 0); const fs_builder ubld16 = bld.exec_all().group(16, 0); - const fs_reg packed_consts = + const brw_reg packed_consts = ubld1.vgrf(BRW_TYPE_UD, total_dwords); const nir_src load_offset = is_ssbo ? instr->src[1] : instr->src[0]; if (nir_src_is_const(load_offset)) { const fs_builder &ubld = devinfo->ver >= 20 ? ubld16 : ubld8; - fs_reg addr = ubld.MOV(brw_imm_ud(nir_src_as_uint(load_offset))); + brw_reg addr = ubld.MOV(brw_imm_ud(nir_src_as_uint(load_offset))); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = component(addr, 0); } else { srcs[SURFACE_LOGICAL_SRC_ADDRESS] = @@ -7022,15 +7022,15 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_store_output: { assert(nir_src_bit_size(instr->src[0]) == 32); - fs_reg src = get_nir_src(ntb, instr->src[0]); + brw_reg src = get_nir_src(ntb, instr->src[0]); unsigned store_offset = nir_src_as_uint(instr->src[1]); unsigned num_components = instr->num_components; unsigned first_component = nir_intrinsic_component(instr); - fs_reg new_dest = retype(offset(s.outputs[instr->const_index[0]], bld, + brw_reg new_dest = retype(offset(s.outputs[instr->const_index[0]], bld, 4 * store_offset), src.type); - fs_reg comps[num_components]; + brw_reg comps[num_components]; for (unsigned i = 0; i < num_components; i++) { comps[i] = offset(src, bld, i); } @@ -7057,12 +7057,12 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * the dispatch width. */ const fs_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0); - fs_reg ret_payload = ubld.vgrf(BRW_TYPE_UD, 4); + brw_reg ret_payload = ubld.vgrf(BRW_TYPE_UD, 4); /* Set LOD = 0 */ - fs_reg src_payload = ubld.MOV(brw_imm_ud(0)); + brw_reg src_payload = ubld.MOV(brw_imm_ud(0)); - fs_reg srcs[GET_BUFFER_SIZE_SRCS]; + brw_reg srcs[GET_BUFFER_SIZE_SRCS]; srcs[get_nir_src_bindless(ntb, instr->src[0]) ? GET_BUFFER_SIZE_SRC_SURFACE_HANDLE : GET_BUFFER_SIZE_SRC_SURFACE] = @@ -7093,9 +7093,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * * buffer_size = surface_size & ~3 - surface_size & 3 */ - fs_reg size_padding = ubld.AND(ret_payload, brw_imm_ud(3)); - fs_reg size_aligned4 = ubld.AND(ret_payload, brw_imm_ud(~3)); - fs_reg buffer_size = ubld.ADD(size_aligned4, negate(size_padding)); + brw_reg size_padding = ubld.AND(ret_payload, brw_imm_ud(3)); + brw_reg size_aligned4 = ubld.AND(ret_payload, brw_imm_ud(~3)); + brw_reg buffer_size = ubld.ADD(size_aligned4, negate(size_padding)); bld.MOV(retype(dest, ret_payload.type), component(buffer_size, 0)); break; @@ -7104,11 +7104,11 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_scratch: { assert(instr->def.num_components == 1); const unsigned bit_size = instr->def.bit_size; - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; if (devinfo->verx10 >= 125) { const fs_builder ubld = bld.exec_all().group(1, 0); - fs_reg handle = component(ubld.vgrf(BRW_TYPE_UD), 0); + brw_reg handle = component(ubld.vgrf(BRW_TYPE_UD), 0); ubld.AND(handle, retype(brw_vec1_grf(0, 5), BRW_TYPE_UD), brw_imm_ud(INTEL_MASK(31, 10))); if (devinfo->ver >= 20) @@ -7153,7 +7153,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, dest, srcs, SURFACE_LOGICAL_NUM_SRCS); } } else { - fs_reg read_result = bld.vgrf(BRW_TYPE_UD); + brw_reg read_result = bld.vgrf(BRW_TYPE_UD); bld.emit(SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, read_result, srcs, SURFACE_LOGICAL_NUM_SRCS); bld.MOV(dest, read_result); @@ -7166,11 +7166,11 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_store_scratch: { assert(nir_src_num_components(instr->src[0]) == 1); const unsigned bit_size = nir_src_bit_size(instr->src[0]); - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; if (devinfo->verx10 >= 125) { const fs_builder ubld = bld.exec_all().group(1, 0); - fs_reg handle = component(ubld.vgrf(BRW_TYPE_UD), 0); + brw_reg handle = component(ubld.vgrf(BRW_TYPE_UD), 0); ubld.AND(handle, retype(brw_vec1_grf(0, 5), BRW_TYPE_UD), brw_imm_ud(INTEL_MASK(31, 10))); if (devinfo->ver >= 20) @@ -7200,7 +7200,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[SURFACE_LOGICAL_SRC_ADDRESS] = swizzle_nir_scratch_addr(ntb, bld, instr->src[1], addr_in_dwords); - fs_reg data = get_nir_src(ntb, instr->src[0]); + brw_reg data = get_nir_src(ntb, instr->src[0]); data.type = brw_type_with_size(BRW_TYPE_UD, bit_size); assert(nir_src_num_components(instr->src[0]) == 1); @@ -7220,14 +7220,14 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[SURFACE_LOGICAL_SRC_DATA] = data; bld.emit(SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); } } else { srcs[SURFACE_LOGICAL_SRC_DATA] = bld.vgrf(BRW_TYPE_UD); bld.MOV(srcs[SURFACE_LOGICAL_SRC_DATA], data); bld.emit(SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); } s.shader_stats.spill_count += DIV_ROUND_UP(s.dispatch_width, 16); break; @@ -7259,7 +7259,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, if (s.dispatch_width == 32) flag.type = BRW_TYPE_UD; - fs_reg cond = get_nir_src(ntb, instr->src[0]); + brw_reg cond = get_nir_src(ntb, instr->src[0]); /* Before Xe2, we can use specialized predicates. */ if (devinfo->ver < 20) { @@ -7292,22 +7292,22 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * replicated to each invocation. */ bld.CMP(bld.null_reg_ud(), cond, brw_imm_ud(0u), BRW_CONDITIONAL_NZ); - fs_reg cond_mask = bld.vgrf(BRW_TYPE_UD); + brw_reg cond_mask = bld.vgrf(BRW_TYPE_UD); bld.MOV(cond_mask, flag); /* Mask of invocations in the quad, each invocation will get * all the bits set for their quad, i.e. invocations 0-3 will have * 0b...1111, invocations 4-7 will have 0b...11110000 and so on. */ - fs_reg invoc_ud = bld.vgrf(BRW_TYPE_UD); + brw_reg invoc_ud = bld.vgrf(BRW_TYPE_UD); bld.MOV(invoc_ud, ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]); - fs_reg quad_mask = + brw_reg quad_mask = bld.SHL(brw_imm_ud(0xF), bld.AND(invoc_ud, brw_imm_ud(0xFFFFFFFC))); /* An invocation will have bits set for each quad that passes the * condition. This is uniform among each quad. */ - fs_reg tmp = bld.AND(cond_mask, quad_mask); + brw_reg tmp = bld.AND(cond_mask, quad_mask); if (instr->intrinsic == nir_intrinsic_quad_vote_any) { bld.CMP(retype(dest, BRW_TYPE_UD), tmp, brw_imm_ud(0), BRW_CONDITIONAL_NZ); @@ -7315,9 +7315,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, assert(instr->intrinsic == nir_intrinsic_quad_vote_all); /* Filter out quad_mask to include only active channels. */ - fs_reg active = bld.vgrf(BRW_TYPE_UD); + brw_reg active = bld.vgrf(BRW_TYPE_UD); bld.exec_all().emit(SHADER_OPCODE_LOAD_LIVE_CHANNELS, active); - bld.MOV(active, fs_reg(component(active, 0))); + bld.MOV(active, brw_reg(component(active, 0))); bld.AND(quad_mask, quad_mask, active); bld.CMP(retype(dest, BRW_TYPE_UD), tmp, quad_mask, BRW_CONDITIONAL_Z); @@ -7349,7 +7349,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * of 1-wide MOVs and scattering the result. */ const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1; - fs_reg res1 = ubld.MOV(brw_imm_d(0)); + brw_reg res1 = ubld.MOV(brw_imm_d(0)); set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ANY : s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ANY8H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ANY16H : @@ -7382,7 +7382,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * of 1-wide MOVs and scattering the result. */ const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1; - fs_reg res1 = ubld.MOV(brw_imm_d(0)); + brw_reg res1 = ubld.MOV(brw_imm_d(0)); set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ALL : s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H : @@ -7394,14 +7394,14 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_vote_feq: case nir_intrinsic_vote_ieq: { - fs_reg value = get_nir_src(ntb, instr->src[0]); + brw_reg value = get_nir_src(ntb, instr->src[0]); if (instr->intrinsic == nir_intrinsic_vote_feq) { const unsigned bit_size = nir_src_bit_size(instr->src[0]); value.type = bit_size == 8 ? BRW_TYPE_B : brw_type_with_size(BRW_TYPE_F, bit_size); } - fs_reg uniformized = bld.emit_uniformize(value); + brw_reg uniformized = bld.emit_uniformize(value); const fs_builder ubld1 = bld.exec_all().group(1, 0); /* The any/all predicates do not consider channel enables. To prevent @@ -7424,7 +7424,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * of 1-wide MOVs and scattering the result. */ const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1; - fs_reg res1 = ubld.MOV(brw_imm_d(0)); + brw_reg res1 = ubld.MOV(brw_imm_d(0)); set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ALL : s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H : @@ -7445,13 +7445,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, /* Implement a fast-path for ballot(true). */ if (nir_src_is_const(instr->src[0]) && nir_src_as_bool(instr->src[0])) { - fs_reg tmp = bld.vgrf(BRW_TYPE_UD); + brw_reg tmp = bld.vgrf(BRW_TYPE_UD); bld.exec_all().emit(SHADER_OPCODE_LOAD_LIVE_CHANNELS, tmp); - bld.MOV(dest, fs_reg(component(tmp, 0))); + bld.MOV(dest, brw_reg(component(tmp, 0))); break; } - const fs_reg value = retype(get_nir_src(ntb, instr->src[0]), + const brw_reg value = retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_UD); struct brw_reg flag = brw_flag_reg(0, 0); @@ -7465,8 +7465,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_read_invocation: { - const fs_reg value = get_nir_src(ntb, instr->src[0]); - const fs_reg invocation = get_nir_src_imm(ntb, instr->src[1]); + const brw_reg value = get_nir_src(ntb, instr->src[0]); + const brw_reg invocation = get_nir_src_imm(ntb, instr->src[1]); if (invocation.file == IMM) { unsigned i = invocation.ud & (bld.dispatch_width() - 1); @@ -7474,13 +7474,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, break; } - fs_reg tmp = bld.vgrf(value.type); + brw_reg tmp = bld.vgrf(value.type); /* When for some reason the subgroup_size picked by NIR is larger than * the dispatch size picked by the backend (this could happen in RT, * FS), bound the invocation to the dispatch size. */ - fs_reg bound_invocation = retype(invocation, BRW_TYPE_UD); + brw_reg bound_invocation = retype(invocation, BRW_TYPE_UD); if (s.api_subgroup_size == 0 || bld.dispatch_width() < s.api_subgroup_size) { bound_invocation = @@ -7489,42 +7489,42 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, bld.exec_all().emit(SHADER_OPCODE_BROADCAST, tmp, value, bld.emit_uniformize(bound_invocation)); - bld.MOV(retype(dest, value.type), fs_reg(component(tmp, 0))); + bld.MOV(retype(dest, value.type), brw_reg(component(tmp, 0))); break; } case nir_intrinsic_read_first_invocation: { - const fs_reg value = get_nir_src(ntb, instr->src[0]); + const brw_reg value = get_nir_src(ntb, instr->src[0]); bld.MOV(retype(dest, value.type), bld.emit_uniformize(value)); break; } case nir_intrinsic_shuffle: { - const fs_reg value = get_nir_src(ntb, instr->src[0]); - const fs_reg index = get_nir_src(ntb, instr->src[1]); + const brw_reg value = get_nir_src(ntb, instr->src[0]); + const brw_reg index = get_nir_src(ntb, instr->src[1]); bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, index); break; } case nir_intrinsic_first_invocation: { - fs_reg tmp = bld.vgrf(BRW_TYPE_UD); + brw_reg tmp = bld.vgrf(BRW_TYPE_UD); bld.exec_all().emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, tmp); bld.MOV(retype(dest, BRW_TYPE_UD), - fs_reg(component(tmp, 0))); + brw_reg(component(tmp, 0))); break; } case nir_intrinsic_last_invocation: { - fs_reg tmp = bld.vgrf(BRW_TYPE_UD); + brw_reg tmp = bld.vgrf(BRW_TYPE_UD); bld.exec_all().emit(SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL, tmp); bld.MOV(retype(dest, BRW_TYPE_UD), - fs_reg(component(tmp, 0))); + brw_reg(component(tmp, 0))); break; } case nir_intrinsic_quad_broadcast: { - const fs_reg value = get_nir_src(ntb, instr->src[0]); + const brw_reg value = get_nir_src(ntb, instr->src[0]); const unsigned index = nir_src_as_uint(instr->src[1]); bld.emit(SHADER_OPCODE_CLUSTER_BROADCAST, retype(dest, value.type), @@ -7533,15 +7533,15 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_quad_swap_horizontal: { - const fs_reg value = get_nir_src(ntb, instr->src[0]); - const fs_reg tmp = bld.vgrf(value.type); + const brw_reg value = get_nir_src(ntb, instr->src[0]); + const brw_reg tmp = bld.vgrf(value.type); const fs_builder ubld = bld.exec_all().group(s.dispatch_width / 2, 0); - const fs_reg src_left = horiz_stride(value, 2); - const fs_reg src_right = horiz_stride(horiz_offset(value, 1), 2); - const fs_reg tmp_left = horiz_stride(tmp, 2); - const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2); + const brw_reg src_left = horiz_stride(value, 2); + const brw_reg src_right = horiz_stride(horiz_offset(value, 1), 2); + const brw_reg tmp_left = horiz_stride(tmp, 2); + const brw_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2); ubld.MOV(tmp_left, src_right); ubld.MOV(tmp_right, src_left); @@ -7551,10 +7551,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_quad_swap_vertical: { - const fs_reg value = get_nir_src(ntb, instr->src[0]); + const brw_reg value = get_nir_src(ntb, instr->src[0]); if (nir_src_bit_size(instr->src[0]) == 32) { /* For 32-bit, we can use a SIMD4x2 instruction to do this easily */ - const fs_reg tmp = bld.vgrf(value.type); + const brw_reg tmp = bld.vgrf(value.type); const fs_builder ubld = bld.exec_all(); ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value, brw_imm_ud(BRW_SWIZZLE4(2,3,0,1))); @@ -7563,7 +7563,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, /* For larger data types, we have to either emit dispatch_width many * MOVs or else fall back to doing indirects. */ - fs_reg idx = bld.vgrf(BRW_TYPE_W); + brw_reg idx = bld.vgrf(BRW_TYPE_W); bld.XOR(idx, ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION], brw_imm_w(0x2)); bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, idx); @@ -7572,10 +7572,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_quad_swap_diagonal: { - const fs_reg value = get_nir_src(ntb, instr->src[0]); + const brw_reg value = get_nir_src(ntb, instr->src[0]); if (nir_src_bit_size(instr->src[0]) == 32) { /* For 32-bit, we can use a SIMD4x2 instruction to do this easily */ - const fs_reg tmp = bld.vgrf(value.type); + const brw_reg tmp = bld.vgrf(value.type); const fs_builder ubld = bld.exec_all(); ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value, brw_imm_ud(BRW_SWIZZLE4(3,2,1,0))); @@ -7584,7 +7584,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, /* For larger data types, we have to either emit dispatch_width many * MOVs or else fall back to doing indirects. */ - fs_reg idx = bld.vgrf(BRW_TYPE_W); + brw_reg idx = bld.vgrf(BRW_TYPE_W); bld.XOR(idx, ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION], brw_imm_w(0x3)); bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, idx); @@ -7593,7 +7593,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } case nir_intrinsic_reduce: { - fs_reg src = get_nir_src(ntb, instr->src[0]); + brw_reg src = get_nir_src(ntb, instr->src[0]); nir_op redop = (nir_op)nir_intrinsic_reduction_op(instr); unsigned cluster_size = nir_intrinsic_cluster_size(instr); if (cluster_size == 0 || cluster_size > s.dispatch_width) @@ -7604,14 +7604,14 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, (nir_alu_type)(nir_op_infos[redop].input_types[0] | nir_src_bit_size(instr->src[0]))); - fs_reg identity = brw_nir_reduction_op_identity(bld, redop, src.type); + brw_reg identity = brw_nir_reduction_op_identity(bld, redop, src.type); opcode brw_op = brw_op_for_nir_reduction_op(redop); brw_conditional_mod cond_mod = brw_cond_mod_for_nir_reduction_op(redop); /* Set up a register for all of our scratching around and initialize it * to reduction operation's identity value. */ - fs_reg scan = bld.vgrf(src.type); + brw_reg scan = bld.vgrf(src.type); bld.exec_all().emit(SHADER_OPCODE_SEL_EXEC, scan, src, identity); bld.emit_scan(brw_op, scan, cluster_size, cond_mod); @@ -7642,7 +7642,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_inclusive_scan: case nir_intrinsic_exclusive_scan: { - fs_reg src = get_nir_src(ntb, instr->src[0]); + brw_reg src = get_nir_src(ntb, instr->src[0]); nir_op redop = (nir_op)nir_intrinsic_reduction_op(instr); /* Figure out the source type */ @@ -7650,14 +7650,14 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, (nir_alu_type)(nir_op_infos[redop].input_types[0] | nir_src_bit_size(instr->src[0]))); - fs_reg identity = brw_nir_reduction_op_identity(bld, redop, src.type); + brw_reg identity = brw_nir_reduction_op_identity(bld, redop, src.type); opcode brw_op = brw_op_for_nir_reduction_op(redop); brw_conditional_mod cond_mod = brw_cond_mod_for_nir_reduction_op(redop); /* Set up a register for all of our scratching around and initialize it * to reduction operation's identity value. */ - fs_reg scan = bld.vgrf(src.type); + brw_reg scan = bld.vgrf(src.type); const fs_builder allbld = bld.exec_all(); allbld.emit(SHADER_OPCODE_SEL_EXEC, scan, src, identity); @@ -7666,8 +7666,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * shift of the contents before we can begin. To make things worse, * we can't do this with a normal stride; we have to use indirects. */ - fs_reg shifted = bld.vgrf(src.type); - fs_reg idx = bld.vgrf(BRW_TYPE_W); + brw_reg shifted = bld.vgrf(src.type); + brw_reg idx = bld.vgrf(BRW_TYPE_W); allbld.ADD(idx, ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION], brw_imm_w(-1)); allbld.emit(SHADER_OPCODE_SHUFFLE, shifted, scan, idx); @@ -7684,7 +7684,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_global_block_intel: { assert(instr->def.bit_size == 32); - fs_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[0])); + brw_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[0])); const fs_builder ubld1 = bld.exec_all().group(1, 0); const fs_builder ubld8 = bld.exec_all().group(8, 0); @@ -7700,9 +7700,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const fs_builder &ubld = block == 8 ? ubld8 : ubld16; - fs_reg srcs[A64_LOGICAL_NUM_SRCS]; + brw_reg srcs[A64_LOGICAL_NUM_SRCS]; srcs[A64_LOGICAL_ADDRESS] = address; - srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */ + srcs[A64_LOGICAL_SRC] = brw_reg(); /* No source data */ srcs[A64_LOGICAL_ARG] = brw_imm_ud(block); srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(1); ubld.emit(SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL, @@ -7720,8 +7720,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_store_global_block_intel: { assert(nir_src_bit_size(instr->src[0]) == 32); - fs_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[1])); - fs_reg src = get_nir_src(ntb, instr->src[0]); + brw_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[1])); + brw_reg src = get_nir_src(ntb, instr->src[0]); const fs_builder ubld1 = bld.exec_all().group(1, 0); const fs_builder ubld8 = bld.exec_all().group(8, 0); @@ -7734,7 +7734,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const unsigned block = choose_oword_block_size_dwords(devinfo, total - written); - fs_reg srcs[A64_LOGICAL_NUM_SRCS]; + brw_reg srcs[A64_LOGICAL_NUM_SRCS]; srcs[A64_LOGICAL_ADDRESS] = address; srcs[A64_LOGICAL_SRC] = retype(byte_offset(src, written * 4), BRW_TYPE_UD); @@ -7742,7 +7742,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0); const fs_builder &ubld = block == 8 ? ubld8 : ubld16; - ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL, fs_reg(), + ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL, brw_reg(), srcs, A64_LOGICAL_NUM_SRCS); const unsigned block_bytes = block * 4; @@ -7760,15 +7760,15 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const bool is_ssbo = instr->intrinsic == nir_intrinsic_load_ssbo_block_intel; - fs_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[is_ssbo ? 1 : 0])); + brw_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[is_ssbo ? 1 : 0])); bool no_mask_handle = false; - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; if (is_ssbo) { srcs[SURFACE_LOGICAL_SRC_SURFACE] = get_nir_buffer_intrinsic_index(ntb, bld, instr, &no_mask_handle); } else { - srcs[SURFACE_LOGICAL_SRC_SURFACE] = fs_reg(brw_imm_ud(GFX7_BTI_SLM)); + srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_reg(brw_imm_ud(GFX7_BTI_SLM)); no_mask_handle = true; } srcs[SURFACE_LOGICAL_SRC_ADDRESS] = address; @@ -7810,13 +7810,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const bool is_ssbo = instr->intrinsic == nir_intrinsic_store_ssbo_block_intel; - fs_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[is_ssbo ? 2 : 1])); - fs_reg src = get_nir_src(ntb, instr->src[0]); + brw_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[is_ssbo ? 2 : 1])); + brw_reg src = get_nir_src(ntb, instr->src[0]); - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[SURFACE_LOGICAL_SRC_SURFACE] = is_ssbo ? get_nir_buffer_intrinsic_index(ntb, bld, instr) : - fs_reg(brw_imm_ud(GFX7_BTI_SLM)); + brw_reg(brw_imm_ud(GFX7_BTI_SLM)); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = address; const fs_builder ubld1 = bld.exec_all().group(1, 0); @@ -7836,7 +7836,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const fs_builder &ubld = block == 8 ? ubld8 : ubld16; ubld.emit(SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL, - fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); + brw_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); const unsigned block_bytes = block * 4; srcs[SURFACE_LOGICAL_SRC_ADDRESS] = @@ -7874,7 +7874,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * [6:4] : EUID * [2:0] : Thread ID */ - fs_reg raw_id = bld.vgrf(BRW_TYPE_UD); + brw_reg raw_id = bld.vgrf(BRW_TYPE_UD); bld.UNDEF(raw_id); bld.emit(SHADER_OPCODE_READ_ARCH_REG, raw_id, retype(brw_sr0_reg(0), BRW_TYPE_UD)); @@ -7898,7 +7898,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * * We are using the state register to calculate the DSSID. */ - fs_reg slice_id = + brw_reg slice_id = bld.SHR(bld.AND(raw_id, brw_imm_ud(INTEL_MASK(15, 11))), brw_imm_ud(11)); @@ -7907,7 +7907,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, */ unsigned slice_stride = devinfo->max_subslices_per_slice; assert(slice_stride >= (1 << 2)); - fs_reg subslice_id = + brw_reg subslice_id = bld.SHR(bld.AND(raw_id, brw_imm_ud(INTEL_MASK(9, 8))), brw_imm_ud(8)); bld.ADD(retype(dest, BRW_TYPE_UD), @@ -7921,8 +7921,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, case BRW_TOPOLOGY_ID_EU_THREAD_SIMD: { s.limit_dispatch_width(16, "Topology helper for Ray queries, " "not supported in SIMD32 mode."); - fs_reg dst = retype(dest, BRW_TYPE_UD); - fs_reg eu; + brw_reg dst = retype(dest, BRW_TYPE_UD); + brw_reg eu; if (devinfo->ver >= 20) { /* Xe2+: Graphics Engine, 3D and GPGPU Programs, Shared Functions @@ -7951,16 +7951,16 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * EU[2] = raw_id[8] (identified as SubSlice ID) * EU[3] = raw_id[7] (identified as EUID[2] or Row ID) */ - fs_reg raw5_4 = bld.AND(raw_id, brw_imm_ud(INTEL_MASK(5, 4))); - fs_reg raw7 = bld.AND(raw_id, brw_imm_ud(INTEL_MASK(7, 7))); - fs_reg raw8 = bld.AND(raw_id, brw_imm_ud(INTEL_MASK(8, 8))); + brw_reg raw5_4 = bld.AND(raw_id, brw_imm_ud(INTEL_MASK(5, 4))); + brw_reg raw7 = bld.AND(raw_id, brw_imm_ud(INTEL_MASK(7, 7))); + brw_reg raw8 = bld.AND(raw_id, brw_imm_ud(INTEL_MASK(8, 8))); eu = bld.OR(bld.SHL(raw5_4, brw_imm_ud(3)), bld.OR(bld.SHL(raw7, brw_imm_ud(3)), bld.SHL(raw8, brw_imm_ud(1)))); } /* ThreadID[2:0] << 4 (ThreadID comes from raw_id[2:0]) */ - fs_reg tid = + brw_reg tid = bld.SHL(bld.AND(raw_id, brw_imm_ud(INTEL_MASK(2, 0))), brw_imm_ud(4)); @@ -8028,9 +8028,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, */ emit_rt_lsc_fence(bld, LSC_FENCE_LOCAL, LSC_FLUSH_TYPE_NONE); - fs_reg srcs[RT_LOGICAL_NUM_SRCS]; + brw_reg srcs[RT_LOGICAL_NUM_SRCS]; - fs_reg globals = get_nir_src(ntb, instr->src[0]); + brw_reg globals = get_nir_src(ntb, instr->src[0]); srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals); srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(ntb, instr->src[1]); srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(ntb, instr->src[2]); @@ -8060,11 +8060,11 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } } -static fs_reg -expand_to_32bit(const fs_builder &bld, const fs_reg &src) +static brw_reg +expand_to_32bit(const fs_builder &bld, const brw_reg &src) { if (brw_type_size_bytes(src.type) == 2) { - fs_reg src32 = bld.vgrf(BRW_TYPE_UD); + brw_reg src32 = bld.vgrf(BRW_TYPE_UD); bld.MOV(src32, retype(src, BRW_TYPE_UW)); return src32; } else { @@ -8075,7 +8075,7 @@ expand_to_32bit(const fs_builder &bld, const fs_reg &src) static void fs_nir_emit_surface_atomic(nir_to_brw_state &ntb, const fs_builder &bld, nir_intrinsic_instr *instr, - fs_reg surface, + brw_reg surface, bool bindless) { const intel_device_info *devinfo = ntb.devinfo; @@ -8097,9 +8097,9 @@ fs_nir_emit_surface_atomic(nir_to_brw_state &ntb, const fs_builder &bld, (instr->def.bit_size == 16 && (devinfo->has_lsc || lsc_opcode_is_atomic_float(op)))); - fs_reg dest = get_nir_def(ntb, instr->def); + brw_reg dest = get_nir_def(ntb, instr->def); - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; + brw_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; srcs[bindless ? SURFACE_LOGICAL_SRC_SURFACE_HANDLE : SURFACE_LOGICAL_SRC_SURFACE] = surface; @@ -8123,13 +8123,13 @@ fs_nir_emit_surface_atomic(nir_to_brw_state &ntb, const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(ntb, instr->src[1]); } - fs_reg data; + brw_reg data; if (num_data >= 1) data = expand_to_32bit(bld, get_nir_src(ntb, instr->src[shared ? 1 : 2])); if (num_data >= 2) { - fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { + brw_reg tmp = bld.vgrf(data.type, 2); + brw_reg sources[2] = { data, expand_to_32bit(bld, get_nir_src(ntb, instr->src[shared ? 2 : 3])) }; @@ -8143,7 +8143,7 @@ fs_nir_emit_surface_atomic(nir_to_brw_state &ntb, const fs_builder &bld, /* Emit the actual atomic operation */ switch (instr->def.bit_size) { case 16: { - fs_reg dest32 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest32 = bld.vgrf(BRW_TYPE_UD); inst = bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, retype(dest32, dest.type), srcs, SURFACE_LOGICAL_NUM_SRCS); @@ -8173,17 +8173,17 @@ fs_nir_emit_global_atomic(nir_to_brw_state &ntb, const fs_builder &bld, enum lsc_opcode op = lsc_aop_for_nir_intrinsic(instr); int num_data = lsc_op_num_data_values(op); - fs_reg dest = get_nir_def(ntb, instr->def); + brw_reg dest = get_nir_def(ntb, instr->def); - fs_reg addr = get_nir_src(ntb, instr->src[0]); + brw_reg addr = get_nir_src(ntb, instr->src[0]); - fs_reg data; + brw_reg data; if (num_data >= 1) data = expand_to_32bit(bld, get_nir_src(ntb, instr->src[1])); if (num_data >= 2) { - fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { + brw_reg tmp = bld.vgrf(data.type, 2); + brw_reg sources[2] = { data, expand_to_32bit(bld, get_nir_src(ntb, instr->src[2])) }; @@ -8191,7 +8191,7 @@ fs_nir_emit_global_atomic(nir_to_brw_state &ntb, const fs_builder &bld, data = tmp; } - fs_reg srcs[A64_LOGICAL_NUM_SRCS]; + brw_reg srcs[A64_LOGICAL_NUM_SRCS]; srcs[A64_LOGICAL_ADDRESS] = addr; srcs[A64_LOGICAL_SRC] = data; srcs[A64_LOGICAL_ARG] = brw_imm_ud(op); @@ -8201,7 +8201,7 @@ fs_nir_emit_global_atomic(nir_to_brw_state &ntb, const fs_builder &bld, unsigned size_written = 0; switch (instr->def.bit_size) { case 16: { - fs_reg dest32 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest32 = bld.vgrf(BRW_TYPE_UD); inst = bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, retype(dest32, dest.type), srcs, A64_LOGICAL_NUM_SRCS); @@ -8230,7 +8230,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, const intel_device_info *devinfo = ntb.devinfo; const fs_builder &bld = ntb.bld; - fs_reg srcs[TEX_LOGICAL_NUM_SRCS]; + brw_reg srcs[TEX_LOGICAL_NUM_SRCS]; /* SKL PRMs: Volume 7: 3D-Media-GPGPU: * @@ -8257,7 +8257,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, uint32_t header_bits = 0; for (unsigned i = 0; i < instr->num_srcs; i++) { nir_src nir_src = instr->src[i].src; - fs_reg src = get_nir_src(ntb, nir_src); + brw_reg src = get_nir_src(ntb, nir_src); switch (instr->src[i].src_type) { case nir_tex_src_bias: assert(!got_lod); @@ -8363,7 +8363,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, case nir_tex_src_texture_handle: assert(nir_tex_instr_src_index(instr, nir_tex_src_texture_offset) == -1); - srcs[TEX_LOGICAL_SRC_SURFACE] = fs_reg(); + srcs[TEX_LOGICAL_SRC_SURFACE] = brw_reg(); if (is_resource_src(nir_src)) srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = get_resource_nir_src(ntb, nir_src); if (srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE].file == BAD_FILE) @@ -8372,7 +8372,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, case nir_tex_src_sampler_handle: assert(nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset) == -1); - srcs[TEX_LOGICAL_SRC_SAMPLER] = fs_reg(); + srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_reg(); if (is_resource_src(nir_src)) srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE] = get_resource_nir_src(ntb, nir_src); if (srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE].file == BAD_FILE) @@ -8507,7 +8507,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, opcode = SHADER_OPCODE_SAMPLEINFO_LOGICAL; break; case nir_texop_samples_identical: { - fs_reg dst = retype(get_nir_def(ntb, instr->def), BRW_TYPE_D); + brw_reg dst = retype(get_nir_def(ntb, instr->def), BRW_TYPE_D); /* If mcs is an immediate value, it means there is no MCS. In that case * just return false. @@ -8515,7 +8515,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, if (srcs[TEX_LOGICAL_SRC_MCS].file == BRW_IMMEDIATE_VALUE) { bld.MOV(dst, brw_imm_ud(0u)); } else { - fs_reg tmp = + brw_reg tmp = bld.OR(srcs[TEX_LOGICAL_SRC_MCS], offset(srcs[TEX_LOGICAL_SRC_MCS], bld, 1)); bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ); @@ -8530,7 +8530,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, header_bits |= instr->component << 16; } - fs_reg dst = bld.vgrf(brw_type_for_nir_type(devinfo, instr->dest_type), 4 + instr->is_sparse); + brw_reg dst = bld.vgrf(brw_type_for_nir_type(devinfo, instr->dest_type), 4 + instr->is_sparse); fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); inst->offset = header_bits; @@ -8573,7 +8573,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, inst->keep_payload_trailing_zeros = true; } - fs_reg nir_def_reg = get_nir_def(ntb, instr->def); + brw_reg nir_def_reg = get_nir_def(ntb, instr->def); if (instr->op != nir_texop_query_levels && !instr->is_sparse) { /* In most cases we can write directly to the result. */ @@ -8582,7 +8582,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, /* In other cases, we have to reorganize the sampler message's results * a bit to match the NIR intrinsic's expectations. */ - fs_reg nir_dest[5]; + brw_reg nir_dest[5]; for (unsigned i = 0; i < read_size; i++) nir_dest[i] = offset(dst, bld, i); @@ -8680,8 +8680,8 @@ fs_nir_emit_jump(nir_to_brw_state &ntb, nir_jump_instr *instr) */ void shuffle_src_to_dst(const fs_builder &bld, - const fs_reg &dst, - const fs_reg &src, + const brw_reg &dst, + const brw_reg &src, uint32_t first_component, uint32_t components) { @@ -8706,7 +8706,7 @@ shuffle_src_to_dst(const fs_builder &bld, brw_reg_type shuffle_type = brw_type_with_size(BRW_TYPE_D, brw_type_size_bits(src.type)); for (unsigned i = 0; i < components; i++) { - fs_reg shuffle_component_i = + brw_reg shuffle_component_i = subscript(offset(dst, bld, i / size_ratio), shuffle_type, i % size_ratio); bld.MOV(shuffle_component_i, @@ -8725,7 +8725,7 @@ shuffle_src_to_dst(const fs_builder &bld, brw_reg_type shuffle_type = brw_type_with_size(BRW_TYPE_D, brw_type_size_bits(dst.type)); for (unsigned i = 0; i < components; i++) { - fs_reg shuffle_component_i = + brw_reg shuffle_component_i = subscript(offset(src, bld, (first_component + i) / size_ratio), shuffle_type, (first_component + i) % size_ratio); bld.MOV(retype(offset(dst, bld, i), shuffle_type), @@ -8736,8 +8736,8 @@ shuffle_src_to_dst(const fs_builder &bld, void shuffle_from_32bit_read(const fs_builder &bld, - const fs_reg &dst, - const fs_reg &src, + const brw_reg &dst, + const brw_reg &src, uint32_t first_component, uint32_t components) { @@ -8920,8 +8920,8 @@ brw_fs_test_dispatch_packing(const fs_builder &bld) shader->max_polygons, shader->prog_data)) { const fs_builder ubld = bld.exec_all().group(1, 0); - const fs_reg tmp = component(bld.vgrf(BRW_TYPE_UD), 0); - const fs_reg mask = uses_vmask ? brw_vmask_reg() : brw_dmask_reg(); + const brw_reg tmp = component(bld.vgrf(BRW_TYPE_UD), 0); + const brw_reg mask = uses_vmask ? brw_vmask_reg() : brw_dmask_reg(); ubld.ADD(tmp, mask, brw_imm_ud(1)); ubld.AND(tmp, mask, tmp); diff --git a/src/intel/compiler/brw_fs_opt_algebraic.cpp b/src/intel/compiler/brw_fs_opt_algebraic.cpp index 49e31219145..a45989304da 100644 --- a/src/intel/compiler/brw_fs_opt_algebraic.cpp +++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp @@ -9,7 +9,7 @@ using namespace brw; static uint64_t -src_as_uint(const fs_reg &src) +src_as_uint(const brw_reg &src) { assert(src.file == IMM); @@ -37,7 +37,7 @@ src_as_uint(const fs_reg &src) } } -static fs_reg +static brw_reg brw_imm_for_type(uint64_t value, enum brw_reg_type type) { switch (type) { @@ -394,7 +394,7 @@ brw_fs_opt_algebraic(fs_visitor &s) */ assert(!inst->saturate); - fs_reg result; + brw_reg result; switch (brw_type_size_bytes(inst->src[0].type)) { case 2: @@ -470,7 +470,7 @@ brw_fs_opt_algebraic(fs_visitor &s) */ if (progress && inst->sources == 2 && inst->is_commutative()) { if (inst->src[0].file == IMM) { - fs_reg tmp = inst->src[1]; + brw_reg tmp = inst->src[1]; inst->src[1] = inst->src[0]; inst->src[0] = tmp; } diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index 7ed4faa4db2..c895230f7f6 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -38,7 +38,7 @@ using namespace brw; static void assign_reg(const struct intel_device_info *devinfo, - unsigned *reg_hw_locations, fs_reg *reg) + unsigned *reg_hw_locations, brw_reg *reg) { if (reg->file == VGRF) { reg->nr = reg_unit(devinfo) * reg_hw_locations[reg->nr] + reg->offset / REG_SIZE; @@ -291,21 +291,21 @@ private: void build_interference_graph(); void discard_interference_graph(); - fs_reg build_lane_offsets(const fs_builder &bld, + brw_reg build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, int ip); - fs_reg build_single_offset(const fs_builder &bld, + brw_reg build_single_offset(const fs_builder &bld, uint32_t spill_offset, int ip); - fs_reg build_legacy_scratch_header(const fs_builder &bld, - uint32_t spill_offset, int ip); + brw_reg build_legacy_scratch_header(const fs_builder &bld, + uint32_t spill_offset, int ip); void emit_unspill(const fs_builder &bld, struct shader_stats *stats, - fs_reg dst, uint32_t spill_offset, unsigned count, int ip); + brw_reg dst, uint32_t spill_offset, unsigned count, int ip); void emit_spill(const fs_builder &bld, struct shader_stats *stats, - fs_reg src, uint32_t spill_offset, unsigned count, int ip); + brw_reg src, uint32_t spill_offset, unsigned count, int ip); void set_spill_costs(); int choose_spill_reg(); - fs_reg alloc_spill_reg(unsigned size, int ip); + brw_reg alloc_spill_reg(unsigned size, int ip); void spill_reg(unsigned spill_reg); void *mem_ctx; @@ -571,16 +571,16 @@ fs_reg_alloc::discard_interference_graph() have_spill_costs = false; } -fs_reg +brw_reg fs_reg_alloc::build_single_offset(const fs_builder &bld, uint32_t spill_offset, int ip) { - fs_reg offset = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD); + brw_reg offset = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD); fs_inst *inst = bld.MOV(offset, brw_imm_ud(spill_offset)); _mesa_set_add(spill_insts, inst); return offset; } -fs_reg +brw_reg fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, int ip) { /* LSC messages are limited to SIMD16 */ @@ -589,7 +589,7 @@ fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, i const fs_builder ubld = bld.exec_all(); const unsigned reg_count = ubld.dispatch_width() / 8; - fs_reg offset = retype(alloc_spill_reg(reg_count, ip), BRW_TYPE_UD); + brw_reg offset = retype(alloc_spill_reg(reg_count, ip), BRW_TYPE_UD); fs_inst *inst; /* Build an offset per lane in SIMD8 */ @@ -622,7 +622,7 @@ fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, i /** * Generate a scratch header for pre-LSC platforms. */ -fs_reg +brw_reg fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld, uint32_t spill_offset, int ip) { @@ -630,7 +630,7 @@ fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld, const fs_builder ubld1 = bld.exec_all().group(1, 0); /* Allocate a spill header and make it interfere with g0 */ - fs_reg header = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD); + brw_reg header = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD); ra_add_node_interference(g, first_vgrf_node + header.nr, first_payload_node); fs_inst *inst = ubld8.emit(SHADER_OPCODE_SCRATCH_HEADER, header); @@ -647,7 +647,7 @@ fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld, void fs_reg_alloc::emit_unspill(const fs_builder &bld, struct shader_stats *stats, - fs_reg dst, + brw_reg dst, uint32_t spill_offset, unsigned count, int ip) { const intel_device_info *devinfo = bld.shader->devinfo; @@ -664,7 +664,7 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, */ const bool use_transpose = bld.dispatch_width() > 16; const fs_builder ubld = use_transpose ? bld.exec_all().group(1, 0) : bld; - fs_reg offset; + brw_reg offset; if (use_transpose) { offset = build_single_offset(ubld, spill_offset, ip); } else { @@ -675,11 +675,11 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, * register. That way we don't need to burn an additional register * for register allocation spill/fill. */ - fs_reg srcs[] = { + brw_reg srcs[] = { brw_imm_ud(0), /* desc */ brw_imm_ud(0), /* ex_desc */ offset, /* payload */ - fs_reg(), /* payload2 */ + brw_reg(), /* payload2 */ }; unspill_inst = ubld.emit(SHADER_OPCODE_SEND, dst, @@ -702,12 +702,12 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, unspill_inst->send_is_volatile = true; unspill_inst->send_ex_desc_scratch = true; } else { - fs_reg header = build_legacy_scratch_header(bld, spill_offset, ip); + brw_reg header = build_legacy_scratch_header(bld, spill_offset, ip); const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT; - const fs_reg ex_desc = brw_imm_ud(0); + const brw_reg ex_desc = brw_imm_ud(0); - fs_reg srcs[] = { brw_imm_ud(0), ex_desc, header }; + brw_reg srcs[] = { brw_imm_ud(0), ex_desc, header }; unspill_inst = bld.emit(SHADER_OPCODE_SEND, dst, srcs, ARRAY_SIZE(srcs)); unspill_inst->mlen = 1; @@ -732,7 +732,7 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, void fs_reg_alloc::emit_spill(const fs_builder &bld, struct shader_stats *stats, - fs_reg src, + brw_reg src, uint32_t spill_offset, unsigned count, int ip) { const intel_device_info *devinfo = bld.shader->devinfo; @@ -744,13 +744,13 @@ fs_reg_alloc::emit_spill(const fs_builder &bld, fs_inst *spill_inst; if (devinfo->verx10 >= 125) { - fs_reg offset = build_lane_offsets(bld, spill_offset, ip); + brw_reg offset = build_lane_offsets(bld, spill_offset, ip); /* We leave the extended descriptor empty and flag the instruction * relocate the extended descriptor. That way the surface offset is * directly put into the instruction and we don't need to use a * register to hold it. */ - fs_reg srcs[] = { + brw_reg srcs[] = { brw_imm_ud(0), /* desc */ brw_imm_ud(0), /* ex_desc */ offset, /* payload */ @@ -775,12 +775,12 @@ fs_reg_alloc::emit_spill(const fs_builder &bld, spill_inst->send_is_volatile = false; spill_inst->send_ex_desc_scratch = true; } else { - fs_reg header = build_legacy_scratch_header(bld, spill_offset, ip); + brw_reg header = build_legacy_scratch_header(bld, spill_offset, ip); const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT; - const fs_reg ex_desc = brw_imm_ud(0); + const brw_reg ex_desc = brw_imm_ud(0); - fs_reg srcs[] = { brw_imm_ud(0), ex_desc, header, src }; + brw_reg srcs[] = { brw_imm_ud(0), ex_desc, header, src }; spill_inst = bld.emit(SHADER_OPCODE_SEND, bld.null_reg_f(), srcs, ARRAY_SIZE(srcs)); spill_inst->mlen = 1; @@ -903,7 +903,7 @@ fs_reg_alloc::choose_spill_reg() return node - first_vgrf_node; } -fs_reg +brw_reg fs_reg_alloc::alloc_spill_reg(unsigned size, int ip) { int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo))); @@ -970,7 +970,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg) int count = regs_read(inst, i); int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE); - fs_reg unspill_dst = alloc_spill_reg(count, ip); + brw_reg unspill_dst = alloc_spill_reg(count, ip); inst->src[i].nr = unspill_dst.nr; inst->src[i].offset %= REG_SIZE; @@ -999,7 +999,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg) inst->opcode != SHADER_OPCODE_UNDEF) { int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->dst.offset, REG_SIZE); - fs_reg spill_src = alloc_spill_reg(regs_written(inst), ip); + brw_reg spill_src = alloc_spill_reg(regs_written(inst), ip); inst->dst.nr = spill_src.nr; inst->dst.offset %= REG_SIZE; diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index fc5b844cd64..82e7ff0ca9f 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -50,7 +50,7 @@ static bool is_nop_mov(const fs_inst *inst) { if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { - fs_reg dst = inst->dst; + brw_reg dst = inst->dst; for (int i = 0; i < inst->sources; i++) { if (!dst.equals(inst->src[i])) { return false; diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp index 8c68df53ed8..11907d6ffbc 100644 --- a/src/intel/compiler/brw_fs_scoreboard.cpp +++ b/src/intel/compiler/brw_fs_scoreboard.cpp @@ -659,7 +659,7 @@ namespace { * Look up the most current data dependency for register \p r. */ dependency - get(const fs_reg &r) const + get(const brw_reg &r) const { if (const dependency *p = const_cast(this)->dep(r)) return *p; @@ -671,7 +671,7 @@ namespace { * Specify the most current data dependency for register \p r. */ void - set(const fs_reg &r, const dependency &d) + set(const brw_reg &r, const dependency &d) { if (dependency *p = dep(r)) *p = d; @@ -761,7 +761,7 @@ namespace { dependency accum_dep; dependency * - dep(const fs_reg &r) + dep(const brw_reg &r) { const unsigned reg = (r.file == VGRF ? r.nr + r.offset / REG_SIZE : reg_offset(r) / REG_SIZE); @@ -1038,7 +1038,7 @@ namespace { dependency::done; for (unsigned j = 0; j < regs_read(inst, i); j++) { - const fs_reg r = byte_offset(inst->src[i], REG_SIZE * j); + const brw_reg r = byte_offset(inst->src[i], REG_SIZE * j); sb.set(r, shadow(sb.get(r), rd_dep)); } } diff --git a/src/intel/compiler/brw_fs_sel_peephole.cpp b/src/intel/compiler/brw_fs_sel_peephole.cpp index 65fa8ca068c..1d0a4cc16c1 100644 --- a/src/intel/compiler/brw_fs_sel_peephole.cpp +++ b/src/intel/compiler/brw_fs_sel_peephole.cpp @@ -200,14 +200,14 @@ brw_fs_opt_peephole_sel(fs_visitor &s) * in the "then" clause uses a constant, we need to put it in a * temporary. */ - fs_reg src0(then_mov[i]->src[0]); + brw_reg src0(then_mov[i]->src[0]); if (src0.file == IMM) { src0 = ibld.vgrf(then_mov[i]->src[0].type); ibld.MOV(src0, then_mov[i]->src[0]); } /* 64-bit immediates can't be placed in src1. */ - fs_reg src1(else_mov[i]->src[0]); + brw_reg src1(else_mov[i]->src[0]); if (src1.file == IMM && brw_type_size_bytes(src1.type) == 8) { src1 = ibld.vgrf(else_mov[i]->src[0].type); ibld.MOV(src1, else_mov[i]->src[0]); diff --git a/src/intel/compiler/brw_fs_thread_payload.cpp b/src/intel/compiler/brw_fs_thread_payload.cpp index b2a2c815f6c..5a6bca04965 100644 --- a/src/intel/compiler/brw_fs_thread_payload.cpp +++ b/src/intel/compiler/brw_fs_thread_payload.cpp @@ -386,7 +386,7 @@ cs_thread_payload::cs_thread_payload(const fs_visitor &v) void cs_thread_payload::load_subgroup_id(const fs_builder &bld, - fs_reg &dest) const + brw_reg &dest) const { auto devinfo = bld.shader->devinfo; dest = retype(dest, BRW_TYPE_UD); @@ -483,9 +483,9 @@ bs_thread_payload::bs_thread_payload(const fs_visitor &v) } void -bs_thread_payload::load_shader_type(const fs_builder &bld, fs_reg &dest) const +bs_thread_payload::load_shader_type(const fs_builder &bld, brw_reg &dest) const { - fs_reg ud_dest = retype(dest, BRW_TYPE_UD); + brw_reg ud_dest = retype(dest, BRW_TYPE_UD); bld.MOV(ud_dest, retype(brw_vec1_grf(0, 3), ud_dest.type)); bld.AND(ud_dest, ud_dest, brw_imm_ud(0xf)); } diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 15d2074b3e4..340d049fe3c 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -45,7 +45,7 @@ using namespace brw; * data. It will get adjusted to be a real location before * generate_code() time. */ -fs_reg +brw_reg fs_visitor::interp_reg(const fs_builder &bld, unsigned location, unsigned channel, unsigned comp) { @@ -71,7 +71,7 @@ fs_visitor::interp_reg(const fs_builder &bld, unsigned location, * assign_urb_setup()), so we need to use offset() instead of * component() to select the specified parameter. */ - const fs_reg tmp = bld.vgrf(BRW_TYPE_UD); + const brw_reg tmp = bld.vgrf(BRW_TYPE_UD); bld.MOV(tmp, offset(brw_attr_reg(regnr, BRW_TYPE_UD), dispatch_width, comp)); return retype(tmp, BRW_TYPE_F); @@ -84,7 +84,7 @@ fs_visitor::interp_reg(const fs_builder &bld, unsigned location, * data. It will get adjusted to be a real location before * generate_code() time. */ -fs_reg +brw_reg fs_visitor::per_primitive_reg(const fs_builder &bld, int location, unsigned comp) { assert(stage == MESA_SHADER_FRAGMENT); @@ -106,7 +106,7 @@ fs_visitor::per_primitive_reg(const fs_builder &bld, int location, unsigned comp * assign_urb_setup()), so we need to use offset() instead of * component() to select the specified parameter. */ - const fs_reg tmp = bld.vgrf(BRW_TYPE_UD); + const brw_reg tmp = bld.vgrf(BRW_TYPE_UD); bld.MOV(tmp, offset(brw_attr_reg(regnr, BRW_TYPE_UD), dispatch_width, comp % 4)); return retype(tmp, BRW_TYPE_F); @@ -128,9 +128,9 @@ fs_visitor::emit_interpolation_setup() const struct brw_wm_prog_key *wm_key = (brw_wm_prog_key*) this->key; struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data); - fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */ - fs_reg int_sample_offset_xy; /* Used on Gen8+ */ - fs_reg half_int_sample_offset_x, half_int_sample_offset_y; + brw_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */ + brw_reg int_sample_offset_xy; /* Used on Gen8+ */ + brw_reg half_int_sample_offset_x, half_int_sample_offset_y; if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) { /* The thread payload only delivers subspan locations (ss0, ss1, * ss2, ...). Since subspans covers 2x2 pixels blocks, we need to @@ -161,9 +161,9 @@ fs_visitor::emit_interpolation_setup() * coordinates out of 2 subspans coordinates in a single ADD instruction * (twice the operation above). */ - int_sample_offset_xy = fs_reg(brw_imm_v(0x11001010)); - half_int_sample_offset_x = fs_reg(brw_imm_uw(0)); - half_int_sample_offset_y = fs_reg(brw_imm_uw(0)); + int_sample_offset_xy = brw_reg(brw_imm_v(0x11001010)); + half_int_sample_offset_x = brw_reg(brw_imm_uw(0)); + half_int_sample_offset_y = brw_reg(brw_imm_uw(0)); /* On Gfx12.5, because of regioning restrictions, the interpolation code * is slightly different and works off X & Y only inputs. The ordering * of the half bytes here is a bit odd, with each subspan replicated @@ -173,13 +173,13 @@ fs_visitor::emit_interpolation_setup() * X offset: 0 0 1 0 0 0 1 0 * Y offset: 0 0 0 0 1 0 1 0 */ - int_sample_offset_x = fs_reg(brw_imm_v(0x01000100)); - int_sample_offset_y = fs_reg(brw_imm_v(0x01010000)); + int_sample_offset_x = brw_reg(brw_imm_v(0x01000100)); + int_sample_offset_y = brw_reg(brw_imm_v(0x01010000)); } - fs_reg int_coarse_offset_x, int_coarse_offset_y; /* Used on Gen12HP+ */ - fs_reg int_coarse_offset_xy; /* Used on Gen8+ */ - fs_reg half_int_coarse_offset_x, half_int_coarse_offset_y; + brw_reg int_coarse_offset_x, int_coarse_offset_y; /* Used on Gen12HP+ */ + brw_reg int_coarse_offset_xy; /* Used on Gen8+ */ + brw_reg half_int_coarse_offset_x, half_int_coarse_offset_y; if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) { /* In coarse pixel dispatch we have to do the same ADD instruction that * we do in normal per pixel dispatch, except this time we're not adding @@ -226,9 +226,9 @@ fs_visitor::emit_interpolation_setup() bld.SHR(half_int_coarse_offset_y, suboffset(r1_0, 1), brw_imm_ud(1)); } - fs_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */ - fs_reg int_pixel_offset_xy; /* Used on Gen8+ */ - fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y; + brw_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */ + brw_reg int_pixel_offset_xy; /* Used on Gen8+ */ + brw_reg half_int_pixel_offset_x, half_int_pixel_offset_y; switch (wm_prog_data->coarse_pixel_dispatch) { case BRW_NEVER: int_pixel_offset_x = int_sample_offset_x; @@ -301,14 +301,14 @@ fs_visitor::emit_interpolation_setup() if (devinfo->verx10 >= 125) { const fs_builder dbld = abld.exec_all().group(hbld.dispatch_width() * 2, 0); - const fs_reg int_pixel_x = dbld.vgrf(BRW_TYPE_UW); - const fs_reg int_pixel_y = dbld.vgrf(BRW_TYPE_UW); + const brw_reg int_pixel_x = dbld.vgrf(BRW_TYPE_UW); + const brw_reg int_pixel_y = dbld.vgrf(BRW_TYPE_UW); dbld.ADD(int_pixel_x, - fs_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)), + brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)), int_pixel_offset_x); dbld.ADD(int_pixel_y, - fs_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)), + brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)), int_pixel_offset_y); if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) { @@ -338,10 +338,10 @@ fs_visitor::emit_interpolation_setup() */ const fs_builder dbld = abld.exec_all().group(hbld.dispatch_width() * 2, 0); - fs_reg int_pixel_xy = dbld.vgrf(BRW_TYPE_UW); + brw_reg int_pixel_xy = dbld.vgrf(BRW_TYPE_UW); dbld.ADD(int_pixel_xy, - fs_reg(stride(suboffset(gi_uw, 4), 1, 4, 0)), + brw_reg(stride(suboffset(gi_uw, 4), 1, 4, 0)), int_pixel_offset_xy); hbld.emit(FS_OPCODE_PIXEL_X, offset(pixel_x, hbld, i), int_pixel_xy, @@ -352,7 +352,7 @@ fs_visitor::emit_interpolation_setup() } abld = bld.annotate("compute pos.z"); - fs_reg coarse_z; + brw_reg coarse_z; if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER && wm_prog_data->uses_depth_w_coefficients) { /* In coarse pixel mode, the HW doesn't interpolate Z coordinate @@ -360,30 +360,30 @@ fs_visitor::emit_interpolation_setup() * pixels locations, here we recompute the Z value with 2 coefficients * in X & Y axis. */ - fs_reg coef_payload = brw_vec8_grf(fs_payload().depth_w_coef_reg, 0); - const fs_reg x_start = brw_vec1_grf(coef_payload.nr, 2); - const fs_reg y_start = brw_vec1_grf(coef_payload.nr, 6); - const fs_reg z_cx = brw_vec1_grf(coef_payload.nr, 1); - const fs_reg z_cy = brw_vec1_grf(coef_payload.nr, 0); - const fs_reg z_c0 = brw_vec1_grf(coef_payload.nr, 3); + brw_reg coef_payload = brw_vec8_grf(fs_payload().depth_w_coef_reg, 0); + const brw_reg x_start = brw_vec1_grf(coef_payload.nr, 2); + const brw_reg y_start = brw_vec1_grf(coef_payload.nr, 6); + const brw_reg z_cx = brw_vec1_grf(coef_payload.nr, 1); + const brw_reg z_cy = brw_vec1_grf(coef_payload.nr, 0); + const brw_reg z_c0 = brw_vec1_grf(coef_payload.nr, 3); - const fs_reg float_pixel_x = abld.vgrf(BRW_TYPE_F); - const fs_reg float_pixel_y = abld.vgrf(BRW_TYPE_F); + const brw_reg float_pixel_x = abld.vgrf(BRW_TYPE_F); + const brw_reg float_pixel_y = abld.vgrf(BRW_TYPE_F); abld.ADD(float_pixel_x, this->pixel_x, negate(x_start)); abld.ADD(float_pixel_y, this->pixel_y, negate(y_start)); /* r1.0 - 0:7 ActualCoarsePixelShadingSize.X */ - const fs_reg u8_cps_width = fs_reg(retype(brw_vec1_grf(1, 0), BRW_TYPE_UB)); + const brw_reg u8_cps_width = brw_reg(retype(brw_vec1_grf(1, 0), BRW_TYPE_UB)); /* r1.0 - 15:8 ActualCoarsePixelShadingSize.Y */ - const fs_reg u8_cps_height = byte_offset(u8_cps_width, 1); - const fs_reg u32_cps_width = abld.vgrf(BRW_TYPE_UD); - const fs_reg u32_cps_height = abld.vgrf(BRW_TYPE_UD); + const brw_reg u8_cps_height = byte_offset(u8_cps_width, 1); + const brw_reg u32_cps_width = abld.vgrf(BRW_TYPE_UD); + const brw_reg u32_cps_height = abld.vgrf(BRW_TYPE_UD); abld.MOV(u32_cps_width, u8_cps_width); abld.MOV(u32_cps_height, u8_cps_height); - const fs_reg f_cps_width = abld.vgrf(BRW_TYPE_F); - const fs_reg f_cps_height = abld.vgrf(BRW_TYPE_F); + const brw_reg f_cps_width = abld.vgrf(BRW_TYPE_F); + const brw_reg f_cps_height = abld.vgrf(BRW_TYPE_F); abld.MOV(f_cps_width, u32_cps_width); abld.MOV(f_cps_height, u32_cps_height); @@ -401,7 +401,7 @@ fs_visitor::emit_interpolation_setup() if (wm_prog_data->uses_depth_w_coefficients || wm_prog_data->uses_src_depth) { - fs_reg sample_z = this->pixel_z; + brw_reg sample_z = this->pixel_z; switch (wm_prog_data->coarse_pixel_dispatch) { case BRW_NEVER: @@ -505,8 +505,8 @@ fs_visitor::emit_interpolation_setup() if (!(centroid_modes & (1 << i))) continue; - const fs_reg centroid_delta_xy = delta_xy[i]; - const fs_reg &pixel_delta_xy = delta_xy[i - 1]; + const brw_reg centroid_delta_xy = delta_xy[i]; + const brw_reg &pixel_delta_xy = delta_xy[i - 1]; delta_xy[i] = bld.vgrf(BRW_TYPE_F, 2); @@ -525,15 +525,15 @@ fs_visitor::emit_interpolation_setup() fs_inst * fs_visitor::emit_single_fb_write(const fs_builder &bld, - fs_reg color0, fs_reg color1, - fs_reg src0_alpha, unsigned components) + brw_reg color0, brw_reg color1, + brw_reg src0_alpha, unsigned components) { assert(stage == MESA_SHADER_FRAGMENT); struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); /* Hand over gl_FragDepth or the payload depth. */ - const fs_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg); - fs_reg src_depth, src_stencil; + const brw_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg); + brw_reg src_depth, src_stencil; if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) src_depth = frag_depth; @@ -541,13 +541,13 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) src_stencil = frag_stencil; - const fs_reg sources[] = { + const brw_reg sources[] = { color0, color1, src0_alpha, src_depth, dst_depth, src_stencil, - (prog_data->uses_omask ? sample_mask : fs_reg()), + (prog_data->uses_omask ? sample_mask : brw_reg()), brw_imm_ud(components) }; assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS); - fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), + fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, brw_reg(), sources, ARRAY_SIZE(sources)); if (prog_data->uses_kill) { @@ -572,7 +572,7 @@ fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha) const fs_builder abld = bld.annotate( ralloc_asprintf(this->mem_ctx, "FB write target %d", target)); - fs_reg src0_alpha; + brw_reg src0_alpha; if (replicate_alpha && target != 0) src0_alpha = offset(outputs[0], bld, 3); @@ -589,9 +589,9 @@ fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha) /* FINISHME: Factor out this frequently recurring pattern into a * helper function. */ - const fs_reg srcs[] = { reg_undef, reg_undef, + const brw_reg srcs[] = { reg_undef, reg_undef, reg_undef, offset(this->outputs[0], bld, 3) }; - const fs_reg tmp = bld.vgrf(BRW_TYPE_UD, 4); + const brw_reg tmp = bld.vgrf(BRW_TYPE_UD, 4); bld.LOAD_PAYLOAD(tmp, srcs, 4, 0); inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4); @@ -665,7 +665,7 @@ fs_visitor::emit_fb_writes() } void -fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) +fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count) { int slot, urb_offset, length; int starting_urb_offset = 0; @@ -675,8 +675,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ | VARYING_BIT_PRIMITIVE_SHADING_RATE; const struct intel_vue_map *vue_map = &vue_prog_data->vue_map; bool flush; - fs_reg sources[8]; - fs_reg urb_handle; + brw_reg sources[8]; + brw_reg urb_handle; switch (stage) { case MESA_SHADER_VERTEX: @@ -694,7 +694,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) const fs_builder bld = fs_builder(this).at_end(); - fs_reg per_slot_offsets; + brw_reg per_slot_offsets; if (stage == MESA_SHADER_GEOMETRY) { const struct brw_gs_prog_data *gs_prog_data = @@ -760,7 +760,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) break; } - fs_reg zero = brw_vgrf(alloc.allocate(dispatch_width / 8), + brw_reg zero = brw_vgrf(alloc.allocate(dispatch_width / 8), BRW_TYPE_UD); bld.MOV(zero, brw_imm_ud(0u)); @@ -769,7 +769,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) sources[length++] = this->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE]; } else if (devinfo->has_coarse_pixel_primitive_and_cb) { uint32_t one_fp16 = 0x3C00; - fs_reg one_by_one_fp16 = brw_vgrf(alloc.allocate(dispatch_width / 8), + brw_reg one_by_one_fp16 = brw_vgrf(alloc.allocate(dispatch_width / 8), BRW_TYPE_UD); bld.MOV(one_by_one_fp16, brw_imm_ud((one_fp16 << 16) | one_fp16)); sources[length++] = one_by_one_fp16; @@ -839,7 +839,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) if (length == 8 || (length > 0 && slot == last_slot)) flush = true; if (flush) { - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets; @@ -883,14 +883,14 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) if (stage == MESA_SHADER_GEOMETRY) return; - fs_reg uniform_urb_handle = brw_vgrf(alloc.allocate(dispatch_width / 8), + brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(dispatch_width / 8), BRW_TYPE_UD); - fs_reg payload = brw_vgrf(alloc.allocate(dispatch_width / 8), + brw_reg payload = brw_vgrf(alloc.allocate(dispatch_width / 8), BRW_TYPE_UD); bld.exec_all().MOV(uniform_urb_handle, urb_handle); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle; srcs[URB_LOGICAL_SRC_DATA] = payload; srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1); @@ -910,9 +910,9 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) */ if (intel_needs_workaround(devinfo, 1805992985) && stage == MESA_SHADER_TESS_EVAL) { assert(dispatch_width == 8); - fs_reg uniform_urb_handle = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD); - fs_reg uniform_mask = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD); - fs_reg payload = brw_vgrf(alloc.allocate(4), BRW_TYPE_UD); + brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD); + brw_reg uniform_mask = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD); + brw_reg payload = brw_vgrf(alloc.allocate(4), BRW_TYPE_UD); /* Workaround requires all 8 channels (lanes) to be valid. This is * understood to mean they all need to be alive. First trick is to find @@ -941,7 +941,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u)); bld.exec_all().MOV(offset(payload, bld, 3), brw_imm_ud(0u)); - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask; srcs[URB_LOGICAL_SRC_DATA] = payload; @@ -958,7 +958,7 @@ void fs_visitor::emit_urb_fence() { const fs_builder bld = fs_builder(this).at_end(); - fs_reg dst = bld.vgrf(BRW_TYPE_UD); + brw_reg dst = bld.vgrf(BRW_TYPE_UD); fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst, brw_vec8_grf(0, 0), brw_imm_ud(true), @@ -983,7 +983,7 @@ fs_visitor::emit_cs_terminate() * make sure it uses the appropriate register range. */ struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_TYPE_UD); - fs_reg payload = brw_vgrf(alloc.allocate(reg_unit(devinfo)), + brw_reg payload = brw_vgrf(alloc.allocate(reg_unit(devinfo)), BRW_TYPE_UD); ubld.group(8 * reg_unit(devinfo), 0).MOV(payload, g0); @@ -999,11 +999,11 @@ fs_visitor::emit_cs_terminate() if (devinfo->ver < 11) desc |= (1 << 4); /* Do not dereference URB */ - fs_reg srcs[4] = { + brw_reg srcs[4] = { brw_imm_ud(desc), /* desc */ brw_imm_ud(0), /* ex_desc */ payload, /* payload */ - fs_reg(), /* payload2 */ + brw_reg(), /* payload2 */ }; fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, reg_undef, srcs, 4); diff --git a/src/intel/compiler/brw_fs_workaround.cpp b/src/intel/compiler/brw_fs_workaround.cpp index 947fd0a51a7..99b2a0e29b9 100644 --- a/src/intel/compiler/brw_fs_workaround.cpp +++ b/src/intel/compiler/brw_fs_workaround.cpp @@ -103,7 +103,7 @@ brw_fs_workaround_memory_fence_before_eot(fs_visitor &s) const fs_builder ibld(&s, block, inst); const fs_builder ubld = ibld.exec_all().group(1, 0); - fs_reg dst = ubld.vgrf(BRW_TYPE_UD); + brw_reg dst = ubld.vgrf(BRW_TYPE_UD); fs_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE, dst, brw_vec8_grf(0, 0), /* commit enable */ brw_imm_ud(1), @@ -230,7 +230,7 @@ brw_fs_workaround_nomask_control_flow(fs_visitor &s) */ const fs_builder ubld = fs_builder(&s, block, inst) .exec_all().group(s.dispatch_width, 0); - const fs_reg flag = retype(brw_flag_reg(0, 0), + const brw_reg flag = retype(brw_flag_reg(0, 0), BRW_TYPE_UD); /* Due to the lack of flag register allocation we need to save @@ -238,7 +238,7 @@ brw_fs_workaround_nomask_control_flow(fs_visitor &s) */ const bool save_flag = flag_liveout & brw_fs_flag_mask(flag, s.dispatch_width / 8); - const fs_reg tmp = ubld.group(8, 0).vgrf(flag.type); + const brw_reg tmp = ubld.group(8, 0).vgrf(flag.type); if (save_flag) { ubld.group(8, 0).UNDEF(tmp); diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 44148986663..5c3ee9f2ef9 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -28,10 +28,8 @@ #include "brw_ir.h" #include "brw_ir_allocator.h" -using fs_reg = brw_reg; - -static inline fs_reg -horiz_offset(const fs_reg ®, unsigned delta) +static inline brw_reg +horiz_offset(const brw_reg ®, unsigned delta) { switch (reg.file) { case BAD_FILE: @@ -65,8 +63,8 @@ horiz_offset(const fs_reg ®, unsigned delta) unreachable("Invalid register file"); } -static inline fs_reg -offset(fs_reg reg, unsigned width, unsigned delta) +static inline brw_reg +offset(brw_reg reg, unsigned width, unsigned delta) { switch (reg.file) { case BAD_FILE: @@ -87,8 +85,8 @@ offset(fs_reg reg, unsigned width, unsigned delta) * Get the scalar channel of \p reg given by \p idx and replicate it to all * channels of the result. */ -static inline fs_reg -component(fs_reg reg, unsigned idx) +static inline brw_reg +component(brw_reg reg, unsigned idx) { reg = horiz_offset(reg, idx); reg.stride = 0; @@ -109,7 +107,7 @@ component(fs_reg reg, unsigned idx) * address spaces, one for each allocation and input attribute respectively. */ static inline uint32_t -reg_space(const fs_reg &r) +reg_space(const brw_reg &r) { return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0); } @@ -119,7 +117,7 @@ reg_space(const fs_reg &r) * reg_space(). */ static inline unsigned -reg_offset(const fs_reg &r) +reg_offset(const brw_reg &r) { return (r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) * (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset + @@ -132,7 +130,7 @@ reg_offset(const fs_reg &r) * one, or zero if components are tightly packed in the register file. */ static inline unsigned -reg_padding(const fs_reg &r) +reg_padding(const brw_reg &r) { const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride : r.hstride == 0 ? 0 : @@ -146,7 +144,7 @@ reg_padding(const fs_reg &r) * spanning \p ds bytes. */ static inline bool -regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) +regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds) { if (r.file != s.file) return false; @@ -166,7 +164,7 @@ regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) * [s.offset, s.offset + ds[. */ static inline bool -region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) +region_contained_in(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds) { return reg_space(r) == reg_space(s) && reg_offset(r) >= reg_offset(s) && @@ -179,7 +177,7 @@ region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) * channels. */ static inline bool -is_periodic(const fs_reg ®, unsigned n) +is_periodic(const brw_reg ®, unsigned n) { if (reg.file == BAD_FILE || reg.is_null()) { return true; @@ -203,7 +201,7 @@ is_periodic(const fs_reg ®, unsigned n) } static inline bool -is_uniform(const fs_reg ®) +is_uniform(const brw_reg ®) { return is_periodic(reg, 1); } @@ -211,47 +209,47 @@ is_uniform(const fs_reg ®) /** * Get the specified 8-component quarter of a register. */ -static inline fs_reg -quarter(const fs_reg ®, unsigned idx) +static inline brw_reg +quarter(const brw_reg ®, unsigned idx) { assert(idx < 4); return horiz_offset(reg, 8 * idx); } -static inline fs_reg -horiz_stride(fs_reg reg, unsigned s) +static inline brw_reg +horiz_stride(brw_reg reg, unsigned s) { reg.stride *= s; return reg; } -bool fs_reg_saturate_immediate(fs_reg *reg); -bool fs_reg_negate_immediate(fs_reg *reg); -bool fs_reg_abs_immediate(fs_reg *reg); +bool fs_reg_saturate_immediate(brw_reg *reg); +bool fs_reg_negate_immediate(brw_reg *reg); +bool fs_reg_abs_immediate(brw_reg *reg); -static const fs_reg reg_undef; +static const brw_reg reg_undef; struct fs_inst : public exec_node { private: fs_inst &operator=(const fs_inst &); - void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst, - const fs_reg *src, unsigned sources); + void init(enum opcode opcode, uint8_t exec_width, const brw_reg &dst, + const brw_reg *src, unsigned sources); public: DECLARE_RALLOC_CXX_OPERATORS(fs_inst) fs_inst(); fs_inst(enum opcode opcode, uint8_t exec_size); - fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst); - fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, - const fs_reg &src0); - fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1); - fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); - fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, - const fs_reg src[], unsigned sources); + fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst); + fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst, + const brw_reg &src0); + fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst, + const brw_reg &src0, const brw_reg &src1); + fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst, + const brw_reg &src0, const brw_reg &src1, const brw_reg &src2); + fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst, + const brw_reg src[], unsigned sources); fs_inst(const fs_inst &that); ~fs_inst(); @@ -332,7 +330,7 @@ public: const char *annotation; /** @} */ - uint8_t sources; /**< Number of fs_reg sources. */ + uint8_t sources; /**< Number of brw_reg sources. */ /** * Execution size of the instruction. This is used by the generator to @@ -425,9 +423,9 @@ public: uint32_t bits; }; - fs_reg dst; - fs_reg *src; - fs_reg builtin_src[4]; + brw_reg dst; + brw_reg *src; + brw_reg builtin_src[4]; }; /** @@ -587,7 +585,7 @@ is_unordered(const intel_device_info *devinfo, const fs_inst *inst) * single one-dimensional stride. */ static inline unsigned -byte_stride(const fs_reg ®) +byte_stride(const brw_reg ®) { switch (reg.file) { case BAD_FILE: @@ -675,7 +673,7 @@ has_dst_aligned_region_restriction(const intel_device_info *devinfo, static inline bool has_subdword_integer_region_restriction(const intel_device_info *devinfo, const fs_inst *inst, - const fs_reg *srcs, unsigned num_srcs) + const brw_reg *srcs, unsigned num_srcs) { if (devinfo->ver >= 20 && brw_type_is_int(inst->dst.type) && @@ -741,7 +739,7 @@ is_copy_payload(brw_reg_file file, const fs_inst *inst) inline bool is_identity_payload(brw_reg_file file, const fs_inst *inst) { if (is_copy_payload(file, inst)) { - fs_reg reg = inst->src[0]; + brw_reg reg = inst->src[0]; for (unsigned i = 0; i < inst->sources; i++) { reg.type = inst->src[i].type; @@ -823,7 +821,7 @@ brw_fs_bit_mask(unsigned n) } static inline unsigned -brw_fs_flag_mask(const fs_reg &r, unsigned sz) +brw_fs_flag_mask(const brw_reg &r, unsigned sz) { if (r.file == ARF) { const unsigned start = (r.nr - BRW_ARF_FLAG) * 4 + r.subnr; diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index a413b7b50ee..47bde298114 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -778,7 +778,7 @@ namespace { * Return the dependency ID of a backend_reg, offset by \p delta GRFs. */ enum intel_eu_dependency_id - reg_dependency_id(const intel_device_info *devinfo, const fs_reg &r, + reg_dependency_id(const intel_device_info *devinfo, const brw_reg &r, const int delta) { if (r.file == VGRF) { diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 8b93e779c7c..3b6d0e856a0 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -41,13 +41,13 @@ lower_urb_read_logical_send(const fs_builder &bld, fs_inst *inst) assert(inst->size_written % REG_SIZE == 0); assert(inst->header_size == 0); - fs_reg payload_sources[2]; + brw_reg payload_sources[2]; unsigned header_size = 0; payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE]; if (per_slot_present) payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; - fs_reg payload = brw_vgrf(bld.shader->alloc.allocate(header_size), + brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(header_size), BRW_TYPE_F); bld.LOAD_PAYLOAD(payload, payload_sources, header_size, header_size); @@ -84,12 +84,12 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, fs_inst *inst) assert(inst->header_size == 0); /* Get the logical send arguments. */ - const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE]; + const brw_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE]; /* Calculate the total number of components of the payload. */ const unsigned dst_comps = inst->size_written / (REG_SIZE * reg_unit(devinfo)); - fs_reg payload = bld.vgrf(BRW_TYPE_UD); + brw_reg payload = bld.vgrf(BRW_TYPE_UD); bld.MOV(payload, handle); @@ -101,7 +101,7 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, fs_inst *inst) inst->offset = 0; } - fs_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; + brw_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; if (offsets.file != BAD_FILE) { bld.ADD(payload, payload, offsets); } @@ -147,8 +147,8 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst) const unsigned length = 1 + per_slot_present + channel_mask_present + inst->components_read(URB_LOGICAL_SRC_DATA); - fs_reg *payload_sources = new fs_reg[length]; - fs_reg payload = brw_vgrf(bld.shader->alloc.allocate(length), + brw_reg *payload_sources = new brw_reg[length]; + brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(length), BRW_TYPE_F); unsigned header_size = 0; @@ -197,16 +197,16 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst) assert(devinfo->has_lsc); /* Get the logical send arguments. */ - const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE]; - const fs_reg src = inst->components_read(URB_LOGICAL_SRC_DATA) ? - inst->src[URB_LOGICAL_SRC_DATA] : fs_reg(brw_imm_ud(0)); + const brw_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE]; + const brw_reg src = inst->components_read(URB_LOGICAL_SRC_DATA) ? + inst->src[URB_LOGICAL_SRC_DATA] : brw_reg(brw_imm_ud(0)); assert(brw_type_size_bytes(src.type) == 4); /* Calculate the total number of components of the payload. */ const unsigned src_comps = MAX2(1, inst->components_read(URB_LOGICAL_SRC_DATA)); const unsigned src_sz = brw_type_size_bytes(src.type); - fs_reg payload = bld.vgrf(BRW_TYPE_UD); + brw_reg payload = bld.vgrf(BRW_TYPE_UD); bld.MOV(payload, handle); @@ -218,12 +218,12 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst) inst->offset = 0; } - fs_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; + brw_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; if (offsets.file != BAD_FILE) { bld.ADD(payload, payload, offsets); } - const fs_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK]; + const brw_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK]; unsigned mask = 0; if (cmask.file != BAD_FILE) { @@ -232,7 +232,7 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst) mask = cmask.ud >> 16; } - fs_reg payload2 = bld.move_to_vgrf(src, src_comps); + brw_reg payload2 = bld.move_to_vgrf(src, src_comps); const unsigned ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE; inst->sfid = BRW_SFID_URB; @@ -265,10 +265,10 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst) static void setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key, - fs_reg *dst, fs_reg color, unsigned components) + brw_reg *dst, brw_reg color, unsigned components) { if (key->clamp_fragment_color) { - fs_reg tmp = bld.vgrf(BRW_TYPE_F, 4); + brw_reg tmp = bld.vgrf(BRW_TYPE_F, 4); assert(color.type == BRW_TYPE_F); for (unsigned i = 0; i < components; i++) @@ -290,19 +290,19 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, { assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM); const intel_device_info *devinfo = bld.shader->devinfo; - const fs_reg color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0]; - const fs_reg color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1]; - const fs_reg src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA]; - const fs_reg src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH]; - const fs_reg dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH]; - const fs_reg src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL]; - fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK]; + const brw_reg color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0]; + const brw_reg color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1]; + const brw_reg src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA]; + const brw_reg src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH]; + const brw_reg dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH]; + const brw_reg src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL]; + brw_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK]; const unsigned components = inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud; assert(inst->target != 0 || src0_alpha.file == BAD_FILE); - fs_reg sources[15]; + brw_reg sources[15]; int header_size = 2, payload_header_size; unsigned length = 0; @@ -319,7 +319,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, */ const fs_builder ubld = bld.exec_all().group(8, 0); - fs_reg header = ubld.vgrf(BRW_TYPE_UD, 2); + brw_reg header = ubld.vgrf(BRW_TYPE_UD, 2); if (bld.group() < 16) { /* The header starts off as g0 and g1 for the first half */ ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0), @@ -327,7 +327,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, } else { /* The header starts off as g0 and g2 for the second half */ assert(bld.group() < 32); - const fs_reg header_sources[2] = { + const brw_reg header_sources[2] = { retype(brw_vec8_grf(0, 0), BRW_TYPE_UD), retype(brw_vec8_grf(2, 0), BRW_TYPE_UD), }; @@ -379,7 +379,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, sources[length] = brw_vgrf(bld.shader->alloc.allocate(1), BRW_TYPE_F); bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha") .MOV(sources[length], - fs_reg(brw_vec8_grf(fs_payload.aa_dest_stencil_reg[0], 0))); + brw_reg(brw_vec8_grf(fs_payload.aa_dest_stencil_reg[0], 0))); length++; } @@ -387,7 +387,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, for (unsigned i = 0; i < bld.dispatch_width() / 8; i++) { const fs_builder &ubld = bld.exec_all().group(8, i) .annotate("FB write src0 alpha"); - const fs_reg tmp = ubld.vgrf(BRW_TYPE_F); + const brw_reg tmp = ubld.vgrf(BRW_TYPE_F); ubld.MOV(tmp, horiz_offset(src0_alpha, i * 8)); setup_color_payload(ubld, key, &sources[length], tmp, 1); length++; @@ -395,7 +395,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, } if (sample_mask.file != BAD_FILE) { - const fs_reg tmp = brw_vgrf(bld.shader->alloc.allocate(reg_unit(devinfo)), + const brw_reg tmp = brw_vgrf(bld.shader->alloc.allocate(reg_unit(devinfo)), BRW_TYPE_UD); /* Hand over gl_SampleMask. Only the lower 16 bits of each channel are @@ -455,7 +455,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, } /* Send from the GRF */ - fs_reg payload = brw_vgrf(-1, BRW_TYPE_F); + brw_reg payload = brw_vgrf(-1, BRW_TYPE_F); fs_inst *load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); payload.nr = bld.shader->alloc.allocate(regs_written(load)); load->dst = payload; @@ -468,7 +468,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt, 0 /* coarse_rt_write */); - fs_reg desc = brw_imm_ud(0); + brw_reg desc = brw_imm_ud(0); if (prog_data->coarse_pixel_dispatch == BRW_ALWAYS) { inst->desc |= (1 << 18); } else if (prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) { @@ -519,7 +519,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst, const intel_device_info *devinfo = bld.shader->devinfo; const fs_builder &ubld = bld.exec_all().group(8, 0); const unsigned length = 2; - const fs_reg header = ubld.vgrf(BRW_TYPE_UD, length); + const brw_reg header = ubld.vgrf(BRW_TYPE_UD, length); assert(devinfo->ver >= 9 && devinfo->ver < 20); @@ -528,7 +528,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst, BRW_TYPE_UD)); } else { assert(bld.group() < 32); - const fs_reg header_sources[] = { + const brw_reg header_sources[] = { retype(brw_vec8_grf(0, 0), BRW_TYPE_UD), retype(brw_vec8_grf(2, 0), BRW_TYPE_UD) }; @@ -567,7 +567,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst, inst->src[0] = brw_imm_ud(0); inst->src[1] = brw_imm_ud(0); inst->src[2] = header; - inst->src[3] = fs_reg(); + inst->src[3] = brw_reg(); inst->mlen = length; inst->header_size = length; inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE; @@ -580,7 +580,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst, } static bool -is_high_sampler(const struct intel_device_info *devinfo, const fs_reg &sampler) +is_high_sampler(const struct intel_device_info *devinfo, const brw_reg &sampler) { return sampler.file != IMM || sampler.ud >= 16; } @@ -676,15 +676,15 @@ sampler_msg_type(const intel_device_info *devinfo, * the given requested_alignment_sz. */ static fs_inst * -emit_load_payload_with_padding(const fs_builder &bld, const fs_reg &dst, - const fs_reg *src, unsigned sources, +emit_load_payload_with_padding(const fs_builder &bld, const brw_reg &dst, + const brw_reg *src, unsigned sources, unsigned header_size, unsigned requested_alignment_sz) { unsigned length = 0; unsigned num_srcs = sources * DIV_ROUND_UP(requested_alignment_sz, bld.dispatch_width()); - fs_reg *src_comps = new fs_reg[num_srcs]; + brw_reg *src_comps = new brw_reg[num_srcs]; for (unsigned i = 0; i < header_size; i++) src_comps[length++] = src[i]; @@ -702,7 +702,7 @@ emit_load_payload_with_padding(const fs_builder &bld, const fs_reg &dst, */ if (src_sz < requested_alignment_sz) { for (unsigned j = 0; j < (requested_alignment_sz / src_sz) - 1; j++) { - src_comps[length++] = retype(fs_reg(), padding_payload_type); + src_comps[length++] = retype(brw_reg(), padding_payload_type); } } } @@ -735,17 +735,17 @@ shader_opcode_needs_header(opcode op) static void lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, - const fs_reg &coordinate, - const fs_reg &shadow_c, - fs_reg lod, const fs_reg &lod2, - const fs_reg &min_lod, - const fs_reg &sample_index, - const fs_reg &mcs, - const fs_reg &surface, - const fs_reg &sampler, - const fs_reg &surface_handle, - const fs_reg &sampler_handle, - const fs_reg &tg4_offset, + const brw_reg &coordinate, + const brw_reg &shadow_c, + brw_reg lod, const brw_reg &lod2, + const brw_reg &min_lod, + const brw_reg &sample_index, + const brw_reg &mcs, + const brw_reg &surface, + const brw_reg &sampler, + const brw_reg &surface_handle, + const brw_reg &sampler_handle, + const brw_reg &tg4_offset, unsigned payload_type_bit_size, unsigned coord_components, unsigned grad_components, @@ -762,7 +762,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, unsigned reg_width = bld.dispatch_width() / 8; unsigned header_size = 0, length = 0; opcode op = inst->opcode; - fs_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE]; + brw_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE]; for (unsigned i = 0; i < ARRAY_SIZE(sources); i++) sources[i] = bld.vgrf(payload_type); @@ -782,7 +782,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, * larger sampler numbers we need to offset the Sampler State Pointer in * the header. */ - fs_reg header = retype(sources[0], BRW_TYPE_UD); + brw_reg header = retype(sources[0], BRW_TYPE_UD); for (header_size = 0; header_size < reg_unit(devinfo); header_size++) sources[length++] = byte_offset(header, REG_SIZE * header_size); @@ -840,7 +840,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, ubld1.MOV(component(header, 3), sampler_handle); } } else if (is_high_sampler(devinfo, sampler)) { - fs_reg sampler_state_ptr = + brw_reg sampler_state_ptr = retype(brw_vec1_grf(0, 3), BRW_TYPE_UD); /* Gfx11+ sampler message headers include bits in 4:0 which conflict @@ -860,7 +860,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, ubld1.ADD(component(header, 3), sampler_state_ptr, brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size)); } else { - fs_reg tmp = ubld1.vgrf(BRW_TYPE_UD); + brw_reg tmp = ubld1.vgrf(BRW_TYPE_UD); ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0)); ubld1.SHL(tmp, tmp, brw_imm_ud(4)); ubld1.ADD(component(header, 3), sampler_state_ptr, tmp); @@ -991,16 +991,16 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, * ld2dms_w si mcs0 mcs1 mcs2 mcs3 u v r */ if (op == SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) { - fs_reg tmp = offset(mcs, bld, i); + brw_reg tmp = offset(mcs, bld, i); sources[length] = retype(sources[length], payload_unsigned_type); bld.MOV(sources[length++], mcs.file == IMM ? mcs : - fs_reg(subscript(tmp, payload_unsigned_type, 0))); + brw_reg(subscript(tmp, payload_unsigned_type, 0))); sources[length] = retype(sources[length], payload_unsigned_type); bld.MOV(sources[length++], mcs.file == IMM ? mcs : - fs_reg(subscript(tmp, payload_unsigned_type, 1))); + brw_reg(subscript(tmp, payload_unsigned_type, 1))); } else { sources[length] = retype(sources[length], payload_unsigned_type); bld.MOV(sources[length++], @@ -1087,7 +1087,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, bld.MOV(sources[length++], min_lod); } - const fs_reg src_payload = + const brw_reg src_payload = brw_vgrf(bld.shader->alloc.allocate(length * reg_width), BRW_TYPE_F); /* In case of 16-bit payload each component takes one full register in @@ -1149,7 +1149,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, inst->src[0] = brw_imm_ud(0); } else { const fs_builder ubld = bld.group(1, 0).exec_all(); - fs_reg desc = ubld.vgrf(BRW_TYPE_UD); + brw_reg desc = ubld.vgrf(BRW_TYPE_UD); ubld.SHL(desc, sampler, brw_imm_ud(8)); inst->src[0] = component(desc, 0); } @@ -1168,7 +1168,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, simd_mode, 0 /* return_format unused on gfx7+ */); const fs_builder ubld = bld.group(1, 0).exec_all(); - fs_reg desc = ubld.vgrf(BRW_TYPE_UD); + brw_reg desc = ubld.vgrf(BRW_TYPE_UD); if (surface.equals(sampler)) { /* This case is common in GL */ ubld.MUL(desc, surface, brw_imm_ud(0x101)); @@ -1212,7 +1212,7 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, const fs_inst *inst) { assert(inst); - const fs_reg *src = inst->src; + const brw_reg *src = inst->src; unsigned src_type_size = 0; /* All sources need to have the same size, therefore seek the first valid @@ -1263,18 +1263,18 @@ static void lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst) { const intel_device_info *devinfo = bld.shader->devinfo; - const fs_reg coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE]; - const fs_reg shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C]; - const fs_reg lod = inst->src[TEX_LOGICAL_SRC_LOD]; - const fs_reg lod2 = inst->src[TEX_LOGICAL_SRC_LOD2]; - const fs_reg min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD]; - const fs_reg sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX]; - const fs_reg mcs = inst->src[TEX_LOGICAL_SRC_MCS]; - const fs_reg surface = inst->src[TEX_LOGICAL_SRC_SURFACE]; - const fs_reg sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER]; - const fs_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]; - const fs_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; - const fs_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET]; + const brw_reg coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE]; + const brw_reg shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C]; + const brw_reg lod = inst->src[TEX_LOGICAL_SRC_LOD]; + const brw_reg lod2 = inst->src[TEX_LOGICAL_SRC_LOD2]; + const brw_reg min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD]; + const brw_reg sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX]; + const brw_reg mcs = inst->src[TEX_LOGICAL_SRC_MCS]; + const brw_reg surface = inst->src[TEX_LOGICAL_SRC_SURFACE]; + const brw_reg sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER]; + const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]; + const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; + const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET]; assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM); const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM); @@ -1312,7 +1312,7 @@ emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst) const fs_builder ubld = bld.exec_all().group(1, 0); const fs_visitor &s = *bld.shader; - const fs_reg vector_mask = ubld.vgrf(BRW_TYPE_UW); + const brw_reg vector_mask = ubld.vgrf(BRW_TYPE_UW); ubld.UNDEF(vector_mask); ubld.emit(SHADER_OPCODE_READ_ARCH_REG, vector_mask, retype(brw_sr0_reg(3), BRW_TYPE_UD)); @@ -1338,7 +1338,7 @@ emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst) static void setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc, - const fs_reg &surface, const fs_reg &surface_handle) + const brw_reg &surface, const brw_reg &surface_handle) { const brw_compiler *compiler = bld.shader->compiler; @@ -1362,7 +1362,7 @@ setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc, } else { inst->desc = desc; const fs_builder ubld = bld.exec_all().group(1, 0); - fs_reg tmp = ubld.vgrf(BRW_TYPE_UD); + brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); ubld.AND(tmp, surface, brw_imm_ud(0xff)); inst->src[0] = component(tmp, 0); inst->src[1] = brw_imm_ud(0); /* ex_desc */ @@ -1371,7 +1371,7 @@ setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc, static void setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst, - uint32_t desc, const fs_reg &surface) + uint32_t desc, const brw_reg &surface) { const ASSERTED intel_device_info *devinfo = bld.shader->devinfo; const brw_compiler *compiler = bld.shader->compiler; @@ -1397,7 +1397,7 @@ setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst, inst->src[1] = brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud)); } else { const fs_builder ubld = bld.exec_all().group(1, 0); - fs_reg tmp = ubld.vgrf(BRW_TYPE_UD); + brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); ubld.SHL(tmp, surface, brw_imm_ud(24)); inst->src[1] = component(tmp, 0); } @@ -1419,13 +1419,13 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) const intel_device_info *devinfo = bld.shader->devinfo; /* Get the logical send arguments. */ - const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; - const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA]; - const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; - const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; - const UNUSED fs_reg dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS]; - const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; - const fs_reg allow_sample_mask = + const brw_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; + const brw_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA]; + const brw_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; + const brw_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; + const UNUSED brw_reg dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS]; + const brw_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; + const brw_reg allow_sample_mask = inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK]; assert(arg.file == IMM); assert(allow_sample_mask.file == IMM); @@ -1450,10 +1450,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) const bool has_side_effects = inst->has_side_effects(); - fs_reg sample_mask = allow_sample_mask.ud ? brw_sample_mask_reg(bld) : - fs_reg(brw_imm_ud(0xffffffff)); + brw_reg sample_mask = allow_sample_mask.ud ? brw_sample_mask_reg(bld) : + brw_reg(brw_imm_ud(0xffffffff)); - fs_reg header; + brw_reg header; if (is_stateless) { assert(!is_surface_access); fs_builder ubld = bld.exec_all().group(8, 0); @@ -1462,7 +1462,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) } const unsigned header_sz = header.file != BAD_FILE ? 1 : 0; - fs_reg payload, payload2; + brw_reg payload, payload2; unsigned mlen, ex_mlen = 0; if (src.file == BAD_FILE || header.file == BAD_FILE) { /* We have split sends on gfx9 and above */ @@ -1482,7 +1482,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) /* Allocate space for the payload. */ const unsigned sz = header_sz + addr_sz + src_sz; payload = bld.vgrf(BRW_TYPE_UD, sz); - fs_reg *const components = new fs_reg[sz]; + brw_reg *const components = new brw_reg[sz]; unsigned n = 0; /* Construct the payload. */ @@ -1657,13 +1657,13 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) assert(devinfo->has_lsc); /* Get the logical send arguments. */ - const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; - const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA]; - const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; - const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; - const fs_reg dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS]; - const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; - const fs_reg allow_sample_mask = + const brw_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; + const brw_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA]; + const brw_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; + const brw_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; + const brw_reg dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS]; + const brw_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; + const brw_reg allow_sample_mask = inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK]; assert(arg.file == IMM); assert(allow_sample_mask.file == IMM); @@ -1685,7 +1685,7 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) unsigned num_components = 0; unsigned ex_mlen = 0; - fs_reg payload, payload2; + brw_reg payload, payload2; payload = bld.move_to_vgrf(addr, addr_sz); if (src.file != BAD_FILE) { payload2 = bld.move_to_vgrf(src, src_comps); @@ -1693,8 +1693,8 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) } /* Predicate the instruction on the sample mask if needed */ - fs_reg sample_mask = allow_sample_mask.ud ? brw_sample_mask_reg(bld) : - fs_reg(brw_imm_ud(0xffffffff)); + brw_reg sample_mask = allow_sample_mask.ud ? brw_sample_mask_reg(bld) : + brw_reg(brw_imm_ud(0xffffffff)); if (sample_mask.file != BAD_FILE && sample_mask.file != IMM) brw_emit_predicate_on_sample_mask(bld, inst); @@ -1831,11 +1831,11 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst) assert(devinfo->has_lsc); /* Get the logical send arguments. */ - const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; - const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA]; - const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; - const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; - const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; + const brw_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; + const brw_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA]; + const brw_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; + const brw_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; + const brw_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; assert(arg.file == IMM); assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE); assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE); @@ -1849,7 +1849,7 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst) const bool write = inst->opcode == SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL; fs_builder ubld = bld.exec_all().group(1, 0); - fs_reg stateless_ex_desc; + brw_reg stateless_ex_desc; if (is_stateless) { stateless_ex_desc = ubld.vgrf(BRW_TYPE_UD); ubld.AND(stateless_ex_desc, @@ -1859,7 +1859,7 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst) ubld.SHR(stateless_ex_desc, stateless_ex_desc, brw_imm_ud(4)); } - fs_reg data; + brw_reg data; if (write) { const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA); data = retype(bld.move_to_vgrf(src, src_sz), BRW_TYPE_UD); @@ -1913,11 +1913,11 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst) const intel_device_info *devinfo = bld.shader->devinfo; /* Get the logical send arguments. */ - const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; - const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA]; - const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; - const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; - const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; + const brw_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; + const brw_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA]; + const brw_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; + const brw_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; + const brw_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; assert(arg.file == IMM); assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE); assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE); @@ -1941,7 +1941,7 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst) /* The address is stored in the header. See MH_A32_GO and MH_BTS_GO. */ fs_builder ubld = bld.exec_all().group(8, 0); - fs_reg header = ubld.vgrf(BRW_TYPE_UD); + brw_reg header = ubld.vgrf(BRW_TYPE_UD); if (is_stateless) ubld.emit(SHADER_OPCODE_SCRATCH_HEADER, header); @@ -1954,7 +1954,7 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst) else ubld.group(1, 0).MOV(component(header, 2), addr); - fs_reg data; + brw_reg data; unsigned ex_mlen = 0; if (write) { const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA); @@ -1981,14 +1981,14 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst) inst->src[3] = data; } -static fs_reg -emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr) +static brw_reg +emit_a64_oword_block_header(const fs_builder &bld, const brw_reg &addr) { const fs_builder ubld = bld.exec_all().group(8, 0); assert(brw_type_size_bytes(addr.type) == 8 && addr.stride == 0); - fs_reg expanded_addr = addr; + brw_reg expanded_addr = addr; if (addr.file == UNIFORM) { /* We can't do stride 1 with the UNIFORM file, it requires stride 0 */ expanded_addr = ubld.vgrf(BRW_TYPE_UQ); @@ -1996,11 +1996,11 @@ emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr) ubld.MOV(expanded_addr, retype(addr, BRW_TYPE_UQ)); } - fs_reg header = ubld.vgrf(BRW_TYPE_UD); + brw_reg header = ubld.vgrf(BRW_TYPE_UD); ubld.MOV(header, brw_imm_ud(0)); /* Use a 2-wide MOV to fill out the address */ - fs_reg addr_vec2 = expanded_addr; + brw_reg addr_vec2 = expanded_addr; addr_vec2.type = BRW_TYPE_UD; addr_vec2.stride = 1; ubld.group(2, 0).MOV(header, addr_vec2); @@ -2034,8 +2034,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) const intel_device_info *devinfo = bld.shader->devinfo; /* Get the logical send arguments. */ - const fs_reg addr = inst->src[A64_LOGICAL_ADDRESS]; - const fs_reg src = inst->src[A64_LOGICAL_SRC]; + const brw_reg addr = inst->src[A64_LOGICAL_ADDRESS]; + const brw_reg src = inst->src[A64_LOGICAL_SRC]; const unsigned src_sz = brw_type_size_bytes(src.type); const unsigned dst_sz = brw_type_size_bytes(inst->dst.type); @@ -2044,8 +2044,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) const unsigned arg = inst->src[A64_LOGICAL_ARG].ud; const bool has_side_effects = inst->has_side_effects(); - fs_reg payload = retype(bld.move_to_vgrf(addr, 1), BRW_TYPE_UD); - fs_reg payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_TYPE_UD); + brw_reg payload = retype(bld.move_to_vgrf(addr, 1), BRW_TYPE_UD); + brw_reg payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_TYPE_UD); unsigned ex_mlen = src_comps * src_sz * inst->exec_size / REG_SIZE; unsigned num_components = 0; @@ -2155,14 +2155,14 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) { const intel_device_info *devinfo = bld.shader->devinfo; - const fs_reg addr = inst->src[A64_LOGICAL_ADDRESS]; - const fs_reg src = inst->src[A64_LOGICAL_SRC]; + const brw_reg addr = inst->src[A64_LOGICAL_ADDRESS]; + const brw_reg src = inst->src[A64_LOGICAL_SRC]; const unsigned src_comps = inst->components_read(1); assert(inst->src[A64_LOGICAL_ARG].file == IMM); const unsigned arg = inst->src[A64_LOGICAL_ARG].ud; const bool has_side_effects = inst->has_side_effects(); - fs_reg payload, payload2; + brw_reg payload, payload2; unsigned mlen, ex_mlen = 0, header_size = 0; if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL || inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL || @@ -2279,16 +2279,16 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld, const intel_device_info *devinfo = bld.shader->devinfo; ASSERTED const brw_compiler *compiler = bld.shader->compiler; - fs_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE]; - fs_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE]; - fs_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET]; - fs_reg alignment_B = inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT]; + brw_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE]; + brw_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE]; + brw_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET]; + brw_reg alignment_B = inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT]; /* We are switching the instruction from an ALU-like instruction to a * send-from-grf instruction. Since sends can't handle strides or * source modifiers, we have to make a copy of the offset source. */ - fs_reg ubo_offset = bld.move_to_vgrf(offset_B, 1); + brw_reg ubo_offset = bld.move_to_vgrf(offset_B, 1); enum lsc_addr_surface_type surf_type = surface_handle.file == BAD_FILE ? @@ -2365,15 +2365,15 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst) const intel_device_info *devinfo = bld.shader->devinfo; const brw_compiler *compiler = bld.shader->compiler; - fs_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE]; - fs_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE]; - fs_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET]; + brw_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE]; + brw_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE]; + brw_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET]; /* We are switching the instruction from an ALU-like instruction to a * send-from-grf instruction. Since sends can't handle strides or * source modifiers, we have to make a copy of the offset source. */ - fs_reg ubo_offset = bld.vgrf(BRW_TYPE_UD); + brw_reg ubo_offset = bld.vgrf(BRW_TYPE_UD); bld.MOV(ubo_offset, offset_B); assert(inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT].file == BRW_IMMEDIATE_VALUE); @@ -2445,7 +2445,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, const intel_device_info *devinfo = bld.shader->devinfo; /* We have to send something */ - fs_reg payload = brw_vec8_grf(0, 0); + brw_reg payload = brw_vec8_grf(0, 0); unsigned mlen = 1; unsigned mode; @@ -2473,7 +2473,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, const bool dynamic_mode = inst->src[INTERP_SRC_DYNAMIC_MODE].file != BAD_FILE; - fs_reg desc = inst->src[INTERP_SRC_MSG_DESC]; + brw_reg desc = inst->src[INTERP_SRC_MSG_DESC]; uint32_t desc_imm = brw_pixel_interp_desc(devinfo, /* Leave the mode at 0 if persample_dispatch is @@ -2488,7 +2488,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, desc_imm |= (1 << 15); } else if (wm_prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) { STATIC_ASSERT(INTEL_MSAA_FLAG_COARSE_PI_MSG == (1 << 15)); - fs_reg orig_desc = desc; + brw_reg orig_desc = desc; const fs_builder &ubld = bld.exec_all().group(8, 0); desc = ubld.vgrf(BRW_TYPE_UD); ubld.AND(desc, dynamic_msaa_flags(wm_prog_data), @@ -2518,7 +2518,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, * components of "Per Message Offset”, which will give us the pixel offset 0x0. */ if (dynamic_mode) { - fs_reg orig_desc = desc; + brw_reg orig_desc = desc; const fs_builder &ubld = bld.exec_all().group(8, 0); desc = ubld.vgrf(BRW_TYPE_UD); @@ -2565,13 +2565,13 @@ static void lower_btd_logical_send(const fs_builder &bld, fs_inst *inst) { const intel_device_info *devinfo = bld.shader->devinfo; - fs_reg global_addr = inst->src[0]; - const fs_reg btd_record = inst->src[1]; + brw_reg global_addr = inst->src[0]; + const brw_reg btd_record = inst->src[1]; const unsigned unit = reg_unit(devinfo); const unsigned mlen = 2 * unit; const fs_builder ubld = bld.exec_all(); - fs_reg header = ubld.vgrf(BRW_TYPE_UD, 2 * unit); + brw_reg header = ubld.vgrf(BRW_TYPE_UD, 2 * unit); ubld.MOV(header, brw_imm_ud(0)); switch (inst->opcode) { @@ -2595,12 +2595,12 @@ lower_btd_logical_send(const fs_builder &bld, fs_inst *inst) /* Stack IDs are always in R1 regardless of whether we're coming from a * bindless shader or a regular compute shader. */ - fs_reg stack_ids = retype(offset(header, bld, 1), BRW_TYPE_UW); + brw_reg stack_ids = retype(offset(header, bld, 1), BRW_TYPE_UW); bld.exec_all().MOV(stack_ids, retype(brw_vec8_grf(1 * unit, 0), BRW_TYPE_UW)); unsigned ex_mlen = 0; - fs_reg payload; + brw_reg payload; if (inst->opcode == SHADER_OPCODE_BTD_SPAWN_LOGICAL) { ex_mlen = 2 * (inst->exec_size / 8); payload = bld.move_to_vgrf(btd_record, 1); @@ -2643,33 +2643,33 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) * so that the MOV operates on 2 components rather than twice the same * component. */ - fs_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_TYPE_UD); + brw_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_TYPE_UD); globals_addr.stride = 1; - const fs_reg bvh_level = + const brw_reg bvh_level = inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ? inst->src[RT_LOGICAL_SRC_BVH_LEVEL] : bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_BVH_LEVEL], inst->components_read(RT_LOGICAL_SRC_BVH_LEVEL)); - const fs_reg trace_ray_control = + const brw_reg trace_ray_control = inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL].file == BRW_IMMEDIATE_VALUE ? inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] : bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL], inst->components_read(RT_LOGICAL_SRC_TRACE_RAY_CONTROL)); - const fs_reg synchronous_src = inst->src[RT_LOGICAL_SRC_SYNCHRONOUS]; + const brw_reg synchronous_src = inst->src[RT_LOGICAL_SRC_SYNCHRONOUS]; assert(synchronous_src.file == BRW_IMMEDIATE_VALUE); const bool synchronous = synchronous_src.ud; const unsigned unit = reg_unit(devinfo); const unsigned mlen = unit; const fs_builder ubld = bld.exec_all(); - fs_reg header = ubld.vgrf(BRW_TYPE_UD); + brw_reg header = ubld.vgrf(BRW_TYPE_UD); ubld.MOV(header, brw_imm_ud(0)); ubld.group(2, 0).MOV(header, globals_addr); if (synchronous) ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous)); const unsigned ex_mlen = inst->exec_size / 8; - fs_reg payload = bld.vgrf(BRW_TYPE_UD); + brw_reg payload = bld.vgrf(BRW_TYPE_UD); if (bvh_level.file == BRW_IMMEDIATE_VALUE && trace_ray_control.file == BRW_IMMEDIATE_VALUE) { bld.MOV(payload, brw_imm_ud(SET_BITS(trace_ray_control.ud, 9, 8) | @@ -2720,9 +2720,9 @@ lower_get_buffer_size(const fs_builder &bld, fs_inst *inst) */ assert(inst->exec_size == (devinfo->ver < 20 ? 8 : 16)); - fs_reg surface = inst->src[GET_BUFFER_SIZE_SRC_SURFACE]; - fs_reg surface_handle = inst->src[GET_BUFFER_SIZE_SRC_SURFACE_HANDLE]; - fs_reg lod = inst->src[GET_BUFFER_SIZE_SRC_LOD]; + brw_reg surface = inst->src[GET_BUFFER_SIZE_SRC_SURFACE]; + brw_reg surface_handle = inst->src[GET_BUFFER_SIZE_SRC_SURFACE_HANDLE]; + brw_reg lod = inst->src[GET_BUFFER_SIZE_SRC_LOD]; inst->opcode = SHADER_OPCODE_SEND; inst->mlen = inst->exec_size / 8; @@ -2916,10 +2916,10 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s) if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD) continue; - const fs_reg surface = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE]; - const fs_reg surface_handle = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE]; - const fs_reg offset_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_OFFSET]; - const fs_reg size_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_SIZE]; + const brw_reg surface = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE]; + const brw_reg surface_handle = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE]; + const brw_reg offset_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_OFFSET]; + const brw_reg size_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_SIZE]; assert(surface.file == BAD_FILE || surface_handle.file == BAD_FILE); assert(offset_B.file == IMM); assert(size_B.file == IMM); @@ -2928,7 +2928,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s) const fs_builder ubld = fs_builder(&s, block, inst).group(8, 0).exec_all(); - const fs_reg payload = ubld.vgrf(BRW_TYPE_UD); + const brw_reg payload = ubld.vgrf(BRW_TYPE_UD); ubld.MOV(payload, offset_B); inst->sfid = GFX12_SFID_UGM; @@ -2964,7 +2964,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s) s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); } else { const fs_builder ubld = fs_builder(&s, block, inst).exec_all(); - fs_reg header = fs_builder(&s, 8).exec_all().vgrf(BRW_TYPE_UD); + brw_reg header = fs_builder(&s, 8).exec_all().vgrf(BRW_TYPE_UD); ubld.group(8, 0).MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD)); @@ -2985,7 +2985,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s) setup_surface_descriptors(ubld, inst, desc, surface, surface_handle); inst->src[2] = header; - inst->src[3] = fs_reg(); /* unused for reads */ + inst->src[3] = brw_reg(); /* unused for reads */ s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); } diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index e0d2ca79d8f..15cec80a807 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -589,7 +589,7 @@ public: void calculate_deps(); bool is_compressed(const fs_inst *inst); - bool register_needs_barrier(const fs_reg ®); + bool register_needs_barrier(const brw_reg ®); schedule_node *choose_instruction_to_schedule(); int calculate_issue_time(const fs_inst *inst); @@ -602,7 +602,7 @@ public: void schedule_instructions(); void run(instruction_scheduler_mode mode); - int grf_index(const fs_reg ®); + int grf_index(const brw_reg ®); void *mem_ctx; linear_ctx *lin_ctx; @@ -1071,7 +1071,7 @@ has_cross_lane_access(const fs_inst *inst) * Some register access need dependencies on other instructions. */ bool -instruction_scheduler::register_needs_barrier(const fs_reg ®) +instruction_scheduler::register_needs_barrier(const brw_reg ®) { if (reg.file != ARF || reg.is_null()) return false; @@ -1175,7 +1175,7 @@ instruction_scheduler::clear_last_grf_write() } int -instruction_scheduler::grf_index(const fs_reg ®) +instruction_scheduler::grf_index(const brw_reg ®) { if (post_reg_alloc) return reg.nr; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 0472c6b563c..dfbd41a0a8d 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -30,7 +30,7 @@ #include "util/macros.h" bool -fs_reg_saturate_immediate(fs_reg *reg) +fs_reg_saturate_immediate(brw_reg *reg) { union { unsigned ud; @@ -93,7 +93,7 @@ fs_reg_saturate_immediate(fs_reg *reg) } bool -fs_reg_negate_immediate(fs_reg *reg) +fs_reg_negate_immediate(brw_reg *reg) { switch (reg->type) { case BRW_TYPE_D: @@ -136,7 +136,7 @@ fs_reg_negate_immediate(fs_reg *reg) } bool -fs_reg_abs_immediate(fs_reg *reg) +fs_reg_abs_immediate(brw_reg *reg) { switch (reg->type) { case BRW_TYPE_D: @@ -579,7 +579,7 @@ fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates) if (exec_list_is_singular(&block->instructions)) { this->opcode = BRW_OPCODE_NOP; this->resize_sources(0); - this->dst = fs_reg(); + this->dst = brw_reg(); this->size_written = 0; return; } diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp index 504fac0925c..b37604cf448 100644 --- a/src/intel/compiler/test_fs_cmod_propagation.cpp +++ b/src/intel/compiler/test_fs_cmod_propagation.cpp @@ -130,10 +130,10 @@ cmod_propagation(fs_visitor *v) TEST_F(cmod_propagation_test, basic) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -161,10 +161,10 @@ TEST_F(cmod_propagation_test, basic) TEST_F(cmod_propagation_test, basic_other_flag) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE) ->flag_subreg = 1; @@ -194,10 +194,10 @@ TEST_F(cmod_propagation_test, basic_other_flag) TEST_F(cmod_propagation_test, cmp_nonzero) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg nonzero(brw_imm_f(1.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg nonzero(brw_imm_f(1.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE); @@ -226,9 +226,9 @@ TEST_F(cmod_propagation_test, cmp_nonzero) TEST_F(cmod_propagation_test, non_cmod_instruction) { - fs_reg dest = bld.vgrf(BRW_TYPE_UD); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg zero(brw_imm_ud(0u)); + brw_reg dest = bld.vgrf(BRW_TYPE_UD); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg zero(brw_imm_ud(0u)); bld.FBL(dest, src0); bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE); @@ -257,8 +257,8 @@ TEST_F(cmod_propagation_test, non_cmod_instruction) TEST_F(cmod_propagation_test, non_cmod_livechannel) { - fs_reg dest = bld.vgrf(BRW_TYPE_UD); - fs_reg zero(brw_imm_d(0)); + brw_reg dest = bld.vgrf(BRW_TYPE_UD); + brw_reg zero(brw_imm_d(0)); bld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dest)->exec_size = 32; bld.CMP(bld.null_reg_d(), dest, zero, BRW_CONDITIONAL_Z)->exec_size = 32; @@ -288,11 +288,11 @@ TEST_F(cmod_propagation_test, non_cmod_livechannel) TEST_F(cmod_propagation_test, intervening_flag_write) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -325,11 +325,11 @@ TEST_F(cmod_propagation_test, intervening_flag_write) TEST_F(cmod_propagation_test, intervening_mismatch_flag_write) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE) ->flag_subreg = 1; @@ -365,12 +365,12 @@ TEST_F(cmod_propagation_test, intervening_mismatch_flag_write) TEST_F(cmod_propagation_test, intervening_flag_read) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest0, src0, src1); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); @@ -403,12 +403,12 @@ TEST_F(cmod_propagation_test, intervening_flag_read) TEST_F(cmod_propagation_test, intervening_mismatch_flag_read) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest0, src0, src1); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)) ->flag_subreg = 1; @@ -444,13 +444,13 @@ TEST_F(cmod_propagation_test, intervening_mismatch_flag_read) TEST_F(cmod_propagation_test, intervening_dest_write) { - fs_reg dest = bld.vgrf(BRW_TYPE_F, 4); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F, 2); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F, 4); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F, 2); + brw_reg zero(brw_imm_f(0.0f)); - fs_reg tex_srcs[TEX_LOGICAL_NUM_SRCS]; + brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS]; tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2; tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0); tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2); @@ -491,12 +491,12 @@ TEST_F(cmod_propagation_test, intervening_dest_write) TEST_F(cmod_propagation_test, intervening_flag_read_same_value) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1)); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); @@ -529,10 +529,10 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value) TEST_F(cmod_propagation_test, negate) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); dest.negate = true; bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -561,9 +561,9 @@ TEST_F(cmod_propagation_test, negate) TEST_F(cmod_propagation_test, movnz) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.CMP(dest, src0, src1, BRW_CONDITIONAL_GE); set_condmod(BRW_CONDITIONAL_NZ, bld.MOV(bld.null_reg_f(), dest)); @@ -592,10 +592,10 @@ TEST_F(cmod_propagation_test, movnz) TEST_F(cmod_propagation_test, different_types_cmod_with_zero) { - fs_reg dest = bld.vgrf(BRW_TYPE_D); - fs_reg src0 = bld.vgrf(BRW_TYPE_D); - fs_reg src1 = bld.vgrf(BRW_TYPE_D); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_D); + brw_reg src0 = bld.vgrf(BRW_TYPE_D); + brw_reg src1 = bld.vgrf(BRW_TYPE_D); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), retype(dest, BRW_TYPE_F), zero, BRW_CONDITIONAL_GE); @@ -625,10 +625,10 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero) TEST_F(cmod_propagation_test, andnz_one) { - fs_reg dest = bld.vgrf(BRW_TYPE_D); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); - fs_reg one(brw_imm_d(1)); + brw_reg dest = bld.vgrf(BRW_TYPE_D); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); + brw_reg one(brw_imm_d(1)); bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_NZ, @@ -659,10 +659,10 @@ TEST_F(cmod_propagation_test, andnz_one) TEST_F(cmod_propagation_test, andnz_non_one) { - fs_reg dest = bld.vgrf(BRW_TYPE_D); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); - fs_reg nonone(brw_imm_d(38)); + brw_reg dest = bld.vgrf(BRW_TYPE_D); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); + brw_reg nonone(brw_imm_d(38)); bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_NZ, @@ -693,9 +693,9 @@ TEST_F(cmod_propagation_test, andnz_non_one) TEST_F(cmod_propagation_test, cmp_cmpnz) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0)); bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ); @@ -720,9 +720,9 @@ TEST_F(cmod_propagation_test, cmp_cmpnz) TEST_F(cmod_propagation_test, cmp_cmpg) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0)); bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_G); @@ -749,9 +749,9 @@ TEST_F(cmod_propagation_test, cmp_cmpg) TEST_F(cmod_propagation_test, plnnz_cmpnz) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0)); set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero)); bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ); @@ -776,9 +776,9 @@ TEST_F(cmod_propagation_test, plnnz_cmpnz) TEST_F(cmod_propagation_test, plnnz_cmpz) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0)); set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero)); bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_Z); @@ -803,10 +803,10 @@ TEST_F(cmod_propagation_test, plnnz_cmpz) TEST_F(cmod_propagation_test, plnnz_sel_cmpz) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0)); set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero)); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dst1, src0, zero)); @@ -837,9 +837,9 @@ TEST_F(cmod_propagation_test, plnnz_sel_cmpz) TEST_F(cmod_propagation_test, cmp_cmpg_D) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_D); - fs_reg src0 = bld.vgrf(BRW_TYPE_D); - fs_reg zero(brw_imm_d(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_D); + brw_reg src0 = bld.vgrf(BRW_TYPE_D); + brw_reg zero(brw_imm_d(0)); bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_G); @@ -866,9 +866,9 @@ TEST_F(cmod_propagation_test, cmp_cmpg_D) TEST_F(cmod_propagation_test, cmp_cmpg_UD) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg zero(brw_imm_ud(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg zero(brw_imm_ud(0)); bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_G); @@ -893,9 +893,9 @@ TEST_F(cmod_propagation_test, cmp_cmpg_UD) TEST_F(cmod_propagation_test, cmp_cmpl_D) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_D); - fs_reg src0 = bld.vgrf(BRW_TYPE_D); - fs_reg zero(brw_imm_d(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_D); + brw_reg src0 = bld.vgrf(BRW_TYPE_D); + brw_reg zero(brw_imm_d(0)); bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_L); @@ -920,9 +920,9 @@ TEST_F(cmod_propagation_test, cmp_cmpl_D) TEST_F(cmod_propagation_test, cmp_cmpl_UD) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg zero(brw_imm_ud(0)); + brw_reg dst0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg zero(brw_imm_ud(0)); bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_L); @@ -949,10 +949,10 @@ TEST_F(cmod_propagation_test, cmp_cmpl_UD) TEST_F(cmod_propagation_test, andz_one) { - fs_reg dest = bld.vgrf(BRW_TYPE_D); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); - fs_reg one(brw_imm_d(1)); + brw_reg dest = bld.vgrf(BRW_TYPE_D); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); + brw_reg one(brw_imm_d(1)); bld.CMP(retype(dest, BRW_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_Z, @@ -983,9 +983,9 @@ TEST_F(cmod_propagation_test, andz_one) TEST_F(cmod_propagation_test, add_not_merge_with_compare) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); @@ -1016,9 +1016,9 @@ TEST_F(cmod_propagation_test, add_not_merge_with_compare) TEST_F(cmod_propagation_test, subtract_merge_with_compare) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest, src0, negate(src1)); bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); @@ -1044,10 +1044,10 @@ TEST_F(cmod_propagation_test, subtract_merge_with_compare) TEST_F(cmod_propagation_test, subtract_immediate_merge_with_compare) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg one(brw_imm_f(1.0f)); - fs_reg negative_one(brw_imm_f(-1.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg one(brw_imm_f(1.0f)); + brw_reg negative_one(brw_imm_f(-1.0f)); bld.ADD(dest, src0, negative_one); bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_NZ); @@ -1074,10 +1074,10 @@ TEST_F(cmod_propagation_test, subtract_immediate_merge_with_compare) TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_add) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest0, src0, negate(src1)); bld.ADD(dest1, src0, src1); bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); @@ -1108,10 +1108,10 @@ TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_add) TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_partial_write) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest0, src0, negate(src1)); set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, src0, negate(src1))); bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); @@ -1143,10 +1143,10 @@ TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_partia TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_add) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest0, src0, negate(src1)); set_condmod(BRW_CONDITIONAL_EQ, bld.ADD(dest1, src0, src1)); bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); @@ -1178,9 +1178,9 @@ TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_add) TEST_F(cmod_propagation_test, add_merge_with_compare) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), src0, negate(src1), BRW_CONDITIONAL_L); @@ -1206,9 +1206,9 @@ TEST_F(cmod_propagation_test, add_merge_with_compare) TEST_F(cmod_propagation_test, negative_subtract_merge_with_compare) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest, src1, negate(src0)); bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); @@ -1237,11 +1237,11 @@ TEST_F(cmod_propagation_test, negative_subtract_merge_with_compare) TEST_F(cmod_propagation_test, subtract_delete_compare) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1))); set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(dest1, src2)); @@ -1276,11 +1276,11 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_other_flag) /* This test is the same as subtract_delete_compare but it explicitly used * flag f0.1 for the subtraction and the comparison. */ - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1))) ->flag_subreg = 1; @@ -1315,9 +1315,9 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_other_flag) TEST_F(cmod_propagation_test, subtract_to_mismatch_flag) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1))); bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L) @@ -1350,9 +1350,9 @@ TEST_F(cmod_propagation_test, subtract_to_mismatch_flag) TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_mismatch_flag_write) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest0, src0, negate(src1)); bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L) @@ -1397,12 +1397,12 @@ TEST_F(cmod_propagation_test, TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_mismatch_flag_read) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest0, src0, negate(src1)); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)) @@ -1437,10 +1437,10 @@ TEST_F(cmod_propagation_test, TEST_F(cmod_propagation_test, subtract_delete_compare_derp) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_F); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest0 = bld.vgrf(BRW_TYPE_F); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest0, src0, negate(src1))); set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, negate(src0), src1)); @@ -1472,8 +1472,8 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_derp) TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch) { - fs_reg dest0 = bld.vgrf(BRW_TYPE_D); - fs_reg src0 = bld.vgrf(BRW_TYPE_D); + brw_reg dest0 = bld.vgrf(BRW_TYPE_D); + brw_reg src0 = bld.vgrf(BRW_TYPE_D); src0.type = BRW_TYPE_W; bld.ASR(dest0, negate(src0), brw_imm_d(15)); @@ -1503,9 +1503,9 @@ TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch) TEST_F(cmod_propagation_test, ior_f2i_nz) { - fs_reg dest = bld.vgrf(BRW_TYPE_D); - fs_reg src0 = bld.vgrf(BRW_TYPE_D); - fs_reg src1 = bld.vgrf(BRW_TYPE_D); + brw_reg dest = bld.vgrf(BRW_TYPE_D); + brw_reg src0 = bld.vgrf(BRW_TYPE_D); + brw_reg src1 = bld.vgrf(BRW_TYPE_D); bld.OR(dest, src0, src1); bld.MOV(bld.null_reg_d(), retype(dest, BRW_TYPE_F)) @@ -1549,9 +1549,9 @@ cmod_propagation_test::test_mov_prop(enum brw_conditional_mod cmod, enum brw_reg_type mov_dst_type, bool expected_cmod_prop_progress) { - fs_reg dest = bld.vgrf(add_type); - fs_reg src0 = bld.vgrf(add_type); - fs_reg src1 = bld.vgrf(add_type); + brw_reg dest = bld.vgrf(add_type); + brw_reg src0 = bld.vgrf(add_type); + brw_reg src1 = bld.vgrf(add_type); bld.ADD(dest, src0, src1); bld.MOV(retype(bld.null_reg_ud(), mov_dst_type), dest) @@ -2126,10 +2126,10 @@ cmod_propagation_test::test_saturate_prop(enum brw_conditional_mod before, enum brw_reg_type op_type, bool expected_cmod_prop_progress) { - fs_reg dest = bld.vgrf(add_type); - fs_reg src0 = bld.vgrf(add_type); - fs_reg src1 = bld.vgrf(add_type); - fs_reg zero(brw_imm_ud(0)); + brw_reg dest = bld.vgrf(add_type); + brw_reg src0 = bld.vgrf(add_type); + brw_reg src1 = bld.vgrf(add_type); + brw_reg zero(brw_imm_ud(0)); bld.ADD(dest, src0, src1)->saturate = true; @@ -2611,9 +2611,9 @@ TEST_F(cmod_propagation_test, not_to_or) /* Exercise propagation of conditional modifier from a NOT instruction to * another ALU instruction as performed by cmod_propagate_not. */ - fs_reg dest = bld.vgrf(BRW_TYPE_UD); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src1 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest = bld.vgrf(BRW_TYPE_UD); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src1 = bld.vgrf(BRW_TYPE_UD); bld.OR(dest, src0, src1); set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest)); @@ -2644,9 +2644,9 @@ TEST_F(cmod_propagation_test, not_to_and) /* Exercise propagation of conditional modifier from a NOT instruction to * another ALU instruction as performed by cmod_propagate_not. */ - fs_reg dest = bld.vgrf(BRW_TYPE_UD); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src1 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest = bld.vgrf(BRW_TYPE_UD); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src1 = bld.vgrf(BRW_TYPE_UD); bld.AND(dest, src0, src1); set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest)); @@ -2682,9 +2682,9 @@ TEST_F(cmod_propagation_test, not_to_uadd) * restriction is just the the destination type of the ALU instruction is * the same as the source type of the NOT instruction. */ - fs_reg dest = bld.vgrf(BRW_TYPE_UD); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src1 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest = bld.vgrf(BRW_TYPE_UD); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src1 = bld.vgrf(BRW_TYPE_UD); bld.ADD(dest, src0, src1); set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest)); @@ -2722,9 +2722,9 @@ TEST_F(cmod_propagation_test, not_to_fadd_to_ud) * restriction is just the the destination type of the ALU instruction is * the same as the source type of the NOT instruction. */ - fs_reg dest = bld.vgrf(BRW_TYPE_UD); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_UD); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest, src0, src1); set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest)); @@ -2762,9 +2762,9 @@ TEST_F(cmod_propagation_test, not_to_fadd) * restriction is just the the destination type of the ALU instruction is * the same as the source type of the NOT instruction. */ - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dest, src0, src1); set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), @@ -2799,12 +2799,12 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_compatible_value) /* Exercise propagation of conditional modifier from a NOT instruction to * another ALU instruction as performed by cmod_propagate_not. */ - fs_reg dest0 = bld.vgrf(BRW_TYPE_UD); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src1 = bld.vgrf(BRW_TYPE_UD); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest0 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src1 = bld.vgrf(BRW_TYPE_UD); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest0, src0, src1)); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0)); @@ -2841,12 +2841,12 @@ TEST_F(cmod_propagation_test, /* Exercise propagation of conditional modifier from a NOT instruction to * another ALU instruction as performed by cmod_propagate_not. */ - fs_reg dest0 = bld.vgrf(BRW_TYPE_UD); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src1 = bld.vgrf(BRW_TYPE_UD); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest0 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src1 = bld.vgrf(BRW_TYPE_UD); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest0, src0, src1)) ->flag_subreg = 1; set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); @@ -2886,12 +2886,12 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_incompatible_value /* Exercise propagation of conditional modifier from a NOT instruction to * another ALU instruction as performed by cmod_propagate_not. */ - fs_reg dest0 = bld.vgrf(BRW_TYPE_UD); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src1 = bld.vgrf(BRW_TYPE_UD); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest0 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src1 = bld.vgrf(BRW_TYPE_UD); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); set_condmod(BRW_CONDITIONAL_NZ, bld.OR(dest0, src0, src1)); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0)); @@ -2928,10 +2928,10 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_write) /* Exercise propagation of conditional modifier from a NOT instruction to * another ALU instruction as performed by cmod_propagate_not. */ - fs_reg dest0 = bld.vgrf(BRW_TYPE_UD); - fs_reg dest1 = bld.vgrf(BRW_TYPE_UD); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src1 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest0 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest1 = bld.vgrf(BRW_TYPE_UD); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src1 = bld.vgrf(BRW_TYPE_UD); bld.OR(dest0, src0, src1); set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest1, src0, src1)) @@ -2971,12 +2971,12 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read) /* Exercise propagation of conditional modifier from a NOT instruction to * another ALU instruction as performed by cmod_propagate_not. */ - fs_reg dest0 = bld.vgrf(BRW_TYPE_UD); - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_UD); - fs_reg src1 = bld.vgrf(BRW_TYPE_UD); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest0 = bld.vgrf(BRW_TYPE_UD); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_UD); + brw_reg src1 = bld.vgrf(BRW_TYPE_UD); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.OR(dest0, src0, src1); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)) @@ -3013,10 +3013,10 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read) TEST_F(cmod_propagation_test, cmp_to_add_float_e) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg neg10(brw_imm_f(-10.0f)); - fs_reg pos10(brw_imm_f(10.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg neg10(brw_imm_f(-10.0f)); + brw_reg pos10(brw_imm_f(10.0f)); bld.ADD(dest, src0, neg10)->saturate = true; bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ); @@ -3043,10 +3043,10 @@ TEST_F(cmod_propagation_test, cmp_to_add_float_e) TEST_F(cmod_propagation_test, cmp_to_add_float_g) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg neg10(brw_imm_f(-10.0f)); - fs_reg pos10(brw_imm_f(10.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg neg10(brw_imm_f(-10.0f)); + brw_reg pos10(brw_imm_f(10.0f)); bld.ADD(dest, src0, neg10)->saturate = true; bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G); @@ -3071,10 +3071,10 @@ TEST_F(cmod_propagation_test, cmp_to_add_float_g) TEST_F(cmod_propagation_test, cmp_to_add_float_le) { - fs_reg dest = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg neg10(brw_imm_f(-10.0f)); - fs_reg pos10(brw_imm_f(10.0f)); + brw_reg dest = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg neg10(brw_imm_f(-10.0f)); + brw_reg pos10(brw_imm_f(10.0f)); bld.ADD(dest, src0, neg10)->saturate = true; bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE); @@ -3099,13 +3099,13 @@ TEST_F(cmod_propagation_test, cmp_to_add_float_le) TEST_F(cmod_propagation_test, prop_across_sel) { - fs_reg dest1 = bld.vgrf(BRW_TYPE_F); - fs_reg dest2 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); - fs_reg src3 = bld.vgrf(BRW_TYPE_F); - fs_reg zero(brw_imm_f(0.0f)); + brw_reg dest1 = bld.vgrf(BRW_TYPE_F); + brw_reg dest2 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg src3 = bld.vgrf(BRW_TYPE_F); + brw_reg zero(brw_imm_f(0.0f)); bld.ADD(dest1, src0, src1); bld.emit_minmax(dest2, src2, src3, BRW_CONDITIONAL_GE); bld.CMP(bld.null_reg_f(), dest1, zero, BRW_CONDITIONAL_GE); diff --git a/src/intel/compiler/test_fs_combine_constants.cpp b/src/intel/compiler/test_fs_combine_constants.cpp index 4094d548c09..9d762d968cf 100644 --- a/src/intel/compiler/test_fs_combine_constants.cpp +++ b/src/intel/compiler/test_fs_combine_constants.cpp @@ -77,9 +77,9 @@ TEST_F(FSCombineConstantsTest, Simple) { fs_builder bld = make_builder(shader); - fs_reg r = brw_vec8_grf(1, 0); - fs_reg imm_a = brw_imm_ud(1); - fs_reg imm_b = brw_imm_ud(2); + brw_reg r = brw_vec8_grf(1, 0); + brw_reg imm_a = brw_imm_ud(1); + brw_reg imm_b = brw_imm_ud(2); bld.SEL(r, imm_a, imm_b); shader->calculate_cfg(); @@ -102,10 +102,10 @@ TEST_F(FSCombineConstantsTest, DoContainingDo) { fs_builder bld = make_builder(shader); - fs_reg r1 = brw_vec8_grf(1, 0); - fs_reg r2 = brw_vec8_grf(2, 0); - fs_reg imm_a = brw_imm_ud(1); - fs_reg imm_b = brw_imm_ud(2); + brw_reg r1 = brw_vec8_grf(1, 0); + brw_reg r2 = brw_vec8_grf(2, 0); + brw_reg imm_a = brw_imm_ud(1); + brw_reg imm_b = brw_imm_ud(2); bld.DO(); bld.DO(); diff --git a/src/intel/compiler/test_fs_copy_propagation.cpp b/src/intel/compiler/test_fs_copy_propagation.cpp index 7b1f5ebfd8d..bfc6c89c615 100644 --- a/src/intel/compiler/test_fs_copy_propagation.cpp +++ b/src/intel/compiler/test_fs_copy_propagation.cpp @@ -119,10 +119,10 @@ copy_propagation(fs_visitor *v) TEST_F(copy_propagation_test, basic) { - fs_reg vgrf0 = bld.vgrf(BRW_TYPE_F); - fs_reg vgrf1 = bld.vgrf(BRW_TYPE_F); - fs_reg vgrf2 = bld.vgrf(BRW_TYPE_F); - fs_reg vgrf3 = bld.vgrf(BRW_TYPE_F); + brw_reg vgrf0 = bld.vgrf(BRW_TYPE_F); + brw_reg vgrf1 = bld.vgrf(BRW_TYPE_F); + brw_reg vgrf2 = bld.vgrf(BRW_TYPE_F); + brw_reg vgrf3 = bld.vgrf(BRW_TYPE_F); bld.MOV(vgrf0, vgrf2); bld.ADD(vgrf1, vgrf0, vgrf3); @@ -160,9 +160,9 @@ TEST_F(copy_propagation_test, basic) TEST_F(copy_propagation_test, maxmax_sat_imm) { - fs_reg vgrf0 = bld.vgrf(BRW_TYPE_F); - fs_reg vgrf1 = bld.vgrf(BRW_TYPE_F); - fs_reg vgrf2 = bld.vgrf(BRW_TYPE_F); + brw_reg vgrf0 = bld.vgrf(BRW_TYPE_F); + brw_reg vgrf1 = bld.vgrf(BRW_TYPE_F); + brw_reg vgrf2 = bld.vgrf(BRW_TYPE_F); static const struct { enum brw_conditional_mod conditional_mod; diff --git a/src/intel/compiler/test_fs_cse.cpp b/src/intel/compiler/test_fs_cse.cpp index 9f8044ce4b0..5795ea4c697 100644 --- a/src/intel/compiler/test_fs_cse.cpp +++ b/src/intel/compiler/test_fs_cse.cpp @@ -102,11 +102,11 @@ cse(fs_visitor *v) TEST_F(cse_test, add3_invalid) { - fs_reg dst0 = bld.null_reg_d(); - fs_reg src0 = bld.vgrf(BRW_TYPE_D); - fs_reg src1 = bld.vgrf(BRW_TYPE_D); - fs_reg src2 = bld.vgrf(BRW_TYPE_D); - fs_reg src3 = bld.vgrf(BRW_TYPE_D); + brw_reg dst0 = bld.null_reg_d(); + brw_reg src0 = bld.vgrf(BRW_TYPE_D); + brw_reg src1 = bld.vgrf(BRW_TYPE_D); + brw_reg src2 = bld.vgrf(BRW_TYPE_D); + brw_reg src3 = bld.vgrf(BRW_TYPE_D); bld.ADD3(dst0, src0, src1, src2) ->conditional_mod = BRW_CONDITIONAL_NZ; diff --git a/src/intel/compiler/test_fs_saturate_propagation.cpp b/src/intel/compiler/test_fs_saturate_propagation.cpp index 099fbdfaa8e..451e4ebdc91 100644 --- a/src/intel/compiler/test_fs_saturate_propagation.cpp +++ b/src/intel/compiler/test_fs_saturate_propagation.cpp @@ -120,10 +120,10 @@ saturate_propagation(fs_visitor *v) TEST_F(saturate_propagation_test, basic) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dst0, src0, src1); set_saturate(true, bld.MOV(dst1, dst0)); @@ -154,11 +154,11 @@ TEST_F(saturate_propagation_test, basic) TEST_F(saturate_propagation_test, other_non_saturated_use) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg dst2 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst2 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dst0, src0, src1); set_saturate(true, bld.MOV(dst1, dst0)); bld.ADD(dst2, dst0, src0); @@ -191,10 +191,10 @@ TEST_F(saturate_propagation_test, other_non_saturated_use) TEST_F(saturate_propagation_test, predicated_instruction) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dst0, src0, src1) ->predicate = BRW_PREDICATE_NORMAL; set_saturate(true, bld.MOV(dst1, dst0)); @@ -225,9 +225,9 @@ TEST_F(saturate_propagation_test, predicated_instruction) TEST_F(saturate_propagation_test, neg_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); bld.RNDU(dst0, src0); dst0.negate = true; set_saturate(true, bld.MOV(dst1, dst0)); @@ -258,10 +258,10 @@ TEST_F(saturate_propagation_test, neg_mov_sat) TEST_F(saturate_propagation_test, add_neg_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dst0, src0, src1); dst0.negate = true; set_saturate(true, bld.MOV(dst1, dst0)); @@ -295,10 +295,10 @@ TEST_F(saturate_propagation_test, add_neg_mov_sat) TEST_F(saturate_propagation_test, add_imm_float_neg_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = brw_imm_f(1.0f); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = brw_imm_f(1.0f); bld.ADD(dst0, src0, src1); dst0.negate = true; set_saturate(true, bld.MOV(dst1, dst0)); @@ -332,10 +332,10 @@ TEST_F(saturate_propagation_test, add_imm_float_neg_mov_sat) TEST_F(saturate_propagation_test, mul_neg_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.MUL(dst0, src0, src1); dst0.negate = true; set_saturate(true, bld.MOV(dst1, dst0)); @@ -369,11 +369,11 @@ TEST_F(saturate_propagation_test, mul_neg_mov_sat) TEST_F(saturate_propagation_test, mad_neg_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); bld.MAD(dst0, src0, src1, src2); dst0.negate = true; set_saturate(true, bld.MOV(dst1, dst0)); @@ -409,11 +409,11 @@ TEST_F(saturate_propagation_test, mad_neg_mov_sat) TEST_F(saturate_propagation_test, mad_imm_float_neg_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = brw_imm_f(1.0f); - fs_reg src1 = brw_imm_f(-2.0f); - fs_reg src2 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = brw_imm_f(1.0f); + brw_reg src1 = brw_imm_f(-2.0f); + brw_reg src2 = bld.vgrf(BRW_TYPE_F); /* The builder for MAD tries to be helpful and not put immediates as direct * sources. We want to test specifically that case. */ @@ -453,11 +453,11 @@ TEST_F(saturate_propagation_test, mad_imm_float_neg_mov_sat) TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg dst2 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst2 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.MUL(dst0, src0, src1); set_saturate(true, bld.MOV(dst1, dst0)); dst0.negate = true; @@ -494,11 +494,11 @@ TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat) TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg dst2 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst2 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.MUL(dst0, src0, src1); dst0.negate = true; set_saturate(true, bld.MOV(dst1, dst0)); @@ -536,10 +536,10 @@ TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat) TEST_F(saturate_propagation_test, abs_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dst0, src0, src1); dst0.abs = true; set_saturate(true, bld.MOV(dst1, dst0)); @@ -570,11 +570,11 @@ TEST_F(saturate_propagation_test, abs_mov_sat) TEST_F(saturate_propagation_test, producer_saturates) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg dst2 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst2 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); set_saturate(true, bld.ADD(dst0, src0, src1)); set_saturate(true, bld.MOV(dst1, dst0)); bld.MOV(dst2, dst0); @@ -608,11 +608,11 @@ TEST_F(saturate_propagation_test, producer_saturates) TEST_F(saturate_propagation_test, intervening_saturating_copy) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg dst2 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst2 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dst0, src0, src1); set_saturate(true, bld.MOV(dst1, dst0)); set_saturate(true, bld.MOV(dst2, dst0)); @@ -648,13 +648,13 @@ TEST_F(saturate_propagation_test, intervening_saturating_copy) TEST_F(saturate_propagation_test, intervening_dest_write) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F, 4); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); - fs_reg src2 = bld.vgrf(BRW_TYPE_F, 2); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F, 4); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg src2 = bld.vgrf(BRW_TYPE_F, 2); - fs_reg tex_srcs[TEX_LOGICAL_NUM_SRCS]; + brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS]; tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2; tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0); tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2); @@ -695,11 +695,11 @@ TEST_F(saturate_propagation_test, intervening_dest_write) TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg dst2 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst2 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.MUL(dst0, src0, src1); dst0.negate = true; set_saturate(true, bld.MOV(dst1, dst0)); @@ -737,10 +737,10 @@ TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat) TEST_F(saturate_propagation_test, smaller_exec_size_consumer) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.ADD(dst0, src0, src1); set_saturate(true, bld.group(8, 0).MOV(dst1, dst0)); @@ -770,10 +770,10 @@ TEST_F(saturate_propagation_test, smaller_exec_size_consumer) TEST_F(saturate_propagation_test, larger_exec_size_consumer) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.group(8, 0).ADD(dst0, src0, src1); set_saturate(true, bld.MOV(dst1, dst0)); @@ -803,11 +803,11 @@ TEST_F(saturate_propagation_test, larger_exec_size_consumer) TEST_F(saturate_propagation_test, offset_source_barrier) { - fs_reg dst0 = bld.vgrf(BRW_TYPE_F); - fs_reg dst1 = bld.vgrf(BRW_TYPE_F); - fs_reg dst2 = bld.vgrf(BRW_TYPE_F); - fs_reg src0 = bld.vgrf(BRW_TYPE_F); - fs_reg src1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst0 = bld.vgrf(BRW_TYPE_F); + brw_reg dst1 = bld.vgrf(BRW_TYPE_F); + brw_reg dst2 = bld.vgrf(BRW_TYPE_F); + brw_reg src0 = bld.vgrf(BRW_TYPE_F); + brw_reg src1 = bld.vgrf(BRW_TYPE_F); bld.group(16, 0).ADD(dst0, src0, src1); bld.group(1, 0).ADD(dst1, component(dst0, 8), brw_imm_f(1.0f)); set_saturate(true, bld.group(16, 0).MOV(dst2, dst0)); diff --git a/src/intel/compiler/test_fs_scoreboard.cpp b/src/intel/compiler/test_fs_scoreboard.cpp index 9b050dac2a4..f44c5607dbf 100644 --- a/src/intel/compiler/test_fs_scoreboard.cpp +++ b/src/intel/compiler/test_fs_scoreboard.cpp @@ -106,8 +106,8 @@ lower_scoreboard(fs_visitor *v) } fs_inst * -emit_SEND(const fs_builder &bld, const fs_reg &dst, - const fs_reg &desc, const fs_reg &payload) +emit_SEND(const fs_builder &bld, const brw_reg &dst, + const brw_reg &desc, const brw_reg &payload) { fs_inst *inst = bld.emit(SHADER_OPCODE_SEND, dst, desc, desc, payload); inst->mlen = 1; @@ -150,12 +150,12 @@ std::ostream &operator<<(std::ostream &os, const tgl_swsb &swsb) { TEST_F(scoreboard_test, RAW_inorder_inorder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); - fs_reg y = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg y = bld.vgrf(BRW_TYPE_D); bld.ADD( x, g[1], g[2]); bld.MUL( y, g[3], g[4]); bld.AND(g[5], x, y); @@ -176,11 +176,11 @@ TEST_F(scoreboard_test, RAW_inorder_inorder) TEST_F(scoreboard_test, RAW_inorder_outoforder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.ADD( x, g[1], g[2]); bld.MUL( g[3], g[4], g[5]); emit_SEND(bld, g[6], g[7], x); @@ -201,12 +201,12 @@ TEST_F(scoreboard_test, RAW_inorder_outoforder) TEST_F(scoreboard_test, RAW_outoforder_inorder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); - fs_reg y = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg y = bld.vgrf(BRW_TYPE_D); emit_SEND(bld, x, g[1], g[2]); bld.MUL( y, g[3], g[4]); bld.AND( g[5], x, y); @@ -227,7 +227,7 @@ TEST_F(scoreboard_test, RAW_outoforder_inorder) TEST_F(scoreboard_test, RAW_outoforder_outoforder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); @@ -235,7 +235,7 @@ TEST_F(scoreboard_test, RAW_outoforder_outoforder) * SBIDs. Since it is not possible we expect a SYNC instruction to be * added. */ - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); emit_SEND(bld, x, g[1], g[2]); emit_SEND(bld, g[3], x, g[4])->sfid++; @@ -259,11 +259,11 @@ TEST_F(scoreboard_test, RAW_outoforder_outoforder) TEST_F(scoreboard_test, WAR_inorder_inorder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.ADD(g[1], x, g[2]); bld.MUL(g[3], g[4], g[5]); bld.AND( x, g[6], g[7]); @@ -284,11 +284,11 @@ TEST_F(scoreboard_test, WAR_inorder_inorder) TEST_F(scoreboard_test, WAR_inorder_outoforder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.ADD( g[1], x, g[2]); bld.MUL( g[3], g[4], g[5]); emit_SEND(bld, x, g[6], g[7]); @@ -309,11 +309,11 @@ TEST_F(scoreboard_test, WAR_inorder_outoforder) TEST_F(scoreboard_test, WAR_outoforder_inorder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); emit_SEND(bld, g[1], g[2], x); bld.MUL( g[4], g[5], g[6]); bld.AND( x, g[7], g[8]); @@ -334,11 +334,11 @@ TEST_F(scoreboard_test, WAR_outoforder_inorder) TEST_F(scoreboard_test, WAR_outoforder_outoforder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); emit_SEND(bld, g[1], g[2], x); emit_SEND(bld, x, g[3], g[4])->sfid++; @@ -362,11 +362,11 @@ TEST_F(scoreboard_test, WAR_outoforder_outoforder) TEST_F(scoreboard_test, WAW_inorder_inorder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.ADD( x, g[1], g[2]); bld.MUL(g[3], g[4], g[5]); bld.AND( x, g[6], g[7]); @@ -392,11 +392,11 @@ TEST_F(scoreboard_test, WAW_inorder_inorder) TEST_F(scoreboard_test, WAW_inorder_outoforder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.ADD( x, g[1], g[2]); bld.MUL( g[3], g[4], g[5]); emit_SEND(bld, x, g[6], g[7]); @@ -417,11 +417,11 @@ TEST_F(scoreboard_test, WAW_inorder_outoforder) TEST_F(scoreboard_test, WAW_outoforder_inorder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); emit_SEND(bld, x, g[1], g[2]); bld.MUL( g[3], g[4], g[5]); bld.AND( x, g[6], g[7]); @@ -442,11 +442,11 @@ TEST_F(scoreboard_test, WAW_outoforder_inorder) TEST_F(scoreboard_test, WAW_outoforder_outoforder) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); emit_SEND(bld, x, g[1], g[2]); emit_SEND(bld, x, g[3], g[4])->sfid++; @@ -471,11 +471,11 @@ TEST_F(scoreboard_test, WAW_outoforder_outoforder) TEST_F(scoreboard_test, loop1) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.emit(BRW_OPCODE_DO); @@ -501,11 +501,11 @@ TEST_F(scoreboard_test, loop1) TEST_F(scoreboard_test, loop2) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.XOR(g[3], g[1], g[2]); bld.XOR(g[4], g[1], g[2]); @@ -536,11 +536,11 @@ TEST_F(scoreboard_test, loop2) TEST_F(scoreboard_test, loop3) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.emit(BRW_OPCODE_DO); @@ -573,11 +573,11 @@ TEST_F(scoreboard_test, loop3) TEST_F(scoreboard_test, conditional1) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.emit(BRW_OPCODE_IF); @@ -602,11 +602,11 @@ TEST_F(scoreboard_test, conditional1) TEST_F(scoreboard_test, conditional2) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.XOR(g[3], g[1], g[2]); bld.XOR(g[4], g[1], g[2]); @@ -634,11 +634,11 @@ TEST_F(scoreboard_test, conditional2) TEST_F(scoreboard_test, conditional3) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.emit(BRW_OPCODE_IF); @@ -666,11 +666,11 @@ TEST_F(scoreboard_test, conditional3) TEST_F(scoreboard_test, conditional4) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.emit(BRW_OPCODE_IF); @@ -698,11 +698,11 @@ TEST_F(scoreboard_test, conditional4) TEST_F(scoreboard_test, conditional5) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.emit(BRW_OPCODE_IF); @@ -735,11 +735,11 @@ TEST_F(scoreboard_test, conditional5) TEST_F(scoreboard_test, conditional6) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.emit(BRW_OPCODE_IF); @@ -779,11 +779,11 @@ TEST_F(scoreboard_test, conditional6) TEST_F(scoreboard_test, conditional7) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.emit(BRW_OPCODE_IF); @@ -823,11 +823,11 @@ TEST_F(scoreboard_test, conditional7) TEST_F(scoreboard_test, conditional8) { - fs_reg g[16]; + brw_reg g[16]; for (unsigned i = 0; i < ARRAY_SIZE(g); i++) g[i] = bld.vgrf(BRW_TYPE_D); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.XOR( x, g[1], g[2]); bld.XOR(g[3], g[1], g[2]); bld.XOR(g[4], g[1], g[2]); @@ -871,10 +871,10 @@ TEST_F(scoreboard_test, gfx125_RaR_over_different_pipes) devinfo->verx10 = 125; brw_init_isa_info(&compiler->isa, devinfo); - fs_reg a = bld.vgrf(BRW_TYPE_D); - fs_reg b = bld.vgrf(BRW_TYPE_D); - fs_reg f = bld.vgrf(BRW_TYPE_F); - fs_reg x = bld.vgrf(BRW_TYPE_D); + brw_reg a = bld.vgrf(BRW_TYPE_D); + brw_reg b = bld.vgrf(BRW_TYPE_D); + brw_reg f = bld.vgrf(BRW_TYPE_F); + brw_reg x = bld.vgrf(BRW_TYPE_D); bld.ADD(f, x, x); bld.ADD(a, x, x); diff --git a/src/intel/compiler/test_predicated_break.cpp b/src/intel/compiler/test_predicated_break.cpp index 3672a2d4ff8..40b6da77506 100644 --- a/src/intel/compiler/test_predicated_break.cpp +++ b/src/intel/compiler/test_predicated_break.cpp @@ -140,9 +140,9 @@ TEST_F(PredicatedBreakTest, TopBreakWithoutContinue) fs_builder a = make_builder(shader_a); fs_builder b = make_builder(shader_b); - fs_reg r1 = brw_vec8_grf(1, 0); - fs_reg r2 = brw_vec8_grf(2, 0); - fs_reg r3 = brw_vec8_grf(3, 0); + brw_reg r1 = brw_vec8_grf(1, 0); + brw_reg r2 = brw_vec8_grf(2, 0); + brw_reg r3 = brw_vec8_grf(3, 0); a.DO(); a.CMP(r1, r2, r3, BRW_CONDITIONAL_NZ); @@ -174,9 +174,9 @@ TEST_F(PredicatedBreakTest, TopBreakWithContinue) fs_builder a = make_builder(shader_a); fs_builder b = make_builder(shader_b); - fs_reg r1 = brw_vec8_grf(1, 0); - fs_reg r2 = brw_vec8_grf(2, 0); - fs_reg r3 = brw_vec8_grf(3, 0); + brw_reg r1 = brw_vec8_grf(1, 0); + brw_reg r2 = brw_vec8_grf(2, 0); + brw_reg r3 = brw_vec8_grf(3, 0); a.DO(); a.CMP(r1, r2, r3, BRW_CONDITIONAL_NZ); @@ -218,9 +218,9 @@ TEST_F(PredicatedBreakTest, DISABLED_BottomBreakWithoutContinue) fs_builder a = make_builder(shader_a); fs_builder b = make_builder(shader_b); - fs_reg r1 = brw_vec8_grf(1, 0); - fs_reg r2 = brw_vec8_grf(2, 0); - fs_reg r3 = brw_vec8_grf(3, 0); + brw_reg r1 = brw_vec8_grf(1, 0); + brw_reg r2 = brw_vec8_grf(2, 0); + brw_reg r3 = brw_vec8_grf(3, 0); a.DO(); a.ADD(r1, r2, r3); @@ -256,9 +256,9 @@ TEST_F(PredicatedBreakTest, BottomBreakWithContinue) fs_builder a = make_builder(shader_a); fs_builder b = make_builder(shader_b); - fs_reg r1 = brw_vec8_grf(1, 0); - fs_reg r2 = brw_vec8_grf(2, 0); - fs_reg r3 = brw_vec8_grf(3, 0); + brw_reg r1 = brw_vec8_grf(1, 0); + brw_reg r2 = brw_vec8_grf(2, 0); + brw_reg r3 = brw_vec8_grf(3, 0); a.DO(); a.ADD(r1, r2, r3); @@ -300,9 +300,9 @@ TEST_F(PredicatedBreakTest, TwoBreaks) fs_builder a = make_builder(shader_a); fs_builder b = make_builder(shader_b); - fs_reg r1 = brw_vec8_grf(1, 0); - fs_reg r2 = brw_vec8_grf(2, 0); - fs_reg r3 = brw_vec8_grf(3, 0); + brw_reg r1 = brw_vec8_grf(1, 0); + brw_reg r2 = brw_vec8_grf(2, 0); + brw_reg r3 = brw_vec8_grf(3, 0); a.DO(); a.ADD(r1, r2, r3);