From 0fcce2722f20e22e18ead543dde6e2dd464a9bdf Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Wed, 20 Aug 2025 15:43:08 -0700 Subject: [PATCH] brw: Add brw_send_inst Move all the SEND specific fields from brw_inst into brw_send_inst. This new instruction kind will contain all variants of SENDs plus the virtual opcodes that were already relying on those SEND fields. Use the `as_send()` helper to go from a brw_inst into the brw_send_inst when applicable. Some of the code was changed to use the brw_send_inst type directly. Until other kinds are added, all the instructions are allocated the same amount of space as brw_send_inst. This ensures that all brw_transform_inst() calls are still valid. This will change after a few patches so that BASE instructions can use less memory. Reviewed-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- .../compiler/brw_analysis_performance.cpp | 31 +- src/intel/compiler/brw_builder.h | 4 +- src/intel/compiler/brw_compile_fs.cpp | 4 +- src/intel/compiler/brw_compile_mesh.cpp | 6 +- src/intel/compiler/brw_from_nir.cpp | 8 +- src/intel/compiler/brw_generator.cpp | 27 +- src/intel/compiler/brw_generator.h | 2 +- src/intel/compiler/brw_inst.cpp | 35 +- src/intel/compiler/brw_inst.h | 77 +- src/intel/compiler/brw_lower.cpp | 29 +- .../compiler/brw_lower_logical_sends.cpp | 685 +++++++++--------- src/intel/compiler/brw_lower_simd_width.cpp | 5 +- src/intel/compiler/brw_opt.cpp | 74 +- src/intel/compiler/brw_opt_cse.cpp | 58 +- .../compiler/brw_opt_dead_code_eliminate.cpp | 4 +- src/intel/compiler/brw_print.cpp | 20 +- src/intel/compiler/brw_reg_allocate.cpp | 24 +- .../compiler/brw_schedule_instructions.cpp | 17 +- src/intel/compiler/brw_shader.cpp | 6 +- src/intel/compiler/brw_shader.h | 3 + src/intel/compiler/brw_validate.cpp | 2 +- src/intel/compiler/brw_workaround.cpp | 16 +- src/intel/compiler/test_lower_scoreboard.cpp | 2 +- 23 files changed, 645 insertions(+), 494 deletions(-) diff --git a/src/intel/compiler/brw_analysis_performance.cpp b/src/intel/compiler/brw_analysis_performance.cpp index ac25cbd7162..6e78f1d5661 100644 --- a/src/intel/compiler/brw_analysis_performance.cpp +++ b/src/intel/compiler/brw_analysis_performance.cpp @@ -137,20 +137,25 @@ namespace { td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)), tx(get_exec_type(inst)), sx(0), ss(0), sc(has_bank_conflict(isa, inst) ? sd : 0), - desc(inst->desc), sfid(inst->sfid) + desc(0), sfid(0) { - /* We typically want the maximum source size, except for split send - * messages which require the total size. - */ - if (inst->opcode == SHADER_OPCODE_SEND) { - ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) + - DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE); - } else if (inst->opcode == SHADER_OPCODE_SEND_GATHER) { - ss = inst->mlen; - /* If haven't lowered yet, count the sources. */ - if (!ss) { - for (int i = 3; i < inst->sources; i++) - ss += DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE); + const brw_send_inst *send = inst->as_send(); + if (send) { + desc = send->desc; + sfid = send->sfid; + /* We typically want the maximum source size, except for split send + * messages which require the total size. + */ + if (inst->opcode == SHADER_OPCODE_SEND) { + ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) + + DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE); + } else if (inst->opcode == SHADER_OPCODE_SEND_GATHER) { + ss = send->mlen; + /* If haven't lowered yet, count the sources. */ + if (!ss) { + for (int i = 3; i < inst->sources; i++) + ss += DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE); + } } } else { for (unsigned i = 0; i < inst->sources; i++) diff --git a/src/intel/compiler/brw_builder.h b/src/intel/compiler/brw_builder.h index 66aca58d7d8..3ac97f91010 100644 --- a/src/intel/compiler/brw_builder.h +++ b/src/intel/compiler/brw_builder.h @@ -632,10 +632,10 @@ public: #undef _ALU1 /** @} */ - brw_inst * + brw_send_inst * SEND() const { - return emit(SHADER_OPCODE_SEND, SEND_NUM_SRCS); + return emit(SHADER_OPCODE_SEND, SEND_NUM_SRCS)->as_send(); } brw_inst * diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index b3afcde54a5..d780a71a86a 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -620,7 +620,7 @@ static void brw_emit_repclear_shader(brw_shader &s) { brw_wm_prog_key *key = (brw_wm_prog_key*) s.key; - brw_inst *write = NULL; + brw_send_inst *write = NULL; assert(s.devinfo->ver < 20); assert(s.uniforms == 0); @@ -666,7 +666,7 @@ brw_emit_repclear_shader(brw_shader &s) write->src[SEND_SRC_PAYLOAD1] = i == 0 ? color_output : header; write->src[SEND_SRC_PAYLOAD2] = brw_reg(); write->check_tdr = true; - write->send_has_side_effects = true; + write->has_side_effects = true; /* We can use a headerless message for the first render target */ write->header_size = i == 0 ? 0 : 2; diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp index 079ea993ce1..bfdcc05085a 100644 --- a/src/intel/compiler/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw_compile_mesh.cpp @@ -289,9 +289,9 @@ brw_emit_urb_fence(brw_shader &s) { const brw_builder bld1 = brw_builder(&s).uniform(); brw_reg dst = bld1.vgrf(BRW_TYPE_UD); - brw_inst *fence = bld1.emit(SHADER_OPCODE_MEMORY_FENCE, dst, - brw_vec8_grf(0, 0), - brw_imm_ud(true)); + brw_send_inst *fence = bld1.emit(SHADER_OPCODE_MEMORY_FENCE, dst, + brw_vec8_grf(0, 0), + brw_imm_ud(true))->as_send(); fence->size_written = REG_SIZE * reg_unit(s.devinfo); fence->sfid = BRW_SFID_URB; /* The logical thing here would likely be a THREADGROUP fence but that's diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index 4977a8839d4..b1e2ca43670 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -4935,7 +4935,7 @@ emit_rt_lsc_fence(const brw_builder &bld, const brw_builder ubld = bld.exec_all().group(8, 0); brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); - brw_inst *send = ubld.SEND(); + brw_send_inst *send = ubld.SEND(); send->dst = tmp; send->src[SEND_SRC_DESC] = brw_imm_ud(0); @@ -4949,7 +4949,7 @@ emit_rt_lsc_fence(const brw_builder &bld, send->ex_mlen = 0; /* Temp write for scheduling */ send->size_written = REG_SIZE * reg_unit(devinfo); - send->send_has_side_effects = true; + send->has_side_effects = true; ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), tmp); } @@ -5164,8 +5164,8 @@ emit_fence(const brw_builder &bld, enum opcode opcode, opcode == SHADER_OPCODE_MEMORY_FENCE); brw_reg dst = commit_enable ? bld.vgrf(BRW_TYPE_UD) : bld.null_reg_ud(); - brw_inst *fence = bld.emit(opcode, dst, brw_vec8_grf(0, 0), - brw_imm_ud(commit_enable)); + brw_send_inst *fence = bld.emit(opcode, dst, brw_vec8_grf(0, 0), + brw_imm_ud(commit_enable))->as_send(); fence->sfid = sfid; fence->desc = desc; fence->size_written = commit_enable ? REG_SIZE * reg_unit(devinfo) : 0; diff --git a/src/intel/compiler/brw_generator.cpp b/src/intel/compiler/brw_generator.cpp index 44124399fb4..33ce7865f95 100644 --- a/src/intel/compiler/brw_generator.cpp +++ b/src/intel/compiler/brw_generator.cpp @@ -161,12 +161,12 @@ brw_generator::patch_halt_jumps() } void -brw_generator::generate_send(brw_inst *inst, - struct brw_reg dst, - struct brw_reg desc, - struct brw_reg ex_desc, - struct brw_reg payload, - struct brw_reg payload2) +brw_generator::generate_send(brw_send_inst *inst, + struct brw_reg dst, + struct brw_reg desc, + struct brw_reg ex_desc, + struct brw_reg payload, + struct brw_reg payload2) { const bool gather = inst->opcode == SHADER_OPCODE_SEND_GATHER; if (gather) { @@ -181,7 +181,7 @@ brw_generator::generate_send(brw_inst *inst, * descriptor is written indirectly (it already contains a SS/BSS * surface handle) */ - assert(!inst->send_ex_desc_imm); + assert(!inst->ex_desc_imm); brw_send_indirect_message(p, inst->sfid, dst, payload, desc, inst->eot, gather); if (inst->check_tdr) brw_eu_inst_set_opcode(p->isa, brw_last_inst, BRW_OPCODE_SENDC); @@ -191,8 +191,8 @@ brw_generator::generate_send(brw_inst *inst, */ brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2, desc, ex_desc, - inst->send_ex_desc_imm ? inst->offset : 0, - inst->ex_mlen, inst->send_ex_bso, + inst->ex_desc_imm ? inst->offset : 0, + inst->ex_mlen, inst->ex_bso, inst->eot, gather); if (inst->check_tdr) brw_eu_inst_set_opcode(p->isa, brw_last_inst, @@ -886,7 +886,8 @@ brw_generator::generate_code(const brw_shader &s, assert(inst->force_writemask_all || inst->exec_size >= 4); assert(inst->force_writemask_all || inst->group % inst->exec_size == 0); - assert(inst->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo)); + if (const brw_send_inst *send = inst->as_send()) + assert(send->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo)); switch (inst->opcode) { case BRW_OPCODE_NOP: @@ -1094,7 +1095,6 @@ brw_generator::generate_code(const brw_shader &s, case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - assert(inst->mlen == 0); gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], retype(brw_null_reg(), src[0].type)); break; @@ -1103,7 +1103,6 @@ brw_generator::generate_code(const brw_shader &s, case SHADER_OPCODE_POW: assert(devinfo->verx10 < 125); assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - assert(inst->mlen == 0); assert(inst->opcode == SHADER_OPCODE_POW || inst->exec_size == 8); gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); break; @@ -1144,13 +1143,13 @@ brw_generator::generate_code(const brw_shader &s, break; case SHADER_OPCODE_SEND: - generate_send(inst, dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC], + generate_send(inst->as_send(), dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC], src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2]); send_count++; break; case SHADER_OPCODE_SEND_GATHER: - generate_send(inst, dst, + generate_send(inst->as_send(), dst, src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC], src[SEND_GATHER_SRC_SCALAR], brw_null_reg()); send_count++; diff --git a/src/intel/compiler/brw_generator.h b/src/intel/compiler/brw_generator.h index 13cf4bb304c..4c3437c38f0 100644 --- a/src/intel/compiler/brw_generator.h +++ b/src/intel/compiler/brw_generator.h @@ -25,7 +25,7 @@ public: const unsigned *get_assembly(); private: - void generate_send(brw_inst *inst, + void generate_send(brw_send_inst *inst, struct brw_reg dst, struct brw_reg desc, struct brw_reg ex_desc, diff --git a/src/intel/compiler/brw_inst.cpp b/src/intel/compiler/brw_inst.cpp index eb5277a561f..b47f9579f3c 100644 --- a/src/intel/compiler/brw_inst.cpp +++ b/src/intel/compiler/brw_inst.cpp @@ -14,7 +14,11 @@ static inline unsigned brw_inst_kind_size(brw_inst_kind kind) { - return sizeof(brw_inst); + /* TODO: Temporarily here to ensure all instructions can be converted to + * SEND. Once all new kinds are added, change so that BASE allocate only + * sizeof(brw_inst). + */ + return sizeof(brw_send_inst); } static brw_inst * @@ -110,8 +114,6 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode, const brw_inst_kind kind = inst->kind; const brw_inst_kind new_kind = brw_inst_kind_for_opcode(new_opcode); - assert(new_kind == BRW_KIND_BASE); - const unsigned inst_size = brw_inst_kind_size(kind); const unsigned new_inst_size = brw_inst_kind_size(new_kind); assert(new_inst_size <= inst_size); @@ -127,6 +129,9 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode, inst->src = new_src; } + if (new_kind != kind) + memset(((char *)inst) + sizeof(brw_inst), 0, new_inst_size - sizeof(brw_inst)); + inst->sources = new_num_sources; inst->opcode = new_opcode; inst->kind = new_kind; @@ -137,7 +142,21 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode, brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode) { - return BRW_KIND_BASE; + switch (opcode) { + case BRW_OPCODE_SEND: + case BRW_OPCODE_SENDS: + case BRW_OPCODE_SENDC: + case BRW_OPCODE_SENDSC: + case SHADER_OPCODE_SEND: + case SHADER_OPCODE_SEND_GATHER: + case SHADER_OPCODE_BARRIER: + case SHADER_OPCODE_MEMORY_FENCE: + case SHADER_OPCODE_INTERLOCK: + return BRW_KIND_SEND; + + default: + return BRW_KIND_BASE; + } } bool @@ -483,9 +502,9 @@ brw_inst::size_read(const struct intel_device_info *devinfo, int arg) const switch (opcode) { case SHADER_OPCODE_SEND: if (arg == SEND_SRC_PAYLOAD1) { - return mlen * REG_SIZE; + return as_send()->mlen * REG_SIZE; } else if (arg == SEND_SRC_PAYLOAD2) { - return ex_mlen * REG_SIZE; + return as_send()->ex_mlen * REG_SIZE; } break; @@ -893,7 +912,7 @@ brw_inst::has_side_effects() const switch (opcode) { case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND_GATHER: - return send_has_side_effects; + return as_send()->has_side_effects; case BRW_OPCODE_SYNC: case SHADER_OPCODE_MEMORY_STORE_LOGICAL: @@ -927,7 +946,7 @@ brw_inst::is_volatile() const return src[MEMORY_LOGICAL_FLAGS].ud & MEMORY_FLAG_VOLATILE_ACCESS; case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND_GATHER: - return send_is_volatile; + return as_send()->is_volatile; default: return false; } diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index 130f8f52141..151439b3e22 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -41,6 +41,7 @@ struct brw_shader; enum ENUM_PACKED brw_inst_kind { BRW_KIND_BASE, + BRW_KIND_SEND, }; brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode); @@ -53,6 +54,23 @@ struct brw_inst : brw_exec_node { static void* operator new(size_t size, void *ptr) { return ptr; } static void operator delete(void *p) {} + /* Prefer macro here instead of templates to get nicer + * helper names. + */ +#define KIND_HELPERS(HELPER_NAME, TYPE_NAME, ENUM_NAME) \ + struct TYPE_NAME *HELPER_NAME() { \ + return kind == ENUM_NAME ? (struct TYPE_NAME *)this \ + : nullptr; \ + } \ + const struct TYPE_NAME *HELPER_NAME() const { \ + return kind == ENUM_NAME ? (const struct TYPE_NAME *)this \ + : nullptr; \ + } + + KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND); + +#undef KIND_HELPERS + bool is_send() const; bool is_payload(unsigned arg) const; bool is_partial_write(unsigned grf_size = REG_SIZE) const; @@ -144,13 +162,8 @@ struct brw_inst : brw_exec_node { */ uint8_t group; - uint8_t mlen; /**< SEND message length */ - uint8_t ex_mlen; /**< SENDS extended message length */ - uint8_t sfid; /**< SFID for SEND instructions */ /** The number of hardware registers used for a message header. */ uint8_t header_size; - uint32_t desc; /**< SEND[S] message descriptor immediate */ - uint32_t ex_desc; /**< SEND[S] extended message descriptor immediate */ uint32_t offset; /**< spill/unspill offset or texture offset bitfield */ uint16_t size_written; /**< Data written to the destination register in bytes. */ @@ -179,25 +192,11 @@ struct brw_inst : brw_exec_node { */ unsigned rcount:4; - unsigned pad:4; - bool predicate_inverse:1; bool writes_accumulator:1; /**< instruction implicitly writes accumulator */ bool force_writemask_all:1; bool saturate:1; - bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */ - bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */ - bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */ - bool send_ex_bso:1; /**< Only for SHADER_OPCODE_SEND, use extended - * bindless surface offset (26bits instead of - * 20bits) - */ - /** - * Only for SHADER_OPCODE_SEND, @offset field contains an immediate - * part of the extended descriptor that must be encoded in the - * instruction. - */ - bool send_ex_desc_imm:1; + /** * The predication mask applied to this instruction is guaranteed to * be uniform and a superset of the execution mask of the present block. @@ -215,6 +214,8 @@ struct brw_inst : brw_exec_node { * never executed. */ bool has_no_mask_send_params:1; + + unsigned pad:13; }; uint32_t bits; }; @@ -233,6 +234,42 @@ struct brw_inst : brw_exec_node { bblock_t *block; }; +struct brw_send_inst : brw_inst { + uint32_t desc; + uint32_t ex_desc; + + uint8_t mlen; + uint8_t ex_mlen; + uint8_t sfid; + + union { + struct { + /** + * Turns it into a SENDC. + */ + bool check_tdr:1; + + bool has_side_effects:1; + bool is_volatile:1; + + /** + * Use extended bindless surface offset (26bits instead of 20bits) + */ + bool ex_bso:1; + + /** + * Only for SHADER_OPCODE_SEND, @offset field contains an immediate + * part of the extended descriptor that must be encoded in the + * instruction. + */ + bool ex_desc_imm:1; + + uint8_t pad:3; + }; + uint8_t send_bits; + }; +}; + /** * Make the execution of \p inst dependent on the evaluation of a possibly * inverted predicate. diff --git a/src/intel/compiler/brw_lower.cpp b/src/intel/compiler/brw_lower.cpp index 96b07015a4a..20c92f32c31 100644 --- a/src/intel/compiler/brw_lower.cpp +++ b/src/intel/compiler/brw_lower.cpp @@ -519,22 +519,27 @@ brw_lower_sends_overlapping_payload(brw_shader &s) bool progress = false; foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) { - if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 && - regions_overlap(inst->src[SEND_SRC_PAYLOAD1], - inst->mlen * REG_SIZE, - inst->src[SEND_SRC_PAYLOAD2], - inst->ex_mlen * REG_SIZE)) { - const unsigned arg = inst->mlen < inst->ex_mlen ? + if (inst->opcode != SHADER_OPCODE_SEND) + continue; + + brw_send_inst *send = inst->as_send(); + + if (send->ex_mlen > 0 && + regions_overlap(send->src[SEND_SRC_PAYLOAD1], + send->mlen * REG_SIZE, + send->src[SEND_SRC_PAYLOAD2], + send->ex_mlen * REG_SIZE)) { + const unsigned arg = send->mlen < send->ex_mlen ? SEND_SRC_PAYLOAD1 : SEND_SRC_PAYLOAD2; - const unsigned len = MIN2(inst->mlen, inst->ex_mlen); + const unsigned len = MIN2(send->mlen, send->ex_mlen); brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD); /* Sadly, we've lost all notion of channels and bit sizes at this * point. Just WE_all it. */ - const brw_builder ibld = brw_builder(inst).exec_all().group(16, 0); - brw_reg copy_src = retype(inst->src[arg], BRW_TYPE_UD); + const brw_builder ibld = brw_builder(send).exec_all().group(16, 0); + brw_reg copy_src = retype(send->src[arg], BRW_TYPE_UD); brw_reg copy_dst = tmp; for (unsigned i = 0; i < len; i += 2) { if (len == i + 1) { @@ -546,7 +551,7 @@ brw_lower_sends_overlapping_payload(brw_shader &s) copy_src = offset(copy_src, ibld, 1); copy_dst = offset(copy_dst, ibld, 1); } - inst->src[arg] = tmp; + send->src[arg] = tmp; progress = true; } } @@ -867,7 +872,7 @@ brw_s0(enum brw_reg_type type, unsigned subnr) } static bool -brw_lower_send_gather_inst(brw_shader &s, brw_inst *inst) +brw_lower_send_gather_inst(brw_shader &s, brw_send_inst *inst) { const intel_device_info *devinfo = s.devinfo; assert(devinfo->ver >= 30); @@ -926,7 +931,7 @@ brw_lower_send_gather(brw_shader &s) foreach_block_and_inst(block, brw_inst, inst, s.cfg) { if (inst->opcode == SHADER_OPCODE_SEND_GATHER) - progress |= brw_lower_send_gather_inst(s, inst); + progress |= brw_lower_send_gather_inst(s, inst->as_send()); } if (progress) diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 326ed0aa3cd..f8d16a919a8 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -31,6 +31,12 @@ #include "util/bitpack_helpers.h" +static inline brw_send_inst * +brw_transform_inst_to_send(const brw_builder &bld, brw_inst *inst) +{ + return brw_transform_inst(bld, inst, SHADER_OPCODE_SEND)->as_send(); +} + static void lower_urb_read_logical_send(const brw_builder &bld, brw_inst *inst) { @@ -50,25 +56,27 @@ lower_urb_read_logical_send(const brw_builder &bld, brw_inst *inst) brw_reg payload = retype(brw_allocate_vgrf_units(*bld.shader, header_size), BRW_TYPE_F); bld.LOAD_PAYLOAD(payload, payload_sources, header_size, header_size); - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->header_size = header_size; + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->sfid = BRW_SFID_URB; - inst->desc = brw_urb_desc(devinfo, + send->header_size = header_size; + + send->sfid = BRW_SFID_URB; + send->desc = brw_urb_desc(devinfo, GFX8_URB_OPCODE_SIMD8_READ, per_slot_present, false, - inst->offset); + send->offset); - inst->mlen = header_size; - inst->ex_desc = 0; - inst->ex_mlen = 0; - inst->send_is_volatile = true; + send->mlen = header_size; + send->ex_desc = 0; + send->ex_mlen = 0; + send->is_volatile = true; - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); } static void @@ -103,29 +111,30 @@ lower_urb_read_logical_send_xe2(const brw_builder &bld, brw_inst *inst) bld.ADD(payload, payload, offsets); } - inst->sfid = BRW_SFID_URB; + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; + + send->sfid = BRW_SFID_URB; assert((dst_comps >= 1 && dst_comps <= 4) || dst_comps == 8); - inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, + send->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32, LSC_DATA_SIZE_D32, dst_comps /* num_channels */, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1UC_L3UC)); - /* Update the original instruction. */ - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); - inst->ex_mlen = 0; - inst->header_size = 0; - inst->send_has_side_effects = true; - inst->send_is_volatile = false; + send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, send->exec_size); + send->ex_mlen = 0; + send->header_size = 0; + send->has_side_effects = true; + send->is_volatile = false; - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); } static void @@ -166,26 +175,28 @@ lower_urb_write_logical_send(const brw_builder &bld, brw_inst *inst) delete [] payload_sources; - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->header_size = header_size; - inst->dst = brw_null_reg(); + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->sfid = BRW_SFID_URB; - inst->desc = brw_urb_desc(devinfo, + send->header_size = header_size; + send->dst = brw_null_reg(); + + send->sfid = BRW_SFID_URB; + send->desc = brw_urb_desc(devinfo, GFX8_URB_OPCODE_SIMD8_WRITE, per_slot_present, channel_mask_present, - inst->offset); + send->offset); - inst->mlen = length; - inst->ex_desc = 0; - inst->ex_mlen = 0; - inst->send_has_side_effects = true; + send->mlen = length; + send->ex_desc = 0; + send->ex_mlen = 0; + send->has_side_effects = true; - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); } static void @@ -237,10 +248,13 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst) brw_reg payload2 = bld.move_to_vgrf(src, src_comps); const unsigned ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE; - inst->sfid = BRW_SFID_URB; + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; + + send->sfid = BRW_SFID_URB; enum lsc_opcode op = cmask.file != BAD_FILE ? LSC_OP_STORE_CMASK : LSC_OP_STORE; - inst->desc = lsc_msg_desc(devinfo, op, + send->desc = lsc_msg_desc(devinfo, op, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32, LSC_DATA_SIZE_D32, num_channels_or_cmask, @@ -248,18 +262,16 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst) LSC_CACHE(devinfo, STORE, L1UC_L3UC)); - /* Update the original instruction. */ - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); - inst->ex_mlen = ex_mlen; - inst->header_size = 0; - inst->send_has_side_effects = true; - inst->send_is_volatile = false; + send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, send->exec_size); + send->ex_mlen = ex_mlen; + send->header_size = 0; + send->has_side_effects = true; + send->is_volatile = false; - inst->src[SEND_SRC_DESC] = desc; - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = payload2; + send->src[SEND_SRC_DESC] = desc; + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = payload2; } static void @@ -456,12 +468,12 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst, uint32_t msg_ctl = brw_fb_write_msg_control(inst, prog_data); /* XXX - Bit 13 Per-sample PS enable */ - inst->desc = + uint32_t desc = (inst->group / 16) << 11 | /* rt slot group */ brw_fb_write_desc(devinfo, target, msg_ctl, last_rt, 0 /* coarse_rt_write */); - brw_reg desc = brw_imm_ud(0); + brw_reg desc_reg = brw_imm_ud(0); if (prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES && !inst->has_no_mask_send_params) { assert(devinfo->ver >= 11); @@ -471,10 +483,10 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst, brw_reg coarse_bit = ubld.AND(brw_dynamic_msaa_flags(prog_data), brw_imm_ud(INTEL_MSAA_FLAG_COARSE_RT_WRITES)); - desc = component(coarse_bit, 0); + desc_reg = component(coarse_bit, 0); } } else { - inst->desc |= prog_data->coarse_pixel_dispatch == INTEL_ALWAYS ? (1 << 18) : 0; + desc |= prog_data->coarse_pixel_dispatch == INTEL_ALWAYS ? (1 << 18) : 0; } uint32_t ex_desc = 0; @@ -493,20 +505,24 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst, null_rt << 20 | (src0_alpha.file != BAD_FILE) << 15; } - inst->ex_desc = ex_desc; - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->sfid = BRW_SFID_RENDER_CACHE; + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->src[SEND_SRC_DESC] = desc; - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); - inst->mlen = regs_written(load); - inst->ex_mlen = 0; - inst->header_size = header_size; - inst->check_tdr = true; - inst->send_has_side_effects = true; + send->desc = desc; + send->ex_desc = ex_desc; + + send->sfid = BRW_SFID_RENDER_CACHE; + + send->src[SEND_SRC_DESC] = desc_reg; + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->mlen = regs_written(load); + send->ex_mlen = 0; + send->header_size = header_size; + send->check_tdr = true; + send->has_side_effects = true; const bool double_rt_writes = devinfo->ver == 11 && prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES; @@ -515,17 +531,17 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst, INTEL_MSAA_FLAG_COARSE_RT_WRITES); bld.IF(BRW_PREDICATE_NORMAL); { - brw_inst *coarse_inst = brw_clone_inst(*bld.shader, inst); + brw_send_inst *coarse_inst = brw_clone_inst(*bld.shader, send)->as_send(); coarse_inst->desc |= brw_fb_write_desc(devinfo, target, msg_ctl, last_rt, true); bld.emit(coarse_inst); } bld.ELSE(); { - bld.emit(brw_clone_inst(*bld.shader, inst)); + bld.emit(brw_clone_inst(*bld.shader, send)); } bld.ENDIF(); - inst->remove(); + send->remove(); } } @@ -581,19 +597,21 @@ lower_fb_read_logical_send(const brw_builder &bld, brw_inst *inst, component(header, 0), brw_imm_ud(~INTEL_MASK(14, 11))); - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = header; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); - inst->mlen = length; - inst->header_size = length; - inst->sfid = BRW_SFID_RENDER_CACHE; - inst->check_tdr = true; - inst->desc = - (inst->group / 16) << 11 | /* rt slot group */ + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; + + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = header; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->mlen = length; + send->header_size = length; + send->sfid = BRW_SFID_RENDER_CACHE; + send->check_tdr = true; + send->desc = + (send->group / 16) << 11 | /* rt slot group */ brw_fb_read_desc(devinfo, target, - 0 /* msg_control */, inst->exec_size, + 0 /* msg_control */, send->exec_size, wm_prog_data->persample_dispatch); } @@ -1226,27 +1244,27 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) } } - /* Generate the SEND. */ - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->mlen = mlen; - inst->header_size = header_size; - inst->sfid = BRW_SFID_SAMPLER; - uint sampler_ret_type = brw_type_size_bits(inst->dst.type) == 16 + send->mlen = mlen; + send->header_size = header_size; + send->sfid = BRW_SFID_SAMPLER; + uint sampler_ret_type = brw_type_size_bits(send->dst.type) == 16 ? GFX8_SAMPLER_RETURN_FORMAT_16BITS : GFX8_SAMPLER_RETURN_FORMAT_32BITS; if (surface.file == IMM && (sampler.file == IMM || sampler_handle.file != BAD_FILE)) { - inst->desc = brw_sampler_desc(devinfo, surface.ud, + send->desc = brw_sampler_desc(devinfo, surface.ud, sampler.file == IMM ? sampler.ud % 16 : 0, msg_type, simd_mode, sampler_ret_type); - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); } else if (surface_handle.file != BAD_FILE) { /* Bindless surface */ - inst->desc = brw_sampler_desc(devinfo, + send->desc = brw_sampler_desc(devinfo, GFX9_BTI_BINDLESS, sampler.file == IMM ? sampler.ud % 16 : 0, msg_type, @@ -1257,22 +1275,22 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) * header so we can leave the portion in the message descriptor 0. */ if (sampler_handle.file != BAD_FILE || sampler.file == IMM) { - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); } else { const brw_builder ubld = bld.uniform(); brw_reg desc = ubld.vgrf(BRW_TYPE_UD); ubld.SHL(desc, sampler, brw_imm_ud(8)); - inst->src[SEND_SRC_DESC] = component(desc, 0); + send->src[SEND_SRC_DESC] = component(desc, 0); } /* We assume that the driver provided the handle in the top 20 bits so * we can use the surface handle directly as the extended descriptor. */ - inst->src[SEND_SRC_EX_DESC] = retype(surface_handle, BRW_TYPE_UD); - inst->send_ex_bso = compiler->extended_bindless_surface_offset; + send->src[SEND_SRC_EX_DESC] = retype(surface_handle, BRW_TYPE_UD); + send->ex_bso = compiler->extended_bindless_surface_offset; } else { /* Immediate portion of the descriptor */ - inst->desc = brw_sampler_desc(devinfo, + send->desc = brw_sampler_desc(devinfo, 0, /* surface */ 0, /* sampler */ msg_type, @@ -1295,17 +1313,17 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) } ubld.AND(desc, desc, brw_imm_ud(0xfff)); - inst->src[SEND_SRC_DESC] = component(desc, 0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_DESC] = component(desc, 0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); } - inst->ex_desc = 0; + send->ex_desc = 0; - inst->src[SEND_SRC_PAYLOAD1] = src_payload; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->src[SEND_SRC_PAYLOAD1] = src_payload; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */ - assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE * reg_unit(devinfo)); + assert(send->mlen <= MAX_SAMPLER_MESSAGE_SIZE * reg_unit(devinfo)); } /** @@ -1346,7 +1364,7 @@ emit_predicate_on_vector_mask(const brw_builder &bld, brw_inst *inst) } static void -setup_surface_descriptors(const brw_builder &bld, brw_inst *inst, uint32_t desc, +setup_surface_descriptors(const brw_builder &bld, brw_send_inst *send, uint32_t desc, const brw_reg &surface, const brw_reg &surface_handle) { const brw_compiler *compiler = bld.shader->compiler; @@ -1355,31 +1373,31 @@ setup_surface_descriptors(const brw_builder &bld, brw_inst *inst, uint32_t desc, assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE)); if (surface.file == IMM) { - inst->desc = desc | (surface.ud & 0xff); - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->desc = desc | (surface.ud & 0xff); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); } else if (surface_handle.file != BAD_FILE) { /* Bindless surface */ - inst->desc = desc | GFX9_BTI_BINDLESS; - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->desc = desc | GFX9_BTI_BINDLESS; + send->src[SEND_SRC_DESC] = brw_imm_ud(0); /* We assume that the driver provided the handle in the top 20 bits so * we can use the surface handle directly as the extended descriptor. */ - inst->src[SEND_SRC_EX_DESC] = retype(surface_handle, BRW_TYPE_UD); - inst->send_ex_bso = compiler->extended_bindless_surface_offset; + send->src[SEND_SRC_EX_DESC] = retype(surface_handle, BRW_TYPE_UD); + send->ex_bso = compiler->extended_bindless_surface_offset; } else { - inst->desc = desc; + send->desc = desc; const brw_builder ubld = bld.uniform(); brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); ubld.AND(tmp, surface, brw_imm_ud(0xff)); - inst->src[SEND_SRC_DESC] = component(tmp, 0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_DESC] = component(tmp, 0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); } } static void -setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst, +setup_lsc_surface_descriptors(const brw_builder &bld, brw_send_inst *send, uint32_t desc, const brw_reg &surface, int32_t base_offset) { @@ -1388,8 +1406,8 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst, assert(base_offset == 0 || devinfo->ver >= 20); - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); enum lsc_addr_surface_type surf_type = lsc_msg_desc_addr_type(devinfo, desc); @@ -1402,17 +1420,17 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst, switch (surf_type) { case LSC_ADDR_SURFTYPE_BSS: - inst->send_ex_bso = compiler->extended_bindless_surface_offset; + send->ex_bso = compiler->extended_bindless_surface_offset; FALLTHROUGH; case LSC_ADDR_SURFTYPE_SS: assert(surface.file != BAD_FILE); /* We assume that the driver provided the handle in the top 20 bits so * we can use the surface handle directly as the extended descriptor. */ - inst->src[SEND_SRC_EX_DESC] = retype(surface, BRW_TYPE_UD); + send->src[SEND_SRC_EX_DESC] = retype(surface, BRW_TYPE_UD); /* Gfx20+ assumes ExBSO with UGM */ - if (devinfo->ver >= 20 && inst->sfid == BRW_SFID_UGM) - inst->send_ex_bso = true; + if (devinfo->ver >= 20 && send->sfid == BRW_SFID_UGM) + send->ex_bso = true; /* We're already using the extended descriptor to hold the surface * handle. But now the immediate extended descriptor bits in the @@ -1425,8 +1443,8 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst, * generator to do the right thing with it. */ if (base_offset) { - inst->send_ex_desc_imm = true; - inst->offset = SET_BITS(GET_BITS(base_offset_bits, 16, 4), 31, 19) | + send->ex_desc_imm = true; + send->offset = SET_BITS(GET_BITS(base_offset_bits, 16, 4), 31, 19) | SET_BITS(GET_BITS(base_offset_bits, 3, 0), 15, 12); } break; @@ -1434,18 +1452,18 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst, case LSC_ADDR_SURFTYPE_BTI: assert(surface.file != BAD_FILE); if (surface.file == IMM) { - inst->src[SEND_SRC_EX_DESC] = + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud, base_offset_bits)); } else { assert(base_offset == 0); const brw_builder ubld = bld.uniform(); brw_reg tmp = ubld.SHL(surface, brw_imm_ud(24)); - inst->src[SEND_SRC_EX_DESC] = component(tmp, 0); + send->src[SEND_SRC_EX_DESC] = component(tmp, 0); } break; case LSC_ADDR_SURFTYPE_FLAT: - inst->src[SEND_SRC_EX_DESC] = + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(lsc_flat_ex_desc(devinfo, base_offset_bits)); break; @@ -1609,26 +1627,29 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst) brw_emit_predicate_on_sample_mask(bld, inst); } + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; + switch (mode) { case MEMORY_MODE_UNTYPED: case MEMORY_MODE_CONSTANT: case MEMORY_MODE_SCRATCH: - inst->sfid = BRW_SFID_UGM; + send->sfid = BRW_SFID_UGM; break; case MEMORY_MODE_TYPED: - inst->sfid = BRW_SFID_TGM; + send->sfid = BRW_SFID_TGM; break; case MEMORY_MODE_SHARED_LOCAL: - inst->sfid = BRW_SFID_SLM; + send->sfid = BRW_SFID_SLM; break; } - assert(inst->sfid); + assert(send->sfid); /* Disable LSC data port L1 cache scheme for the TGM load/store for RT * shaders. (see HSD 18038444588) */ if (devinfo->ver >= 20 && mesa_shader_stage_is_rt(bld.shader->stage) && - inst->sfid == BRW_SFID_TGM && + send->sfid == BRW_SFID_TGM && !lsc_opcode_is_atomic(op)) { if (lsc_opcode_is_store(op)) { cache_mode = (unsigned) LSC_CACHE(devinfo, STORE, L1UC_L3WB); @@ -1637,26 +1658,24 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst) } } - inst->desc = lsc_msg_desc(devinfo, op, binding_type, addr_size, data_size, + send->desc = lsc_msg_desc(devinfo, op, binding_type, addr_size, data_size, lsc_opcode_has_cmask(op) ? (1 << components) - 1 : components, transpose, cache_mode); - setup_lsc_surface_descriptors(bld, inst, inst->desc, binding, base_offset); + setup_lsc_surface_descriptors(bld, send, send->desc, binding, base_offset); - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - - inst->mlen = lsc_msg_addr_len(devinfo, addr_size, - inst->exec_size * coord_components); - inst->ex_mlen = ex_mlen; - inst->header_size = 0; - inst->send_has_side_effects = has_side_effects; - inst->send_is_volatile = !has_side_effects || volatile_access; + send->mlen = lsc_msg_addr_len(devinfo, addr_size, + send->exec_size * coord_components); + send->ex_mlen = ex_mlen; + send->header_size = 0; + send->has_side_effects = has_side_effects; + send->is_volatile = !has_side_effects || volatile_access; /* Finally, the payload */ - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = payload2; + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = payload2; } static brw_reg @@ -1927,28 +1946,29 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) assert(sfid); - /* Update the original instruction. */ - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->sfid = sfid; - inst->mlen = mlen; - inst->ex_mlen = ex_mlen; - inst->header_size = header.file != BAD_FILE ? 1 : 0; - inst->send_has_side_effects = has_side_effects; - inst->send_is_volatile = !has_side_effects || volatile_access; + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; + + send->sfid = sfid; + send->mlen = mlen; + send->ex_mlen = ex_mlen; + send->header_size = header.file != BAD_FILE ? 1 : 0; + send->has_side_effects = has_side_effects; + send->is_volatile = !has_side_effects || volatile_access; if (block) { - assert(inst->force_writemask_all); - inst->exec_size = components > 8 ? 16 : 8; + assert(send->force_writemask_all); + send->exec_size = components > 8 ? 16 : 8; } /* Set up descriptors */ switch (binding_type) { case LSC_ADDR_SURFTYPE_FLAT: - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); break; case LSC_ADDR_SURFTYPE_BSS: - inst->send_ex_bso = compiler->extended_bindless_surface_offset; + send->ex_bso = compiler->extended_bindless_surface_offset; FALLTHROUGH; case LSC_ADDR_SURFTYPE_SS: desc |= GFX9_BTI_BINDLESS; @@ -1956,30 +1976,30 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) /* We assume that the driver provided the handle in the top 20 bits so * we can use the surface handle directly as the extended descriptor. */ - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = binding; + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = binding; break; case LSC_ADDR_SURFTYPE_BTI: if (binding.file == IMM) { desc |= binding.ud & 0xff; - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); } else { brw_reg tmp = ubld1.vgrf(BRW_TYPE_UD); ubld1.AND(tmp, binding, brw_imm_ud(0xff)); - inst->src[SEND_SRC_DESC] = component(tmp, 0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_DESC] = component(tmp, 0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); } break; default: UNREACHABLE("Unknown surface type"); } - inst->desc = desc; + send->desc = desc; /* Finally, the payloads */ - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = payload2; + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = payload2; } static void @@ -2007,43 +2027,44 @@ lower_lsc_varying_pull_constant_logical_send(const brw_builder &bld, assert(alignment_B.file == IMM); unsigned alignment = alignment_B.ud; - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->sfid = BRW_SFID_UGM; - inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS && + send->sfid = BRW_SFID_UGM; + send->ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS && compiler->extended_bindless_surface_offset; assert(!compiler->indirect_ubos_use_sampler); - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = ubo_offset; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = ubo_offset; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); if (alignment >= 4) { - inst->desc = + send->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, surf_type, LSC_ADDR_SIZE_A32, LSC_DATA_SIZE_D32, 4 /* num_channels */, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS)); - inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); + send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, send->exec_size); - setup_lsc_surface_descriptors(bld, inst, inst->desc, + setup_lsc_surface_descriptors(bld, send, send->desc, surface.file != BAD_FILE ? surface : surface_handle, 0); } else { - inst->desc = + send->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, surf_type, LSC_ADDR_SIZE_A32, LSC_DATA_SIZE_D32, 1 /* num_channels */, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS)); - inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); + send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, send->exec_size); - setup_lsc_surface_descriptors(bld, inst, inst->desc, + setup_lsc_surface_descriptors(bld, send, send->desc, surface.file != BAD_FILE ? surface : surface_handle, 0); @@ -2052,21 +2073,21 @@ lower_lsc_varying_pull_constant_logical_send(const brw_builder &bld, * Hopefully, dead code will clean up the mess if some of them aren't * needed. */ - assert(inst->size_written == 16 * inst->exec_size); - inst->size_written /= 4; + assert(send->size_written == 16 * send->exec_size); + send->size_written /= 4; for (unsigned c = 1; c < 4; c++) { /* Emit a copy of the instruction because we're about to modify * it. Because this loop starts at 1, we will emit copies for the * first 3 and the final one will be the modified instruction. */ - bld.emit(brw_clone_inst(*bld.shader, inst)); + bld.emit(brw_clone_inst(*bld.shader, send)); /* Offset the source */ - inst->src[SEND_SRC_PAYLOAD1] = bld.vgrf(BRW_TYPE_UD); - bld.ADD(inst->src[SEND_SRC_PAYLOAD1], ubo_offset, brw_imm_ud(c * 4)); + send->src[SEND_SRC_PAYLOAD1] = bld.vgrf(BRW_TYPE_UD); + bld.ADD(send->src[SEND_SRC_PAYLOAD1], ubo_offset, brw_imm_ud(c * 4)); /* Offset the destination */ - inst->dst = offset(inst->dst, bld, 1); + send->dst = offset(send->dst, bld, 1); } } } @@ -2091,61 +2112,62 @@ lower_varying_pull_constant_logical_send(const brw_builder &bld, brw_inst *inst) assert(inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT].file == IMM); unsigned alignment = inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT].ud; - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->mlen = inst->exec_size / 8; + send->mlen = send->exec_size / 8; /* src[SEND_SRC_DESC/EX_DESC] are filled by setup_surface_descriptors() */ - inst->src[SEND_SRC_PAYLOAD1] = ubo_offset; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->src[SEND_SRC_PAYLOAD1] = ubo_offset; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); if (compiler->indirect_ubos_use_sampler) { const unsigned simd_mode = - inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : + send->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : BRW_SAMPLER_SIMD_MODE_SIMD16; const uint32_t desc = brw_sampler_desc(devinfo, 0, 0, GFX5_SAMPLER_MESSAGE_SAMPLE_LD, simd_mode, 0); - inst->sfid = BRW_SFID_SAMPLER; - setup_surface_descriptors(bld, inst, desc, surface, surface_handle); + send->sfid = BRW_SFID_SAMPLER; + setup_surface_descriptors(bld, send, desc, surface, surface_handle); } else if (alignment >= 4) { const uint32_t desc = - brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size, + brw_dp_untyped_surface_rw_desc(devinfo, send->exec_size, 4, /* num_channels */ false /* write */); - inst->sfid = BRW_SFID_HDC1; - setup_surface_descriptors(bld, inst, desc, surface, surface_handle); + send->sfid = BRW_SFID_HDC1; + setup_surface_descriptors(bld, send, desc, surface, surface_handle); } else { const uint32_t desc = - brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size, + brw_dp_byte_scattered_rw_desc(devinfo, send->exec_size, 32, /* bit_size */ false /* write */); - inst->sfid = BRW_SFID_HDC0; - setup_surface_descriptors(bld, inst, desc, surface, surface_handle); + send->sfid = BRW_SFID_HDC0; + setup_surface_descriptors(bld, send, desc, surface, surface_handle); /* The byte scattered messages can only read one dword at a time so * we have to duplicate the message 4 times to read the full vec4. * Hopefully, dead code will clean up the mess if some of them aren't * needed. */ - assert(inst->size_written == 16 * inst->exec_size); - inst->size_written /= 4; + assert(send->size_written == 16 * send->exec_size); + send->size_written /= 4; for (unsigned c = 1; c < 4; c++) { /* Emit a copy of the instruction because we're about to modify * it. Because this loop starts at 1, we will emit copies for the * first 3 and the final one will be the modified instruction. */ - bld.emit(brw_clone_inst(*bld.shader, inst)); + bld.emit(brw_clone_inst(*bld.shader, send)); /* Offset the source */ - inst->src[SEND_SRC_PAYLOAD1] = bld.vgrf(BRW_TYPE_UD); - bld.ADD(inst->src[SEND_SRC_PAYLOAD1], ubo_offset, brw_imm_ud(c * 4)); + send->src[SEND_SRC_PAYLOAD1] = bld.vgrf(BRW_TYPE_UD); + bld.ADD(send->src[SEND_SRC_PAYLOAD1], ubo_offset, brw_imm_ud(c * 4)); /* Offset the destination */ - inst->dst = offset(inst->dst, bld, 1); + send->dst = offset(send->dst, bld, 1); } } } @@ -2261,19 +2283,21 @@ lower_interpolator_logical_send(const brw_builder &bld, brw_inst *inst, } } - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->sfid = BRW_SFID_PIXEL_INTERPOLATOR; - inst->desc = desc_imm; - inst->ex_desc = 0; - inst->mlen = mlen; - inst->ex_mlen = 0; - inst->send_has_side_effects = false; - inst->send_is_volatile = false; + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->src[SEND_SRC_DESC] = component(desc, 0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->sfid = BRW_SFID_PIXEL_INTERPOLATOR; + send->desc = desc_imm; + send->ex_desc = 0; + send->mlen = mlen; + send->ex_mlen = 0; + send->has_side_effects = false; + send->is_volatile = false; + + send->src[SEND_SRC_DESC] = component(desc, 0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); } static void @@ -2337,23 +2361,24 @@ lower_btd_logical_send(const brw_builder &bld, brw_inst *inst) payload = bld.move_to_vgrf(brw_imm_uq(0), 1); } - /* Update the original instruction. */ - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); - inst->mlen = mlen; - inst->ex_mlen = ex_mlen; - inst->header_size = 0; /* HW docs require has_header = false */ - inst->send_has_side_effects = true; - inst->send_is_volatile = false; + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; + + send->mlen = mlen; + send->ex_mlen = ex_mlen; + send->header_size = 0; /* HW docs require has_header = false */ + send->has_side_effects = true; + send->is_volatile = false; /* Set up SFID and descriptors */ - inst->sfid = BRW_SFID_BINDLESS_THREAD_DISPATCH; - inst->desc = brw_btd_spawn_desc(devinfo, inst->exec_size, + send->sfid = BRW_SFID_BINDLESS_THREAD_DISPATCH; + send->desc = brw_btd_spawn_desc(devinfo, send->exec_size, GEN_RT_BTD_MESSAGE_SPAWN); - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = header; - inst->src[SEND_SRC_PAYLOAD2] = payload; + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = header; + send->src[SEND_SRC_PAYLOAD2] = payload; } static void @@ -2443,23 +2468,23 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst) stack_id_mask); } - /* Update the original instruction. */ - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->mlen = mlen; - inst->ex_mlen = ex_mlen; - inst->header_size = 0; /* HW docs require has_header = false */ - inst->send_has_side_effects = true; - inst->send_is_volatile = false; + send->mlen = mlen; + send->ex_mlen = ex_mlen; + send->header_size = 0; /* HW docs require has_header = false */ + send->has_side_effects = true; + send->is_volatile = false; /* Set up SFID and descriptors */ - inst->sfid = BRW_SFID_RAY_TRACE_ACCELERATOR; - inst->desc = brw_rt_trace_ray_desc(devinfo, inst->exec_size); + send->sfid = BRW_SFID_RAY_TRACE_ACCELERATOR; + send->desc = brw_rt_trace_ray_desc(devinfo, send->exec_size); - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = header; - inst->src[SEND_SRC_PAYLOAD2] = payload; + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = header; + send->src[SEND_SRC_PAYLOAD2] = payload; } static void @@ -2475,15 +2500,16 @@ lower_get_buffer_size(const brw_builder &bld, brw_inst *inst) brw_reg surface_handle = inst->src[GET_BUFFER_SIZE_SRC_SURFACE_HANDLE]; brw_reg lod = bld.move_to_vgrf(inst->src[GET_BUFFER_SIZE_SRC_LOD], 1); - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->mlen = inst->exec_size / 8; - inst->ex_mlen = 0; - inst->ex_desc = 0; + send->mlen = send->exec_size / 8; + send->ex_mlen = 0; + send->ex_desc = 0; /* src[SEND_SRC_DESC/EX_DESC] are filled by setup_surface_descriptors() */ - inst->src[SEND_SRC_PAYLOAD1] = lod; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->src[SEND_SRC_PAYLOAD1] = lod; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); const uint32_t return_format = GFX8_SAMPLER_RETURN_FORMAT_32BITS; @@ -2492,9 +2518,9 @@ lower_get_buffer_size(const brw_builder &bld, brw_inst *inst) BRW_SAMPLER_SIMD_MODE_SIMD8, return_format); - inst->dst = retype(inst->dst, BRW_TYPE_UW); - inst->sfid = BRW_SFID_SAMPLER; - setup_surface_descriptors(bld, inst, desc, surface, surface_handle); + send->dst = retype(send->dst, BRW_TYPE_UW); + send->sfid = BRW_SFID_SAMPLER; + setup_surface_descriptors(bld, send, desc, surface, surface_handle); } static void @@ -2511,32 +2537,33 @@ lower_lsc_memory_fence_and_interlock(const brw_builder &bld, brw_inst *inst) assert(inst->size_written == reg_unit(devinfo) * REG_SIZE); - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->check_tdr = interlock; - inst->send_has_side_effects = true; + send->check_tdr = interlock; + send->has_side_effects = true; - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = retype(vec1(header), BRW_TYPE_UD); - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); - inst->mlen = reg_unit(devinfo); - inst->ex_mlen = 0; + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = retype(vec1(header), BRW_TYPE_UD); + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->mlen = reg_unit(devinfo); + send->ex_mlen = 0; /* On Gfx12.5 URB is not listed as port usable for fences with the LSC (see * BSpec 53578 for Gfx12.5, BSpec 57330 for Gfx20), so we completely ignore * the descriptor value and rebuild a legacy URB fence descriptor. */ - if (inst->sfid == BRW_SFID_URB && devinfo->ver < 20) { - inst->desc = brw_urb_fence_desc(devinfo); - inst->header_size = 1; + if (send->sfid == BRW_SFID_URB && devinfo->ver < 20) { + send->desc = brw_urb_fence_desc(devinfo); + send->header_size = 1; } else { enum lsc_fence_scope scope = - lsc_fence_msg_desc_scope(devinfo, inst->desc); + lsc_fence_msg_desc_scope(devinfo, send->desc); enum lsc_flush_type flush_type = - lsc_fence_msg_desc_flush_type(devinfo, inst->desc); + lsc_fence_msg_desc_flush_type(devinfo, send->desc); - if (inst->sfid == BRW_SFID_TGM) { + if (send->sfid == BRW_SFID_TGM) { scope = LSC_FENCE_TILE; flush_type = LSC_FLUSH_TYPE_EVICT; } @@ -2558,7 +2585,7 @@ lower_lsc_memory_fence_and_interlock(const brw_builder &bld, brw_inst *inst) flush_type = LSC_FLUSH_TYPE_NONE_6; } - inst->desc = lsc_fence_msg_desc(devinfo, scope, flush_type, false); + send->desc = lsc_fence_msg_desc(devinfo, scope, flush_type, false); } } @@ -2574,37 +2601,40 @@ lower_hdc_memory_fence_and_interlock(const brw_builder &bld, brw_inst *inst) const bool commit_enable = inst->src[1].ud; bool slm = false; - if (inst->sfid == BRW_SFID_SLM) { + + assert(inst->as_send() != NULL); + if (inst->as_send()->sfid == BRW_SFID_SLM) { assert(devinfo->ver >= 11); /* This SFID doesn't exist on Gfx11-12.0, but we use it to represent * SLM fences, and map back here to the way Gfx11 represented that: * a special "SLM" binding table index and the data cache SFID. */ - inst->sfid = BRW_SFID_HDC0; + inst->as_send()->sfid = BRW_SFID_HDC0; slm = true; } assert(inst->size_written == (commit_enable ? REG_SIZE : 0)); - inst = brw_transform_inst(bld, inst, SHADER_OPCODE_SEND); + brw_send_inst *send = brw_transform_inst_to_send(bld, inst); + inst = NULL; - inst->check_tdr = interlock; - inst->send_has_side_effects = true; + send->check_tdr = interlock; + send->has_side_effects = true; - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); - inst->src[SEND_SRC_PAYLOAD1] = retype(vec1(header), BRW_TYPE_UD); - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); - inst->mlen = reg_unit(devinfo); - inst->ex_mlen = 0; - inst->header_size = 1; + send->src[SEND_SRC_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + send->src[SEND_SRC_PAYLOAD1] = retype(vec1(header), BRW_TYPE_UD); + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->mlen = reg_unit(devinfo); + send->ex_mlen = 0; + send->header_size = 1; const unsigned msg_type = - inst->sfid == BRW_SFID_RENDER_CACHE ? GFX7_DATAPORT_RC_MEMORY_FENCE : + send->sfid == BRW_SFID_RENDER_CACHE ? GFX7_DATAPORT_RC_MEMORY_FENCE : GFX7_DATAPORT_DC_MEMORY_FENCE; - inst->desc = brw_dp_desc(devinfo, slm ? GFX7_BTI_SLM : 0, msg_type, + send->desc = brw_dp_desc(devinfo, slm ? GFX7_BTI_SLM : 0, msg_type, commit_enable ? 1 << 5 : 0); } @@ -2750,7 +2780,7 @@ brw_lower_uniform_pull_constant_loads(brw_shader &s) const intel_device_info *devinfo = s.devinfo; bool progress = false; - foreach_block_and_inst (block, brw_inst, inst, s.cfg) { + foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) { if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD) continue; @@ -2768,35 +2798,35 @@ brw_lower_uniform_pull_constant_loads(brw_shader &s) const brw_reg payload = ubld.vgrf(BRW_TYPE_UD); ubld.MOV(payload, offset_B); - inst->sfid = BRW_SFID_UGM; - inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, + brw_send_inst *send = brw_transform_inst_to_send(ubld, inst); + inst = NULL; + + send->sfid = BRW_SFID_UGM; + send->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, surface_handle.file == BAD_FILE ? LSC_ADDR_SURFTYPE_BTI : LSC_ADDR_SURFTYPE_BSS, LSC_ADDR_SIZE_A32, LSC_DATA_SIZE_D32, - inst->size_written / 4, + send->size_written / 4, true /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS)); - /* Update the original instruction. */ - inst = brw_transform_inst(ubld, inst, SHADER_OPCODE_SEND); - - inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); - inst->send_ex_bso = surface_handle.file != BAD_FILE && + send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); + send->ex_bso = surface_handle.file != BAD_FILE && s.compiler->extended_bindless_surface_offset; - inst->ex_mlen = 0; - inst->header_size = 0; - inst->send_has_side_effects = false; - inst->send_is_volatile = true; - inst->exec_size = 1; + send->ex_mlen = 0; + send->header_size = 0; + send->has_side_effects = false; + send->is_volatile = true; + send->exec_size = 1; /* Finally, the payload */ - setup_lsc_surface_descriptors(ubld, inst, inst->desc, + setup_lsc_surface_descriptors(ubld, send, send->desc, surface.file != BAD_FILE ? surface : surface_handle, 0); - inst->src[SEND_SRC_PAYLOAD1] = payload; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); + send->src[SEND_SRC_PAYLOAD1] = payload; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS | BRW_DEPENDENCY_VARIABLES); @@ -2809,20 +2839,21 @@ brw_lower_uniform_pull_constant_loads(brw_shader &s) ubld.group(1, 0).MOV(component(header, 2), brw_imm_ud(offset_B.ud / 16)); - inst = brw_transform_inst(ubld, inst, SHADER_OPCODE_SEND); + brw_send_inst *send = brw_transform_inst_to_send(ubld, inst); + inst = NULL; - inst->sfid = BRW_SFID_HDC_READ_ONLY; - inst->header_size = 1; - inst->mlen = 1; + send->sfid = BRW_SFID_HDC_READ_ONLY; + send->header_size = 1; + send->mlen = 1; uint32_t desc = brw_dp_oword_block_rw_desc(devinfo, true /* align_16B */, size_B.ud / 4, false /* write */); - setup_surface_descriptors(ubld, inst, desc, surface, surface_handle); + setup_surface_descriptors(ubld, send, desc, surface, surface_handle); - inst->src[SEND_SRC_PAYLOAD1] = header; - inst->src[SEND_SRC_PAYLOAD2] = brw_reg(); /* unused for reads */ + send->src[SEND_SRC_PAYLOAD1] = header; + send->src[SEND_SRC_PAYLOAD2] = brw_reg(); /* unused for reads */ s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS | BRW_DEPENDENCY_VARIABLES); @@ -2845,36 +2876,38 @@ brw_lower_send_descriptors(brw_shader &s) inst->opcode != SHADER_OPCODE_SEND_GATHER) continue; - const brw_builder ubld = brw_builder(inst).uniform(); + brw_send_inst *send = inst->as_send(); + + const brw_builder ubld = brw_builder(send).uniform(); /* Descriptor */ - const unsigned rlen = inst->dst.is_null() ? 0 : inst->size_written / REG_SIZE; - unsigned mlen = inst->mlen; - if (inst->opcode == SHADER_OPCODE_SEND_GATHER) { - assert(inst->sources >= 3); - mlen = (inst->sources - 3) * reg_unit(devinfo); + const unsigned rlen = send->dst.is_null() ? 0 : send->size_written / REG_SIZE; + unsigned mlen = send->mlen; + if (send->opcode == SHADER_OPCODE_SEND_GATHER) { + assert(send->sources >= 3); + mlen = (send->sources - 3) * reg_unit(devinfo); } - uint32_t desc_imm = inst->desc | - brw_message_desc(devinfo, mlen, rlen, inst->header_size); + uint32_t desc_imm = send->desc | + brw_message_desc(devinfo, mlen, rlen, send->header_size); - assert(inst->src[SEND_SRC_DESC].file != BAD_FILE); - assert(inst->src[SEND_SRC_EX_DESC].file != BAD_FILE); + assert(send->src[SEND_SRC_DESC].file != BAD_FILE); + assert(send->src[SEND_SRC_EX_DESC].file != BAD_FILE); - brw_reg desc = inst->src[SEND_SRC_DESC]; + brw_reg desc = send->src[SEND_SRC_DESC]; if (desc.file == IMM) { - inst->src[SEND_SRC_DESC] = brw_imm_ud(desc.ud | desc_imm); + send->src[SEND_SRC_DESC] = brw_imm_ud(desc.ud | desc_imm); } else { brw_reg addr_reg = ubld.vaddr(BRW_TYPE_UD, BRW_ADDRESS_SUBREG_INDIRECT_DESC); ubld.OR(addr_reg, desc, brw_imm_ud(desc_imm)); - inst->src[SEND_SRC_DESC] = addr_reg; + send->src[SEND_SRC_DESC] = addr_reg; } /* Extended descriptor */ - brw_reg ex_desc = inst->src[SEND_SRC_EX_DESC]; - uint32_t ex_desc_imm = inst->ex_desc | - brw_message_ex_desc(devinfo, inst->ex_mlen); + brw_reg ex_desc = send->src[SEND_SRC_EX_DESC]; + uint32_t ex_desc_imm = send->ex_desc | + brw_message_ex_desc(devinfo, send->ex_mlen); if (ex_desc.file == IMM) ex_desc_imm |= ex_desc.ud; @@ -2886,7 +2919,7 @@ brw_lower_send_descriptors(brw_shader &s) (ex_desc_imm & INTEL_MASK(15, 12)) != 0) needs_addr_reg = true; - if (inst->send_ex_bso) { + if (send->ex_bso) { needs_addr_reg = true; /* When using the extended bindless offset, the whole extended * descriptor is the surface handle. @@ -2894,7 +2927,7 @@ brw_lower_send_descriptors(brw_shader &s) ex_desc_imm = 0; } else { if (needs_addr_reg) - ex_desc_imm |= inst->sfid | inst->eot << 5; + ex_desc_imm |= send->sfid | send->eot << 5; } if (needs_addr_reg) { @@ -2906,9 +2939,9 @@ brw_lower_send_descriptors(brw_shader &s) ubld.MOV(addr_reg, ex_desc); else ubld.OR(addr_reg, ex_desc, brw_imm_ud(ex_desc_imm)); - inst->src[SEND_SRC_EX_DESC] = addr_reg; + send->src[SEND_SRC_EX_DESC] = addr_reg; } else { - inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(ex_desc_imm); + send->src[SEND_SRC_EX_DESC] = brw_imm_ud(ex_desc_imm); } progress = true; diff --git a/src/intel/compiler/brw_lower_simd_width.cpp b/src/intel/compiler/brw_lower_simd_width.cpp index 0410f22f4dc..ece13998b56 100644 --- a/src/intel/compiler/brw_lower_simd_width.cpp +++ b/src/intel/compiler/brw_lower_simd_width.cpp @@ -677,7 +677,10 @@ brw_lower_simd_width(brw_shader &s) (inst->size_written - residency_size) / inst->dst.component_size(inst->exec_size); - assert(!inst->writes_accumulator && !inst->mlen); + if (const brw_send_inst *send = inst->as_send()) + assert(!send->mlen); + + assert(!inst->writes_accumulator); /* Inserting the zip, unzip, and duplicated instructions in all of * the right spots is somewhat tricky. All of the unzip and any diff --git a/src/intel/compiler/brw_opt.cpp b/src/intel/compiler/brw_opt.cpp index b0dc1817ccb..58ddf2b02c4 100644 --- a/src/intel/compiler/brw_opt.cpp +++ b/src/intel/compiler/brw_opt.cpp @@ -263,9 +263,12 @@ brw_opt_zero_samples(brw_shader &s) { bool progress = false; - foreach_block_and_inst(block, brw_inst, send, s.cfg) { - if (send->opcode != SHADER_OPCODE_SEND || - send->sfid != BRW_SFID_SAMPLER) + foreach_block_and_inst(block, brw_inst, inst, s.cfg) { + if (inst->opcode != SHADER_OPCODE_SEND) + continue; + + brw_send_inst *send = inst->as_send(); + if (send->sfid != BRW_SFID_SAMPLER) continue; /* Wa_14012688258: @@ -340,9 +343,12 @@ brw_opt_split_sends(brw_shader &s) { bool progress = false; - foreach_block_and_inst(block, brw_inst, send, s.cfg) { - if (send->opcode != SHADER_OPCODE_SEND || - send->mlen <= reg_unit(s.devinfo) || send->ex_mlen > 0 || + foreach_block_and_inst(block, brw_inst, inst, s.cfg) { + if (inst->opcode != SHADER_OPCODE_SEND) + continue; + + brw_send_inst *send = inst->as_send(); + if (send->mlen <= reg_unit(s.devinfo) || send->ex_mlen > 0 || send->src[SEND_SRC_PAYLOAD1].file != VGRF) continue; @@ -619,19 +625,21 @@ brw_opt_send_to_send_gather(brw_shader &s) if (inst->opcode != SHADER_OPCODE_SEND) continue; + brw_send_inst *send = inst->as_send(); + /* For 1-2 registers, send-gather offers no benefits over split-send. */ - if (inst->mlen + inst->ex_mlen <= 2 * unit) + if (send->mlen + send->ex_mlen <= 2 * unit) continue; - assert(inst->mlen % unit == 0); - assert(inst->ex_mlen % unit == 0); + assert(send->mlen % unit == 0); + assert(send->ex_mlen % unit == 0); struct { brw_reg src; unsigned phys_len; } payload[2] = { - { inst->src[SEND_SRC_PAYLOAD1], inst->mlen / unit }, - { inst->src[SEND_SRC_PAYLOAD2], inst->ex_mlen / unit }, + { send->src[SEND_SRC_PAYLOAD1], send->mlen / unit }, + { send->src[SEND_SRC_PAYLOAD2], send->ex_mlen / unit }, }; const unsigned num_payload_sources = payload[0].phys_len + payload[1].phys_len; @@ -645,25 +653,25 @@ brw_opt_send_to_send_gather(brw_shader &s) continue; } - inst = brw_transform_inst(s, inst, SHADER_OPCODE_SEND_GATHER, - SEND_GATHER_SRC_PAYLOAD + num_payload_sources); + send = brw_transform_inst(s, send, SHADER_OPCODE_SEND_GATHER, + SEND_GATHER_SRC_PAYLOAD + num_payload_sources)->as_send(); /* Sources 0 and 1 remain the same. Source 2 will be filled * after register allocation. */ - inst->src[SEND_GATHER_SRC_SCALAR] = {}; + send->src[SEND_GATHER_SRC_SCALAR] = {}; int idx = 3; for (unsigned p = 0; p < ARRAY_SIZE(payload); p++) { for (unsigned i = 0; i < payload[p].phys_len; i++) { - inst->src[idx++] = byte_offset(payload[p].src, + send->src[idx++] = byte_offset(payload[p].src, i * reg_unit(devinfo) * REG_SIZE); } } - assert(idx == inst->sources); + assert(idx == send->sources); - inst->mlen = 0; - inst->ex_mlen = 0; + send->mlen = 0; + send->ex_mlen = 0; progress = true; } @@ -699,10 +707,12 @@ brw_opt_send_gather_to_send(brw_shader &s) if (inst->opcode != SHADER_OPCODE_SEND_GATHER) continue; - assert(inst->sources > 2); - assert(inst->src[SEND_GATHER_SRC_SCALAR].file == BAD_FILE); + brw_send_inst *send = inst->as_send(); - const int num_payload_sources = inst->sources - 3; + assert(send->sources > 2); + assert(send->src[SEND_GATHER_SRC_SCALAR].file == BAD_FILE); + + const int num_payload_sources = send->sources - 3; assert(num_payload_sources > 0); /* Limited by Src0.Length in the SEND instruction. */ @@ -713,7 +723,7 @@ brw_opt_send_gather_to_send(brw_shader &s) * and there's no need to use SEND_GATHER (which would set ARF scalar register * adding an extra instruction). */ - const brw_reg *payload = &inst->src[SEND_GATHER_SRC_PAYLOAD]; + const brw_reg *payload = &send->src[SEND_GATHER_SRC_PAYLOAD]; brw_reg payload1 = payload[0]; brw_reg payload2 = {}; int payload1_len = 0; @@ -755,21 +765,21 @@ brw_opt_send_gather_to_send(brw_shader &s) * * TODO: Pass LSC address length or infer it so valid splits can work. */ - if (payload2_len && (inst->sfid == BRW_SFID_UGM || - inst->sfid == BRW_SFID_TGM || - inst->sfid == BRW_SFID_SLM || - inst->sfid == BRW_SFID_URB)) { - enum lsc_opcode lsc_op = lsc_msg_desc_opcode(devinfo, inst->desc); + if (payload2_len && (send->sfid == BRW_SFID_UGM || + send->sfid == BRW_SFID_TGM || + send->sfid == BRW_SFID_SLM || + send->sfid == BRW_SFID_URB)) { + enum lsc_opcode lsc_op = lsc_msg_desc_opcode(devinfo, send->desc); if (lsc_op_num_data_values(lsc_op) > 0) continue; } - inst = brw_transform_inst(s, inst, SHADER_OPCODE_SEND); + send = brw_transform_inst(s, send, SHADER_OPCODE_SEND)->as_send(); - inst->src[SEND_SRC_PAYLOAD1] = payload1; - inst->src[SEND_SRC_PAYLOAD2] = payload2; - inst->mlen = payload1_len * unit; - inst->ex_mlen = payload2_len * unit; + send->src[SEND_SRC_PAYLOAD1] = payload1; + send->src[SEND_SRC_PAYLOAD2] = payload2; + send->mlen = payload1_len * unit; + send->ex_mlen = payload2_len * unit; progress = true; } diff --git a/src/intel/compiler/brw_opt_cse.cpp b/src/intel/compiler/brw_opt_cse.cpp index 91f5bbca42e..4da3f074bf0 100644 --- a/src/intel/compiler/brw_opt_cse.cpp +++ b/src/intel/compiler/brw_opt_cse.cpp @@ -135,10 +135,12 @@ is_expression(const brw_shader *v, const brw_inst *const inst) case SHADER_OPCODE_LOAD_PAYLOAD: return !is_coalescing_payload(*v, inst); case SHADER_OPCODE_SEND: - case SHADER_OPCODE_SEND_GATHER: - return !inst->send_has_side_effects && - !inst->send_is_volatile && - !inst->eot; + case SHADER_OPCODE_SEND_GATHER: { + const brw_send_inst *send = inst->as_send(); + return !send->has_side_effects && + !send->is_volatile && + !send->eot; + } default: return false; } @@ -238,25 +240,31 @@ operands_match(const brw_inst *a, const brw_inst *b, bool *negate) } } +static bool +send_inst_match(brw_send_inst *a, brw_send_inst *b) +{ + return a->mlen == b->mlen && + a->ex_mlen == b->ex_mlen && + a->sfid == b->sfid && + a->desc == b->desc && + a->ex_desc == b->ex_desc && + a->send_bits == b->send_bits; +} + static bool instructions_match(brw_inst *a, brw_inst *b, bool *negate) { - /* `Kind` is derived from opcode, so skipped. */ return a->opcode == b->opcode && + /* `kind` is derived from opcode, so skipped. */ + (a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) && a->exec_size == b->exec_size && a->group == b->group && a->predicate == b->predicate && a->conditional_mod == b->conditional_mod && a->dst.type == b->dst.type && a->offset == b->offset && - a->mlen == b->mlen && - a->ex_mlen == b->ex_mlen && - a->sfid == b->sfid && - a->desc == b->desc && - a->ex_desc == b->ex_desc && a->size_written == b->size_written && - a->check_tdr == b->check_tdr && a->header_size == b->header_size && a->sources == b->sources && a->bits == b->bits && @@ -299,17 +307,12 @@ hash_inst(const void *v) inst->sources, inst->exec_size, inst->group, - inst->mlen, - inst->ex_mlen, - inst->sfid, inst->header_size, inst->conditional_mod, inst->predicate, }; const uint32_t u32data[] = { - inst->desc, - inst->ex_desc, inst->offset, inst->size_written, inst->opcode, @@ -321,6 +324,29 @@ hash_inst(const void *v) /* Skip hashing sched - we shouldn't be CSE'ing after that SWSB */ + switch (inst->kind) { + case BRW_KIND_SEND: { + const brw_send_inst *send = inst->as_send(); + const uint8_t send_u8data[] = { + send->mlen, + send->ex_mlen, + send->sfid, + send->send_bits, + }; + const uint32_t send_u32data[] = { + send->desc, + send->ex_desc, + }; + hash = HASH(hash, send_u8data); + hash = HASH(hash, send_u32data); + break; + } + + case BRW_KIND_BASE: + /* Nothing else to do. */ + break; + } + if (inst->opcode == BRW_OPCODE_MAD) { /* Commutatively combine the hashes for the multiplicands */ hash = hash_reg(hash, inst->src[0]); diff --git a/src/intel/compiler/brw_opt_dead_code_eliminate.cpp b/src/intel/compiler/brw_opt_dead_code_eliminate.cpp index 5e7470a32cf..0def547f256 100644 --- a/src/intel/compiler/brw_opt_dead_code_eliminate.cpp +++ b/src/intel/compiler/brw_opt_dead_code_eliminate.cpp @@ -60,7 +60,9 @@ can_omit_write(const brw_inst *inst) /* We can eliminate the destination write for ordinary instructions, * but not most SENDs. */ - if (inst->opcode < NUM_BRW_OPCODES && inst->mlen == 0) + const brw_send_inst *send = inst->as_send(); + if (inst->opcode < NUM_BRW_OPCODES && + (!send || send->mlen == 0)) return true; /* It might not be safe for other virtual opcodes. */ diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index 0838f1d80fa..1ce6d02b48c 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -415,12 +415,14 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con } fprintf(file, "(%d) ", inst->exec_size); - if (inst->mlen) { - fprintf(file, "(mlen: %d) ", inst->mlen); + const brw_send_inst *send = inst->as_send(); + + if (send && send->mlen) { + fprintf(file, "(mlen: %d) ", send->mlen); } - if (inst->ex_mlen) { - fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); + if (send && send->ex_mlen) { + fprintf(file, "(ex_mlen: %d) ", send->ex_mlen); } if (inst->eot) { @@ -665,13 +667,13 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con if (inst->has_no_mask_send_params) fprintf(file, "NoMaskParams "); - if (is_send && inst->desc) - fprintf(file, "Desc 0x%08x ", inst->desc); + if (send && send->desc) + fprintf(file, "Desc 0x%08x ", send->desc); - if (is_send && inst->ex_desc) - fprintf(file, "ExDesc 0x%08x ", inst->ex_desc); + if (send && send->ex_desc) + fprintf(file, "ExDesc 0x%08x ", send->ex_desc); - if (is_send && inst->send_ex_desc_imm) + if (send && send->ex_desc_imm) fprintf(file, "ExDescImmInst 0x%08x ", inst->offset); if (inst->sched.regdist || inst->sched.mode) { diff --git a/src/intel/compiler/brw_reg_allocate.cpp b/src/intel/compiler/brw_reg_allocate.cpp index a2afffbcc10..49fa0e303c4 100644 --- a/src/intel/compiler/brw_reg_allocate.cpp +++ b/src/intel/compiler/brw_reg_allocate.cpp @@ -621,7 +621,7 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst) * they're used as sources in the same instruction. We also need to add * interference here. */ - if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 && + if (inst->opcode == SHADER_OPCODE_SEND && inst->as_send()->ex_mlen > 0 && inst->src[SEND_SRC_PAYLOAD1].file == VGRF && inst->src[SEND_SRC_PAYLOAD2].file == VGRF && inst->src[SEND_SRC_PAYLOAD1].nr != inst->src[SEND_SRC_PAYLOAD2].nr) { @@ -643,7 +643,7 @@ brw_reg_alloc::setup_inst_interference(const brw_inst *inst) assert(inst->opcode == SHADER_OPCODE_SEND); const brw_reg srcs[2] = { inst->src[SEND_SRC_PAYLOAD1], - inst->ex_mlen > 0 ? inst->src[SEND_SRC_PAYLOAD2] : brw_reg(), + inst->as_send()->ex_mlen > 0 ? inst->src[SEND_SRC_PAYLOAD2] : brw_reg(), }; const unsigned sizes[2] = { DIV_ROUND_UP(fs->alloc.sizes[srcs[0].nr], reg_unit(devinfo)), @@ -892,7 +892,7 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld, for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) { ++stats->fill_count; - brw_inst *unspill_inst; + brw_send_inst *unspill_inst; if (devinfo->verx10 >= 125) { /* LSC is limited to SIMD16 (SIMD32 on Xe2) load/store but we can * load more using transpose messages. @@ -933,8 +933,8 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld, unspill_inst->ex_mlen = 0; unspill_inst->size_written = lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE; - unspill_inst->send_has_side_effects = false; - unspill_inst->send_is_volatile = true; + unspill_inst->has_side_effects = false; + unspill_inst->is_volatile = true; unspill_inst->src[0] = brw_imm_ud( desc | @@ -958,8 +958,8 @@ brw_reg_alloc::emit_unspill(const brw_builder &bld, unspill_inst->mlen = 1; unspill_inst->header_size = 1; unspill_inst->size_written = reg_size * REG_SIZE; - unspill_inst->send_has_side_effects = false; - unspill_inst->send_is_volatile = true; + unspill_inst->has_side_effects = false; + unspill_inst->is_volatile = true; unspill_inst->sfid = BRW_SFID_HDC0; unspill_inst->src[0] = brw_imm_ud( @@ -992,7 +992,7 @@ brw_reg_alloc::emit_spill(const brw_builder &bld, for (unsigned i = 0; i < DIV_ROUND_UP(count, reg_size); i++) { ++stats->spill_count; - brw_inst *spill_inst; + brw_send_inst *spill_inst; if (devinfo->verx10 >= 125) { brw_reg offset = build_lane_offsets(bld, spill_offset, ip); @@ -1019,8 +1019,8 @@ brw_reg_alloc::emit_spill(const brw_builder &bld, bld.dispatch_width()); spill_inst->ex_mlen = reg_size; spill_inst->size_written = 0; - spill_inst->send_has_side_effects = true; - spill_inst->send_is_volatile = false; + spill_inst->has_side_effects = true; + spill_inst->is_volatile = false; spill_inst->src[0] = brw_imm_ud( desc | @@ -1045,8 +1045,8 @@ brw_reg_alloc::emit_spill(const brw_builder &bld, spill_inst->ex_mlen = reg_size; spill_inst->size_written = 0; spill_inst->header_size = 1; - spill_inst->send_has_side_effects = true; - spill_inst->send_is_volatile = false; + spill_inst->has_side_effects = true; + spill_inst->is_volatile = false; spill_inst->sfid = BRW_SFID_HDC0; spill_inst->src[0] = brw_imm_ud( diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index b2fe12aca57..dfc6cd59fc9 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -268,10 +268,12 @@ schedule_node::set_latency(const struct brw_isa_info *isa) break; case SHADER_OPCODE_SEND: - case SHADER_OPCODE_SEND_GATHER: - switch (inst->sfid) { + case SHADER_OPCODE_SEND_GATHER: { + brw_send_inst *send = inst->as_send(); + + switch (send->sfid) { case BRW_SFID_SAMPLER: { - unsigned msg_type = (inst->desc >> 12) & 0x1f; + unsigned msg_type = (send->desc >> 12) & 0x1f; switch (msg_type) { case GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO: case GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO: @@ -364,7 +366,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa) break; case BRW_SFID_RENDER_CACHE: - switch (brw_fb_desc_msg_type(isa->devinfo, inst->desc)) { + switch (brw_fb_desc_msg_type(isa->devinfo, send->desc)) { case GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE: case GFX7_DATAPORT_RC_TYPED_SURFACE_READ: /* See also SHADER_OPCODE_TYPED_SURFACE_READ */ @@ -388,7 +390,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa) break; case BRW_SFID_HDC0: - switch ((inst->desc >> 14) & 0x1f) { + switch ((send->desc >> 14) & 0x1f) { case BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ: case GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ: case GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE: @@ -460,7 +462,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa) break; case BRW_SFID_HDC1: - switch (brw_dp_desc_msg_type(isa->devinfo, inst->desc)) { + switch (brw_dp_desc_msg_type(isa->devinfo, send->desc)) { case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ: case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE: case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ: @@ -500,7 +502,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa) case BRW_SFID_UGM: case BRW_SFID_TGM: case BRW_SFID_SLM: - switch (lsc_msg_desc_opcode(isa->devinfo, inst->desc)) { + switch (lsc_msg_desc_opcode(isa->devinfo, send->desc)) { case LSC_OP_LOAD: case LSC_OP_STORE: case LSC_OP_LOAD_CMASK: @@ -555,6 +557,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa) UNREACHABLE("Unknown SFID"); } break; + } case BRW_OPCODE_DPAS: switch (inst->rcount) { diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 7169fd26581..14ebfc39206 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -372,7 +372,7 @@ brw_shader::emit_cs_terminate() if (devinfo->ver < 11) desc |= (1 << 4); /* Do not dereference URB */ - brw_inst *send = ubld.SEND(); + brw_send_inst *send = ubld.SEND(); send->dst = reg_undef; send->src[SEND_SRC_DESC] = brw_imm_ud(desc); send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); @@ -678,7 +678,7 @@ brw_shader::assign_curb_setup() addr = base_addr; } - brw_inst *send = ubld.SEND(); + brw_send_inst *send = ubld.SEND(); send->dst = retype(brw_vec8_grf(payload().num_regs + i, 0), BRW_TYPE_UD); @@ -704,7 +704,7 @@ brw_shader::assign_curb_setup() lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE; assert((payload().num_regs + i + send->size_written / REG_SIZE) <= (payload().num_regs + prog_data->curb_read_length)); - send->send_is_volatile = true; + send->is_volatile = true; send->src[SEND_SRC_DESC] = brw_imm_ud(desc | brw_message_desc(devinfo, diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h index 8b99054dd48..94502e6841e 100644 --- a/src/intel/compiler/brw_shader.h +++ b/src/intel/compiler/brw_shader.h @@ -379,6 +379,9 @@ brw_inst *brw_clone_inst(brw_shader &s, const brw_inst *inst); * brw_inst are maintained and any previous sources still visible. Additional * sources will be uninitialized. * + * All instructions can be transformed to an instruction of BASE kind. + * All non-BASE instructions can be transformed to an instruction of SEND kind. + * * If new_num_srcs is UINT_MAX a default will be picked based on the opcode. * Not all opcodes have a default. */ diff --git a/src/intel/compiler/brw_validate.cpp b/src/intel/compiler/brw_validate.cpp index 92527ead7a1..4c8a22e09ea 100644 --- a/src/intel/compiler/brw_validate.cpp +++ b/src/intel/compiler/brw_validate.cpp @@ -314,7 +314,7 @@ brw_validate(const brw_shader &s) VAL_ASSERT(is_uniform(inst->src[SEND_SRC_DESC])); VAL_ASSERT(is_uniform(inst->src[SEND_SRC_EX_DESC])); VAL_ASSERT_NE(inst->src[SEND_SRC_PAYLOAD1].file, BAD_FILE); - VAL_ASSERT(inst->ex_mlen > 0 || + VAL_ASSERT(inst->as_send()->ex_mlen > 0 || inst->src[SEND_SRC_PAYLOAD2].file == BAD_FILE); /* Send payloads cannot be immediates nor have source modifiers */ for (unsigned i = 0; i < 2; i++) { diff --git a/src/intel/compiler/brw_workaround.cpp b/src/intel/compiler/brw_workaround.cpp index ce8fdb12223..7dee3c96674 100644 --- a/src/intel/compiler/brw_workaround.cpp +++ b/src/intel/compiler/brw_workaround.cpp @@ -37,20 +37,24 @@ brw_workaround_emit_dummy_mov_instruction(brw_shader &s) } static bool -needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst) +needs_dummy_fence(const intel_device_info *devinfo, const brw_inst *inst) { + const brw_send_inst *send = inst->as_send(); + if (!send) + return false; + /* This workaround is about making sure that any instruction writing * through UGM has completed before we hit EOT. */ - if (inst->sfid != BRW_SFID_UGM) + if (send->sfid != BRW_SFID_UGM) return false; /* Any UGM, non-Scratch-surface Stores (not including Atomic) messages, * where the L1-cache override is NOT among {WB, WS, WT} */ - enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, inst->desc); + enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, send->desc); if (lsc_opcode_is_store(opcode)) { - switch (lsc_msg_desc_cache_ctrl(devinfo, inst->desc)) { + switch (lsc_msg_desc_cache_ctrl(devinfo, send->desc)) { case LSC_CACHE_STORE_L1STATE_L3MOCS: case LSC_CACHE_STORE_L1WB_L3WB: case LSC_CACHE_STORE_L1S_L3UC: @@ -65,7 +69,7 @@ needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst) } /* Any UGM Atomic message WITHOUT return value */ - if (lsc_opcode_is_atomic(opcode) && inst->dst.is_null()) + if (lsc_opcode_is_atomic(opcode) && send->dst.is_null()) return true; return false; @@ -106,7 +110,7 @@ brw_workaround_memory_fence_before_eot(brw_shader &s) const brw_builder ubld = brw_builder(inst).uniform(); brw_reg dst = ubld.vgrf(BRW_TYPE_UD); - brw_inst *dummy_fence = ubld.SEND(); + brw_send_inst *dummy_fence = ubld.SEND(); dummy_fence->src[SEND_SRC_DESC] = brw_imm_ud(0); dummy_fence->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); diff --git a/src/intel/compiler/test_lower_scoreboard.cpp b/src/intel/compiler/test_lower_scoreboard.cpp index d79a37a5207..8c4a30e1ca6 100644 --- a/src/intel/compiler/test_lower_scoreboard.cpp +++ b/src/intel/compiler/test_lower_scoreboard.cpp @@ -44,7 +44,7 @@ emit_SEND(const brw_builder &bld, const brw_reg &dst, { brw_reg uniform_desc = component(desc, 0); - brw_inst *send = bld.SEND(); + brw_send_inst *send = bld.SEND(); send->dst = dst; send->src[SEND_SRC_DESC] = uniform_desc;