diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp index 581eb3689f9..1ffa8c9422d 100644 --- a/src/intel/compiler/elk/elk_fs.cpp +++ b/src/intel/compiler/elk/elk_fs.cpp @@ -850,8 +850,6 @@ elk_fs_inst::size_read(int arg) const case ELK_SHADER_OPCODE_SEND: if (arg == 2) { return mlen * REG_SIZE; - } else if (arg == 3) { - return ex_mlen * REG_SIZE; } break; @@ -2733,10 +2731,6 @@ elk_fs_visitor::opt_zero_samples() if (send->keep_payload_trailing_zeros) continue; - /* This pass works on SENDs before splitting. */ - if (send->ex_mlen > 0) - continue; - elk_fs_inst *lp = (elk_fs_inst *) send->prev; if (lp->is_head_sentinel() || lp->opcode != ELK_SHADER_OPCODE_LOAD_PAYLOAD) @@ -5518,10 +5512,6 @@ elk_fs_visitor::dump_instruction_to_file(const elk_backend_instruction *be_inst, fprintf(file, "(mlen: %d) ", inst->mlen); } - if (inst->ex_mlen) { - fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); - } - if (inst->eot) { fprintf(file, "(EOT) "); } diff --git a/src/intel/compiler/elk/elk_fs.h b/src/intel/compiler/elk/elk_fs.h index 5d5593b8be1..e97cd7e0de2 100644 --- a/src/intel/compiler/elk/elk_fs.h +++ b/src/intel/compiler/elk/elk_fs.h @@ -467,9 +467,7 @@ private: void generate_send(elk_fs_inst *inst, struct elk_reg dst, struct elk_reg desc, - struct elk_reg ex_desc, - struct elk_reg payload, - struct elk_reg payload2); + struct elk_reg payload); void generate_fb_write(elk_fs_inst *inst, struct elk_reg payload); void generate_cs_terminate(elk_fs_inst *inst, struct elk_reg payload); void generate_barrier(elk_fs_inst *inst, struct elk_reg src); diff --git a/src/intel/compiler/elk/elk_fs_cse.cpp b/src/intel/compiler/elk/elk_fs_cse.cpp index a305343bacb..58c9c6a532e 100644 --- a/src/intel/compiler/elk/elk_fs_cse.cpp +++ b/src/intel/compiler/elk/elk_fs_cse.cpp @@ -185,7 +185,6 @@ instructions_match(elk_fs_inst *a, elk_fs_inst *b, bool *negate) a->dst.type == b->dst.type && a->offset == b->offset && a->mlen == b->mlen && - a->ex_mlen == b->ex_mlen && a->sfid == b->sfid && a->desc == b->desc && a->size_written == b->size_written && diff --git a/src/intel/compiler/elk/elk_fs_generator.cpp b/src/intel/compiler/elk/elk_fs_generator.cpp index 28d6ae267a8..c59745b085b 100644 --- a/src/intel/compiler/elk/elk_fs_generator.cpp +++ b/src/intel/compiler/elk/elk_fs_generator.cpp @@ -321,11 +321,9 @@ elk_fs_generator::patch_halt_jumps() void elk_fs_generator::generate_send(elk_fs_inst *inst, - struct elk_reg dst, - struct elk_reg desc, - struct elk_reg ex_desc, - struct elk_reg payload, - struct elk_reg payload2) + struct elk_reg dst, + struct elk_reg desc, + struct elk_reg payload) { const bool dst_is_null = dst.file == ELK_ARCHITECTURE_REGISTER_FILE && dst.nr == ELK_ARF_NULL; @@ -334,18 +332,10 @@ elk_fs_generator::generate_send(elk_fs_inst *inst, uint32_t desc_imm = inst->desc | elk_message_desc(devinfo, inst->mlen, rlen, inst->header_size); - uint32_t ex_desc_imm = inst->ex_desc | - elk_message_ex_desc(devinfo, inst->ex_mlen); - - if (ex_desc.file != ELK_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm || - inst->send_ex_desc_scratch) { - unreachable("no split sends available"); - } else { - elk_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm, - inst->eot); - if (inst->check_tdr) - elk_inst_set_opcode(p->isa, elk_last_inst, ELK_OPCODE_SENDC); - } + elk_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm, + inst->eot); + if (inst->check_tdr) + elk_inst_set_opcode(p->isa, elk_last_inst, ELK_OPCODE_SENDC); } void @@ -1960,8 +1950,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, break; case ELK_SHADER_OPCODE_SEND: - generate_send(inst, dst, src[0], src[1], src[2], - inst->ex_mlen > 0 ? src[3] : elk_null_reg()); + generate_send(inst, dst, src[0], src[2]); send_count++; break; diff --git a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp index df236a6238a..ca948da887c 100644 --- a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp +++ b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp @@ -604,25 +604,6 @@ elk_fs_reg_alloc::setup_inst_interference(const elk_fs_inst *inst) grf127_send_hack_node); } - /* From the Skylake PRM Vol. 2a docs for sends: - * - * "It is required that the second block of GRFs does not overlap with - * the first block." - * - * Normally, this is taken care of by fixup_sends_duplicate_payload() but - * in the case where one of the registers is an undefined value, the - * register allocator may decide that they don't interfere even though - * they're used as sources in the same instruction. We also need to add - * interference here. - */ - if (devinfo->ver >= 9) { - if (inst->opcode == ELK_SHADER_OPCODE_SEND && inst->ex_mlen > 0 && - inst->src[2].file == VGRF && inst->src[3].file == VGRF && - inst->src[2].nr != inst->src[3].nr) - ra_add_node_interference(g, first_vgrf_node + inst->src[2].nr, - first_vgrf_node + inst->src[3].nr); - } - /* When we do send-from-GRF for FB writes, we need to ensure that the last * write instruction sends from a high register. This is because the * vertex fetcher wants to start filling the low payload registers while @@ -652,12 +633,6 @@ elk_fs_reg_alloc::setup_inst_interference(const elk_fs_inst *inst) } ra_set_node_reg(g, first_vgrf_node + vgrf, reg); - - if (inst->ex_mlen > 0) { - const int vgrf = inst->src[3].nr; - reg -= DIV_ROUND_UP(fs->alloc.sizes[vgrf], reg_unit(devinfo)); - ra_set_node_reg(g, first_vgrf_node + vgrf, reg); - } } } @@ -880,7 +855,6 @@ elk_fs_reg_alloc::emit_spill(const fs_builder &bld, elk_fs_reg src, uint32_t spill_offset, unsigned count, int ip) { - const intel_device_info *devinfo = bld.shader->devinfo; const unsigned reg_size = src.component_size(bld.dispatch_width()) / REG_SIZE; assert(count % reg_size == 0); @@ -888,39 +862,11 @@ elk_fs_reg_alloc::emit_spill(const fs_builder &bld, for (unsigned i = 0; i < count / reg_size; i++) { ++stats->spill_count; - elk_fs_inst *spill_inst; - if (devinfo->ver >= 9) { - elk_fs_reg header = this->scratch_header; - fs_builder ubld = bld.exec_all().group(1, 0); - assert(spill_offset % 16 == 0); - spill_inst = ubld.MOV(component(header, 2), - elk_imm_ud(spill_offset / 16)); - _mesa_set_add(spill_insts, spill_inst); - - const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT; - const elk_fs_reg ex_desc = elk_imm_ud(0); - - elk_fs_reg srcs[] = { elk_imm_ud(0), ex_desc, header, src }; - spill_inst = bld.emit(ELK_SHADER_OPCODE_SEND, bld.null_reg_f(), - srcs, ARRAY_SIZE(srcs)); - spill_inst->mlen = 1; - spill_inst->ex_mlen = reg_size; - spill_inst->size_written = 0; - spill_inst->header_size = 1; - spill_inst->send_has_side_effects = true; - spill_inst->send_is_volatile = false; - spill_inst->sfid = GFX7_SFID_DATAPORT_DATA_CACHE; - spill_inst->desc = - elk_dp_desc(devinfo, bti, - GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, - ELK_DATAPORT_OWORD_BLOCK_DWORDS(reg_size * 8)); - } else { - spill_inst = bld.emit(ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE, - bld.null_reg_f(), src); - spill_inst->offset = spill_offset; - spill_inst->mlen = 1 + reg_size; /* header, value */ - spill_inst->base_mrf = spill_base_mrf(bld.shader); - } + elk_fs_inst *spill_inst = bld.emit(ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE, + bld.null_reg_f(), src); + spill_inst->offset = spill_offset; + spill_inst->mlen = 1 + reg_size; /* header, value */ + spill_inst->base_mrf = spill_base_mrf(bld.shader); _mesa_set_add(spill_insts, spill_inst); src.offset += reg_size * REG_SIZE; diff --git a/src/intel/compiler/elk/elk_ir.h b/src/intel/compiler/elk/elk_ir.h index 22cf9c18c07..19019d2c15a 100644 --- a/src/intel/compiler/elk/elk_ir.h +++ b/src/intel/compiler/elk/elk_ir.h @@ -158,12 +158,10 @@ struct elk_backend_instruction { uint32_t offset; /**< spill/unspill offset or texture offset bitfield */ uint8_t mlen; /**< SEND message length */ - uint8_t ex_mlen; /**< SENDS extended message length */ int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ uint8_t target; /**< MRT target. */ uint8_t sfid; /**< SFID for SEND instructions */ uint32_t desc; /**< SEND[S] message descriptor immediate */ - uint32_t ex_desc; /**< SEND[S] extended message descriptor immediate */ unsigned size_written; /**< Data written to the destination register in bytes. */ enum elk_opcode opcode; /* ELK_OPCODE_* or ELK_FS_OPCODE_* */ diff --git a/src/intel/compiler/elk/elk_lower_logical_sends.cpp b/src/intel/compiler/elk/elk_lower_logical_sends.cpp index f0913d24681..f83b00cf5c2 100644 --- a/src/intel/compiler/elk/elk_lower_logical_sends.cpp +++ b/src/intel/compiler/elk/elk_lower_logical_sends.cpp @@ -62,8 +62,6 @@ lower_urb_read_logical_send(const fs_builder &bld, elk_fs_inst *inst) inst->offset); inst->mlen = header_size; - inst->ex_desc = 0; - inst->ex_mlen = 0; inst->send_is_volatile = true; inst->resize_sources(4); @@ -124,7 +122,6 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, elk_fs_inst *inst) /* Update the original instruction. */ inst->opcode = ELK_SHADER_OPCODE_SEND; inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); - inst->ex_mlen = 0; inst->header_size = 0; inst->send_has_side_effects = true; inst->send_is_volatile = false; @@ -183,8 +180,6 @@ lower_urb_write_logical_send(const fs_builder &bld, elk_fs_inst *inst) inst->offset); inst->mlen = length; - inst->ex_desc = 0; - inst->ex_mlen = 0; inst->send_has_side_effects = true; inst->resize_sources(4); @@ -209,7 +204,6 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, elk_fs_inst *inst) /* Calculate the total number of components of the payload. */ const unsigned src_comps = MAX2(1, inst->components_read(URB_LOGICAL_SRC_DATA)); - const unsigned src_sz = type_sz(src.type); elk_fs_reg payload = bld.vgrf(ELK_REGISTER_TYPE_UD); @@ -240,7 +234,6 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, elk_fs_inst *inst) } elk_fs_reg payload2 = bld.move_to_vgrf(src, src_comps); - const unsigned ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE; inst->sfid = ELK_SFID_URB; @@ -257,7 +250,6 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, elk_fs_inst *inst) /* Update the original instruction. */ inst->opcode = ELK_SHADER_OPCODE_SEND; inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); - inst->ex_mlen = ex_mlen; inst->header_size = 0; inst->send_has_side_effects = true; inst->send_is_volatile = false; @@ -520,18 +512,6 @@ lower_fb_write_logical_send(const fs_builder &bld, elk_fs_inst *inst, desc = component(desc, 0); } - uint32_t ex_desc = 0; - if (devinfo->ver >= 11) { - /* Set the "Render Target Index" and "Src0 Alpha Present" fields - * in the extended message descriptor, in lieu of using a header. - */ - ex_desc = inst->target << 12 | (src0_alpha.file != BAD_FILE) << 15; - - if (key->nr_color_regions == 0) - ex_desc |= 1 << 20; /* Null Render Target */ - } - inst->ex_desc = ex_desc; - inst->opcode = ELK_SHADER_OPCODE_SEND; inst->resize_sources(3); inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE; @@ -539,7 +519,6 @@ lower_fb_write_logical_send(const fs_builder &bld, elk_fs_inst *inst, inst->src[1] = elk_imm_ud(0); inst->src[2] = payload; inst->mlen = regs_written(load); - inst->ex_mlen = 0; inst->header_size = header_size; inst->check_tdr = true; inst->send_has_side_effects = true; @@ -1291,8 +1270,6 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, elk_fs_inst *inst, elk_op inst->src[1] = elk_imm_ud(0); /* ex_desc */ } - inst->ex_desc = 0; - inst->src[2] = src_payload; inst->resize_sources(3); @@ -1547,44 +1524,28 @@ lower_surface_logical_send(const fs_builder &bld, elk_fs_inst *inst) const unsigned header_sz = header.file != BAD_FILE ? 1 : 0; elk_fs_reg payload, payload2; - unsigned mlen, ex_mlen = 0; - if (devinfo->ver >= 9 && - (src.file == BAD_FILE || header.file == BAD_FILE)) { - /* We have split sends on gfx9 and above */ - if (header.file == BAD_FILE) { - payload = bld.move_to_vgrf(addr, addr_sz); - payload2 = bld.move_to_vgrf(src, src_sz); - mlen = addr_sz * (inst->exec_size / 8); - ex_mlen = src_sz * (inst->exec_size / 8); - } else { - assert(src.file == BAD_FILE); - payload = header; - payload2 = bld.move_to_vgrf(addr, addr_sz); - mlen = header_sz; - ex_mlen = addr_sz * (inst->exec_size / 8); - } - } else { - /* Allocate space for the payload. */ - const unsigned sz = header_sz + addr_sz + src_sz; - payload = bld.vgrf(ELK_REGISTER_TYPE_UD, sz); - elk_fs_reg *const components = new elk_fs_reg[sz]; - unsigned n = 0; + unsigned mlen; - /* Construct the payload. */ - if (header.file != BAD_FILE) - components[n++] = header; + /* Allocate space for the payload. */ + const unsigned sz = header_sz + addr_sz + src_sz; + payload = bld.vgrf(ELK_REGISTER_TYPE_UD, sz); + elk_fs_reg *const components = new elk_fs_reg[sz]; + unsigned n = 0; - for (unsigned i = 0; i < addr_sz; i++) - components[n++] = offset(addr, bld, i); + /* Construct the payload. */ + if (header.file != BAD_FILE) + components[n++] = header; - for (unsigned i = 0; i < src_sz; i++) - components[n++] = offset(src, bld, i); + for (unsigned i = 0; i < addr_sz; i++) + components[n++] = offset(addr, bld, i); - bld.LOAD_PAYLOAD(payload, components, sz, header_sz); - mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8; + for (unsigned i = 0; i < src_sz; i++) + components[n++] = offset(src, bld, i); - delete[] components; - } + bld.LOAD_PAYLOAD(payload, components, sz, header_sz); + mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8; + + delete[] components; /* Predicate the instruction on the sample mask if no header is * provided. @@ -1704,7 +1665,6 @@ lower_surface_logical_send(const fs_builder &bld, elk_fs_inst *inst) /* Update the original instruction. */ inst->opcode = ELK_SHADER_OPCODE_SEND; inst->mlen = mlen; - inst->ex_mlen = ex_mlen; inst->header_size = header_sz; inst->send_has_side_effects = has_side_effects; inst->send_is_volatile = !has_side_effects; @@ -1765,16 +1725,13 @@ lower_surface_block_logical_send(const fs_builder &bld, elk_fs_inst *inst) ubld.group(1, 0).MOV(component(header, 2), addr); elk_fs_reg data; - unsigned ex_mlen = 0; if (write) { const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA); data = retype(bld.move_to_vgrf(src, src_sz), ELK_REGISTER_TYPE_UD); - ex_mlen = src_sz * type_sz(src.type) * inst->exec_size / REG_SIZE; } inst->opcode = ELK_SHADER_OPCODE_SEND; inst->mlen = 1; - inst->ex_mlen = ex_mlen; inst->header_size = 1; inst->send_has_side_effects = has_side_effects; inst->send_is_volatile = !has_side_effects; @@ -1791,33 +1748,6 @@ lower_surface_block_logical_send(const fs_builder &bld, elk_fs_inst *inst) inst->src[3] = data; } -static elk_fs_reg -emit_a64_oword_block_header(const fs_builder &bld, const elk_fs_reg &addr) -{ - const fs_builder ubld = bld.exec_all().group(8, 0); - - assert(type_sz(addr.type) == 8 && addr.stride == 0); - - elk_fs_reg expanded_addr = addr; - if (addr.file == UNIFORM) { - /* We can't do stride 1 with the UNIFORM file, it requires stride 0 */ - expanded_addr = ubld.vgrf(ELK_REGISTER_TYPE_UQ); - expanded_addr.stride = 0; - ubld.MOV(expanded_addr, retype(addr, ELK_REGISTER_TYPE_UQ)); - } - - elk_fs_reg header = ubld.vgrf(ELK_REGISTER_TYPE_UD); - ubld.MOV(header, elk_imm_ud(0)); - - /* Use a 2-wide MOV to fill out the address */ - elk_fs_reg addr_vec2 = expanded_addr; - addr_vec2.type = ELK_REGISTER_TYPE_UD; - addr_vec2.stride = 1; - ubld.group(2, 0).MOV(header, addr_vec2); - - return header; -} - static void emit_fragment_mask(const fs_builder &bld, elk_fs_inst *inst) { @@ -1851,44 +1781,21 @@ lower_a64_logical_send(const fs_builder &bld, elk_fs_inst *inst) const bool has_side_effects = inst->has_side_effects(); elk_fs_reg payload, payload2; - unsigned mlen, ex_mlen = 0, header_size = 0; - if (inst->opcode == ELK_SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL || - inst->opcode == ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL || - inst->opcode == ELK_SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL) { - assert(devinfo->ver >= 9); + unsigned mlen, header_size = 0; - /* OWORD messages only take a scalar address in a header */ - mlen = 1; - header_size = 1; - payload = emit_a64_oword_block_header(bld, addr); + /* Add two because the address is 64-bit */ + const unsigned dwords = 2 + src_comps; + mlen = dwords * (inst->exec_size / 8); - if (inst->opcode == ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL) { - ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE; - payload2 = retype(bld.move_to_vgrf(src, src_comps), - ELK_REGISTER_TYPE_UD); - } - } else if (devinfo->ver >= 9) { - /* On Skylake and above, we have SENDS */ - mlen = 2 * (inst->exec_size / 8); - ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE; - payload = retype(bld.move_to_vgrf(addr, 1), ELK_REGISTER_TYPE_UD); - payload2 = retype(bld.move_to_vgrf(src, src_comps), - ELK_REGISTER_TYPE_UD); - } else { - /* Add two because the address is 64-bit */ - const unsigned dwords = 2 + src_comps; - mlen = dwords * (inst->exec_size / 8); + elk_fs_reg sources[5]; - elk_fs_reg sources[5]; + sources[0] = addr; - sources[0] = addr; + for (unsigned i = 0; i < src_comps; i++) + sources[1 + i] = offset(src, bld, i); - for (unsigned i = 0; i < src_comps; i++) - sources[1 + i] = offset(src, bld, i); - - payload = bld.vgrf(ELK_REGISTER_TYPE_UD, dwords); - bld.LOAD_PAYLOAD(payload, sources, 1 + src_comps, 0); - } + payload = bld.vgrf(ELK_REGISTER_TYPE_UD, dwords); + bld.LOAD_PAYLOAD(payload, sources, 1 + src_comps, 0); uint32_t desc; switch (inst->opcode) { @@ -1955,7 +1862,6 @@ lower_a64_logical_send(const fs_builder &bld, elk_fs_inst *inst) /* Update the original instruction. */ inst->opcode = ELK_SHADER_OPCODE_SEND; inst->mlen = mlen; - inst->ex_mlen = ex_mlen; inst->header_size = header_size; inst->send_has_side_effects = has_side_effects; inst->send_is_volatile = !has_side_effects; @@ -2212,9 +2118,7 @@ lower_interpolator_logical_send(const fs_builder &bld, elk_fs_inst *inst, inst->opcode = ELK_SHADER_OPCODE_SEND; inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR; inst->desc = desc_imm; - inst->ex_desc = 0; inst->mlen = mlen; - inst->ex_mlen = 0; inst->send_has_side_effects = false; inst->send_is_volatile = false; @@ -2241,8 +2145,6 @@ lower_get_buffer_size(const fs_builder &bld, elk_fs_inst *inst) inst->opcode = ELK_SHADER_OPCODE_SEND; inst->mlen = inst->exec_size / 8; inst->resize_sources(3); - inst->ex_mlen = 0; - inst->ex_desc = 0; /* src[0] & src[1] are filled by setup_surface_descriptors() */ inst->src[2] = lod;