diff --git a/src/intel/compiler/brw/brw_eu.h b/src/intel/compiler/brw/brw_eu.h index 4e1606da3bb..77cfdaecb28 100644 --- a/src/intel/compiler/brw/brw_eu.h +++ b/src/intel/compiler/brw/brw_eu.h @@ -1589,7 +1589,7 @@ brw_set_desc(struct brw_codegen *p, brw_eu_inst *insn, unsigned desc, bool gathe brw_set_desc_ex(p, insn, desc, 0, gather); } -void brw_set_uip_jip(struct brw_codegen *p, int start_offset); +void brw_set_uip_jip(struct brw_codegen *p, int start_offset, int final_halt_offset); enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod); enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod); diff --git a/src/intel/compiler/brw/brw_eu_emit.c b/src/intel/compiler/brw/brw_eu_emit.c index a2ccb23736e..07852db8c4c 100644 --- a/src/intel/compiler/brw/brw_eu_emit.c +++ b/src/intel/compiler/brw/brw_eu_emit.c @@ -1411,163 +1411,6 @@ brw_send_indirect_split_message(struct brw_codegen *p, brw_eu_inst_set_eot(devinfo, send, eot); } -static bool -while_jumps_before_offset(const struct intel_device_info *devinfo, - brw_eu_inst *insn, int while_offset, int start_offset) -{ - int scale = 16 / brw_jump_scale(devinfo); - int jip = brw_eu_inst_jip(devinfo, insn); - assert(jip < 0); - return while_offset + jip * scale <= start_offset; -} - - -static int -brw_find_next_block_end(struct brw_codegen *p, int start_offset) -{ - int offset; - void *store = p->store; - const struct intel_device_info *devinfo = p->devinfo; - - int depth = 0; - - for (offset = next_offset(p, store, start_offset); - offset < p->next_insn_offset; - offset = next_offset(p, store, offset)) { - brw_eu_inst *insn = store + offset; - - switch (brw_eu_inst_opcode(p->isa, insn)) { - case BRW_OPCODE_IF: - depth++; - break; - case BRW_OPCODE_ENDIF: - if (depth == 0) - return offset; - depth--; - break; - case BRW_OPCODE_WHILE: - /* If the while doesn't jump before our instruction, it's the end - * of a sibling do...while loop. Ignore it. - */ - if (!while_jumps_before_offset(devinfo, insn, offset, start_offset)) - continue; - FALLTHROUGH; - case BRW_OPCODE_ELSE: - case BRW_OPCODE_HALT: - if (depth == 0) - return offset; - break; - default: - break; - } - } - - return 0; -} - -/* There is no DO instruction on gfx6, so to find the end of the loop - * we have to see if the loop is jumping back before our start - * instruction. - */ -static int -brw_find_loop_end(struct brw_codegen *p, int start_offset) -{ - const struct intel_device_info *devinfo = p->devinfo; - int offset; - void *store = p->store; - - /* Always start after the instruction (such as a WHILE) we're trying to fix - * up. - */ - for (offset = next_offset(p, store, start_offset); - offset < p->next_insn_offset; - offset = next_offset(p, store, offset)) { - brw_eu_inst *insn = store + offset; - - if (brw_eu_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) { - if (while_jumps_before_offset(devinfo, insn, offset, start_offset)) - return offset; - } - } - UNREACHABLE("not reached"); -} - -/* After program generation, go back and update the UIP and JIP of - * BREAK, CONT, and HALT instructions to their correct locations. - */ -void -brw_set_uip_jip(struct brw_codegen *p, int start_offset) -{ - const struct intel_device_info *devinfo = p->devinfo; - int offset; - int br = brw_jump_scale(devinfo); - int scale = 16 / br; - void *store = p->store; - - for (offset = start_offset; offset < p->next_insn_offset; offset += 16) { - brw_eu_inst *insn = store + offset; - assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0); - - switch (brw_eu_inst_opcode(p->isa, insn)) { - case BRW_OPCODE_BREAK: { - int block_end_offset = brw_find_next_block_end(p, offset); - assert(block_end_offset != 0); - brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); - /* Gfx7 UIP points to WHILE; Gfx6 points just after it */ - brw_eu_inst_set_uip(devinfo, insn, - (brw_find_loop_end(p, offset) - offset) / scale); - break; - } - - case BRW_OPCODE_CONTINUE: { - int block_end_offset = brw_find_next_block_end(p, offset); - assert(block_end_offset != 0); - brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); - brw_eu_inst_set_uip(devinfo, insn, - (brw_find_loop_end(p, offset) - offset) / scale); - - assert(brw_eu_inst_uip(devinfo, insn) != 0); - assert(brw_eu_inst_jip(devinfo, insn) != 0); - break; - } - - case BRW_OPCODE_ENDIF: { - int block_end_offset = brw_find_next_block_end(p, offset); - int32_t jump = (block_end_offset == 0) ? - 1 * br : (block_end_offset - offset) / scale; - brw_eu_inst_set_jip(devinfo, insn, jump); - break; - } - - case BRW_OPCODE_HALT: { - /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): - * - * "In case of the halt instruction not inside any conditional - * code block, the value of and should be the - * same. In case of the halt instruction inside conditional code - * block, the should be the end of the program, and the - * should be end of the most inner conditional code block." - * - * The uip will have already been set by whoever set up the - * instruction. - */ - int block_end_offset = brw_find_next_block_end(p, offset); - if (block_end_offset == 0) { - brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn)); - } else { - brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); - } - assert(brw_eu_inst_uip(devinfo, insn) != 0); - assert(brw_eu_inst_jip(devinfo, insn) != 0); - break; - } - - default: - break; - } - } -} - void brw_broadcast(struct brw_codegen *p, struct brw_reg dst, diff --git a/src/intel/compiler/brw/brw_generator.cpp b/src/intel/compiler/brw/brw_generator.cpp index c475f2fb354..766060ff8c5 100644 --- a/src/intel/compiler/brw/brw_generator.cpp +++ b/src/intel/compiler/brw/brw_generator.cpp @@ -27,6 +27,9 @@ * native instructions. */ +#include +#include + #include "brw_eu.h" #include "brw_disasm_info.h" #include "brw_shader.h" @@ -110,56 +113,6 @@ brw_generator::~brw_generator() { } -class ip_record : public brw_exec_node { -public: - DECLARE_RALLOC_CXX_OPERATORS(ip_record) - - ip_record(int ip) - { - this->ip = ip; - } - - int ip; -}; - -bool -brw_generator::patch_halt_jumps() -{ - if (this->discard_halt_patches.is_empty()) - return false; - - int scale = brw_jump_scale(p->devinfo); - - /* There is a somewhat strange undocumented requirement of using - * HALT, according to the simulator. If some channel has HALTed to - * a particular UIP, then by the end of the program, every channel - * must have HALTed to that UIP. Furthermore, the tracking is a - * stack, so you can't do the final halt of a UIP after starting - * halting to a new UIP. - * - * Symptoms of not emitting this instruction on actual hardware - * included GPU hangs and sparkly rendering on the piglit discard - * tests. - */ - brw_eu_inst *last_halt = brw_HALT(p); - brw_eu_inst_set_uip(p->devinfo, last_halt, 1 * scale); - brw_eu_inst_set_jip(p->devinfo, last_halt, 1 * scale); - - int ip = p->nr_insn; - - brw_foreach_in_list(ip_record, patch_ip, &discard_halt_patches) { - brw_eu_inst *patch = &p->store[patch_ip->ip]; - - assert(brw_eu_inst_opcode(p->isa, patch) == BRW_OPCODE_HALT); - /* HALT takes a half-instruction distance from the pre-incremented IP. */ - brw_eu_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); - } - - this->discard_halt_patches.make_empty(); - - return true; -} - void brw_generator::generate_send(brw_send_inst *inst, struct brw_reg dst, @@ -650,17 +603,6 @@ brw_generator::generate_ddy(const brw_inst *inst, } } -void -brw_generator::generate_halt(brw_inst *) -{ - /* This HALT will be patched up at FB write time to point UIP at the end of - * the program, and at brw_uip_jip() JIP will be set to the end of the - * current block (or the program). - */ - this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn)); - brw_HALT(p); -} - DEBUG_GET_ONCE_OPTION(shader_bin_override_path, "INTEL_SHADER_ASM_READ_PATH", NULL); @@ -755,6 +697,8 @@ brw_generator::generate_code(const brw_shader &s, brw_realign(p, 64); this->dispatch_width = dispatch_width; + this->final_halt_offset = -1; + this->needs_final_halt = false; int start_offset = p->next_insn_offset; @@ -1120,7 +1064,9 @@ brw_generator::generate_code(const brw_shader &s, break; case BRW_OPCODE_HALT: - generate_halt(inst); + /* This HALT will be patched by brw_set_uip_jip(). */ + this->needs_final_halt = true; + brw_HALT(p); break; case FS_OPCODE_SCHEDULING_FENCE: @@ -1236,11 +1182,23 @@ brw_generator::generate_code(const brw_shader &s, /* This is the place where the final HALT needs to be inserted if * we've emitted any discards. If not, this will emit no code. */ - if (!patch_halt_jumps()) { - if (unlikely(annotate)) { - disasm_info->use_tail = true; - } - } else if (devinfo->ver >= 12) { + if (!this->needs_final_halt) { + disasm_info->use_tail = true; + break; + } + + /* HALT temporarily disables channels, and the same instruction + * is used to re-enable them: once all channels are + * disabled, then they are re-enabled again immediately. + * + * So put a HALT right before the "epilogue" of the shader to make + * sure all channels get HALTed, so that this last HALT will re-enable + * them again. + */ + final_halt_offset = p->next_insn_offset; + brw_HALT(p); + + if (devinfo->ver >= 12) { /* This works around synchronization issues consequence of the * HALT instruction not being considered a control flow * instruction by the back-end -- The fact that it doesn't @@ -1374,7 +1332,7 @@ brw_generator::generate_code(const brw_shader &s, } } - brw_set_uip_jip(p, start_offset); + brw_set_uip_jip(p, start_offset, final_halt_offset); /* end of program sentinel */ disasm_new_inst_group(disasm_info, p->next_insn_offset); @@ -1582,3 +1540,181 @@ void brw_prog_data_init(struct brw_stage_prog_data *prog_data, prog_data->total_scratch = 0; prog_data->total_shared = params->nir->info.shared_size; } + +/* After program generation, go back and update the UIP and JIP of + * BREAK, CONT, ENDIF and HALT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_codegen *p, int start_offset, int final_halt_offset) +{ + const struct intel_device_info *devinfo = p->devinfo; + const int end_offset = p->next_insn_offset; + brw_eu_inst *store = p->store; + + struct branch_info { + enum opcode opcode; + int offset; + + /* For loop headers. */ + int loop_end_offset; + }; + + /* Collect information about the control flow instructions and any + * instruction that are loop headers. There might be multiple entries + * for instructions that act as loop header for multiple loops and/or that + * are control flow instruction themselves (e.g. IF as the loop header). + */ + std::vector infos; + for (int offset = start_offset; offset < end_offset; offset += 16) { + brw_eu_inst *insn = store + (offset / 16); + assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0); + + const enum opcode opcode = brw_eu_inst_opcode(p->isa, insn); + switch (opcode) { + case BRW_OPCODE_IF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_HALT: + case BRW_OPCODE_BREAK: + case BRW_OPCODE_CONTINUE: + case BRW_OPCODE_WHILE: + infos.push_back({ + .opcode = opcode, + .offset = offset, + }); + if (opcode == BRW_OPCODE_WHILE) { + /* Also add an entry for the loop header. */ + const int jip = brw_eu_inst_jip(devinfo, insn); + assert(jip < 0); + infos.push_back({ + /* Use NOP to indicate this is a loop header entry. */ + .opcode = BRW_OPCODE_NOP, + .offset = offset + jip, + .loop_end_offset = offset, + }); + } + break; + + default: + /* Nothing to do. */ + break; + } + } + + /* Sort in scope order. */ + std::sort(infos.begin(), infos.end(), [](const auto &a, const auto &b) { + if (a.offset != b.offset) + return a.offset < b.offset; + /* Note the flipped comparison: want to see the largest scope first, + * since it contains the other. + */ + return a.loop_end_offset > b.loop_end_offset; + }); + + struct scope { + int end_offset; + + /* End of current loop if exists. */ + int loop_end_offset; + }; + + std::vector scopes; + scopes.push_back({-1, -1}); + + /* Walk backwards keeping track of the scopes. This make easy to + * get the innermost end of scope and the innermost end of loop. + */ + for (int i = infos.size() - 1; i >= 0; i--) { + const branch_info &info = infos[i]; + + brw_eu_inst *insn = store + (info.offset / 16); + + switch (info.opcode) { + case BRW_OPCODE_NOP: + case BRW_OPCODE_IF: + /* Pop the scope. NOP here is a stand in for loop headers. */ + scopes.pop_back(); + break; + + case BRW_OPCODE_ELSE: + /* For instructions before the ELSE in the conditional (i.e. the + * then-part of the loop), the scope ends here. + */ + scopes.back().end_offset = info.offset; + break; + + case BRW_OPCODE_ENDIF: { + const int innermost_end_offset = scopes.back().end_offset; + int jip_offset; + + if (innermost_end_offset != -1) + jip_offset = innermost_end_offset; + else if (final_halt_offset != -1) + jip_offset = final_halt_offset + 16; + else + jip_offset = info.offset + 16; + + brw_eu_inst_set_jip(devinfo, insn, jip_offset - info.offset); + + scopes.push_back({ + .end_offset = info.offset, + .loop_end_offset = scopes.back().loop_end_offset, + }); + break; + } + + case BRW_OPCODE_WHILE: + scopes.push_back({ + .end_offset = info.offset, + .loop_end_offset = info.offset, + }); + break; + + case BRW_OPCODE_BREAK: + case BRW_OPCODE_CONTINUE: { + const int innermost_end_offset = scopes.back().end_offset; + brw_eu_inst_set_jip(devinfo, insn, innermost_end_offset - info.offset); + + const int loop_end_offset = scopes.back().loop_end_offset; + assert(loop_end_offset != -1); + assert(loop_end_offset > info.offset); + brw_eu_inst_set_uip(devinfo, insn, loop_end_offset - info.offset); + break; + } + + case BRW_OPCODE_HALT: { + /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): + * + * "In case of the halt instruction not inside any conditional + * code block, the value of and should be the + * same. In case of the halt instruction inside conditional code + * block, the should be the end of the program, and the + * should be end of the most inner conditional code block." + */ + const int innermost_end_offset = scopes.back().end_offset; + + /* If present, use the final HALT to infer the "end of the program". + * + * See also SHADER_OPCODE_HALT_TARGET. + */ + if (final_halt_offset != -1) { + if (final_halt_offset == info.offset) + assert(innermost_end_offset == -1); + + const int uip_offset = final_halt_offset + 16; + brw_eu_inst_set_uip(devinfo, insn, uip_offset - info.offset); + } + + if (innermost_end_offset != -1) + brw_eu_inst_set_jip(devinfo, insn, innermost_end_offset - info.offset); + else + brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn)); + break; + } + + default: + /* Nothing to do. */ + break; + } + } +} diff --git a/src/intel/compiler/brw/brw_generator.h b/src/intel/compiler/brw/brw_generator.h index 753f47d9d09..3fcbe785259 100644 --- a/src/intel/compiler/brw/brw_generator.h +++ b/src/intel/compiler/brw/brw_generator.h @@ -40,8 +40,6 @@ private: void generate_scratch_header(brw_inst *inst, struct brw_reg dst, struct brw_reg src); - void generate_halt(brw_inst *inst); - void generate_mov_indirect(brw_inst *inst, struct brw_reg dst, struct brw_reg reg, @@ -68,7 +66,9 @@ private: unsigned dispatch_width; /**< 8, 16 or 32 */ - brw_exec_list discard_halt_patches; + int final_halt_offset; + bool needs_final_halt; + bool debug_flag; const char *shader_name; mesa_shader_stage stage;