mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-31 07:00:27 +01:00
brw: Rework UIP and JIP setting code
The current code walks the instructions, and when needed,
it will scan to find the next "end of scope" and sometimes
the next "end of block". It also has a separate patching
logic for HALTs.
The new code collects the necessary scope information up front,
then walks the instruction backwards, making avoiding the need
to scan for the end of scope. It will also walk only the
relevant instructions that were previously collected. It also
replaces the previous HALT-specific patching logic.
With this new change, many cases that were jumping to
intermediate HALTs, will now jump straight to the end of
scope (or the "end of the program" section). E.g. in
```
if
...
(...) HALT
...
(...) HALT
endif
```
both HALTs now will jump to the end of the scope, instead of the
first HALT jumping into the second one.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38914>
This commit is contained in:
parent
aba079b3af
commit
dcefa0e6b3
4 changed files with 208 additions and 229 deletions
|
|
@ -1589,7 +1589,7 @@ brw_set_desc(struct brw_codegen *p, brw_eu_inst *insn, unsigned desc, bool gathe
|
|||
brw_set_desc_ex(p, insn, desc, 0, gather);
|
||||
}
|
||||
|
||||
void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
|
||||
void brw_set_uip_jip(struct brw_codegen *p, int start_offset, int final_halt_offset);
|
||||
|
||||
enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
|
||||
enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
|
||||
|
|
|
|||
|
|
@ -1411,163 +1411,6 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||
brw_eu_inst_set_eot(devinfo, send, eot);
|
||||
}
|
||||
|
||||
static bool
|
||||
while_jumps_before_offset(const struct intel_device_info *devinfo,
|
||||
brw_eu_inst *insn, int while_offset, int start_offset)
|
||||
{
|
||||
int scale = 16 / brw_jump_scale(devinfo);
|
||||
int jip = brw_eu_inst_jip(devinfo, insn);
|
||||
assert(jip < 0);
|
||||
return while_offset + jip * scale <= start_offset;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
brw_find_next_block_end(struct brw_codegen *p, int start_offset)
|
||||
{
|
||||
int offset;
|
||||
void *store = p->store;
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
|
||||
int depth = 0;
|
||||
|
||||
for (offset = next_offset(p, store, start_offset);
|
||||
offset < p->next_insn_offset;
|
||||
offset = next_offset(p, store, offset)) {
|
||||
brw_eu_inst *insn = store + offset;
|
||||
|
||||
switch (brw_eu_inst_opcode(p->isa, insn)) {
|
||||
case BRW_OPCODE_IF:
|
||||
depth++;
|
||||
break;
|
||||
case BRW_OPCODE_ENDIF:
|
||||
if (depth == 0)
|
||||
return offset;
|
||||
depth--;
|
||||
break;
|
||||
case BRW_OPCODE_WHILE:
|
||||
/* If the while doesn't jump before our instruction, it's the end
|
||||
* of a sibling do...while loop. Ignore it.
|
||||
*/
|
||||
if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
|
||||
continue;
|
||||
FALLTHROUGH;
|
||||
case BRW_OPCODE_ELSE:
|
||||
case BRW_OPCODE_HALT:
|
||||
if (depth == 0)
|
||||
return offset;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* There is no DO instruction on gfx6, so to find the end of the loop
|
||||
* we have to see if the loop is jumping back before our start
|
||||
* instruction.
|
||||
*/
|
||||
static int
|
||||
brw_find_loop_end(struct brw_codegen *p, int start_offset)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
int offset;
|
||||
void *store = p->store;
|
||||
|
||||
/* Always start after the instruction (such as a WHILE) we're trying to fix
|
||||
* up.
|
||||
*/
|
||||
for (offset = next_offset(p, store, start_offset);
|
||||
offset < p->next_insn_offset;
|
||||
offset = next_offset(p, store, offset)) {
|
||||
brw_eu_inst *insn = store + offset;
|
||||
|
||||
if (brw_eu_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
|
||||
if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
UNREACHABLE("not reached");
|
||||
}
|
||||
|
||||
/* After program generation, go back and update the UIP and JIP of
|
||||
* BREAK, CONT, and HALT instructions to their correct locations.
|
||||
*/
|
||||
void
|
||||
brw_set_uip_jip(struct brw_codegen *p, int start_offset)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
int offset;
|
||||
int br = brw_jump_scale(devinfo);
|
||||
int scale = 16 / br;
|
||||
void *store = p->store;
|
||||
|
||||
for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
|
||||
brw_eu_inst *insn = store + offset;
|
||||
assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0);
|
||||
|
||||
switch (brw_eu_inst_opcode(p->isa, insn)) {
|
||||
case BRW_OPCODE_BREAK: {
|
||||
int block_end_offset = brw_find_next_block_end(p, offset);
|
||||
assert(block_end_offset != 0);
|
||||
brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
||||
/* Gfx7 UIP points to WHILE; Gfx6 points just after it */
|
||||
brw_eu_inst_set_uip(devinfo, insn,
|
||||
(brw_find_loop_end(p, offset) - offset) / scale);
|
||||
break;
|
||||
}
|
||||
|
||||
case BRW_OPCODE_CONTINUE: {
|
||||
int block_end_offset = brw_find_next_block_end(p, offset);
|
||||
assert(block_end_offset != 0);
|
||||
brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
||||
brw_eu_inst_set_uip(devinfo, insn,
|
||||
(brw_find_loop_end(p, offset) - offset) / scale);
|
||||
|
||||
assert(brw_eu_inst_uip(devinfo, insn) != 0);
|
||||
assert(brw_eu_inst_jip(devinfo, insn) != 0);
|
||||
break;
|
||||
}
|
||||
|
||||
case BRW_OPCODE_ENDIF: {
|
||||
int block_end_offset = brw_find_next_block_end(p, offset);
|
||||
int32_t jump = (block_end_offset == 0) ?
|
||||
1 * br : (block_end_offset - offset) / scale;
|
||||
brw_eu_inst_set_jip(devinfo, insn, jump);
|
||||
break;
|
||||
}
|
||||
|
||||
case BRW_OPCODE_HALT: {
|
||||
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
|
||||
*
|
||||
* "In case of the halt instruction not inside any conditional
|
||||
* code block, the value of <JIP> and <UIP> should be the
|
||||
* same. In case of the halt instruction inside conditional code
|
||||
* block, the <UIP> should be the end of the program, and the
|
||||
* <JIP> should be end of the most inner conditional code block."
|
||||
*
|
||||
* The uip will have already been set by whoever set up the
|
||||
* instruction.
|
||||
*/
|
||||
int block_end_offset = brw_find_next_block_end(p, offset);
|
||||
if (block_end_offset == 0) {
|
||||
brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn));
|
||||
} else {
|
||||
brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
||||
}
|
||||
assert(brw_eu_inst_uip(devinfo, insn) != 0);
|
||||
assert(brw_eu_inst_jip(devinfo, insn) != 0);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_broadcast(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
|
|
|
|||
|
|
@ -27,6 +27,9 @@
|
|||
* native instructions.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "brw_eu.h"
|
||||
#include "brw_disasm_info.h"
|
||||
#include "brw_shader.h"
|
||||
|
|
@ -110,56 +113,6 @@ brw_generator::~brw_generator()
|
|||
{
|
||||
}
|
||||
|
||||
class ip_record : public brw_exec_node {
|
||||
public:
|
||||
DECLARE_RALLOC_CXX_OPERATORS(ip_record)
|
||||
|
||||
ip_record(int ip)
|
||||
{
|
||||
this->ip = ip;
|
||||
}
|
||||
|
||||
int ip;
|
||||
};
|
||||
|
||||
bool
|
||||
brw_generator::patch_halt_jumps()
|
||||
{
|
||||
if (this->discard_halt_patches.is_empty())
|
||||
return false;
|
||||
|
||||
int scale = brw_jump_scale(p->devinfo);
|
||||
|
||||
/* There is a somewhat strange undocumented requirement of using
|
||||
* HALT, according to the simulator. If some channel has HALTed to
|
||||
* a particular UIP, then by the end of the program, every channel
|
||||
* must have HALTed to that UIP. Furthermore, the tracking is a
|
||||
* stack, so you can't do the final halt of a UIP after starting
|
||||
* halting to a new UIP.
|
||||
*
|
||||
* Symptoms of not emitting this instruction on actual hardware
|
||||
* included GPU hangs and sparkly rendering on the piglit discard
|
||||
* tests.
|
||||
*/
|
||||
brw_eu_inst *last_halt = brw_HALT(p);
|
||||
brw_eu_inst_set_uip(p->devinfo, last_halt, 1 * scale);
|
||||
brw_eu_inst_set_jip(p->devinfo, last_halt, 1 * scale);
|
||||
|
||||
int ip = p->nr_insn;
|
||||
|
||||
brw_foreach_in_list(ip_record, patch_ip, &discard_halt_patches) {
|
||||
brw_eu_inst *patch = &p->store[patch_ip->ip];
|
||||
|
||||
assert(brw_eu_inst_opcode(p->isa, patch) == BRW_OPCODE_HALT);
|
||||
/* HALT takes a half-instruction distance from the pre-incremented IP. */
|
||||
brw_eu_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
|
||||
}
|
||||
|
||||
this->discard_halt_patches.make_empty();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_generator::generate_send(brw_send_inst *inst,
|
||||
struct brw_reg dst,
|
||||
|
|
@ -650,17 +603,6 @@ brw_generator::generate_ddy(const brw_inst *inst,
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_generator::generate_halt(brw_inst *)
|
||||
{
|
||||
/* This HALT will be patched up at FB write time to point UIP at the end of
|
||||
* the program, and at brw_uip_jip() JIP will be set to the end of the
|
||||
* current block (or the program).
|
||||
*/
|
||||
this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
|
||||
brw_HALT(p);
|
||||
}
|
||||
|
||||
DEBUG_GET_ONCE_OPTION(shader_bin_override_path, "INTEL_SHADER_ASM_READ_PATH",
|
||||
NULL);
|
||||
|
||||
|
|
@ -755,6 +697,8 @@ brw_generator::generate_code(const brw_shader &s,
|
|||
brw_realign(p, 64);
|
||||
|
||||
this->dispatch_width = dispatch_width;
|
||||
this->final_halt_offset = -1;
|
||||
this->needs_final_halt = false;
|
||||
|
||||
int start_offset = p->next_insn_offset;
|
||||
|
||||
|
|
@ -1120,7 +1064,9 @@ brw_generator::generate_code(const brw_shader &s,
|
|||
break;
|
||||
|
||||
case BRW_OPCODE_HALT:
|
||||
generate_halt(inst);
|
||||
/* This HALT will be patched by brw_set_uip_jip(). */
|
||||
this->needs_final_halt = true;
|
||||
brw_HALT(p);
|
||||
break;
|
||||
|
||||
case FS_OPCODE_SCHEDULING_FENCE:
|
||||
|
|
@ -1236,11 +1182,23 @@ brw_generator::generate_code(const brw_shader &s,
|
|||
/* This is the place where the final HALT needs to be inserted if
|
||||
* we've emitted any discards. If not, this will emit no code.
|
||||
*/
|
||||
if (!patch_halt_jumps()) {
|
||||
if (unlikely(annotate)) {
|
||||
disasm_info->use_tail = true;
|
||||
}
|
||||
} else if (devinfo->ver >= 12) {
|
||||
if (!this->needs_final_halt) {
|
||||
disasm_info->use_tail = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* HALT temporarily disables channels, and the same instruction
|
||||
* is used to re-enable them: once all channels are
|
||||
* disabled, then they are re-enabled again immediately.
|
||||
*
|
||||
* So put a HALT right before the "epilogue" of the shader to make
|
||||
* sure all channels get HALTed, so that this last HALT will re-enable
|
||||
* them again.
|
||||
*/
|
||||
final_halt_offset = p->next_insn_offset;
|
||||
brw_HALT(p);
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
/* This works around synchronization issues consequence of the
|
||||
* HALT instruction not being considered a control flow
|
||||
* instruction by the back-end -- The fact that it doesn't
|
||||
|
|
@ -1374,7 +1332,7 @@ brw_generator::generate_code(const brw_shader &s,
|
|||
}
|
||||
}
|
||||
|
||||
brw_set_uip_jip(p, start_offset);
|
||||
brw_set_uip_jip(p, start_offset, final_halt_offset);
|
||||
|
||||
/* end of program sentinel */
|
||||
disasm_new_inst_group(disasm_info, p->next_insn_offset);
|
||||
|
|
@ -1582,3 +1540,181 @@ void brw_prog_data_init(struct brw_stage_prog_data *prog_data,
|
|||
prog_data->total_scratch = 0;
|
||||
prog_data->total_shared = params->nir->info.shared_size;
|
||||
}
|
||||
|
||||
/* After program generation, go back and update the UIP and JIP of
|
||||
* BREAK, CONT, ENDIF and HALT instructions to their correct locations.
|
||||
*/
|
||||
void
|
||||
brw_set_uip_jip(struct brw_codegen *p, int start_offset, int final_halt_offset)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
const int end_offset = p->next_insn_offset;
|
||||
brw_eu_inst *store = p->store;
|
||||
|
||||
struct branch_info {
|
||||
enum opcode opcode;
|
||||
int offset;
|
||||
|
||||
/* For loop headers. */
|
||||
int loop_end_offset;
|
||||
};
|
||||
|
||||
/* Collect information about the control flow instructions and any
|
||||
* instruction that are loop headers. There might be multiple entries
|
||||
* for instructions that act as loop header for multiple loops and/or that
|
||||
* are control flow instruction themselves (e.g. IF as the loop header).
|
||||
*/
|
||||
std::vector<branch_info> infos;
|
||||
for (int offset = start_offset; offset < end_offset; offset += 16) {
|
||||
brw_eu_inst *insn = store + (offset / 16);
|
||||
assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0);
|
||||
|
||||
const enum opcode opcode = brw_eu_inst_opcode(p->isa, insn);
|
||||
switch (opcode) {
|
||||
case BRW_OPCODE_IF:
|
||||
case BRW_OPCODE_ELSE:
|
||||
case BRW_OPCODE_ENDIF:
|
||||
case BRW_OPCODE_HALT:
|
||||
case BRW_OPCODE_BREAK:
|
||||
case BRW_OPCODE_CONTINUE:
|
||||
case BRW_OPCODE_WHILE:
|
||||
infos.push_back({
|
||||
.opcode = opcode,
|
||||
.offset = offset,
|
||||
});
|
||||
if (opcode == BRW_OPCODE_WHILE) {
|
||||
/* Also add an entry for the loop header. */
|
||||
const int jip = brw_eu_inst_jip(devinfo, insn);
|
||||
assert(jip < 0);
|
||||
infos.push_back({
|
||||
/* Use NOP to indicate this is a loop header entry. */
|
||||
.opcode = BRW_OPCODE_NOP,
|
||||
.offset = offset + jip,
|
||||
.loop_end_offset = offset,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Nothing to do. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort in scope order. */
|
||||
std::sort(infos.begin(), infos.end(), [](const auto &a, const auto &b) {
|
||||
if (a.offset != b.offset)
|
||||
return a.offset < b.offset;
|
||||
/* Note the flipped comparison: want to see the largest scope first,
|
||||
* since it contains the other.
|
||||
*/
|
||||
return a.loop_end_offset > b.loop_end_offset;
|
||||
});
|
||||
|
||||
struct scope {
|
||||
int end_offset;
|
||||
|
||||
/* End of current loop if exists. */
|
||||
int loop_end_offset;
|
||||
};
|
||||
|
||||
std::vector<scope> scopes;
|
||||
scopes.push_back({-1, -1});
|
||||
|
||||
/* Walk backwards keeping track of the scopes. This make easy to
|
||||
* get the innermost end of scope and the innermost end of loop.
|
||||
*/
|
||||
for (int i = infos.size() - 1; i >= 0; i--) {
|
||||
const branch_info &info = infos[i];
|
||||
|
||||
brw_eu_inst *insn = store + (info.offset / 16);
|
||||
|
||||
switch (info.opcode) {
|
||||
case BRW_OPCODE_NOP:
|
||||
case BRW_OPCODE_IF:
|
||||
/* Pop the scope. NOP here is a stand in for loop headers. */
|
||||
scopes.pop_back();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ELSE:
|
||||
/* For instructions before the ELSE in the conditional (i.e. the
|
||||
* then-part of the loop), the scope ends here.
|
||||
*/
|
||||
scopes.back().end_offset = info.offset;
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ENDIF: {
|
||||
const int innermost_end_offset = scopes.back().end_offset;
|
||||
int jip_offset;
|
||||
|
||||
if (innermost_end_offset != -1)
|
||||
jip_offset = innermost_end_offset;
|
||||
else if (final_halt_offset != -1)
|
||||
jip_offset = final_halt_offset + 16;
|
||||
else
|
||||
jip_offset = info.offset + 16;
|
||||
|
||||
brw_eu_inst_set_jip(devinfo, insn, jip_offset - info.offset);
|
||||
|
||||
scopes.push_back({
|
||||
.end_offset = info.offset,
|
||||
.loop_end_offset = scopes.back().loop_end_offset,
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case BRW_OPCODE_WHILE:
|
||||
scopes.push_back({
|
||||
.end_offset = info.offset,
|
||||
.loop_end_offset = info.offset,
|
||||
});
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BREAK:
|
||||
case BRW_OPCODE_CONTINUE: {
|
||||
const int innermost_end_offset = scopes.back().end_offset;
|
||||
brw_eu_inst_set_jip(devinfo, insn, innermost_end_offset - info.offset);
|
||||
|
||||
const int loop_end_offset = scopes.back().loop_end_offset;
|
||||
assert(loop_end_offset != -1);
|
||||
assert(loop_end_offset > info.offset);
|
||||
brw_eu_inst_set_uip(devinfo, insn, loop_end_offset - info.offset);
|
||||
break;
|
||||
}
|
||||
|
||||
case BRW_OPCODE_HALT: {
|
||||
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
|
||||
*
|
||||
* "In case of the halt instruction not inside any conditional
|
||||
* code block, the value of <JIP> and <UIP> should be the
|
||||
* same. In case of the halt instruction inside conditional code
|
||||
* block, the <UIP> should be the end of the program, and the
|
||||
* <JIP> should be end of the most inner conditional code block."
|
||||
*/
|
||||
const int innermost_end_offset = scopes.back().end_offset;
|
||||
|
||||
/* If present, use the final HALT to infer the "end of the program".
|
||||
*
|
||||
* See also SHADER_OPCODE_HALT_TARGET.
|
||||
*/
|
||||
if (final_halt_offset != -1) {
|
||||
if (final_halt_offset == info.offset)
|
||||
assert(innermost_end_offset == -1);
|
||||
|
||||
const int uip_offset = final_halt_offset + 16;
|
||||
brw_eu_inst_set_uip(devinfo, insn, uip_offset - info.offset);
|
||||
}
|
||||
|
||||
if (innermost_end_offset != -1)
|
||||
brw_eu_inst_set_jip(devinfo, insn, innermost_end_offset - info.offset);
|
||||
else
|
||||
brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* Nothing to do. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,8 +40,6 @@ private:
|
|||
void generate_scratch_header(brw_inst *inst,
|
||||
struct brw_reg dst, struct brw_reg src);
|
||||
|
||||
void generate_halt(brw_inst *inst);
|
||||
|
||||
void generate_mov_indirect(brw_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg reg,
|
||||
|
|
@ -68,7 +66,9 @@ private:
|
|||
|
||||
unsigned dispatch_width; /**< 8, 16 or 32 */
|
||||
|
||||
brw_exec_list discard_halt_patches;
|
||||
int final_halt_offset;
|
||||
bool needs_final_halt;
|
||||
|
||||
bool debug_flag;
|
||||
const char *shader_name;
|
||||
mesa_shader_stage stage;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue