From c2ff8e3e5daa88c677548f59b8f5f9dc286699d5 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 17 Nov 2017 12:13:40 +0100 Subject: [PATCH] r600: Emit EOP for more CF instruction types So far on pre-cayman chipsets the CF instructions CF_OP_LOOP_END, CF_OP_CALL_FS, CF_OP_POP, and CF_OP_GDS an extra CF_NOP instruction was added to add the EOP flag, even though this is not actually needed, because all these instrutions support the EOP flag. This patch removes the fixup code, adds setting the EOP flag for the according instructions as well as others like CF_OP_TEX and CF_OP_VTX, and adds writing out EOP for this type of instruction in the disassembler. This also fixes a bug where shaders were created that didn't actually have the EOP flag set in the last CF instruction, which might have resulted in GPU lockups. [airlied: cleaned up a little] Signed-off-by: Gert Wollny Cc: Signed-off-by: Dave Airlie (cherry picked from commit 1d076aafbc05b0af299826ac0ee63b2fb28e944a) --- src/gallium/drivers/r600/eg_asm.c | 13 +++++++++---- src/gallium/drivers/r600/r600_asm.c | 5 ++++- src/gallium/drivers/r600/r600_shader.c | 2 +- src/gallium/drivers/r600/r700_asm.c | 3 ++- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 6840cf6f18a..536760de697 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -71,9 +71,12 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) } else if (cfop->flags & CF_CLAUSE) { /* CF_TEX/VTX (CF_ALU already handled above) */ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) | + bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); + id++; } else if (cfop->flags & CF_EXP) { /* EXPORT instructions */ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | @@ -111,12 +114,14 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) } else { /* other instructions */ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode)| + bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COND(cf->cond) | S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | - S_SQ_CF_WORD1_COUNT(cf->count) | - S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); + S_SQ_CF_WORD1_COUNT(cf->count); + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); + id++; } } return 0; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 9e00528c5c0..c5163cff225 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1629,7 +1629,8 @@ static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_byt *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) | S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)| + S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); } /* common for r600/r700 - eg in eg_asm.c */ @@ -2092,6 +2093,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) bc->bytecode[id + 1], cfop->name); fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr); fprintf(stderr, "\n"); + if (cf->end_of_program) + fprintf(stderr, "EOP "); } else if (cfop->flags & CF_EXP) { int o = 0; const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 9d6543d910a..b67bd10f0ca 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -3658,7 +3658,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, last = r600_isa_cf(ctx.bc->cf_last->op); /* alu clause instructions don't have EOP bit, so add NOP */ - if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) + if (!last || last->flags & CF_ALU) r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); ctx.bc->cf_last->end_of_program = 1; diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 04f8c6288f0..395059cfeaa 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -30,7 +30,8 @@ void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_c *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R700, cf->op)) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT(count) | - S_SQ_CF_WORD1_COUNT_3(count >> 3); + S_SQ_CF_WORD1_COUNT_3(count >> 3)| + S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); } int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)