diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 6840cf6f18a..536760de697 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -71,9 +71,12 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) } else if (cfop->flags & CF_CLAUSE) { /* CF_TEX/VTX (CF_ALU already handled above) */ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) | + bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); + id++; } else if (cfop->flags & CF_EXP) { /* EXPORT instructions */ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | @@ -111,12 +114,14 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) } else { /* other instructions */ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode)| + bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COND(cf->cond) | S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | - S_SQ_CF_WORD1_COUNT(cf->count) | - S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); + S_SQ_CF_WORD1_COUNT(cf->count); + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); + id++; } } return 0; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 9e00528c5c0..c5163cff225 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1629,7 +1629,8 @@ static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_byt *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) | S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)| + S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); } /* common for r600/r700 - eg in eg_asm.c */ @@ -2092,6 +2093,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) bc->bytecode[id + 1], cfop->name); fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr); fprintf(stderr, "\n"); + if (cf->end_of_program) + fprintf(stderr, "EOP "); } else if (cfop->flags & CF_EXP) { int o = 0; const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 9d6543d910a..b67bd10f0ca 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -3658,7 +3658,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, last = r600_isa_cf(ctx.bc->cf_last->op); /* alu clause instructions don't have EOP bit, so add NOP */ - if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) + if (!last || last->flags & CF_ALU) r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); ctx.bc->cf_last->end_of_program = 1; diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 04f8c6288f0..395059cfeaa 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -30,7 +30,8 @@ void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_c *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R700, cf->op)) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT(count) | - S_SQ_CF_WORD1_COUNT_3(count >> 3); + S_SQ_CF_WORD1_COUNT_3(count >> 3)| + S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); } int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)