mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 08:40:11 +01:00
r600: Implement memoryBarrier() in the non-SFN path.
Previously we were just doing a group barrier for both membar and barrier. This sometimes worked out, because atomics and reads waited for ack already, but writes were not waiting for ack. Use the need_wait_ack pattern that scratch writes used, with a little refactoring for reusability. The refactor also incidentally fixes the atomics waiting for outstanding acks to be > 1 instead of > 0. Cc: mesa-stable Fixes: #6028 Reviewed-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14429>
This commit is contained in:
parent
991a95a352
commit
b8324a7387
6 changed files with 86 additions and 37 deletions
|
|
@ -600,14 +600,10 @@ dEQP-GLES31.functional.stencil_texturing.format.depth32f_stencil8_2d_array,Fail
|
|||
dEQP-GLES31.functional.stencil_texturing.format.stencil_index8_2d,Fail
|
||||
dEQP-GLES31.functional.stencil_texturing.format.stencil_index8_2d_array,Fail
|
||||
|
||||
dEQP-GLES31.functional.synchronization.in_invocation.image_alias_overwrite,Fail
|
||||
dEQP-GLES31.functional.synchronization.in_invocation.image_alias_write,Fail
|
||||
dEQP-GLES31.functional.synchronization.in_invocation.ssbo_alias_overwrite,Fail
|
||||
dEQP-GLES31.functional.synchronization.in_invocation.ssbo_alias_write,Fail
|
||||
dEQP-GLES31.functional.synchronization.in_invocation.ssbo_overwrite,Fail
|
||||
dEQP-GLES31.functional.synchronization.in_invocation.ssbo_write_read,Fail
|
||||
dEQP-GLES31.functional.synchronization.inter_invocation.image_alias_overwrite,Fail
|
||||
dEQP-GLES31.functional.synchronization.inter_invocation.image_alias_write,Fail
|
||||
dEQP-GLES31.functional.synchronization.inter_invocation.image_overwrite,Fail
|
||||
dEQP-GLES31.functional.synchronization.inter_invocation.ssbo_alias_overwrite,Fail
|
||||
dEQP-GLES31.functional.synchronization.inter_invocation.ssbo_alias_write,Fail
|
||||
|
|
@ -1046,8 +1042,6 @@ spec@arb_shader_image_load_store@invalid@imageLoad/unbound image test,Fail
|
|||
spec@arb_shader_image_load_store@max-images,Fail
|
||||
spec@arb_shader_image_load_store@max-images@Combined max image uniforms test,Fail
|
||||
spec@arb_shader_image_load_store@max-images@Fragment shader max image uniforms test,Fail
|
||||
spec@arb_shader_image_load_store@restrict,Fail
|
||||
spec@arb_shader_image_load_store@restrict@no qualifier image aliasing test,Fail
|
||||
|
||||
spec@arb_shader_storage_buffer_object@array-ssbo-auto-binding,Fail
|
||||
spec@arb_shader_storage_buffer_object@compiler@atomicmin-swizzle.vert,Fail
|
||||
|
|
|
|||
|
|
@ -165,7 +165,9 @@
|
|||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE 0x00000000
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND 0x00000001
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ 0x00000002
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_ACK 0x00000002
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND 0x00000003
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK 0x00000003
|
||||
|
||||
#define S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((unsigned)(x) & 0x7F) << 15)
|
||||
#define G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((x) >> 15) & 0x7F)
|
||||
|
|
|
|||
|
|
@ -240,14 +240,48 @@ int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
|
|||
return 0;
|
||||
}
|
||||
|
||||
void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean need_wait_ack)
|
||||
void
|
||||
r600_bytecode_add_ack(struct r600_bytecode *bc)
|
||||
{
|
||||
bc->need_wait_ack = need_wait_ack;
|
||||
bc->need_wait_ack = true;
|
||||
}
|
||||
|
||||
boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc)
|
||||
int
|
||||
r600_bytecode_wait_acks(struct r600_bytecode *bc)
|
||||
{
|
||||
return bc->need_wait_ack;
|
||||
/* Store acks are an R700+ feature. */
|
||||
if (bc->chip_class < R700)
|
||||
return 0;
|
||||
|
||||
if (!bc->need_wait_ack)
|
||||
return 0;
|
||||
|
||||
int ret = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
struct r600_bytecode_cf *cf = bc->cf_last;
|
||||
cf->barrier = 1;
|
||||
/* Request a wait if the number of outstanding acks is > 0 */
|
||||
cf->cf_addr = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect)
|
||||
{
|
||||
if (bc->chip_class >= R700) {
|
||||
if (indirect)
|
||||
return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK_EG;
|
||||
else
|
||||
return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_ACK_EG;
|
||||
} else {
|
||||
if (indirect)
|
||||
return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
|
||||
else
|
||||
return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
|
||||
}
|
||||
}
|
||||
|
||||
/* alu instructions that can ony exits once per group */
|
||||
|
|
@ -1536,10 +1570,8 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
|
|||
int r;
|
||||
|
||||
/* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
|
||||
if (op != CF_OP_MEM_SCRATCH && bc->need_wait_ack) {
|
||||
bc->need_wait_ack = false;
|
||||
r = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
|
||||
}
|
||||
if (op != CF_OP_WAIT_ACK && op != CF_OP_MEM_SCRATCH)
|
||||
r600_bytecode_wait_acks(bc);
|
||||
|
||||
r = r600_bytecode_add_cf(bc);
|
||||
if (r)
|
||||
|
|
|
|||
|
|
@ -313,8 +313,11 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
|
|||
const struct r600_bytecode_output *output);
|
||||
int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
|
||||
const struct r600_bytecode_output *output);
|
||||
void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean needed);
|
||||
boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc);
|
||||
|
||||
void r600_bytecode_add_ack(struct r600_bytecode *bc);
|
||||
int r600_bytecode_wait_acks(struct r600_bytecode *bc);
|
||||
uint32_t r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect);
|
||||
|
||||
int r600_bytecode_build(struct r600_bytecode *bc);
|
||||
int r600_bytecode_add_cf(struct r600_bytecode *bc);
|
||||
int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
|
||||
|
|
|
|||
|
|
@ -978,9 +978,18 @@ static int tgsi_barrier(struct r600_shader_ctx *ctx)
|
|||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* XXX: Need to implement GWS ops to sync across wavefronts */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_membar(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
/* Wait for any SSBO/image stores to land. */
|
||||
return r600_bytecode_wait_acks(ctx->bc);
|
||||
}
|
||||
|
||||
static void choose_spill_arrays(struct r600_shader_ctx *ctx, int *regno, unsigned *scratch_space_needed)
|
||||
{
|
||||
// pick largest array and spill it, repeat until the number of temps is under limit or we run out of arrays
|
||||
|
|
@ -1662,10 +1671,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
|
|||
else {
|
||||
struct r600_bytecode_vtx vtx;
|
||||
|
||||
if (r600_bytecode_get_need_wait_ack(ctx->bc)) {
|
||||
r600_bytecode_need_wait_ack(ctx->bc, false);
|
||||
r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
|
||||
}
|
||||
r600_bytecode_wait_acks(ctx->bc);
|
||||
|
||||
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
|
||||
vtx.op = FETCH_OP_READ_SCRATCH;
|
||||
|
|
@ -4475,7 +4481,7 @@ static void tgsi_dst(struct r600_shader_ctx *ctx,
|
|||
cf.op = CF_OP_MEM_SCRATCH;
|
||||
cf.elem_size = 3;
|
||||
cf.gpr = reg;
|
||||
cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
|
||||
cf.type = r600_bytecode_write_export_ack_type(ctx->bc, tgsi_dst->Register.Indirect);
|
||||
cf.mark = 1;
|
||||
cf.comp_mask = inst->Dst[0].Register.WriteMask;
|
||||
cf.swizzle_x = 0;
|
||||
|
|
@ -4485,10 +4491,6 @@ static void tgsi_dst(struct r600_shader_ctx *ctx,
|
|||
cf.burst_count = 1;
|
||||
|
||||
if (tgsi_dst->Register.Indirect) {
|
||||
if (ctx->bc->chip_class < R700)
|
||||
cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
|
||||
else
|
||||
cf.type = 3; // V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK;
|
||||
cf.index_gpr = ctx->bc->ar_reg;
|
||||
}
|
||||
else {
|
||||
|
|
@ -4500,8 +4502,8 @@ static void tgsi_dst(struct r600_shader_ctx *ctx,
|
|||
if (r)
|
||||
return;
|
||||
|
||||
if (ctx->bc->chip_class >= R700)
|
||||
r600_bytecode_need_wait_ack(ctx->bc, true);
|
||||
r600_bytecode_add_ack(ctx->bc);
|
||||
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -8952,9 +8954,8 @@ static int tgsi_load_rat(struct r600_shader_ctx *ctx)
|
|||
cf->mark = 1;
|
||||
cf->output.elem_size = 0;
|
||||
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
|
||||
cf = ctx->bc->cf_last;
|
||||
cf->barrier = 1;
|
||||
r600_bytecode_add_ack(ctx->bc);
|
||||
r600_bytecode_wait_acks(ctx->bc);
|
||||
|
||||
desc = util_format_description(inst->Memory.Format);
|
||||
r600_vertex_data_type(inst->Memory.Format,
|
||||
|
|
@ -9055,6 +9056,7 @@ static int tgsi_store_buffer_rat(struct r600_shader_ctx *ctx)
|
|||
return r;
|
||||
}
|
||||
|
||||
cf = NULL;
|
||||
lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
|
||||
for (i = 0; i <= lasti; i++) {
|
||||
struct r600_bytecode_alu alu;
|
||||
|
|
@ -9095,6 +9097,14 @@ static int tgsi_store_buffer_rat(struct r600_shader_ctx *ctx)
|
|||
cf->barrier = 1;
|
||||
cf->output.elem_size = 0;
|
||||
}
|
||||
|
||||
/* Request an ack from the last write emitted. */
|
||||
if (cf) {
|
||||
cf->mark = true;
|
||||
cf->output.type = r600_bytecode_write_export_ack_type(ctx->bc, true);
|
||||
r600_bytecode_add_ack(ctx->bc);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -9144,7 +9154,7 @@ static int tgsi_store_rat(struct r600_shader_ctx *ctx)
|
|||
cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index;
|
||||
cf->rat.inst = V_RAT_INST_STORE_TYPED;
|
||||
cf->rat.index_mode = rat_index_mode;
|
||||
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
|
||||
cf->output.type = r600_bytecode_write_export_ack_type(ctx->bc, true);
|
||||
cf->output.gpr = val_gpr;
|
||||
cf->output.index_gpr = idx_gpr;
|
||||
cf->output.comp_mask = 0xf;
|
||||
|
|
@ -9152,6 +9162,10 @@ static int tgsi_store_rat(struct r600_shader_ctx *ctx)
|
|||
cf->vpm = 1;
|
||||
cf->barrier = 1;
|
||||
cf->output.elem_size = 0;
|
||||
cf->mark = 1;
|
||||
|
||||
r600_bytecode_add_ack(ctx->bc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -9324,10 +9338,9 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
|
|||
cf->barrier = 1;
|
||||
cf->mark = 1;
|
||||
cf->output.elem_size = 0;
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
|
||||
cf = ctx->bc->cf_last;
|
||||
cf->barrier = 1;
|
||||
cf->cf_addr = 1;
|
||||
|
||||
r600_bytecode_add_ack(ctx->bc);
|
||||
r600_bytecode_wait_acks(ctx->bc);
|
||||
|
||||
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
|
||||
|
|
@ -12084,7 +12097,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
|||
[TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
|
||||
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_membar},
|
||||
[113] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[114] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[115] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
|
@ -12311,7 +12324,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
|||
[TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
|
||||
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_membar},
|
||||
[113] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[114] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[115] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
|
|
|||
|
|
@ -118,6 +118,11 @@
|
|||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND 0x00000001
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ 0x00000002
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND 0x00000003
|
||||
|
||||
/* R700+-only */
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_ACK_EG 0x00000002
|
||||
#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK_EG 0x00000003
|
||||
|
||||
#define S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((unsigned)(x) & 0x7F) << 15)
|
||||
#define G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((x) >> 15) & 0x7F)
|
||||
#define C_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR 0xFFC07FFF
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue