i965/gen7: Handle atomic instructions from the VEC4 back-end.

This can deal with all the 15 32-bit untyped atomic operations the
hardware supports, but only INC and PREDEC are going to be exposed
through the API for now.

v2: Represent atomics as GLSL intrinsics.  Add support for variably
    indexed atomic counter arrays.
v3: Add comment on why we don't need to assign uniform storage for
    atomic counters.

Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
Francisco Jerez 2013-09-25 16:31:35 -07:00
parent 764f40d92e
commit ba885c30c7
2 changed files with 110 additions and 2 deletions

View file

@ -489,6 +489,13 @@ public:
void emit_shader_time_write(enum shader_time_shader_type type,
src_reg value);
void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
dst_reg dst, src_reg offset, src_reg src0,
src_reg src1);
void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
src_reg offset);
src_reg get_scratch_offset(vec4_instruction *inst,
src_reg *reladdr, int reg_offset);
src_reg get_pull_constant_offset(vec4_instruction *inst,
@ -514,6 +521,8 @@ public:
void dump_instruction(backend_instruction *inst);
void visit_atomic_counter_intrinsic(ir_call *ir);
protected:
void emit_vertex();
void lower_attributes_to_hw_regs(const int *attribute_map,

View file

@ -974,8 +974,11 @@ vec4_visitor::visit(ir_variable *ir)
/* Thanks to the lower_ubo_reference pass, we will see only
* ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
* variables, so no need for them to be in variable_ht.
*
* Atomic counters take no uniform storage, no need to do
* anything here.
*/
if (ir->is_in_uniform_block())
if (ir->is_in_uniform_block() || ir->type->contains_atomic())
return;
/* Track how big the whole uniform variable is, in case we need to put a
@ -2160,10 +2163,57 @@ vec4_visitor::visit(ir_constant *ir)
emit_constant_values(&dst, ir);
}
void
vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
{
ir_dereference *deref = static_cast<ir_dereference *>(
ir->actual_parameters.get_head());
ir_variable *location = deref->variable_referenced();
unsigned surf_index = (prog_data->base.binding_table.abo_start +
location->atomic.buffer_index);
/* Calculate the surface offset */
src_reg offset(this, glsl_type::uint_type);
ir_dereference_array *deref_array = deref->as_dereference_array();
if (deref_array) {
deref_array->array_index->accept(this);
src_reg tmp(this, glsl_type::uint_type);
emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
emit(ADD(dst_reg(offset), tmp, location->atomic.offset));
} else {
offset = location->atomic.offset;
}
/* Emit the appropriate machine instruction */
const char *callee = ir->callee->function_name();
dst_reg dst = get_assignment_lhs(ir->return_deref, this);
if (!strcmp("__intrinsic_atomic_read", callee)) {
emit_untyped_surface_read(surf_index, dst, offset);
} else if (!strcmp("__intrinsic_atomic_increment", callee)) {
emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
src_reg(), src_reg());
} else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
src_reg(), src_reg());
}
}
void
vec4_visitor::visit(ir_call *ir)
{
assert(!"not reached");
const char *callee = ir->callee->function_name();
if (!strcmp("__intrinsic_atomic_read", callee) ||
!strcmp("__intrinsic_atomic_increment", callee) ||
!strcmp("__intrinsic_atomic_predecrement", callee)) {
visit_atomic_counter_intrinsic(ir);
} else {
assert(!"Unsupported intrinsic.");
}
}
void
@ -2557,6 +2607,55 @@ vec4_visitor::visit(ir_end_primitive *)
assert(!"not reached");
}
void
vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
dst_reg dst, src_reg offset,
src_reg src0, src_reg src1)
{
unsigned mlen = 0;
/* Set the atomic operation offset. */
emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
mlen++;
/* Set the atomic operation arguments. */
if (src0.file != BAD_FILE) {
emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
mlen++;
}
if (src1.file != BAD_FILE) {
emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
mlen++;
}
/* Emit the instruction. Note that this maps to the normal SIMD8
* untyped atomic message on Ivy Bridge, but that's OK because
* unused channels will be masked out.
*/
vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
src_reg(atomic_op), src_reg(surf_index));
inst->base_mrf = 0;
inst->mlen = mlen;
}
void
vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
src_reg offset)
{
/* Set the surface read offset. */
emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
/* Emit the instruction. Note that this maps to the normal SIMD8
* untyped surface read message, but that's OK because unused
* channels will be masked out.
*/
vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ,
dst, src_reg(surf_index));
inst->base_mrf = 0;
inst->mlen = 1;
}
void
vec4_visitor::emit_ndc_computation()
{