mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 17:00:09 +01:00
spirv: Add support for lowering workgroup access to offsets
Before, we always left workgroup variables as shared nir_variables and let the driver call nir_lower_io. This adds an option to do the lowering directly in spirv_to_nir. To do this, we implicitly assign the variables a std430 layout and then treat them like a UBO or SSBO and immediately lower all the way to an offset. As a side-effect, the spirv_to_nir pass now handles variable pointers for workgroup variables. Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
This commit is contained in:
parent
f6eb5ce39c
commit
ae54a4f84f
4 changed files with 190 additions and 19 deletions
|
|
@ -49,6 +49,14 @@ enum nir_spirv_debug_level {
|
|||
};
|
||||
|
||||
struct spirv_to_nir_options {
|
||||
/* Whether or not to lower all workgroup variable access to offsets
|
||||
* up-front. This means you will _shared intrinsics instead of _var
|
||||
* for workgroup data access.
|
||||
*
|
||||
* This is currently required for full variable pointers support.
|
||||
*/
|
||||
bool lower_workgroup_access_to_offsets;
|
||||
|
||||
struct {
|
||||
bool float64;
|
||||
bool image_ms_array;
|
||||
|
|
|
|||
|
|
@ -809,6 +809,64 @@ translate_image_format(struct vtn_builder *b, SpvImageFormat format)
|
|||
}
|
||||
}
|
||||
|
||||
static struct vtn_type *
|
||||
vtn_type_layout_std430(struct vtn_builder *b, struct vtn_type *type,
|
||||
uint32_t *size_out, uint32_t *align_out)
|
||||
{
|
||||
switch (type->base_type) {
|
||||
case vtn_base_type_scalar: {
|
||||
uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
|
||||
*size_out = comp_size;
|
||||
*align_out = comp_size;
|
||||
return type;
|
||||
}
|
||||
|
||||
case vtn_base_type_vector: {
|
||||
uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
|
||||
assert(type->length > 0 && type->length <= 4);
|
||||
unsigned align_comps = type->length == 3 ? 4 : type->length;
|
||||
*size_out = comp_size * type->length,
|
||||
*align_out = comp_size * align_comps;
|
||||
return type;
|
||||
}
|
||||
|
||||
case vtn_base_type_matrix:
|
||||
case vtn_base_type_array: {
|
||||
/* We're going to add an array stride */
|
||||
type = vtn_type_copy(b, type);
|
||||
uint32_t elem_size, elem_align;
|
||||
type->array_element = vtn_type_layout_std430(b, type->array_element,
|
||||
&elem_size, &elem_align);
|
||||
type->stride = vtn_align_u32(elem_size, elem_align);
|
||||
*size_out = type->stride * type->length;
|
||||
*align_out = elem_align;
|
||||
return type;
|
||||
}
|
||||
|
||||
case vtn_base_type_struct: {
|
||||
/* We're going to add member offsets */
|
||||
type = vtn_type_copy(b, type);
|
||||
uint32_t offset = 0;
|
||||
uint32_t align = 0;
|
||||
for (unsigned i = 0; i < type->length; i++) {
|
||||
uint32_t mem_size, mem_align;
|
||||
type->members[i] = vtn_type_layout_std430(b, type->members[i],
|
||||
&mem_size, &mem_align);
|
||||
offset = vtn_align_u32(offset, mem_align);
|
||||
type->offsets[i] = offset;
|
||||
offset += mem_size;
|
||||
align = MAX2(align, mem_align);
|
||||
}
|
||||
*size_out = offset;
|
||||
*align_out = align;
|
||||
return type;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid SPIR-V type for std430");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
|
|
@ -958,6 +1016,19 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
|
|||
*/
|
||||
val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2);
|
||||
}
|
||||
|
||||
if (storage_class == SpvStorageClassWorkgroup &&
|
||||
b->options->lower_workgroup_access_to_offsets) {
|
||||
uint32_t size, align;
|
||||
val->type->deref = vtn_type_layout_std430(b, val->type->deref,
|
||||
&size, &align);
|
||||
val->type->length = size;
|
||||
val->type->align = align;
|
||||
/* These can actually be stored to nir_variables and used as SSA
|
||||
* values so they need a real glsl_type.
|
||||
*/
|
||||
val->type->type = glsl_uint_type();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -2181,6 +2252,32 @@ get_ssbo_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
|
|||
}
|
||||
}
|
||||
|
||||
static nir_intrinsic_op
|
||||
get_shared_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case SpvOpAtomicLoad: return nir_intrinsic_load_shared;
|
||||
case SpvOpAtomicStore: return nir_intrinsic_store_shared;
|
||||
#define OP(S, N) case SpvOp##S: return nir_intrinsic_shared_##N;
|
||||
OP(AtomicExchange, atomic_exchange)
|
||||
OP(AtomicCompareExchange, atomic_comp_swap)
|
||||
OP(AtomicIIncrement, atomic_add)
|
||||
OP(AtomicIDecrement, atomic_add)
|
||||
OP(AtomicIAdd, atomic_add)
|
||||
OP(AtomicISub, atomic_add)
|
||||
OP(AtomicSMin, atomic_imin)
|
||||
OP(AtomicUMin, atomic_umin)
|
||||
OP(AtomicSMax, atomic_imax)
|
||||
OP(AtomicUMax, atomic_umax)
|
||||
OP(AtomicAnd, atomic_and)
|
||||
OP(AtomicOr, atomic_or)
|
||||
OP(AtomicXor, atomic_xor)
|
||||
#undef OP
|
||||
default:
|
||||
vtn_fail("Invalid shared atomic");
|
||||
}
|
||||
}
|
||||
|
||||
static nir_intrinsic_op
|
||||
get_var_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
|
||||
{
|
||||
|
|
@ -2246,7 +2343,8 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
|
|||
SpvMemorySemanticsMask semantics = w[5];
|
||||
*/
|
||||
|
||||
if (ptr->mode == vtn_variable_mode_workgroup) {
|
||||
if (ptr->mode == vtn_variable_mode_workgroup &&
|
||||
!b->options->lower_workgroup_access_to_offsets) {
|
||||
nir_deref_var *deref = vtn_pointer_to_deref(b, ptr);
|
||||
const struct glsl_type *deref_type = nir_deref_tail(&deref->deref)->type;
|
||||
nir_intrinsic_op op = get_var_nir_atomic_op(b, opcode);
|
||||
|
|
@ -2286,27 +2384,36 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
|
|||
|
||||
}
|
||||
} else {
|
||||
vtn_assert(ptr->mode == vtn_variable_mode_ssbo);
|
||||
nir_ssa_def *offset, *index;
|
||||
offset = vtn_pointer_to_offset(b, ptr, &index, NULL);
|
||||
|
||||
nir_intrinsic_op op = get_ssbo_nir_atomic_op(b, opcode);
|
||||
nir_intrinsic_op op;
|
||||
if (ptr->mode == vtn_variable_mode_ssbo) {
|
||||
op = get_ssbo_nir_atomic_op(b, opcode);
|
||||
} else {
|
||||
vtn_assert(ptr->mode == vtn_variable_mode_workgroup &&
|
||||
b->options->lower_workgroup_access_to_offsets);
|
||||
op = get_shared_nir_atomic_op(b, opcode);
|
||||
}
|
||||
|
||||
atomic = nir_intrinsic_instr_create(b->nb.shader, op);
|
||||
|
||||
int src = 0;
|
||||
switch (opcode) {
|
||||
case SpvOpAtomicLoad:
|
||||
atomic->num_components = glsl_get_vector_elements(ptr->type->type);
|
||||
atomic->src[0] = nir_src_for_ssa(index);
|
||||
atomic->src[1] = nir_src_for_ssa(offset);
|
||||
if (ptr->mode == vtn_variable_mode_ssbo)
|
||||
atomic->src[src++] = nir_src_for_ssa(index);
|
||||
atomic->src[src++] = nir_src_for_ssa(offset);
|
||||
break;
|
||||
|
||||
case SpvOpAtomicStore:
|
||||
atomic->num_components = glsl_get_vector_elements(ptr->type->type);
|
||||
nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
|
||||
atomic->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
|
||||
atomic->src[1] = nir_src_for_ssa(index);
|
||||
atomic->src[2] = nir_src_for_ssa(offset);
|
||||
atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
|
||||
if (ptr->mode == vtn_variable_mode_ssbo)
|
||||
atomic->src[src++] = nir_src_for_ssa(index);
|
||||
atomic->src[src++] = nir_src_for_ssa(offset);
|
||||
break;
|
||||
|
||||
case SpvOpAtomicExchange:
|
||||
|
|
@ -2323,9 +2430,10 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
|
|||
case SpvOpAtomicAnd:
|
||||
case SpvOpAtomicOr:
|
||||
case SpvOpAtomicXor:
|
||||
atomic->src[0] = nir_src_for_ssa(index);
|
||||
atomic->src[1] = nir_src_for_ssa(offset);
|
||||
fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
|
||||
if (ptr->mode == vtn_variable_mode_ssbo)
|
||||
atomic->src[src++] = nir_src_for_ssa(index);
|
||||
atomic->src[src++] = nir_src_for_ssa(offset);
|
||||
fill_common_atomic_sources(b, opcode, w, &atomic->src[src]);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -276,7 +276,10 @@ struct vtn_type {
|
|||
/* The value that declares this type. Used for finding decorations */
|
||||
struct vtn_value *val;
|
||||
|
||||
/* Specifies the length of complex types. */
|
||||
/* Specifies the length of complex types.
|
||||
*
|
||||
* For Workgroup pointers, this is the size of the referenced type.
|
||||
*/
|
||||
unsigned length;
|
||||
|
||||
/* for arrays, matrices and pointers, the array stride */
|
||||
|
|
@ -327,6 +330,9 @@ struct vtn_type {
|
|||
|
||||
/* Storage class for pointers */
|
||||
SpvStorageClass storage_class;
|
||||
|
||||
/* Required alignment for pointers */
|
||||
uint32_t align;
|
||||
};
|
||||
|
||||
/* Members for image types */
|
||||
|
|
@ -441,6 +447,8 @@ struct vtn_variable {
|
|||
nir_variable *var;
|
||||
nir_variable **members;
|
||||
|
||||
int shared_location;
|
||||
|
||||
/**
|
||||
* In some early released versions of GLSLang, it implemented all function
|
||||
* calls by making copies of all parameters into temporary variables and
|
||||
|
|
@ -686,6 +694,13 @@ void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
|
||||
const uint32_t *words, unsigned count);
|
||||
|
||||
static inline uint32_t
|
||||
vtn_align_u32(uint32_t v, uint32_t a)
|
||||
{
|
||||
assert(a != 0 && a == (a & -a));
|
||||
return (v + a - 1) & ~(a - 1);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
vtn_u64_literal(const uint32_t *w)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -62,7 +62,9 @@ vtn_pointer_uses_ssa_offset(struct vtn_builder *b,
|
|||
struct vtn_pointer *ptr)
|
||||
{
|
||||
return ptr->mode == vtn_variable_mode_ubo ||
|
||||
ptr->mode == vtn_variable_mode_ssbo;
|
||||
ptr->mode == vtn_variable_mode_ssbo ||
|
||||
(ptr->mode == vtn_variable_mode_workgroup &&
|
||||
b->options->lower_workgroup_access_to_offsets);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -71,7 +73,9 @@ vtn_pointer_is_external_block(struct vtn_builder *b,
|
|||
{
|
||||
return ptr->mode == vtn_variable_mode_ssbo ||
|
||||
ptr->mode == vtn_variable_mode_ubo ||
|
||||
ptr->mode == vtn_variable_mode_push_constant;
|
||||
ptr->mode == vtn_variable_mode_push_constant ||
|
||||
(ptr->mode == vtn_variable_mode_workgroup &&
|
||||
b->options->lower_workgroup_access_to_offsets);
|
||||
}
|
||||
|
||||
/* Dereference the given base pointer by the access chain */
|
||||
|
|
@ -167,7 +171,8 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
|
|||
/* We need ptr_type for the stride */
|
||||
vtn_assert(base->ptr_type);
|
||||
/* This must be a pointer to an actual element somewhere */
|
||||
vtn_assert(block_index && offset);
|
||||
vtn_assert(offset);
|
||||
vtn_assert(block_index || base->mode == vtn_variable_mode_workgroup);
|
||||
/* We need at least one element in the chain */
|
||||
vtn_assert(deref_chain->length >= 1);
|
||||
|
||||
|
|
@ -183,6 +188,7 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
|
|||
vtn_assert(!block_index);
|
||||
|
||||
vtn_assert(base->var);
|
||||
vtn_assert(base->ptr_type);
|
||||
switch (base->mode) {
|
||||
case vtn_variable_mode_ubo:
|
||||
case vtn_variable_mode_ssbo:
|
||||
|
|
@ -201,6 +207,22 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
|
|||
offset = nir_imm_int(&b->nb, 0);
|
||||
break;
|
||||
|
||||
case vtn_variable_mode_workgroup:
|
||||
/* Assign location on first use so that we don't end up bloating SLM
|
||||
* address space for variables which are never statically used.
|
||||
*/
|
||||
if (base->var->shared_location < 0) {
|
||||
assert(base->ptr_type->length > 0 && base->ptr_type->align > 0);
|
||||
b->shader->num_shared = vtn_align_u32(b->shader->num_shared,
|
||||
base->ptr_type->align);
|
||||
base->var->shared_location = b->shader->num_shared;
|
||||
b->shader->num_shared += base->ptr_type->length;
|
||||
}
|
||||
|
||||
block_index = NULL;
|
||||
offset = nir_imm_int(&b->nb, base->var->shared_location);
|
||||
break;
|
||||
|
||||
default:
|
||||
vtn_fail("Invalid offset pointer mode");
|
||||
}
|
||||
|
|
@ -837,6 +859,9 @@ vtn_block_load(struct vtn_builder *b, struct vtn_pointer *src)
|
|||
vtn_access_chain_get_offset_size(b, src->chain, src->var->type,
|
||||
&access_offset, &access_size);
|
||||
break;
|
||||
case vtn_variable_mode_workgroup:
|
||||
op = nir_intrinsic_load_shared;
|
||||
break;
|
||||
default:
|
||||
vtn_fail("Invalid block variable mode");
|
||||
}
|
||||
|
|
@ -861,6 +886,9 @@ vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
|
|||
case vtn_variable_mode_ssbo:
|
||||
op = nir_intrinsic_store_ssbo;
|
||||
break;
|
||||
case vtn_variable_mode_workgroup:
|
||||
op = nir_intrinsic_store_shared;
|
||||
break;
|
||||
default:
|
||||
vtn_fail("Invalid block variable mode");
|
||||
}
|
||||
|
|
@ -946,7 +974,8 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
|
|||
struct vtn_pointer *dest)
|
||||
{
|
||||
if (vtn_pointer_is_external_block(b, dest)) {
|
||||
vtn_assert(dest->mode == vtn_variable_mode_ssbo);
|
||||
vtn_assert(dest->mode == vtn_variable_mode_ssbo ||
|
||||
dest->mode == vtn_variable_mode_workgroup);
|
||||
vtn_block_store(b, src, dest);
|
||||
} else {
|
||||
_vtn_variable_load_store(b, false, dest, &src);
|
||||
|
|
@ -1526,7 +1555,7 @@ vtn_pointer_to_ssa(struct vtn_builder *b, struct vtn_pointer *ptr)
|
|||
ptr->mode == vtn_variable_mode_ssbo);
|
||||
return nir_vec2(&b->nb, ptr->block_index, ptr->offset);
|
||||
} else {
|
||||
vtn_fail("Invalid pointer");
|
||||
vtn_assert(ptr->mode == vtn_variable_mode_workgroup);
|
||||
return ptr->offset;
|
||||
}
|
||||
}
|
||||
|
|
@ -1555,7 +1584,7 @@ vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def *ssa,
|
|||
ptr->offset = nir_channel(&b->nb, ssa, 1);
|
||||
} else {
|
||||
vtn_assert(ssa->num_components == 1);
|
||||
unreachable("Invalid pointer");
|
||||
vtn_assert(ptr->mode == vtn_variable_mode_workgroup);
|
||||
ptr->block_index = NULL;
|
||||
ptr->offset = ssa;
|
||||
}
|
||||
|
|
@ -1630,7 +1659,6 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
|
|||
case vtn_variable_mode_global:
|
||||
case vtn_variable_mode_image:
|
||||
case vtn_variable_mode_sampler:
|
||||
case vtn_variable_mode_workgroup:
|
||||
/* For these, we create the variable normally */
|
||||
var->var = rzalloc(b->shader, nir_variable);
|
||||
var->var->name = ralloc_strdup(var->var, val->name);
|
||||
|
|
@ -1648,6 +1676,18 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
|
|||
}
|
||||
break;
|
||||
|
||||
case vtn_variable_mode_workgroup:
|
||||
if (b->options->lower_workgroup_access_to_offsets) {
|
||||
var->shared_location = -1;
|
||||
} else {
|
||||
/* Create the variable normally */
|
||||
var->var = rzalloc(b->shader, nir_variable);
|
||||
var->var->name = ralloc_strdup(var->var, val->name);
|
||||
var->var->type = var->type->type;
|
||||
var->var->data.mode = nir_var_shared;
|
||||
}
|
||||
break;
|
||||
|
||||
case vtn_variable_mode_input:
|
||||
case vtn_variable_mode_output: {
|
||||
/* In order to know whether or not we're a per-vertex inout, we need
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue