gallivm: add support for payload access

mesh shading has a payload to pass between task and mesh shaders,
this acts like shared memory as well, so we use the standard memory
hooks to access it.

This current adds the payload after the 12-byte header which will
contain the x/y/z grid sizes.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23066>
This commit is contained in:
Dave Airlie 2023-05-17 11:35:18 +10:00
parent f3e6582f32
commit 97a2d0aded
4 changed files with 80 additions and 15 deletions

View file

@ -1557,7 +1557,7 @@ visit_load_ssbo(struct lp_build_nir_context *bld_base,
nir_src_is_always_uniform(instr->src[1]);
bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest),
nir_dest_bit_size(instr->dest),
index_and_offset_are_uniform, idx, offset, result);
index_and_offset_are_uniform, false, idx, offset, result);
}
@ -1576,7 +1576,7 @@ visit_store_ssbo(struct lp_build_nir_context *bld_base,
int nc = nir_src_num_components(instr->src[0]);
int bitsize = nir_src_bit_size(instr->src[0]);
bld_base->store_mem(bld_base, writemask, nc, bitsize,
index_and_offset_are_uniform, idx, offset, val);
index_and_offset_are_uniform, false, idx, offset, val);
}
@ -1606,7 +1606,7 @@ visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
val2 = get_src(bld_base, instr->src[3]);
bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, idx,
bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, false, idx,
offset, val, val2, &result[0]);
}
@ -1802,7 +1802,7 @@ visit_shared_load(struct lp_build_nir_context *bld_base,
bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest),
nir_dest_bit_size(instr->dest),
offset_is_uniform, NULL, offset, result);
offset_is_uniform, false, NULL, offset, result);
}
@ -1817,7 +1817,7 @@ visit_shared_store(struct lp_build_nir_context *bld_base,
int nc = nir_src_num_components(instr->src[0]);
int bitsize = nir_src_bit_size(instr->src[0]);
bld_base->store_mem(bld_base, writemask, nc, bitsize,
offset_is_uniform, NULL, offset, val);
offset_is_uniform, false, NULL, offset, val);
}
@ -1833,7 +1833,7 @@ visit_shared_atomic(struct lp_build_nir_context *bld_base,
if (instr->intrinsic == nir_intrinsic_shared_atomic_swap)
val2 = get_src(bld_base, instr->src[2]);
bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, NULL,
bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, false, NULL,
offset, val, val2, &result[0]);
}
@ -1993,6 +1993,47 @@ visit_store_scratch(struct lp_build_nir_context *bld_base,
bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
}
static void
visit_payload_load(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest),
nir_dest_bit_size(instr->dest),
offset_is_uniform, true, NULL, offset, result);
}
static void
visit_payload_store(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr)
{
LLVMValueRef val = get_src(bld_base, instr->src[0]);
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
int writemask = instr->const_index[1];
int nc = nir_src_num_components(instr->src[0]);
int bitsize = nir_src_bit_size(instr->src[0]);
bld_base->store_mem(bld_base, writemask, nc, bitsize,
offset_is_uniform, true, NULL, offset, val);
}
static void
visit_payload_atomic(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
LLVMValueRef val = get_src(bld_base, instr->src[1]);
LLVMValueRef val2 = NULL;
int bitsize = nir_src_bit_size(instr->src[1]);
if (instr->intrinsic == nir_intrinsic_task_payload_atomic_swap)
val2 = get_src(bld_base, instr->src[2]);
bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, true, NULL,
offset, val, val2, &result[0]);
}
static void
visit_intrinsic(struct lp_build_nir_context *bld_base,
@ -2163,6 +2204,16 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
case nir_intrinsic_shader_clock:
bld_base->clock(bld_base, result);
break;
case nir_intrinsic_load_task_payload:
visit_payload_load(bld_base, instr, result);
break;
case nir_intrinsic_store_task_payload:
visit_payload_store(bld_base, instr);
break;
case nir_intrinsic_task_payload_atomic:
case nir_intrinsic_task_payload_atomic_swap:
visit_payload_atomic(bld_base, instr, result);
break;
default:
fprintf(stderr, "Unsupported intrinsic: ");
nir_print_instr(&instr->instr, stderr);

View file

@ -111,16 +111,17 @@ struct lp_build_nir_context
/* for SSBO and shared memory */
void (*load_mem)(struct lp_build_nir_context *bld_base,
unsigned nc, unsigned bit_size,
bool index_and_offset_are_uniform,
bool index_and_offset_are_uniform, bool payload,
LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
void (*store_mem)(struct lp_build_nir_context *bld_base,
unsigned writemask, unsigned nc, unsigned bit_size,
bool index_and_offset_are_uniform,
bool index_and_offset_are_uniform, bool payload,
LLVMValueRef index, LLVMValueRef offset, LLVMValueRef dst);
void (*atomic_mem)(struct lp_build_nir_context *bld_base,
nir_atomic_op op,
unsigned bit_size,
bool payload,
LLVMValueRef index, LLVMValueRef offset,
LLVMValueRef val, LLVMValueRef val2,
LLVMValueRef *result);
@ -254,6 +255,7 @@ struct lp_build_nir_soa_context
LLVMValueRef ssbo_ptr;
LLVMValueRef shared_ptr;
LLVMValueRef payload_ptr;
LLVMValueRef scratch_ptr;
unsigned scratch_size;

View file

@ -1206,7 +1206,7 @@ ssbo_base_pointer(struct lp_build_nir_context *bld_base,
static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context *bld_base,
struct lp_build_context *mem_bld,
unsigned bit_size,
unsigned bit_size, bool payload,
LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
@ -1216,7 +1216,14 @@ mem_access_base_pointer(struct lp_build_nir_context *bld_base,
if (index) {
ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
} else {
ptr = bld->shared_ptr;
if (payload) {
ptr = bld->payload_ptr;
ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
}
else
ptr = bld->shared_ptr;
*bounds = NULL;
}
@ -1231,6 +1238,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base,
unsigned nc,
unsigned bit_size,
bool index_and_offset_are_uniform,
bool payload,
LLVMValueRef index,
LLVMValueRef offset,
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
@ -1255,7 +1263,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base,
if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) {
LLVMValueRef ssbo_limit;
LLVMValueRef first_active = first_active_invocation(bld_base);
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
first_active, &ssbo_limit);
offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, "");
@ -1302,7 +1310,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base,
lp_build_if(&exec_ifthen, gallivm, loop_cond);
LLVMValueRef ssbo_limit;
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
loop_state.counter, &ssbo_limit);
for (unsigned c = 0; c < nc; c++) {
@ -1345,6 +1353,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
unsigned nc,
unsigned bit_size,
bool index_and_offset_are_uniform,
bool payload,
LLVMValueRef index,
LLVMValueRef offset,
LLVMValueRef dst)
@ -1366,7 +1375,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
*/
if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
LLVMValueRef ssbo_limit;
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index,
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
lp_build_const_int32(gallivm, 0), &ssbo_limit);
offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
@ -1407,7 +1416,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
lp_build_if(&exec_ifthen, gallivm, loop_cond);
LLVMValueRef ssbo_limit;
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index,
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
loop_state.counter, &ssbo_limit);
for (unsigned c = 0; c < nc; c++) {
@ -1444,6 +1453,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
nir_atomic_op nir_op,
uint32_t bit_size,
bool payload,
LLVMValueRef index, LLVMValueRef offset,
LLVMValueRef val, LLVMValueRef val2,
LLVMValueRef *result)
@ -1471,7 +1481,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
lp_build_if(&exec_ifthen, gallivm, loop_cond);
LLVMValueRef ssbo_limit;
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, index,
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index,
loop_state.counter, &ssbo_limit);
LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
@ -2743,6 +2753,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
bld.bld_base.aniso_filter_table = params->aniso_filter_table;
bld.image = params->image;
bld.shared_ptr = params->shared_ptr;
bld.payload_ptr = params->payload_ptr;
bld.coro = params->coro;
bld.kernel_args_ptr = params->kernel_args;
bld.indirects = 0;

View file

@ -282,6 +282,7 @@ struct lp_build_tgsi_params {
LLVMValueRef ssbo_sizes_ptr;
const struct lp_build_image_soa *image;
LLVMValueRef shared_ptr;
LLVMValueRef payload_ptr;
const struct lp_build_coro_suspend_info *coro;
LLVMValueRef kernel_args;
const struct lp_build_fs_iface *fs_iface;