From 97a2d0aded72fc0842278afd88d485c25f270e52 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 17 May 2023 11:35:18 +1000 Subject: [PATCH] gallivm: add support for payload access mesh shading has a payload to pass between task and mesh shaders, this acts like shared memory as well, so we use the standard memory hooks to access it. This current adds the payload after the 12-byte header which will contain the x/y/z grid sizes. Reviewed-by: Roland Scheidegger Part-of: --- src/gallium/auxiliary/gallivm/lp_bld_nir.c | 63 +++++++++++++++++-- src/gallium/auxiliary/gallivm/lp_bld_nir.h | 6 +- .../auxiliary/gallivm/lp_bld_nir_soa.c | 25 +++++--- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 + 4 files changed, 80 insertions(+), 15 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 24e7f4e683d..f24761c6fdb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1557,7 +1557,7 @@ visit_load_ssbo(struct lp_build_nir_context *bld_base, nir_src_is_always_uniform(instr->src[1]); bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), - index_and_offset_are_uniform, idx, offset, result); + index_and_offset_are_uniform, false, idx, offset, result); } @@ -1576,7 +1576,7 @@ visit_store_ssbo(struct lp_build_nir_context *bld_base, int nc = nir_src_num_components(instr->src[0]); int bitsize = nir_src_bit_size(instr->src[0]); bld_base->store_mem(bld_base, writemask, nc, bitsize, - index_and_offset_are_uniform, idx, offset, val); + index_and_offset_are_uniform, false, idx, offset, val); } @@ -1606,7 +1606,7 @@ visit_ssbo_atomic(struct lp_build_nir_context *bld_base, if (instr->intrinsic == nir_intrinsic_ssbo_atomic_swap) val2 = get_src(bld_base, instr->src[3]); - bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, idx, + bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, false, idx, offset, val, val2, &result[0]); } @@ -1802,7 +1802,7 @@ visit_shared_load(struct lp_build_nir_context *bld_base, bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]); bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), - offset_is_uniform, NULL, offset, result); + offset_is_uniform, false, NULL, offset, result); } @@ -1817,7 +1817,7 @@ visit_shared_store(struct lp_build_nir_context *bld_base, int nc = nir_src_num_components(instr->src[0]); int bitsize = nir_src_bit_size(instr->src[0]); bld_base->store_mem(bld_base, writemask, nc, bitsize, - offset_is_uniform, NULL, offset, val); + offset_is_uniform, false, NULL, offset, val); } @@ -1833,7 +1833,7 @@ visit_shared_atomic(struct lp_build_nir_context *bld_base, if (instr->intrinsic == nir_intrinsic_shared_atomic_swap) val2 = get_src(bld_base, instr->src[2]); - bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, NULL, + bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, false, NULL, offset, val, val2, &result[0]); } @@ -1993,6 +1993,47 @@ visit_store_scratch(struct lp_build_nir_context *bld_base, bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val); } +static void +visit_payload_load(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) +{ + LLVMValueRef offset = get_src(bld_base, instr->src[0]); + bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]); + bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), + nir_dest_bit_size(instr->dest), + offset_is_uniform, true, NULL, offset, result); +} + +static void +visit_payload_store(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + LLVMValueRef val = get_src(bld_base, instr->src[0]); + LLVMValueRef offset = get_src(bld_base, instr->src[1]); + bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]); + int writemask = instr->const_index[1]; + int nc = nir_src_num_components(instr->src[0]); + int bitsize = nir_src_bit_size(instr->src[0]); + bld_base->store_mem(bld_base, writemask, nc, bitsize, + offset_is_uniform, true, NULL, offset, val); +} + +static void +visit_payload_atomic(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) +{ + LLVMValueRef offset = get_src(bld_base, instr->src[0]); + LLVMValueRef val = get_src(bld_base, instr->src[1]); + LLVMValueRef val2 = NULL; + int bitsize = nir_src_bit_size(instr->src[1]); + if (instr->intrinsic == nir_intrinsic_task_payload_atomic_swap) + val2 = get_src(bld_base, instr->src[2]); + + bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, true, NULL, + offset, val, val2, &result[0]); +} static void visit_intrinsic(struct lp_build_nir_context *bld_base, @@ -2163,6 +2204,16 @@ visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_shader_clock: bld_base->clock(bld_base, result); break; + case nir_intrinsic_load_task_payload: + visit_payload_load(bld_base, instr, result); + break; + case nir_intrinsic_store_task_payload: + visit_payload_store(bld_base, instr); + break; + case nir_intrinsic_task_payload_atomic: + case nir_intrinsic_task_payload_atomic_swap: + visit_payload_atomic(bld_base, instr, result); + break; default: fprintf(stderr, "Unsupported intrinsic: "); nir_print_instr(&instr->instr, stderr); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h index ae8ca7f2e55..59b096ca985 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -111,16 +111,17 @@ struct lp_build_nir_context /* for SSBO and shared memory */ void (*load_mem)(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, - bool index_and_offset_are_uniform, + bool index_and_offset_are_uniform, bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]); void (*store_mem)(struct lp_build_nir_context *bld_base, unsigned writemask, unsigned nc, unsigned bit_size, - bool index_and_offset_are_uniform, + bool index_and_offset_are_uniform, bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef dst); void (*atomic_mem)(struct lp_build_nir_context *bld_base, nir_atomic_op op, unsigned bit_size, + bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef val, LLVMValueRef val2, LLVMValueRef *result); @@ -254,6 +255,7 @@ struct lp_build_nir_soa_context LLVMValueRef ssbo_ptr; LLVMValueRef shared_ptr; + LLVMValueRef payload_ptr; LLVMValueRef scratch_ptr; unsigned scratch_size; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 75ce70a2ce8..a138da6c7e1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -1206,7 +1206,7 @@ ssbo_base_pointer(struct lp_build_nir_context *bld_base, static LLVMValueRef mem_access_base_pointer(struct lp_build_nir_context *bld_base, struct lp_build_context *mem_bld, - unsigned bit_size, + unsigned bit_size, bool payload, LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds) { struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -1216,7 +1216,14 @@ mem_access_base_pointer(struct lp_build_nir_context *bld_base, if (index) { ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds); } else { - ptr = bld->shared_ptr; + if (payload) { + ptr = bld->payload_ptr; + ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, ""); + ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), ""); + ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + } + else + ptr = bld->shared_ptr; *bounds = NULL; } @@ -1231,6 +1238,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, bool index_and_offset_are_uniform, + bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) @@ -1255,7 +1263,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) { LLVMValueRef ssbo_limit; LLVMValueRef first_active = first_active_invocation(bld_base); - LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index, + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index, first_active, &ssbo_limit); offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, ""); @@ -1302,7 +1310,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, lp_build_if(&exec_ifthen, gallivm, loop_cond); LLVMValueRef ssbo_limit; - LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index, + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index, loop_state.counter, &ssbo_limit); for (unsigned c = 0; c < nc; c++) { @@ -1345,6 +1353,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, bool index_and_offset_are_uniform, + bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef dst) @@ -1366,7 +1375,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base, */ if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) { LLVMValueRef ssbo_limit; - LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index, + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index, lp_build_const_int32(gallivm, 0), &ssbo_limit); offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), ""); @@ -1407,7 +1416,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base, lp_build_if(&exec_ifthen, gallivm, loop_cond); LLVMValueRef ssbo_limit; - LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index, + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index, loop_state.counter, &ssbo_limit); for (unsigned c = 0; c < nc; c++) { @@ -1444,6 +1453,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base, static void emit_atomic_mem(struct lp_build_nir_context *bld_base, nir_atomic_op nir_op, uint32_t bit_size, + bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef val, LLVMValueRef val2, LLVMValueRef *result) @@ -1471,7 +1481,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, lp_build_if(&exec_ifthen, gallivm, loop_cond); LLVMValueRef ssbo_limit; - LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, index, + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index, loop_state.counter, &ssbo_limit); LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1); @@ -2743,6 +2753,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.bld_base.aniso_filter_table = params->aniso_filter_table; bld.image = params->image; bld.shared_ptr = params->shared_ptr; + bld.payload_ptr = params->payload_ptr; bld.coro = params->coro; bld.kernel_args_ptr = params->kernel_args; bld.indirects = 0; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 37418fd122a..3b4e8347800 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -282,6 +282,7 @@ struct lp_build_tgsi_params { LLVMValueRef ssbo_sizes_ptr; const struct lp_build_image_soa *image; LLVMValueRef shared_ptr; + LLVMValueRef payload_ptr; const struct lp_build_coro_suspend_info *coro; LLVMValueRef kernel_args; const struct lp_build_fs_iface *fs_iface;