mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 08:40:11 +01:00
gallivm: add load/store scratch support.
Scratch space is per-thread space, so allocate the scratch size * vector width, and add a per-thread base offset to each load/store. This is needed for OpenCL private memory space Reviewed-by: Roland Scheidegger <sroland@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7304>
This commit is contained in:
parent
9c1f6ed804
commit
fb56fb02a1
4 changed files with 188 additions and 54 deletions
|
|
@ -18,8 +18,6 @@ program/execute/amdgcn-callee-saved-registers: skip
|
|||
program/execute/amdgcn-f16-inline-immediates: skip
|
||||
program/execute/amdgcn-f32-inline-immediates/add integer 64: fail
|
||||
program/execute/amdgcn-i16-inline-immediates: skip
|
||||
program/execute/amdgcn-mubuf-negative-vaddr: crash
|
||||
program/execute/amdgcn.sign_extend_inreg: crash
|
||||
program/execute/atomic_int64_add-global: skip
|
||||
program/execute/atomic_int64_add-global-return: skip
|
||||
program/execute/atomic_int64_add-local: skip
|
||||
|
|
@ -65,8 +63,7 @@ program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char4: fail
|
|||
program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char8: fail
|
||||
program/execute/builtin/builtin-char-popcount-1.2.generated: skip
|
||||
program/execute/builtin/builtin-char-rotate-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-cos-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-fma-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-cos-1.0.generated: timeout
|
||||
program/execute/builtin/builtin-float-fmax-1.0.generated/fmax float1: fail
|
||||
program/execute/builtin/builtin-float-fmax-1.0.generated/fmax float16: fail
|
||||
program/execute/builtin/builtin-float-fmax-1.0.generated/fmax float2: fail
|
||||
|
|
@ -85,13 +82,9 @@ program/execute/builtin/builtin-float-fmin-1.0.generated/tss_fmin float16: fail
|
|||
program/execute/builtin/builtin-float-fmin-1.0.generated/tss_fmin float2: fail
|
||||
program/execute/builtin/builtin-float-fmin-1.0.generated/tss_fmin float4: fail
|
||||
program/execute/builtin/builtin-float-fmin-1.0.generated/tss_fmin float8: fail
|
||||
program/execute/builtin/builtin-float-fract-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-frexp-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-isfinite-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-isnormal-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-ldexp-1.0.generated: fail
|
||||
program/execute/builtin/builtin-float-lgamma-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-lgamma_r-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-maxmag-1.1.generated/maxmag float1: fail
|
||||
program/execute/builtin/builtin-float-maxmag-1.1.generated/maxmag float16: fail
|
||||
program/execute/builtin/builtin-float-maxmag-1.1.generated/maxmag float2: fail
|
||||
|
|
@ -103,12 +96,9 @@ program/execute/builtin/builtin-float-minmag-1.1.generated/minmag float2: fail
|
|||
program/execute/builtin/builtin-float-minmag-1.1.generated/minmag float4: fail
|
||||
program/execute/builtin/builtin-float-minmag-1.1.generated/minmag float8: fail
|
||||
program/execute/builtin/builtin-float-mix-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-modf-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-remquo-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-sin-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-sin-1.0.generated: timeout
|
||||
program/execute/builtin/builtin-float-sincos-1.0.generated: timeout
|
||||
program/execute/builtin/builtin-float-tan-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-tgamma-1.0.generated: crash
|
||||
program/execute/builtin/builtin-float-tan-1.0.generated: timeout
|
||||
program/execute/builtin/builtin-int-popcount-1.2.generated: skip
|
||||
program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long1: fail
|
||||
program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long16: fail
|
||||
|
|
@ -178,10 +168,9 @@ program/execute/builtin/builtin-ushort-popcount-1.2.generated: skip
|
|||
program/execute/builtin/builtin-ushort-upsample-1.0.generated: crash
|
||||
program/execute/call-clobbers-amdgcn: skip
|
||||
program/execute/calls-large-struct: crash
|
||||
program/execute/calls-struct: crash
|
||||
program/execute/gegl-rgb-gamma-u8-to-ragabaf: crash
|
||||
program/execute/calls-struct/regs struct: fail
|
||||
program/execute/calls-struct/small struct in regs: fail
|
||||
program/execute/global-offset/3d, input dependent: fail
|
||||
program/execute/i32-stack-array: crash
|
||||
program/execute/image-attributes: crash
|
||||
program/execute/image-read-2d/read float from cl_float cl_rgba image.: fail
|
||||
program/execute/image-read-2d/read signed integer from cl_signed_int8 cl_rgba image.: fail
|
||||
|
|
@ -192,66 +181,33 @@ program/execute/load-hi16-generic: skip
|
|||
program/execute/load-lo16: crash
|
||||
program/execute/load-lo16-generic: skip
|
||||
program/execute/mad-mix: skip
|
||||
program/execute/multiple-stack-objects: crash
|
||||
program/execute/negative-private-base-pointer: crash
|
||||
program/execute/program-tester-check-local-size-test-should-skip/this test should skip: skip
|
||||
program/execute/pyrit-wpa-psk: crash
|
||||
program/execute/realign-stack: crash
|
||||
program/execute/reference: crash
|
||||
program/execute/sampler/read from image using clamp_to_edge addressing mode: fail
|
||||
program/execute/sampler/read from image using linear filtering and normalized coords: fail
|
||||
program/execute/sampler/read from image using linear filtering and unnormalized coords: fail
|
||||
program/execute/scalar-logical-float: skip
|
||||
program/execute/store-hi16-generic: skip
|
||||
program/execute/v2i32-stack: crash
|
||||
program/execute/v3i32-stack: crash
|
||||
program/execute/v3i32-stack-array: crash
|
||||
program/execute/v4i32-stack: crash
|
||||
program/execute/vload/vload-char-private: crash
|
||||
program/execute/vload/vload-double-private: crash
|
||||
program/execute/vload/vload-float-private: crash
|
||||
program/execute/vload/vload-half-constant: skip
|
||||
program/execute/vload/vload-half-global: skip
|
||||
program/execute/vload/vload-half-local: skip
|
||||
program/execute/vload/vload-half-private: skip
|
||||
program/execute/vload/vload-int-private: crash
|
||||
program/execute/vload/vload-long-private: crash
|
||||
program/execute/vload/vload-short-private: crash
|
||||
program/execute/vload/vload-uchar-private: crash
|
||||
program/execute/vload/vload-uint-private: crash
|
||||
program/execute/vload/vload-ulong-private: crash
|
||||
program/execute/vload/vload-ushort-private: crash
|
||||
program/execute/vload/vload_half-float-private: crash
|
||||
program/execute/vload/vloada_half-float-private: crash
|
||||
program/execute/vstore/vstore-char-private: crash
|
||||
program/execute/vstore/vstore-double-private: crash
|
||||
program/execute/vstore/vstore-float-private: crash
|
||||
program/execute/vstore/vstore-half-global: skip
|
||||
program/execute/vstore/vstore-half-local: skip
|
||||
program/execute/vstore/vstore-half-private: skip
|
||||
program/execute/vstore/vstore-int-private: crash
|
||||
program/execute/vstore/vstore-long-private: crash
|
||||
program/execute/vstore/vstore-short-private: crash
|
||||
program/execute/vstore/vstore-uchar-private: crash
|
||||
program/execute/vstore/vstore-uint-private: crash
|
||||
program/execute/vstore/vstore-ulong-private: crash
|
||||
program/execute/vstore/vstore-ushort-private: crash
|
||||
program/execute/vstore/vstore_half-double-global: crash
|
||||
program/execute/vstore/vstore_half-double-local: crash
|
||||
program/execute/vstore/vstore_half-double-private: crash
|
||||
program/execute/vstore/vstore_half-float-private: crash
|
||||
program/execute/vstore/vstorea_half-double-global: crash
|
||||
program/execute/vstore/vstorea_half-double-local: crash
|
||||
program/execute/vstore/vstorea_half-double-private: crash
|
||||
program/execute/vstore/vstorea_half-float-private: crash
|
||||
summary:
|
||||
name: results
|
||||
---- --------
|
||||
pass: 2969
|
||||
fail: 105
|
||||
crash: 67
|
||||
pass: 3560
|
||||
fail: 107
|
||||
crash: 18
|
||||
skip: 73
|
||||
timeout: 1
|
||||
timeout: 4
|
||||
warn: 0
|
||||
incomplete: 0
|
||||
dmesg-warn: 0
|
||||
|
|
@ -259,4 +215,4 @@ summary:
|
|||
changes: 0
|
||||
fixes: 0
|
||||
regressions: 0
|
||||
total: 3215
|
||||
total: 3762
|
||||
|
|
|
|||
|
|
@ -1485,6 +1485,28 @@ static void visit_interp(struct lp_build_nir_context *bld_base,
|
|||
bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
|
||||
}
|
||||
|
||||
static void visit_load_scratch(struct lp_build_nir_context *bld_base,
|
||||
nir_intrinsic_instr *instr,
|
||||
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
|
||||
|
||||
bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
|
||||
nir_dest_bit_size(instr->dest), offset, result);
|
||||
}
|
||||
|
||||
static void visit_store_scratch(struct lp_build_nir_context *bld_base,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
LLVMValueRef val = get_src(bld_base, instr->src[0]);
|
||||
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
|
||||
int writemask = instr->const_index[2];
|
||||
int nc = nir_src_num_components(instr->src[0]);
|
||||
int bitsize = nir_src_bit_size(instr->src[0]);
|
||||
bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
|
||||
}
|
||||
|
||||
|
||||
static void visit_intrinsic(struct lp_build_nir_context *bld_base,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
|
|
@ -1648,6 +1670,12 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
|
|||
case nir_intrinsic_interp_deref_at_sample:
|
||||
visit_interp(bld_base, instr, result);
|
||||
break;
|
||||
case nir_intrinsic_load_scratch:
|
||||
visit_load_scratch(bld_base, instr, result);
|
||||
break;
|
||||
case nir_intrinsic_store_scratch:
|
||||
visit_store_scratch(bld_base, instr);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unsupported intrinsic: ");
|
||||
nir_print_instr(&instr->instr, stderr);
|
||||
|
|
|
|||
|
|
@ -147,6 +147,15 @@ struct lp_build_nir_context
|
|||
LLVMValueRef reg_storage,
|
||||
LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS]);
|
||||
|
||||
void (*load_scratch)(struct lp_build_nir_context *bld_base,
|
||||
unsigned nc, unsigned bit_size,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
|
||||
void (*store_scratch)(struct lp_build_nir_context *bld_base,
|
||||
unsigned writemask, unsigned nc,
|
||||
unsigned bit_size, LLVMValueRef offset,
|
||||
LLVMValueRef val);
|
||||
|
||||
void (*emit_var_decl)(struct lp_build_nir_context *bld_base,
|
||||
nir_variable *var);
|
||||
|
||||
|
|
@ -209,6 +218,8 @@ struct lp_build_nir_soa_context
|
|||
LLVMValueRef ssbo_sizes[LP_MAX_TGSI_SHADER_BUFFERS];
|
||||
|
||||
LLVMValueRef shared_ptr;
|
||||
LLVMValueRef scratch_ptr;
|
||||
unsigned scratch_size;
|
||||
|
||||
const struct lp_build_coro_suspend_info *coro;
|
||||
|
||||
|
|
|
|||
|
|
@ -1833,6 +1833,136 @@ emit_interp_at(struct lp_build_nir_context *bld_base,
|
|||
}
|
||||
}
|
||||
|
||||
static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
|
||||
struct lp_type type,
|
||||
unsigned scratch_size)
|
||||
{
|
||||
LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
|
||||
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned i;
|
||||
|
||||
if (type.length == 1)
|
||||
return LLVMConstInt(elem_type, 0, 0);
|
||||
|
||||
for (i = 0; i < type.length; ++i)
|
||||
elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
|
||||
|
||||
return LLVMConstVector(elems, type.length);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_load_scratch(struct lp_build_nir_context *bld_base,
|
||||
unsigned nc, unsigned bit_size,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
struct gallivm_state * gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
|
||||
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
||||
struct lp_build_context *load_bld;
|
||||
LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
|
||||
uint32_t shift_val = bit_size_to_shift_size(bit_size);
|
||||
|
||||
load_bld = get_int_bld(bld_base, true, bit_size);
|
||||
|
||||
offset = lp_build_add(uint_bld, offset, thread_offsets);
|
||||
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
|
||||
for (unsigned c = 0; c < nc; c++) {
|
||||
LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
|
||||
LLVMValueRef exec_mask = mask_vec(bld_base);
|
||||
|
||||
LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
|
||||
struct lp_build_loop_state loop_state;
|
||||
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
|
||||
|
||||
struct lp_build_if_state ifthen;
|
||||
LLVMValueRef cond, temp_res;
|
||||
|
||||
loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
|
||||
loop_state.counter, "");
|
||||
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
|
||||
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
|
||||
|
||||
lp_build_if(&ifthen, gallivm, cond);
|
||||
LLVMValueRef scalar;
|
||||
LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
|
||||
scalar = lp_build_pointer_get(builder, ptr2, loop_index);
|
||||
|
||||
temp_res = LLVMBuildLoad(builder, result, "");
|
||||
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
|
||||
LLVMBuildStore(builder, temp_res, result);
|
||||
lp_build_else(&ifthen);
|
||||
temp_res = LLVMBuildLoad(builder, result, "");
|
||||
LLVMValueRef zero;
|
||||
if (bit_size == 64)
|
||||
zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
|
||||
else if (bit_size == 16)
|
||||
zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
|
||||
else if (bit_size == 8)
|
||||
zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
|
||||
else
|
||||
zero = lp_build_const_int32(gallivm, 0);
|
||||
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
|
||||
LLVMBuildStore(builder, temp_res, result);
|
||||
lp_build_endif(&ifthen);
|
||||
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
|
||||
NULL, LLVMIntUGE);
|
||||
outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_store_scratch(struct lp_build_nir_context *bld_base,
|
||||
unsigned writemask, unsigned nc,
|
||||
unsigned bit_size, LLVMValueRef offset,
|
||||
LLVMValueRef dst)
|
||||
{
|
||||
struct gallivm_state * gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
|
||||
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
||||
struct lp_build_context *store_bld;
|
||||
LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
|
||||
uint32_t shift_val = bit_size_to_shift_size(bit_size);
|
||||
store_bld = get_int_bld(bld_base, true, bit_size);
|
||||
|
||||
LLVMValueRef exec_mask = mask_vec(bld_base);
|
||||
offset = lp_build_add(uint_bld, offset, thread_offsets);
|
||||
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
|
||||
|
||||
for (unsigned c = 0; c < nc; c++) {
|
||||
if (!(writemask & (1u << c)))
|
||||
continue;
|
||||
LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
|
||||
LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
|
||||
|
||||
struct lp_build_loop_state loop_state;
|
||||
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
|
||||
|
||||
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
|
||||
loop_state.counter, "");
|
||||
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
|
||||
|
||||
struct lp_build_if_state ifthen;
|
||||
LLVMValueRef cond;
|
||||
|
||||
loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
|
||||
loop_state.counter, "");
|
||||
|
||||
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
|
||||
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
|
||||
lp_build_if(&ifthen, gallivm, cond);
|
||||
|
||||
LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
|
||||
lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
|
||||
|
||||
lp_build_endif(&ifthen);
|
||||
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
|
||||
NULL, LLVMIntUGE);
|
||||
}
|
||||
}
|
||||
|
||||
void lp_build_nir_soa(struct gallivm_state *gallivm,
|
||||
struct nir_shader *shader,
|
||||
const struct lp_build_tgsi_params *params,
|
||||
|
|
@ -1930,6 +2060,8 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
|
|||
bld.bld_base.vote = emit_vote;
|
||||
bld.bld_base.helper_invocation = emit_helper_invocation;
|
||||
bld.bld_base.interp_at = emit_interp_at;
|
||||
bld.bld_base.load_scratch = emit_load_scratch;
|
||||
bld.bld_base.store_scratch = emit_store_scratch;
|
||||
|
||||
bld.mask = params->mask;
|
||||
bld.inputs = params->inputs;
|
||||
|
|
@ -1976,6 +2108,13 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
|
|||
|
||||
bld.bld_base.shader = shader;
|
||||
|
||||
if (shader->scratch_size) {
|
||||
bld.scratch_ptr = lp_build_array_alloca(gallivm,
|
||||
LLVMInt8TypeInContext(gallivm->context),
|
||||
lp_build_const_int32(gallivm, shader->scratch_size * type.length),
|
||||
"scratch");
|
||||
}
|
||||
bld.scratch_size = shader->scratch_size;
|
||||
emit_prologue(&bld);
|
||||
lp_build_nir_llvm(&bld.bld_base, shader);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue