diff --git a/.gitlab-ci/piglit/cl.txt b/.gitlab-ci/piglit/cl.txt index c3aff40ddc7..ecd8d6e6698 100644 --- a/.gitlab-ci/piglit/cl.txt +++ b/.gitlab-ci/piglit/cl.txt @@ -18,8 +18,6 @@ program/execute/amdgcn-callee-saved-registers: skip program/execute/amdgcn-f16-inline-immediates: skip program/execute/amdgcn-f32-inline-immediates/add integer 64: fail program/execute/amdgcn-i16-inline-immediates: skip -program/execute/amdgcn-mubuf-negative-vaddr: crash -program/execute/amdgcn.sign_extend_inreg: crash program/execute/atomic_int64_add-global: skip program/execute/atomic_int64_add-global-return: skip program/execute/atomic_int64_add-local: skip @@ -65,8 +63,7 @@ program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char4: fail program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char8: fail program/execute/builtin/builtin-char-popcount-1.2.generated: skip program/execute/builtin/builtin-char-rotate-1.0.generated: crash -program/execute/builtin/builtin-float-cos-1.0.generated: crash -program/execute/builtin/builtin-float-fma-1.0.generated: crash +program/execute/builtin/builtin-float-cos-1.0.generated: timeout program/execute/builtin/builtin-float-fmax-1.0.generated/fmax float1: fail program/execute/builtin/builtin-float-fmax-1.0.generated/fmax float16: fail program/execute/builtin/builtin-float-fmax-1.0.generated/fmax float2: fail @@ -85,13 +82,9 @@ program/execute/builtin/builtin-float-fmin-1.0.generated/tss_fmin float16: fail program/execute/builtin/builtin-float-fmin-1.0.generated/tss_fmin float2: fail program/execute/builtin/builtin-float-fmin-1.0.generated/tss_fmin float4: fail program/execute/builtin/builtin-float-fmin-1.0.generated/tss_fmin float8: fail -program/execute/builtin/builtin-float-fract-1.0.generated: crash -program/execute/builtin/builtin-float-frexp-1.0.generated: crash program/execute/builtin/builtin-float-isfinite-1.0.generated: crash program/execute/builtin/builtin-float-isnormal-1.0.generated: crash program/execute/builtin/builtin-float-ldexp-1.0.generated: fail -program/execute/builtin/builtin-float-lgamma-1.0.generated: crash -program/execute/builtin/builtin-float-lgamma_r-1.0.generated: crash program/execute/builtin/builtin-float-maxmag-1.1.generated/maxmag float1: fail program/execute/builtin/builtin-float-maxmag-1.1.generated/maxmag float16: fail program/execute/builtin/builtin-float-maxmag-1.1.generated/maxmag float2: fail @@ -103,12 +96,9 @@ program/execute/builtin/builtin-float-minmag-1.1.generated/minmag float2: fail program/execute/builtin/builtin-float-minmag-1.1.generated/minmag float4: fail program/execute/builtin/builtin-float-minmag-1.1.generated/minmag float8: fail program/execute/builtin/builtin-float-mix-1.0.generated: crash -program/execute/builtin/builtin-float-modf-1.0.generated: crash -program/execute/builtin/builtin-float-remquo-1.0.generated: crash -program/execute/builtin/builtin-float-sin-1.0.generated: crash +program/execute/builtin/builtin-float-sin-1.0.generated: timeout program/execute/builtin/builtin-float-sincos-1.0.generated: timeout -program/execute/builtin/builtin-float-tan-1.0.generated: crash -program/execute/builtin/builtin-float-tgamma-1.0.generated: crash +program/execute/builtin/builtin-float-tan-1.0.generated: timeout program/execute/builtin/builtin-int-popcount-1.2.generated: skip program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long1: fail program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long16: fail @@ -178,10 +168,9 @@ program/execute/builtin/builtin-ushort-popcount-1.2.generated: skip program/execute/builtin/builtin-ushort-upsample-1.0.generated: crash program/execute/call-clobbers-amdgcn: skip program/execute/calls-large-struct: crash -program/execute/calls-struct: crash -program/execute/gegl-rgb-gamma-u8-to-ragabaf: crash +program/execute/calls-struct/regs struct: fail +program/execute/calls-struct/small struct in regs: fail program/execute/global-offset/3d, input dependent: fail -program/execute/i32-stack-array: crash program/execute/image-attributes: crash program/execute/image-read-2d/read float from cl_float cl_rgba image.: fail program/execute/image-read-2d/read signed integer from cl_signed_int8 cl_rgba image.: fail @@ -192,66 +181,33 @@ program/execute/load-hi16-generic: skip program/execute/load-lo16: crash program/execute/load-lo16-generic: skip program/execute/mad-mix: skip -program/execute/multiple-stack-objects: crash -program/execute/negative-private-base-pointer: crash program/execute/program-tester-check-local-size-test-should-skip/this test should skip: skip -program/execute/pyrit-wpa-psk: crash -program/execute/realign-stack: crash -program/execute/reference: crash program/execute/sampler/read from image using clamp_to_edge addressing mode: fail program/execute/sampler/read from image using linear filtering and normalized coords: fail program/execute/sampler/read from image using linear filtering and unnormalized coords: fail program/execute/scalar-logical-float: skip program/execute/store-hi16-generic: skip -program/execute/v2i32-stack: crash -program/execute/v3i32-stack: crash -program/execute/v3i32-stack-array: crash -program/execute/v4i32-stack: crash -program/execute/vload/vload-char-private: crash -program/execute/vload/vload-double-private: crash -program/execute/vload/vload-float-private: crash program/execute/vload/vload-half-constant: skip program/execute/vload/vload-half-global: skip program/execute/vload/vload-half-local: skip program/execute/vload/vload-half-private: skip -program/execute/vload/vload-int-private: crash -program/execute/vload/vload-long-private: crash -program/execute/vload/vload-short-private: crash -program/execute/vload/vload-uchar-private: crash -program/execute/vload/vload-uint-private: crash -program/execute/vload/vload-ulong-private: crash -program/execute/vload/vload-ushort-private: crash -program/execute/vload/vload_half-float-private: crash -program/execute/vload/vloada_half-float-private: crash -program/execute/vstore/vstore-char-private: crash -program/execute/vstore/vstore-double-private: crash -program/execute/vstore/vstore-float-private: crash program/execute/vstore/vstore-half-global: skip program/execute/vstore/vstore-half-local: skip program/execute/vstore/vstore-half-private: skip -program/execute/vstore/vstore-int-private: crash -program/execute/vstore/vstore-long-private: crash -program/execute/vstore/vstore-short-private: crash -program/execute/vstore/vstore-uchar-private: crash -program/execute/vstore/vstore-uint-private: crash -program/execute/vstore/vstore-ulong-private: crash -program/execute/vstore/vstore-ushort-private: crash program/execute/vstore/vstore_half-double-global: crash program/execute/vstore/vstore_half-double-local: crash program/execute/vstore/vstore_half-double-private: crash -program/execute/vstore/vstore_half-float-private: crash program/execute/vstore/vstorea_half-double-global: crash program/execute/vstore/vstorea_half-double-local: crash program/execute/vstore/vstorea_half-double-private: crash -program/execute/vstore/vstorea_half-float-private: crash summary: name: results ---- -------- - pass: 2969 - fail: 105 - crash: 67 + pass: 3560 + fail: 107 + crash: 18 skip: 73 - timeout: 1 + timeout: 4 warn: 0 incomplete: 0 dmesg-warn: 0 @@ -259,4 +215,4 @@ summary: changes: 0 fixes: 0 regressions: 0 - total: 3215 + total: 3762 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 2980227ebd7..2805434f16a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1485,6 +1485,28 @@ static void visit_interp(struct lp_build_nir_context *bld_base, bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result); } +static void visit_load_scratch(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) +{ + LLVMValueRef offset = get_src(bld_base, instr->src[0]); + + bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest), + nir_dest_bit_size(instr->dest), offset, result); +} + +static void visit_store_scratch(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + LLVMValueRef val = get_src(bld_base, instr->src[0]); + LLVMValueRef offset = get_src(bld_base, instr->src[1]); + int writemask = instr->const_index[2]; + int nc = nir_src_num_components(instr->src[0]); + int bitsize = nir_src_bit_size(instr->src[0]); + bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val); +} + + static void visit_intrinsic(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr) { @@ -1648,6 +1670,12 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_interp_deref_at_sample: visit_interp(bld_base, instr, result); break; + case nir_intrinsic_load_scratch: + visit_load_scratch(bld_base, instr, result); + break; + case nir_intrinsic_store_scratch: + visit_store_scratch(bld_base, instr); + break; default: fprintf(stderr, "Unsupported intrinsic: "); nir_print_instr(&instr->instr, stderr); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h index 65b574fda01..27268672170 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -147,6 +147,15 @@ struct lp_build_nir_context LLVMValueRef reg_storage, LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS]); + void (*load_scratch)(struct lp_build_nir_context *bld_base, + unsigned nc, unsigned bit_size, + LLVMValueRef offset, + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]); + void (*store_scratch)(struct lp_build_nir_context *bld_base, + unsigned writemask, unsigned nc, + unsigned bit_size, LLVMValueRef offset, + LLVMValueRef val); + void (*emit_var_decl)(struct lp_build_nir_context *bld_base, nir_variable *var); @@ -209,6 +218,8 @@ struct lp_build_nir_soa_context LLVMValueRef ssbo_sizes[LP_MAX_TGSI_SHADER_BUFFERS]; LLVMValueRef shared_ptr; + LLVMValueRef scratch_ptr; + unsigned scratch_size; const struct lp_build_coro_suspend_info *coro; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 2f27c9a2552..501f01faa53 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -1833,6 +1833,136 @@ emit_interp_at(struct lp_build_nir_context *bld_base, } } +static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm, + struct lp_type type, + unsigned scratch_size) +{ + LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type); + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + if (type.length == 1) + return LLVMConstInt(elem_type, 0, 0); + + for (i = 0; i < type.length; ++i) + elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0); + + return LLVMConstVector(elems, type.length); +} + +static void +emit_load_scratch(struct lp_build_nir_context *bld_base, + unsigned nc, unsigned bit_size, + LLVMValueRef offset, + LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) +{ + struct gallivm_state * gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_context *load_bld; + LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);; + uint32_t shift_val = bit_size_to_shift_size(bit_size); + + load_bld = get_int_bld(bld_base, true, bit_size); + + offset = lp_build_add(uint_bld, offset, thread_offsets); + offset = lp_build_shr_imm(uint_bld, offset, shift_val); + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); + LLVMValueRef exec_mask = mask_vec(bld_base); + + LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, ""); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + struct lp_build_if_state ifthen; + LLVMValueRef cond, temp_res; + + loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, + loop_state.counter, ""); + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + + lp_build_if(&ifthen, gallivm, cond); + LLVMValueRef scalar; + LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), ""); + scalar = lp_build_pointer_get(builder, ptr2, loop_index); + + temp_res = LLVMBuildLoad(builder, result, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, result); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad(builder, result, ""); + LLVMValueRef zero; + if (bit_size == 64) + zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0); + else if (bit_size == 16) + zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0); + else if (bit_size == 8) + zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0); + else + zero = lp_build_const_int32(gallivm, 0); + temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, result); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + outval[c] = LLVMBuildLoad(gallivm->builder, result, ""); + } +} + +static void +emit_store_scratch(struct lp_build_nir_context *bld_base, + unsigned writemask, unsigned nc, + unsigned bit_size, LLVMValueRef offset, + LLVMValueRef dst) +{ + struct gallivm_state * gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_context *store_bld; + LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);; + uint32_t shift_val = bit_size_to_shift_size(bit_size); + store_bld = get_int_bld(bld_base, true, bit_size); + + LLVMValueRef exec_mask = mask_vec(bld_base); + offset = lp_build_add(uint_bld, offset, thread_offsets); + offset = lp_build_shr_imm(uint_bld, offset, shift_val); + + for (unsigned c = 0; c < nc; c++) { + if (!(writemask & (1u << c))) + continue; + LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); + LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); + + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + loop_state.counter, ""); + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, ""); + + struct lp_build_if_state ifthen; + LLVMValueRef cond; + + loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, + loop_state.counter, ""); + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), ""); + lp_build_pointer_set(builder, ptr2, loop_index, value_ptr); + + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + } +} + void lp_build_nir_soa(struct gallivm_state *gallivm, struct nir_shader *shader, const struct lp_build_tgsi_params *params, @@ -1930,6 +2060,8 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.bld_base.vote = emit_vote; bld.bld_base.helper_invocation = emit_helper_invocation; bld.bld_base.interp_at = emit_interp_at; + bld.bld_base.load_scratch = emit_load_scratch; + bld.bld_base.store_scratch = emit_store_scratch; bld.mask = params->mask; bld.inputs = params->inputs; @@ -1976,6 +2108,13 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.bld_base.shader = shader; + if (shader->scratch_size) { + bld.scratch_ptr = lp_build_array_alloca(gallivm, + LLVMInt8TypeInContext(gallivm->context), + lp_build_const_int32(gallivm, shader->scratch_size * type.length), + "scratch"); + } + bld.scratch_size = shader->scratch_size; emit_prologue(&bld); lp_build_nir_llvm(&bld.bld_base, shader);