ac/nir,radv: Optimize bounds check for 64 bit CAS.

When the application does not ask for robust buffer access.

Only implemented the check in radv.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Bas Nieuwenhuizen 2019-08-02 12:40:17 +02:00
parent 74baeacafc
commit 72e7b7a00b
8 changed files with 37 additions and 17 deletions

View file

@ -1644,14 +1644,17 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
LLVMValueRef compare,
LLVMValueRef exchange)
{
LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
LLVMBasicBlockRef start_block = NULL, then_block = NULL;
if (ctx->abi->robust_buffer_access) {
LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
LLVMBasicBlockRef start_block = LLVMGetInsertBlock(ctx->ac.builder);
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
start_block = LLVMGetInsertBlock(ctx->ac.builder);
ac_build_ifcc(&ctx->ac, cond, -1);
ac_build_ifcc(&ctx->ac, cond, -1);
LLVMBasicBlockRef then_block = LLVMGetInsertBlock(ctx->ac.builder);
then_block = LLVMGetInsertBlock(ctx->ac.builder);
}
LLVMValueRef ptr_parts[2] = {
ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
@ -1673,20 +1676,24 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
ac_build_endif(&ctx->ac, -1);
if (ctx->abi->robust_buffer_access) {
ac_build_endif(&ctx->ac, -1);
LLVMBasicBlockRef incoming_blocks[2] = {
start_block,
then_block,
};
LLVMBasicBlockRef incoming_blocks[2] = {
start_block,
then_block,
};
LLVMValueRef incoming_values[2] = {
LLVMConstInt(ctx->ac.i64, 0, 0),
result,
};
LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
return ret;
LLVMValueRef incoming_values[2] = {
LLVMConstInt(ctx->ac.i64, 0, 0),
result,
};
LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
return ret;
} else {
return result;
}
}
static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,

View file

@ -208,6 +208,9 @@ struct ac_shader_abi {
* and LLVM optimizes an indexed load with constant index to IDXEN=0. */
bool gfx9_stride_size_workaround;
bool gfx9_stride_size_workaround_for_atomic;
/* Whether bounds checks are required */
bool robust_buffer_access;
};
#endif /* AC_SHADER_ABI_H */

View file

@ -1886,6 +1886,9 @@ VkResult radv_CreateDevice(
device->enabled_extensions.EXT_descriptor_indexing ||
device->enabled_extensions.EXT_buffer_device_address;
device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
pCreateInfo->pEnabledFeatures->robustBufferAccess;
mtx_init(&device->shader_slab_mutex, mtx_plain);
list_inithead(&device->shader_slabs);

View file

@ -4389,6 +4389,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ctx.abi.load_resource = radv_load_resource;
ctx.abi.clamp_shadow_reference = false;
ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;
ctx.abi.robust_buffer_access = options->robust_buffer_access;
/* Because the new raw/struct atomic intrinsics are buggy with LLVM 8,
* we fallback to the old intrinsics for atomic buffer image operations

View file

@ -750,6 +750,9 @@ struct radv_device {
struct radv_device_extension_table enabled_extensions;
/* Whether the app has enabled the robustBufferAccess feature. */
bool robust_buffer_access;
/* Whether the driver uses a global BO list. */
bool use_global_bo_list;

View file

@ -1217,6 +1217,7 @@ radv_shader_variant_compile(struct radv_device *device,
options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH);
options.supports_spill = true;
options.robust_buffer_access = device->robust_buffer_access;
return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage,
&options, false, binary_out);

View file

@ -120,6 +120,7 @@ struct radv_nir_compiler_options {
bool unsafe_math;
bool supports_spill;
bool clamp_shadow_reference;
bool robust_buffer_access;
bool dump_shader;
bool dump_preoptir;
bool record_llvm_ir;

View file

@ -1248,6 +1248,7 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
ctx->abi.inputs = &ctx->inputs[0];
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
ctx->abi.clamp_shadow_reference = true;
ctx->abi.robust_buffer_access = true;
ctx->num_samplers = util_last_bit(info->samplers_declared);
ctx->num_images = util_last_bit(info->images_declared);