mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-18 17:18:30 +02:00
ac/nir: Add workaround for GFX9 buffer views.
On GFX9 whether the buffer size is interpreted as elements or bytes depends on whether IDXEN is enabled in the instruction. If the index is a constant zero, LLVM optimizes IDXEN to 0. Now the size in elements is interpreted in bytes which of course results in out of bounds accesses. The correct fix is most likely to disable the LLVM optimization, but we need something to work with LLVM <= 6.0. radeonsi does the max between stride and element count on the CPU but that results in the size intrinsics returning the wrong size for the buffer. This would cause CTS errors for radv. v2: Also include the store changes. Fixes:e38685cc62'Revert "radv: disable support for VEGA for now."' Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit4503ff760c) [Juan A. Suarez: partially backported from 908a0cd1dbe5, a backport for 17.3 stable branch; resolved trivial conflicts] Conflicts: src/amd/common/ac_nir_to_llvm.c src/amd/vulkan/radv_nir_to_llvm.c
This commit is contained in:
parent
1550c67a3a
commit
df6c2bef90
4 changed files with 62 additions and 6 deletions
|
|
@ -1028,6 +1028,26 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
|
|||
ac_get_load_intr_attribs(can_speculate));
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex,
|
||||
LLVMValueRef voffset,
|
||||
bool can_speculate)
|
||||
{
|
||||
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
|
||||
stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
|
||||
|
||||
LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
|
||||
LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
|
||||
elem_count, stride, "");
|
||||
|
||||
LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
|
||||
LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
|
||||
return ac_build_buffer_load_format(ctx, new_rsrc, vindex, voffset, can_speculate);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set range metadata on an instruction. This can only be used on load and
|
||||
* call instructions. If you know an instruction can only produce the values
|
||||
|
|
|
|||
|
|
@ -225,6 +225,14 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
|
|||
LLVMValueRef voffset,
|
||||
bool can_speculate);
|
||||
|
||||
/* load_format that handles the stride & element count better if idxen is
|
||||
* disabled by LLVM. */
|
||||
LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex,
|
||||
LLVMValueRef voffset,
|
||||
bool can_speculate);
|
||||
|
||||
LLVMValueRef
|
||||
ac_get_thread_id(struct ac_llvm_context *ctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -2308,11 +2308,19 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
|
|||
struct ac_image_args *args)
|
||||
{
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
return ac_build_buffer_load_format(&ctx->ac,
|
||||
args->resource,
|
||||
args->addr,
|
||||
ctx->ac.i32_0,
|
||||
true);
|
||||
if (ctx->abi->gfx9_stride_size_workaround) {
|
||||
return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
|
||||
args->resource,
|
||||
args->addr,
|
||||
ctx->ac.i32_0,
|
||||
true);
|
||||
} else {
|
||||
return ac_build_buffer_load_format(&ctx->ac,
|
||||
args->resource,
|
||||
args->addr,
|
||||
ctx->ac.i32_0,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
args->opcode = ac_image_sample;
|
||||
|
|
@ -3685,8 +3693,23 @@ static void visit_image_store(struct ac_nir_context *ctx,
|
|||
glc = ctx->ac.i1true;
|
||||
|
||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
||||
LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
|
||||
|
||||
if (ctx->abi->gfx9_stride_size_workaround) {
|
||||
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||
LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
|
||||
stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
|
||||
|
||||
LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
|
||||
LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
|
||||
elem_count, stride, "");
|
||||
|
||||
rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
|
||||
LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||
}
|
||||
|
||||
params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
|
||||
params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
|
||||
params[1] = rsrc;
|
||||
params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
||||
ctx->ac.i32_0, ""); /* vindex */
|
||||
params[3] = ctx->ac.i32_0; /* voffset */
|
||||
|
|
@ -6829,6 +6852,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
|
|||
ctx.abi.load_ssbo = radv_load_ssbo;
|
||||
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
|
||||
ctx.abi.clamp_shadow_reference = false;
|
||||
ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
|
||||
|
||||
if (shader_count >= 2)
|
||||
ac_init_exec_full_mask(&ctx.ac);
|
||||
|
|
|
|||
|
|
@ -145,6 +145,10 @@ struct ac_shader_abi {
|
|||
/* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
|
||||
* uses it due to promoting D16 to D32, but radv needs it off. */
|
||||
bool clamp_shadow_reference;
|
||||
|
||||
/* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
|
||||
* and LLVM optimizes an indexed load with constant index to IDXEN=0. */
|
||||
bool gfx9_stride_size_workaround;
|
||||
};
|
||||
|
||||
#endif /* AC_SHADER_ABI_H */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue