mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
radeonsi: don't use emit_data->args in store_emit
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
parent
a2c18bfbe3
commit
8fb34050b5
1 changed files with 71 additions and 92 deletions
|
|
@ -347,12 +347,21 @@ static void buffer_append_args(
|
||||||
|
|
||||||
static unsigned get_cache_policy(struct si_shader_context *ctx,
|
static unsigned get_cache_policy(struct si_shader_context *ctx,
|
||||||
const struct tgsi_full_instruction *inst,
|
const struct tgsi_full_instruction *inst,
|
||||||
bool atomic, bool force_glc)
|
bool atomic, bool may_store_unaligned,
|
||||||
|
bool writeonly_memory)
|
||||||
{
|
{
|
||||||
unsigned cache_policy = 0;
|
unsigned cache_policy = 0;
|
||||||
|
|
||||||
if (!atomic &&
|
if (!atomic &&
|
||||||
(force_glc ||
|
/* SI has a TC L1 bug causing corruption of 8bit/16bit stores.
|
||||||
|
* All store opcodes not aligned to a dword are affected.
|
||||||
|
* The only way to get unaligned stores in radeonsi is through
|
||||||
|
* shader images. */
|
||||||
|
((may_store_unaligned && ctx->screen->info.chip_class == SI) ||
|
||||||
|
/* If this is write-only, don't keep data in L1 to prevent
|
||||||
|
* evicting L1 cache lines that may be needed by other
|
||||||
|
* instructions. */
|
||||||
|
writeonly_memory ||
|
||||||
inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE)))
|
inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE)))
|
||||||
cache_policy |= ac_glc;
|
cache_policy |= ac_glc;
|
||||||
|
|
||||||
|
|
@ -588,30 +597,22 @@ static void load_emit(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void store_emit_buffer(
|
static void store_emit_buffer(struct si_shader_context *ctx,
|
||||||
struct si_shader_context *ctx,
|
LLVMValueRef resource,
|
||||||
struct lp_build_emit_data *emit_data,
|
unsigned writemask,
|
||||||
bool writeonly_memory)
|
LLVMValueRef value,
|
||||||
|
LLVMValueRef voffset,
|
||||||
|
unsigned cache_policy,
|
||||||
|
bool writeonly_memory)
|
||||||
{
|
{
|
||||||
const struct tgsi_full_instruction *inst = emit_data->inst;
|
|
||||||
LLVMBuilderRef builder = ctx->ac.builder;
|
LLVMBuilderRef builder = ctx->ac.builder;
|
||||||
LLVMValueRef base_data = emit_data->args[0];
|
LLVMValueRef base_data = value;
|
||||||
LLVMValueRef base_offset = emit_data->args[3];
|
LLVMValueRef base_offset = voffset;
|
||||||
unsigned writemask = inst->Dst[0].Register.WriteMask;
|
|
||||||
|
|
||||||
/* If this is write-only, don't keep data in L1 to prevent
|
|
||||||
* evicting L1 cache lines that may be needed by other
|
|
||||||
* instructions.
|
|
||||||
*/
|
|
||||||
if (writeonly_memory)
|
|
||||||
emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
|
|
||||||
|
|
||||||
while (writemask) {
|
while (writemask) {
|
||||||
int start, count;
|
int start, count;
|
||||||
const char *intrinsic_name;
|
const char *intrinsic_name;
|
||||||
LLVMValueRef data;
|
LLVMValueRef data, voff, tmp;
|
||||||
LLVMValueRef offset;
|
|
||||||
LLVMValueRef tmp;
|
|
||||||
|
|
||||||
u_bit_scan_consecutive_range(&writemask, &start, &count);
|
u_bit_scan_consecutive_range(&writemask, &start, &count);
|
||||||
|
|
||||||
|
|
@ -650,20 +651,23 @@ static void store_emit_buffer(
|
||||||
intrinsic_name = "llvm.amdgcn.buffer.store.f32";
|
intrinsic_name = "llvm.amdgcn.buffer.store.f32";
|
||||||
}
|
}
|
||||||
|
|
||||||
offset = base_offset;
|
voff = base_offset;
|
||||||
if (start != 0) {
|
if (start != 0) {
|
||||||
offset = LLVMBuildAdd(
|
voff = LLVMBuildAdd(
|
||||||
builder, offset,
|
builder, voff,
|
||||||
LLVMConstInt(ctx->i32, start * 4, 0), "");
|
LLVMConstInt(ctx->i32, start * 4, 0), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
emit_data->args[0] = data;
|
LLVMValueRef args[] = {
|
||||||
emit_data->args[3] = offset;
|
data,
|
||||||
|
resource,
|
||||||
ac_build_intrinsic(
|
ctx->i32_0, /* vindex */
|
||||||
&ctx->ac, intrinsic_name, ctx->voidt,
|
voff,
|
||||||
emit_data->args, emit_data->arg_count,
|
LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), 0),
|
||||||
ac_get_store_intr_attribs(writeonly_memory));
|
LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), 0),
|
||||||
|
};
|
||||||
|
ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt, args, 6,
|
||||||
|
ac_get_store_intr_attribs(writeonly_memory));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -701,8 +705,17 @@ static void store_emit(
|
||||||
struct tgsi_full_src_register resource_reg =
|
struct tgsi_full_src_register resource_reg =
|
||||||
tgsi_full_src_register_from_dst(&inst->Dst[0]);
|
tgsi_full_src_register_from_dst(&inst->Dst[0]);
|
||||||
unsigned target = inst->Memory.Texture;
|
unsigned target = inst->Memory.Texture;
|
||||||
bool writeonly_memory = false;
|
bool writeonly_memory = is_oneway_access_only(inst, info,
|
||||||
LLVMValueRef chans[4], rsrc;
|
info->shader_buffers_load |
|
||||||
|
info->shader_buffers_atomic,
|
||||||
|
info->images_load |
|
||||||
|
info->images_atomic);
|
||||||
|
bool is_image = inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
|
||||||
|
tgsi_is_bindless_image_file(inst->Dst[0].Register.File);
|
||||||
|
LLVMValueRef chans[4], value;
|
||||||
|
LLVMValueRef vindex = ctx->i32_0;
|
||||||
|
LLVMValueRef voffset = ctx->i32_0;
|
||||||
|
struct ac_image_args args = {};
|
||||||
|
|
||||||
if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
|
if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
|
||||||
store_emit_memory(ctx, emit_data);
|
store_emit_memory(ctx, emit_data);
|
||||||
|
|
@ -712,88 +725,54 @@ static void store_emit(
|
||||||
for (unsigned chan = 0; chan < 4; ++chan)
|
for (unsigned chan = 0; chan < 4; ++chan)
|
||||||
chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
|
chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
|
||||||
|
|
||||||
emit_data->args[emit_data->arg_count++] =
|
value = ac_build_gather_values(&ctx->ac, chans, 4);
|
||||||
ac_build_gather_values(&ctx->ac, chans, 4);
|
|
||||||
|
|
||||||
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
|
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
|
||||||
LLVMValueRef offset, tmp;
|
args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
|
||||||
|
voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0));
|
||||||
rsrc = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
|
} else if (is_image) {
|
||||||
|
image_fetch_rsrc(bld_base, &resource_reg, true, target, &args.resource);
|
||||||
tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
|
image_fetch_coords(bld_base, inst, 0, args.resource, args.coords);
|
||||||
offset = ac_to_integer(&ctx->ac, tmp);
|
vindex = args.coords[0]; /* for buffers only */
|
||||||
|
} else {
|
||||||
buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
|
unreachable("unexpected register file");
|
||||||
offset, false, false);
|
|
||||||
} else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
|
|
||||||
tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
|
|
||||||
/* 8bit/16bit TC L1 write corruption bug on SI.
|
|
||||||
* All store opcodes not aligned to a dword are affected.
|
|
||||||
*
|
|
||||||
* The only way to get unaligned stores in radeonsi is through
|
|
||||||
* shader images.
|
|
||||||
*/
|
|
||||||
bool force_glc = ctx->screen->info.chip_class == SI;
|
|
||||||
|
|
||||||
image_fetch_rsrc(bld_base, &resource_reg, true, target, &rsrc);
|
|
||||||
image_fetch_coords(bld_base, inst, 0, rsrc, &emit_data->args[2]);
|
|
||||||
|
|
||||||
if (target == TGSI_TEXTURE_BUFFER) {
|
|
||||||
buffer_append_args(ctx, emit_data, rsrc, emit_data->args[2],
|
|
||||||
ctx->i32_0, false, force_glc);
|
|
||||||
} else {
|
|
||||||
emit_data->args[1] = rsrc;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
|
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
|
||||||
ac_build_waitcnt(&ctx->ac, VM_CNT);
|
ac_build_waitcnt(&ctx->ac, VM_CNT);
|
||||||
|
|
||||||
writeonly_memory = is_oneway_access_only(inst, info,
|
args.cache_policy = get_cache_policy(ctx, inst,
|
||||||
info->shader_buffers_load |
|
false, /* atomic */
|
||||||
info->shader_buffers_atomic,
|
is_image, /* may_store_unaligned */
|
||||||
info->images_load |
|
writeonly_memory);
|
||||||
info->images_atomic);
|
|
||||||
|
|
||||||
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
|
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
|
||||||
store_emit_buffer(ctx, emit_data, writeonly_memory);
|
store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask,
|
||||||
|
value, voffset, args.cache_policy, writeonly_memory);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (target == TGSI_TEXTURE_BUFFER) {
|
if (target == TGSI_TEXTURE_BUFFER) {
|
||||||
/* If this is write-only, don't keep data in L1 to prevent
|
LLVMValueRef buf_args[] = {
|
||||||
* evicting L1 cache lines that may be needed by other
|
value,
|
||||||
* instructions.
|
args.resource,
|
||||||
*/
|
vindex,
|
||||||
if (writeonly_memory)
|
ctx->i32_0, /* voffset */
|
||||||
emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
|
LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0),
|
||||||
|
LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0),
|
||||||
|
};
|
||||||
|
|
||||||
emit_data->output[emit_data->chan] = ac_build_intrinsic(
|
emit_data->output[emit_data->chan] = ac_build_intrinsic(
|
||||||
&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
|
&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
|
||||||
ctx->voidt, emit_data->args,
|
ctx->voidt, buf_args, 6,
|
||||||
emit_data->arg_count,
|
|
||||||
ac_get_store_intr_attribs(writeonly_memory));
|
ac_get_store_intr_attribs(writeonly_memory));
|
||||||
} else {
|
} else {
|
||||||
struct ac_image_args args = {};
|
|
||||||
args.opcode = ac_image_store;
|
args.opcode = ac_image_store;
|
||||||
args.data[0] = emit_data->args[0];
|
args.data[0] = value;
|
||||||
args.resource = emit_data->args[1];
|
|
||||||
memcpy(args.coords, &emit_data->args[2], sizeof(args.coords));
|
|
||||||
args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
|
args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
|
||||||
args.attributes = ac_get_store_intr_attribs(writeonly_memory);
|
args.attributes = ac_get_store_intr_attribs(writeonly_memory);
|
||||||
args.dmask = 0xf;
|
args.dmask = 0xf;
|
||||||
|
|
||||||
/* Workaround for 8bit/16bit TC L1 write corruption bug on SI.
|
|
||||||
* All store opcodes not aligned to a dword are affected.
|
|
||||||
*/
|
|
||||||
if (ctx->screen->info.chip_class == SI ||
|
|
||||||
/* If this is write-only, don't keep data in L1 to prevent
|
|
||||||
* evicting L1 cache lines that may be needed by other
|
|
||||||
* instructions. */
|
|
||||||
writeonly_memory ||
|
|
||||||
inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))
|
|
||||||
args.cache_policy = ac_glc;
|
|
||||||
|
|
||||||
emit_data->output[emit_data->chan] =
|
emit_data->output[emit_data->chan] =
|
||||||
ac_build_image_opcode(&ctx->ac, &args);
|
ac_build_image_opcode(&ctx->ac, &args);
|
||||||
}
|
}
|
||||||
|
|
@ -893,7 +872,7 @@ static void atomic_emit(
|
||||||
|
|
||||||
args.data[num_data++] =
|
args.data[num_data++] =
|
||||||
ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0));
|
ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0));
|
||||||
args.cache_policy = get_cache_policy(ctx, inst, true, false);
|
args.cache_policy = get_cache_policy(ctx, inst, true, false, false);
|
||||||
|
|
||||||
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
|
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
|
||||||
args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
|
args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue