radeonsi: replace llvm resource code with nir lower

Port from ac_nir_to_llvm.c and si_shader_llvm_resource.c.

Due to need waterfall of llvm backend, we can't get bind-texture
descriptor directly in nir. So we keep load_sampler_desc abi only
for bind-texture index to desc.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18666>
This commit is contained in:
Qiang Yu 2022-09-08 18:06:56 +08:00
parent e85c5d8779
commit d4fdeaa820
7 changed files with 495 additions and 304 deletions

View file

@ -2485,7 +2485,6 @@ static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_ins
LLVMConstInt(ctx->ac.i32, 2, false),
LLVMConstInt(ctx->ac.i32, 3, false),
};
LLVMValueRef sample_index = NULL;
int count;
ASSERTED bool add_frag_pos =
@ -2495,25 +2494,6 @@ static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_ins
assert(!add_frag_pos && "Input attachments should be lowered by this point.");
count = image_type_to_components_count(dim, is_array);
if (ctx->ac.gfx_level < GFX11 &&
is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
instr->intrinsic == nir_intrinsic_bindless_image_load ||
instr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
instr->intrinsic == nir_intrinsic_bindless_image_sparse_load)) {
LLVMValueRef fmask_load_address[3];
fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
if (is_array)
fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
else
fmask_load_address[2] = NULL;
sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
sample_index = adjust_sample_index_using_fmask(
&ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2],
sample_index, get_image_descriptor(ctx, instr, dynamic_desc_index, AC_DESC_FMASK, false));
}
if (count == 1 && !gfx9_1d) {
if (instr->src[1].ssa->num_components)
args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
@ -2577,9 +2557,8 @@ static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_ins
}
if (is_ms) {
if (!sample_index)
sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
args->coords[count] = sample_index;
/* sample index */
args->coords[count] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
count++;
}
}
@ -2647,7 +2626,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
res = ac_to_integer(&ctx->ac, res);
} else if (instr->intrinsic == nir_intrinsic_image_deref_samples_identical) {
} else if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
assert(ctx->ac.gfx_level < GFX11);
args.opcode = ac_image_load;
@ -2659,8 +2638,6 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
args.a16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.coords[0])) == 16;
res = ac_build_image_opcode(&ctx->ac, &args);
res = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_0, "");
res = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, res, ctx->ac.i32_0, "");
} else {
bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
@ -3823,6 +3800,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
break;
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_sparse_load:
case nir_intrinsic_bindless_image_fragment_mask_load_amd:
result = visit_image_load(ctx, instr, true);
break;
case nir_intrinsic_image_deref_load:
@ -4611,6 +4589,8 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr,
bool divergent)
{
bool texture_handle_divergent = false;
bool sampler_handle_divergent = false;
LLVMValueRef texture_dynamic_handle = NULL;
LLVMValueRef sampler_dynamic_handle = NULL;
nir_deref_instr *texture_deref_instr = NULL;
@ -4637,10 +4617,14 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
else
*samp_ptr = val;
} else {
if (instr->src[i].src_type == nir_tex_src_texture_handle)
bool divergent = instr->src[i].src.ssa->divergent;
if (instr->src[i].src_type == nir_tex_src_texture_handle) {
texture_dynamic_handle = val;
else
texture_handle_divergent = divergent;
} else {
sampler_dynamic_handle = val;
sampler_handle_divergent = divergent;
}
}
break;
}
@ -4671,11 +4655,23 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
}
if (texture_dynamic_handle || sampler_dynamic_handle) {
/* instr->sampler_non_uniform and texture_non_uniform are always false in GLSL,
* but this can lead to unexpected behavior if texture/sampler index come from
* a vertex attribute.
* For instance, 2 consecutive draws using 2 different index values,
* could be squashed together by the hw - producing a single draw with
* non-dynamically uniform index.
* To avoid this, detect divergent indexing, and use enter_waterfall.
* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/2253.
*/
/* descriptor handles given through nir_tex_src_{texture,sampler}_handle */
if (instr->texture_non_uniform)
if (instr->texture_non_uniform ||
(ctx->abi->use_waterfall_for_divergent_tex_samplers && texture_handle_divergent))
texture_dynamic_handle = enter_waterfall(ctx, &wctx[0], texture_dynamic_handle, divergent);
if (instr->sampler_non_uniform)
if (instr->sampler_non_uniform ||
(ctx->abi->use_waterfall_for_divergent_tex_samplers && sampler_handle_divergent))
sampler_dynamic_handle = enter_waterfall(ctx, &wctx[1], sampler_dynamic_handle, divergent);
if (texture_dynamic_handle)

View file

@ -56,7 +56,6 @@ files_libradeonsi = files(
'si_shader_llvm.c',
'si_shader_llvm_gs.c',
'si_shader_llvm_ps.c',
'si_shader_llvm_resources.c',
'si_shader_llvm_tess.c',
'si_shader_llvm_vs.c',
'si_shader_nir.c',

View file

@ -115,6 +115,168 @@ static nir_ssa_def *load_ssbo_desc(nir_builder *b, nir_src *index,
return nir_load_smem_amd(b, 4, addr, offset);
}
static nir_ssa_def *fixup_image_desc(nir_builder *b, nir_ssa_def *rsrc, bool uses_store,
struct lower_resource_state *s)
{
struct si_shader_selector *sel = s->shader->selector;
struct si_screen *screen = sel->screen;
/**
* Given a 256-bit resource descriptor, force the DCC enable bit to off.
*
* At least on Tonga, executing image stores on images with DCC enabled and
* non-trivial can eventually lead to lockups. This can occur when an
* application binds an image as read-only but then uses a shader that writes
* to it. The OpenGL spec allows almost arbitrarily bad behavior (including
* program termination) in this case, but it doesn't cost much to be a bit
* nicer: disabling DCC in the shader still leads to undefined results but
* avoids the lockup.
*/
if (uses_store &&
screen->info.gfx_level <= GFX9 &&
screen->info.gfx_level >= GFX8) {
nir_ssa_def *tmp = nir_channel(b, rsrc, 6);
tmp = nir_iand_imm(b, tmp, C_008F28_COMPRESSION_EN);
rsrc = nir_vector_insert_imm(b, rsrc, tmp, 6);
}
if (!uses_store &&
screen->info.has_image_load_dcc_bug &&
screen->always_allow_dcc_stores) {
nir_ssa_def *tmp = nir_channel(b, rsrc, 6);
tmp = nir_iand_imm(b, tmp, C_00A018_WRITE_COMPRESS_ENABLE);
rsrc = nir_vector_insert_imm(b, rsrc, tmp, 6);
}
return rsrc;
}
/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
* adjust "index" to point to FMASK.
*/
static nir_ssa_def *load_image_desc(nir_builder *b, nir_ssa_def *list, nir_ssa_def *index,
enum ac_descriptor_type desc_type, bool uses_store,
struct lower_resource_state *s)
{
/* index is in uvec8 unit, convert to offset in bytes */
nir_ssa_def *offset = nir_ishl_imm(b, index, 5);
unsigned num_channels;
if (desc_type == AC_DESC_BUFFER) {
offset = nir_iadd_imm(b, offset, 16);
num_channels = 4;
} else {
assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_FMASK);
num_channels = 8;
}
nir_ssa_def *rsrc = nir_load_smem_amd(b, num_channels, list, offset);
if (desc_type == AC_DESC_IMAGE)
rsrc = fixup_image_desc(b, rsrc, uses_store, s);
return rsrc;
}
static nir_ssa_def *deref_to_index(nir_builder *b,
nir_deref_instr *deref,
unsigned max_slots,
nir_ssa_def **dynamic_index_ret,
unsigned *const_index_ret)
{
unsigned const_index = 0;
nir_ssa_def *dynamic_index = NULL;
while (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
if (nir_src_is_const(deref->arr.index)) {
const_index += array_size * nir_src_as_uint(deref->arr.index);
} else {
nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
dynamic_index = dynamic_index ? nir_iadd(b, dynamic_index, tmp) : tmp;
}
deref = nir_deref_instr_parent(deref);
}
unsigned base_index = deref->var->data.binding;
const_index += base_index;
/* Redirect invalid resource indices to the first array element. */
if (const_index >= max_slots)
const_index = base_index;
nir_ssa_def *index = nir_imm_int(b, const_index);
if (dynamic_index) {
index = nir_iadd(b, dynamic_index, index);
/* From the GL_ARB_shader_image_load_store extension spec:
*
* If a shader performs an image load, store, or atomic
* operation using an image variable declared as an array,
* and if the index used to select an individual element is
* negative or greater than or equal to the size of the
* array, the results of the operation are undefined but may
* not lead to termination.
*/
index = clamp_index(b, index, max_slots);
}
if (dynamic_index_ret)
*dynamic_index_ret = dynamic_index;
if (const_index_ret)
*const_index_ret = const_index;
return index;
}
static nir_ssa_def *load_deref_image_desc(nir_builder *b, nir_deref_instr *deref,
enum ac_descriptor_type desc_type, bool is_load,
struct lower_resource_state *s)
{
unsigned const_index;
nir_ssa_def *dynamic_index;
nir_ssa_def *index = deref_to_index(b, deref, s->shader->selector->info.base.num_images,
&dynamic_index, &const_index);
nir_ssa_def *desc;
if (!dynamic_index && desc_type != AC_DESC_FMASK &&
const_index < s->shader->selector->cs_num_images_in_user_sgprs) {
/* Fast path if the image is in user SGPRs. */
desc = ac_nir_load_arg(b, &s->args->ac, s->args->cs_image[const_index]);
if (desc_type == AC_DESC_IMAGE)
desc = fixup_image_desc(b, desc, !is_load, s);
} else {
/* FMASKs are separate from images. */
if (desc_type == AC_DESC_FMASK)
index = nir_iadd_imm(b, index, SI_NUM_IMAGES);
index = nir_isub(b, nir_imm_int(b, SI_NUM_IMAGE_SLOTS - 1), index);
nir_ssa_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->samplers_and_images);
desc = load_image_desc(b, list, index, desc_type, !is_load, s);
}
return desc;
}
static nir_ssa_def *load_bindless_image_desc(nir_builder *b, nir_ssa_def *index,
enum ac_descriptor_type desc_type, bool is_load,
struct lower_resource_state *s)
{
/* Bindless image descriptors use 16-dword slots. */
index = nir_ishl_imm(b, index, 1);
/* FMASK is right after the image. */
if (desc_type == AC_DESC_FMASK)
index = nir_iadd_imm(b, index, 1);
nir_ssa_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->bindless_samplers_and_images);
return load_image_desc(b, list, index, desc_type, !is_load, s);
}
static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
struct lower_resource_state *s)
{
@ -161,6 +323,103 @@ static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin
nir_instr_remove(&intrin->instr);
break;
}
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_sparse_load:
case nir_intrinsic_image_deref_fragment_mask_load_amd:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_image_deref_atomic_imin:
case nir_intrinsic_image_deref_atomic_umin:
case nir_intrinsic_image_deref_atomic_fmin:
case nir_intrinsic_image_deref_atomic_imax:
case nir_intrinsic_image_deref_atomic_umax:
case nir_intrinsic_image_deref_atomic_fmax:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_atomic_fadd:
case nir_intrinsic_image_deref_atomic_inc_wrap:
case nir_intrinsic_image_deref_atomic_dec_wrap:
case nir_intrinsic_image_deref_descriptor_amd: {
assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
enum ac_descriptor_type desc_type;
if (intrin->intrinsic == nir_intrinsic_image_deref_fragment_mask_load_amd) {
desc_type = AC_DESC_FMASK;
} else {
enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
desc_type = dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
}
bool is_load =
intrin->intrinsic == nir_intrinsic_image_deref_load ||
intrin->intrinsic == nir_intrinsic_image_deref_sparse_load ||
intrin->intrinsic == nir_intrinsic_image_deref_fragment_mask_load_amd ||
intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd;
nir_ssa_def *desc = load_deref_image_desc(b, deref, desc_type, is_load, s);
if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) {
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
nir_instr_remove(&intrin->instr);
} else {
nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
nir_rewrite_image_intrinsic(intrin, desc, true);
}
break;
}
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_sparse_load:
case nir_intrinsic_bindless_image_fragment_mask_load_amd:
case nir_intrinsic_bindless_image_store:
case nir_intrinsic_bindless_image_atomic_add:
case nir_intrinsic_bindless_image_atomic_imin:
case nir_intrinsic_bindless_image_atomic_umin:
case nir_intrinsic_bindless_image_atomic_fmin:
case nir_intrinsic_bindless_image_atomic_imax:
case nir_intrinsic_bindless_image_atomic_umax:
case nir_intrinsic_bindless_image_atomic_fmax:
case nir_intrinsic_bindless_image_atomic_and:
case nir_intrinsic_bindless_image_atomic_or:
case nir_intrinsic_bindless_image_atomic_xor:
case nir_intrinsic_bindless_image_atomic_exchange:
case nir_intrinsic_bindless_image_atomic_comp_swap:
case nir_intrinsic_bindless_image_atomic_fadd:
case nir_intrinsic_bindless_image_atomic_inc_wrap:
case nir_intrinsic_bindless_image_atomic_dec_wrap: {
assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
enum ac_descriptor_type desc_type;
if (intrin->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
desc_type = AC_DESC_FMASK;
} else {
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intrin);
desc_type = dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
}
bool is_load =
intrin->intrinsic == nir_intrinsic_bindless_image_load ||
intrin->intrinsic == nir_intrinsic_bindless_image_sparse_load ||
intrin->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd ||
intrin->intrinsic == nir_intrinsic_bindless_image_descriptor_amd;
nir_ssa_def *index = nir_u2u32(b, intrin->src[0].ssa);
nir_ssa_def *desc = load_bindless_image_desc(b, index, desc_type, is_load, s);
if (intrin->intrinsic == nir_intrinsic_bindless_image_descriptor_amd) {
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
nir_instr_remove(&intrin->instr);
} else {
nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], nir_src_for_ssa(desc));
}
break;
}
default:
return false;
}
@ -168,6 +427,148 @@ static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin
return true;
}
static nir_ssa_def *load_sampler_desc(nir_builder *b, nir_ssa_def *list, nir_ssa_def *index,
enum ac_descriptor_type desc_type)
{
/* index is in 16 dword unit, convert to offset in bytes */
nir_ssa_def *offset = nir_ishl_imm(b, index, 6);
unsigned num_channels = 0;
switch (desc_type) {
case AC_DESC_IMAGE:
/* The image is at [0:7]. */
num_channels = 8;
break;
case AC_DESC_BUFFER:
/* The buffer is in [4:7]. */
offset = nir_iadd_imm(b, offset, 16);
num_channels = 4;
break;
case AC_DESC_FMASK:
/* The FMASK is at [8:15]. */
offset = nir_iadd_imm(b, offset, 32);
num_channels = 8;
break;
case AC_DESC_SAMPLER:
/* The sampler state is at [12:15]. */
offset = nir_iadd_imm(b, offset, 48);
num_channels = 4;
break;
default:
unreachable("invalid desc type");
break;
}
return nir_load_smem_amd(b, num_channels, list, offset);
}
static nir_ssa_def *load_deref_sampler_desc(nir_builder *b, nir_deref_instr *deref,
enum ac_descriptor_type desc_type,
struct lower_resource_state *s,
bool return_descriptor)
{
unsigned max_slots = BITSET_LAST_BIT(b->shader->info.textures_used);
nir_ssa_def *index = deref_to_index(b, deref, max_slots, NULL, NULL);
index = nir_iadd_imm(b, index, SI_NUM_IMAGE_SLOTS / 2);
/* return actual desc when required by caller */
if (return_descriptor) {
nir_ssa_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->samplers_and_images);
return load_sampler_desc(b, list, index, desc_type);
}
/* Just use index here and let nir-to-llvm backend to translate to actual
* descriptor. This is because we need waterfall to handle non-dynamic-uniform
* index there.
*/
return index;
}
static nir_ssa_def *load_bindless_sampler_desc(nir_builder *b, nir_ssa_def *index,
enum ac_descriptor_type desc_type,
struct lower_resource_state *s)
{
nir_ssa_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->bindless_samplers_and_images);
/* 64 bit to 32 bit */
index = nir_u2u32(b, index);
return load_sampler_desc(b, list, index, desc_type);
}
static bool lower_resource_tex(nir_builder *b, nir_tex_instr *tex,
struct lower_resource_state *s)
{
assert(!tex->texture_non_uniform && !tex->sampler_non_uniform);
nir_deref_instr *texture_deref = NULL;
nir_deref_instr *sampler_deref = NULL;
nir_ssa_def *texture_handle = NULL;
nir_ssa_def *sampler_handle = NULL;
for (unsigned i = 0; i < tex->num_srcs; i++) {
switch (tex->src[i].src_type) {
case nir_tex_src_texture_deref:
texture_deref = nir_src_as_deref(tex->src[i].src);
break;
case nir_tex_src_sampler_deref:
sampler_deref = nir_src_as_deref(tex->src[i].src);
break;
case nir_tex_src_texture_handle:
texture_handle = tex->src[i].src.ssa;
break;
case nir_tex_src_sampler_handle:
sampler_handle = tex->src[i].src.ssa;
break;
default:
break;
}
}
enum ac_descriptor_type desc_type;
if (tex->op == nir_texop_fragment_mask_fetch_amd)
desc_type = AC_DESC_FMASK;
else
desc_type = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
bool is_descriptor_op = tex->op == nir_texop_descriptor_amd;
nir_ssa_def *image = texture_deref ?
load_deref_sampler_desc(b, texture_deref, desc_type, s, is_descriptor_op) :
load_bindless_sampler_desc(b, texture_handle, desc_type, s);
nir_ssa_def *sampler = NULL;
if (sampler_deref)
sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, false);
else if (sampler_handle)
sampler = load_bindless_sampler_desc(b, sampler_handle, AC_DESC_SAMPLER, s);
if (is_descriptor_op) {
nir_ssa_def_rewrite_uses(&tex->dest.ssa, image);
nir_instr_remove(&tex->instr);
} else {
for (unsigned i = 0; i < tex->num_srcs; i++) {
switch (tex->src[i].src_type) {
case nir_tex_src_texture_deref:
tex->src[i].src_type = nir_tex_src_texture_handle;
FALLTHROUGH;
case nir_tex_src_texture_handle:
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, image);
break;
case nir_tex_src_sampler_deref:
tex->src[i].src_type = nir_tex_src_sampler_handle;
FALLTHROUGH;
case nir_tex_src_sampler_handle:
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, sampler);
break;
default:
break;
}
}
}
return true;
}
static bool lower_resource_instr(nir_builder *b, nir_instr *instr, void *state)
{
struct lower_resource_state *s = (struct lower_resource_state *)state;
@ -179,6 +580,10 @@ static bool lower_resource_instr(nir_builder *b, nir_instr *instr, void *state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
return lower_resource_intrinsic(b, intrin, s);
}
case nir_instr_type_tex: {
nir_tex_instr *tex = nir_instr_as_tex(instr);
return lower_resource_tex(b, tex, s);
}
default:
return false;
}

View file

@ -257,9 +257,6 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
void si_llvm_ps_build_end(struct si_shader_context *ctx);
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_resources.c */
void si_llvm_init_resource_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_vs.c */
void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx,
struct ac_export_args clipdist[2], LLVMValueRef clipvertex[4]);

View file

@ -724,6 +724,64 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
}
}
static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set,
unsigned base_index, unsigned constant_index,
LLVMValueRef dynamic_index,
enum ac_descriptor_type desc_type, bool image,
bool write, bool bindless)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
LLVMBuilderRef builder = ctx->ac.builder;
/* always 0 for OpenGL */
assert(!descriptor_set);
/* all image and texture has been lowered to bindless one in nir */
assert(bindless);
if (dynamic_index && LLVMTypeOf(dynamic_index) == ctx->ac.i32) {
/* image desc has been lowered in nir, we only expect texture here */
assert(!image);
bool is_vec4 = false;
LLVMValueRef index = dynamic_index;
switch (desc_type) {
case AC_DESC_IMAGE:
/* The image is at [0:7]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 2, 0), "");
break;
case AC_DESC_BUFFER:
/* The buffer is in [4:7]. */
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0), ctx->ac.i32_1);
is_vec4 = true;
break;
case AC_DESC_FMASK:
/* The FMASK is at [8:15]. */
assert(ctx->screen->info.gfx_level < GFX11);
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 2, 0), ctx->ac.i32_1);
break;
case AC_DESC_SAMPLER:
/* The sampler state is at [12:15]. */
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0),
LLVMConstInt(ctx->ac.i32, 3, 0));
is_vec4 = true;
break;
default:
unreachable("invalid desc");
}
struct ac_llvm_pointer list = {
.value = ac_get_arg(&ctx->ac, ctx->args->samplers_and_images),
.pointee_type = is_vec4 ? ctx->ac.v4i32 : ctx->ac.v8i32,
};
return ac_build_load_to_sgpr(&ctx->ac, list, index);
}
return dynamic_index;
}
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
struct nir_shader *nir, bool free_nir)
{
@ -741,8 +799,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
ctx->abi.export_vertex = gfx10_ngg_export_vertex;
ctx->abi.load_sampler_desc = si_llvm_load_sampler_desc;
si_llvm_init_resource_callbacks(ctx);
si_llvm_create_main_func(ctx);
if (ctx->stage <= MESA_SHADER_GEOMETRY &&
@ -967,6 +1025,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero ||
info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
ctx->abi.use_waterfall_for_divergent_tex_samplers = true;
ctx->abi.disable_aniso_single_level = true;
unsigned num_outputs = info->num_outputs;
/* need extra output to hold primitive id added by nir ngg lower */

View file

@ -1,267 +0,0 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "si_pipe.h"
#include "si_shader_internal.h"
#include "sid.h"
/**
* Return a value that is equal to the given i32 \p index if it lies in [0,num)
* or an undefined value in the same interval otherwise.
*/
static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValueRef index,
unsigned num)
{
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef c_max = LLVMConstInt(ctx->ac.i32, num - 1, 0);
LLVMValueRef cc;
if (util_is_power_of_two_or_zero(num)) {
index = LLVMBuildAnd(builder, index, c_max, "");
} else {
/* In theory, this MAX pattern should result in code that is
* as good as the bit-wise AND above.
*
* In practice, LLVM generates worse code (at the time of
* writing), because its value tracking is not strong enough.
*/
cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
index = LLVMBuildSelect(builder, cc, index, c_max, "");
}
return index;
}
/**
* Given a 256-bit resource descriptor, force the DCC enable bit to off.
*
* At least on Tonga, executing image stores on images with DCC enabled and
* non-trivial can eventually lead to lockups. This can occur when an
* application binds an image as read-only but then uses a shader that writes
* to it. The OpenGL spec allows almost arbitrarily bad behavior (including
* program termination) in this case, but it doesn't cost much to be a bit
* nicer: disabling DCC in the shader still leads to undefined results but
* avoids the lockup.
*/
static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rsrc)
{
if (ctx->screen->info.gfx_level <= GFX7) {
return rsrc;
} else {
LLVMValueRef i32_6 = LLVMConstInt(ctx->ac.i32, 6, 0);
LLVMValueRef i32_C = LLVMConstInt(ctx->ac.i32, C_008F28_COMPRESSION_EN, 0);
LLVMValueRef tmp;
tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
}
}
static LLVMValueRef force_write_compress_off(struct si_shader_context *ctx, LLVMValueRef rsrc)
{
LLVMValueRef i32_6 = LLVMConstInt(ctx->ac.i32, 6, 0);
LLVMValueRef i32_C = LLVMConstInt(ctx->ac.i32, C_00A018_WRITE_COMPRESS_ENABLE, 0);
LLVMValueRef tmp;
tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
}
static LLVMValueRef fixup_image_desc(struct si_shader_context *ctx, LLVMValueRef rsrc,
bool uses_store)
{
if (uses_store && ctx->ac.gfx_level <= GFX9)
rsrc = force_dcc_off(ctx, rsrc);
if (!uses_store && ctx->screen->info.has_image_load_dcc_bug &&
ctx->screen->always_allow_dcc_stores)
rsrc = force_write_compress_off(ctx, rsrc);
return rsrc;
}
/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
* adjust "index" to point to FMASK. */
static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, struct ac_llvm_pointer list,
LLVMValueRef index, enum ac_descriptor_type desc_type,
bool uses_store, bool bindless)
{
LLVMValueRef rsrc;
if (desc_type == AC_DESC_BUFFER) {
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 2, 0), ctx->ac.i32_1);
list.pointee_type = ctx->ac.v4i32;
} else {
assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_FMASK);
}
if (bindless)
rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
else
rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
if (desc_type == AC_DESC_IMAGE)
rsrc = fixup_image_desc(ctx, rsrc, uses_store);
return rsrc;
}
/**
* Load an image view, fmask view. or sampler state descriptor.
*/
static LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, struct ac_llvm_pointer list,
LLVMValueRef index, enum ac_descriptor_type type)
{
LLVMBuilderRef builder = ctx->ac.builder;
switch (type) {
case AC_DESC_IMAGE:
/* The image is at [0:7]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 2, 0), "");
break;
case AC_DESC_BUFFER:
/* The buffer is in [4:7]. */
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0), ctx->ac.i32_1);
list.pointee_type = ctx->ac.v4i32;
break;
case AC_DESC_FMASK:
/* The FMASK is at [8:15]. */
assert(ctx->screen->info.gfx_level < GFX11);
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 2, 0), ctx->ac.i32_1);
break;
case AC_DESC_SAMPLER:
/* The sampler state is at [12:15]. */
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0),
LLVMConstInt(ctx->ac.i32, 3, 0));
list.pointee_type = ctx->ac.v4i32;
break;
case AC_DESC_PLANE_0:
case AC_DESC_PLANE_1:
case AC_DESC_PLANE_2:
/* Only used for the multiplane image support for Vulkan. Should
* never be reached in radeonsi.
*/
unreachable("Plane descriptor requested in radeonsi.");
}
return ac_build_load_to_sgpr(&ctx->ac, list, index);
}
static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set,
unsigned base_index, unsigned constant_index,
LLVMValueRef dynamic_index,
enum ac_descriptor_type desc_type, bool image,
bool write, bool bindless)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
LLVMBuilderRef builder = ctx->ac.builder;
unsigned const_index = base_index + constant_index;
assert(!descriptor_set);
assert(desc_type <= AC_DESC_BUFFER);
if (bindless) {
struct ac_llvm_pointer list = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->bindless_samplers_and_images);
/* dynamic_index is the bindless handle */
if (image) {
/* Bindless image descriptors use 16-dword slots. */
dynamic_index =
LLVMBuildMul(ctx->ac.builder, dynamic_index, LLVMConstInt(ctx->ac.i64, 2, 0), "");
/* FMASK is right after the image. */
if (desc_type == AC_DESC_FMASK) {
dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index, ctx->ac.i32_1, "");
}
return si_load_image_desc(ctx, list, dynamic_index, desc_type, write, true);
}
/* Since bindless handle arithmetic can contain an unsigned integer
* wraparound and si_load_sampler_desc assumes there isn't any,
* use GEP without "inbounds" (inside ac_build_pointer_add)
* to prevent incorrect code generation and hangs.
*/
dynamic_index =
LLVMBuildMul(ctx->ac.builder, dynamic_index, LLVMConstInt(ctx->ac.i64, 2, 0), "");
list.v = ac_build_pointer_add(&ctx->ac, ctx->ac.v8i32, list.v, dynamic_index);
return si_load_sampler_desc(ctx, list, ctx->ac.i32_0, desc_type);
}
unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
/* Redirect invalid resource indices to the first array element. */
if (const_index >= num_slots)
const_index = base_index;
struct ac_llvm_pointer list = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->samplers_and_images);
LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
if (dynamic_index) {
index = LLVMBuildAdd(builder, index, dynamic_index, "");
/* From the GL_ARB_shader_image_load_store extension spec:
*
* If a shader performs an image load, store, or atomic
* operation using an image variable declared as an array,
* and if the index used to select an individual element is
* negative or greater than or equal to the size of the
* array, the results of the operation are undefined but may
* not lead to termination.
*/
index = si_llvm_bound_index(ctx, index, num_slots);
}
if (image) {
/* Fast path if the image is in user SGPRs. */
if (!dynamic_index &&
const_index < ctx->shader->selector->cs_num_images_in_user_sgprs &&
(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER)) {
LLVMValueRef rsrc = ac_get_arg(&ctx->ac, ctx->args->cs_image[const_index]);
if (desc_type == AC_DESC_IMAGE)
rsrc = fixup_image_desc(ctx, rsrc, write);
return rsrc;
}
/* FMASKs are separate from images. */
if (desc_type == AC_DESC_FMASK) {
index =
LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGES, 0), "");
}
index = LLVMBuildSub(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGE_SLOTS - 1, 0),
index, "");
return si_load_image_desc(ctx, list, index, desc_type, write, false);
}
index = LLVMBuildAdd(ctx->ac.builder, index,
LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
return si_load_sampler_desc(ctx, list, index, desc_type);
}
void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
{
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
}

View file

@ -264,16 +264,18 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
* and copy-propagated
*/
static const struct nir_lower_tex_options lower_tex_options = {
const struct nir_lower_tex_options lower_tex_options = {
.lower_txp = ~0u,
.lower_txs_cube_array = true,
.lower_invalid_implicit_lod = true,
.lower_tg4_offsets = true,
.lower_to_fragment_fetch_amd = sscreen->info.gfx_level < GFX11,
};
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
static const struct nir_lower_image_options lower_image_options = {
const struct nir_lower_image_options lower_image_options = {
.lower_cube_size = true,
.lower_to_fragment_mask_load_amd = sscreen->info.gfx_level < GFX11,
};
NIR_PASS_V(nir, nir_lower_image, &lower_image_options);