mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 20:30:12 +01:00
radeonsi: replace llvm resource code with nir lower
Port from ac_nir_to_llvm.c and si_shader_llvm_resource.c. Due to need waterfall of llvm backend, we can't get bind-texture descriptor directly in nir. So we keep load_sampler_desc abi only for bind-texture index to desc. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18666>
This commit is contained in:
parent
e85c5d8779
commit
d4fdeaa820
7 changed files with 495 additions and 304 deletions
|
|
@ -2485,7 +2485,6 @@ static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_ins
|
|||
LLVMConstInt(ctx->ac.i32, 2, false),
|
||||
LLVMConstInt(ctx->ac.i32, 3, false),
|
||||
};
|
||||
LLVMValueRef sample_index = NULL;
|
||||
|
||||
int count;
|
||||
ASSERTED bool add_frag_pos =
|
||||
|
|
@ -2495,25 +2494,6 @@ static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_ins
|
|||
assert(!add_frag_pos && "Input attachments should be lowered by this point.");
|
||||
count = image_type_to_components_count(dim, is_array);
|
||||
|
||||
if (ctx->ac.gfx_level < GFX11 &&
|
||||
is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
|
||||
instr->intrinsic == nir_intrinsic_bindless_image_load ||
|
||||
instr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
|
||||
instr->intrinsic == nir_intrinsic_bindless_image_sparse_load)) {
|
||||
LLVMValueRef fmask_load_address[3];
|
||||
|
||||
fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
|
||||
fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
|
||||
if (is_array)
|
||||
fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
|
||||
else
|
||||
fmask_load_address[2] = NULL;
|
||||
|
||||
sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
|
||||
sample_index = adjust_sample_index_using_fmask(
|
||||
&ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2],
|
||||
sample_index, get_image_descriptor(ctx, instr, dynamic_desc_index, AC_DESC_FMASK, false));
|
||||
}
|
||||
if (count == 1 && !gfx9_1d) {
|
||||
if (instr->src[1].ssa->num_components)
|
||||
args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
|
||||
|
|
@ -2577,9 +2557,8 @@ static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_ins
|
|||
}
|
||||
|
||||
if (is_ms) {
|
||||
if (!sample_index)
|
||||
sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
|
||||
args->coords[count] = sample_index;
|
||||
/* sample index */
|
||||
args->coords[count] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
|
@ -2647,7 +2626,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
|
|||
|
||||
res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
|
||||
res = ac_to_integer(&ctx->ac, res);
|
||||
} else if (instr->intrinsic == nir_intrinsic_image_deref_samples_identical) {
|
||||
} else if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
|
||||
assert(ctx->ac.gfx_level < GFX11);
|
||||
|
||||
args.opcode = ac_image_load;
|
||||
|
|
@ -2659,8 +2638,6 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
|
|||
args.a16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.coords[0])) == 16;
|
||||
|
||||
res = ac_build_image_opcode(&ctx->ac, &args);
|
||||
res = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_0, "");
|
||||
res = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, res, ctx->ac.i32_0, "");
|
||||
} else {
|
||||
bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
|
||||
|
||||
|
|
@ -3823,6 +3800,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
break;
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
case nir_intrinsic_bindless_image_fragment_mask_load_amd:
|
||||
result = visit_image_load(ctx, instr, true);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_load:
|
||||
|
|
@ -4611,6 +4589,8 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
|
|||
LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr,
|
||||
bool divergent)
|
||||
{
|
||||
bool texture_handle_divergent = false;
|
||||
bool sampler_handle_divergent = false;
|
||||
LLVMValueRef texture_dynamic_handle = NULL;
|
||||
LLVMValueRef sampler_dynamic_handle = NULL;
|
||||
nir_deref_instr *texture_deref_instr = NULL;
|
||||
|
|
@ -4637,10 +4617,14 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
|
|||
else
|
||||
*samp_ptr = val;
|
||||
} else {
|
||||
if (instr->src[i].src_type == nir_tex_src_texture_handle)
|
||||
bool divergent = instr->src[i].src.ssa->divergent;
|
||||
if (instr->src[i].src_type == nir_tex_src_texture_handle) {
|
||||
texture_dynamic_handle = val;
|
||||
else
|
||||
texture_handle_divergent = divergent;
|
||||
} else {
|
||||
sampler_dynamic_handle = val;
|
||||
sampler_handle_divergent = divergent;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -4671,11 +4655,23 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
|
|||
}
|
||||
|
||||
if (texture_dynamic_handle || sampler_dynamic_handle) {
|
||||
/* instr->sampler_non_uniform and texture_non_uniform are always false in GLSL,
|
||||
* but this can lead to unexpected behavior if texture/sampler index come from
|
||||
* a vertex attribute.
|
||||
* For instance, 2 consecutive draws using 2 different index values,
|
||||
* could be squashed together by the hw - producing a single draw with
|
||||
* non-dynamically uniform index.
|
||||
* To avoid this, detect divergent indexing, and use enter_waterfall.
|
||||
* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/2253.
|
||||
*/
|
||||
|
||||
/* descriptor handles given through nir_tex_src_{texture,sampler}_handle */
|
||||
if (instr->texture_non_uniform)
|
||||
if (instr->texture_non_uniform ||
|
||||
(ctx->abi->use_waterfall_for_divergent_tex_samplers && texture_handle_divergent))
|
||||
texture_dynamic_handle = enter_waterfall(ctx, &wctx[0], texture_dynamic_handle, divergent);
|
||||
|
||||
if (instr->sampler_non_uniform)
|
||||
if (instr->sampler_non_uniform ||
|
||||
(ctx->abi->use_waterfall_for_divergent_tex_samplers && sampler_handle_divergent))
|
||||
sampler_dynamic_handle = enter_waterfall(ctx, &wctx[1], sampler_dynamic_handle, divergent);
|
||||
|
||||
if (texture_dynamic_handle)
|
||||
|
|
|
|||
|
|
@ -56,7 +56,6 @@ files_libradeonsi = files(
|
|||
'si_shader_llvm.c',
|
||||
'si_shader_llvm_gs.c',
|
||||
'si_shader_llvm_ps.c',
|
||||
'si_shader_llvm_resources.c',
|
||||
'si_shader_llvm_tess.c',
|
||||
'si_shader_llvm_vs.c',
|
||||
'si_shader_nir.c',
|
||||
|
|
|
|||
|
|
@ -115,6 +115,168 @@ static nir_ssa_def *load_ssbo_desc(nir_builder *b, nir_src *index,
|
|||
return nir_load_smem_amd(b, 4, addr, offset);
|
||||
}
|
||||
|
||||
static nir_ssa_def *fixup_image_desc(nir_builder *b, nir_ssa_def *rsrc, bool uses_store,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
struct si_shader_selector *sel = s->shader->selector;
|
||||
struct si_screen *screen = sel->screen;
|
||||
|
||||
/**
|
||||
* Given a 256-bit resource descriptor, force the DCC enable bit to off.
|
||||
*
|
||||
* At least on Tonga, executing image stores on images with DCC enabled and
|
||||
* non-trivial can eventually lead to lockups. This can occur when an
|
||||
* application binds an image as read-only but then uses a shader that writes
|
||||
* to it. The OpenGL spec allows almost arbitrarily bad behavior (including
|
||||
* program termination) in this case, but it doesn't cost much to be a bit
|
||||
* nicer: disabling DCC in the shader still leads to undefined results but
|
||||
* avoids the lockup.
|
||||
*/
|
||||
if (uses_store &&
|
||||
screen->info.gfx_level <= GFX9 &&
|
||||
screen->info.gfx_level >= GFX8) {
|
||||
nir_ssa_def *tmp = nir_channel(b, rsrc, 6);
|
||||
tmp = nir_iand_imm(b, tmp, C_008F28_COMPRESSION_EN);
|
||||
rsrc = nir_vector_insert_imm(b, rsrc, tmp, 6);
|
||||
}
|
||||
|
||||
if (!uses_store &&
|
||||
screen->info.has_image_load_dcc_bug &&
|
||||
screen->always_allow_dcc_stores) {
|
||||
nir_ssa_def *tmp = nir_channel(b, rsrc, 6);
|
||||
tmp = nir_iand_imm(b, tmp, C_00A018_WRITE_COMPRESS_ENABLE);
|
||||
rsrc = nir_vector_insert_imm(b, rsrc, tmp, 6);
|
||||
}
|
||||
|
||||
return rsrc;
|
||||
}
|
||||
|
||||
/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
|
||||
* adjust "index" to point to FMASK.
|
||||
*/
|
||||
static nir_ssa_def *load_image_desc(nir_builder *b, nir_ssa_def *list, nir_ssa_def *index,
|
||||
enum ac_descriptor_type desc_type, bool uses_store,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
/* index is in uvec8 unit, convert to offset in bytes */
|
||||
nir_ssa_def *offset = nir_ishl_imm(b, index, 5);
|
||||
|
||||
unsigned num_channels;
|
||||
if (desc_type == AC_DESC_BUFFER) {
|
||||
offset = nir_iadd_imm(b, offset, 16);
|
||||
num_channels = 4;
|
||||
} else {
|
||||
assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_FMASK);
|
||||
num_channels = 8;
|
||||
}
|
||||
|
||||
nir_ssa_def *rsrc = nir_load_smem_amd(b, num_channels, list, offset);
|
||||
|
||||
if (desc_type == AC_DESC_IMAGE)
|
||||
rsrc = fixup_image_desc(b, rsrc, uses_store, s);
|
||||
|
||||
return rsrc;
|
||||
}
|
||||
|
||||
static nir_ssa_def *deref_to_index(nir_builder *b,
|
||||
nir_deref_instr *deref,
|
||||
unsigned max_slots,
|
||||
nir_ssa_def **dynamic_index_ret,
|
||||
unsigned *const_index_ret)
|
||||
{
|
||||
unsigned const_index = 0;
|
||||
nir_ssa_def *dynamic_index = NULL;
|
||||
while (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
|
||||
|
||||
if (nir_src_is_const(deref->arr.index)) {
|
||||
const_index += array_size * nir_src_as_uint(deref->arr.index);
|
||||
} else {
|
||||
nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
|
||||
dynamic_index = dynamic_index ? nir_iadd(b, dynamic_index, tmp) : tmp;
|
||||
}
|
||||
|
||||
deref = nir_deref_instr_parent(deref);
|
||||
}
|
||||
|
||||
unsigned base_index = deref->var->data.binding;
|
||||
const_index += base_index;
|
||||
|
||||
/* Redirect invalid resource indices to the first array element. */
|
||||
if (const_index >= max_slots)
|
||||
const_index = base_index;
|
||||
|
||||
nir_ssa_def *index = nir_imm_int(b, const_index);
|
||||
if (dynamic_index) {
|
||||
index = nir_iadd(b, dynamic_index, index);
|
||||
|
||||
/* From the GL_ARB_shader_image_load_store extension spec:
|
||||
*
|
||||
* If a shader performs an image load, store, or atomic
|
||||
* operation using an image variable declared as an array,
|
||||
* and if the index used to select an individual element is
|
||||
* negative or greater than or equal to the size of the
|
||||
* array, the results of the operation are undefined but may
|
||||
* not lead to termination.
|
||||
*/
|
||||
index = clamp_index(b, index, max_slots);
|
||||
}
|
||||
|
||||
if (dynamic_index_ret)
|
||||
*dynamic_index_ret = dynamic_index;
|
||||
if (const_index_ret)
|
||||
*const_index_ret = const_index;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static nir_ssa_def *load_deref_image_desc(nir_builder *b, nir_deref_instr *deref,
|
||||
enum ac_descriptor_type desc_type, bool is_load,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
unsigned const_index;
|
||||
nir_ssa_def *dynamic_index;
|
||||
nir_ssa_def *index = deref_to_index(b, deref, s->shader->selector->info.base.num_images,
|
||||
&dynamic_index, &const_index);
|
||||
|
||||
nir_ssa_def *desc;
|
||||
if (!dynamic_index && desc_type != AC_DESC_FMASK &&
|
||||
const_index < s->shader->selector->cs_num_images_in_user_sgprs) {
|
||||
/* Fast path if the image is in user SGPRs. */
|
||||
desc = ac_nir_load_arg(b, &s->args->ac, s->args->cs_image[const_index]);
|
||||
|
||||
if (desc_type == AC_DESC_IMAGE)
|
||||
desc = fixup_image_desc(b, desc, !is_load, s);
|
||||
} else {
|
||||
/* FMASKs are separate from images. */
|
||||
if (desc_type == AC_DESC_FMASK)
|
||||
index = nir_iadd_imm(b, index, SI_NUM_IMAGES);
|
||||
|
||||
index = nir_isub(b, nir_imm_int(b, SI_NUM_IMAGE_SLOTS - 1), index);
|
||||
|
||||
nir_ssa_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->samplers_and_images);
|
||||
desc = load_image_desc(b, list, index, desc_type, !is_load, s);
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
static nir_ssa_def *load_bindless_image_desc(nir_builder *b, nir_ssa_def *index,
|
||||
enum ac_descriptor_type desc_type, bool is_load,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
/* Bindless image descriptors use 16-dword slots. */
|
||||
index = nir_ishl_imm(b, index, 1);
|
||||
|
||||
/* FMASK is right after the image. */
|
||||
if (desc_type == AC_DESC_FMASK)
|
||||
index = nir_iadd_imm(b, index, 1);
|
||||
|
||||
nir_ssa_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->bindless_samplers_and_images);
|
||||
return load_image_desc(b, list, index, desc_type, !is_load, s);
|
||||
}
|
||||
|
||||
static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
|
|
@ -161,6 +323,103 @@ static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin
|
|||
nir_instr_remove(&intrin->instr);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_sparse_load:
|
||||
case nir_intrinsic_image_deref_fragment_mask_load_amd:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
case nir_intrinsic_image_deref_atomic_add:
|
||||
case nir_intrinsic_image_deref_atomic_imin:
|
||||
case nir_intrinsic_image_deref_atomic_umin:
|
||||
case nir_intrinsic_image_deref_atomic_fmin:
|
||||
case nir_intrinsic_image_deref_atomic_imax:
|
||||
case nir_intrinsic_image_deref_atomic_umax:
|
||||
case nir_intrinsic_image_deref_atomic_fmax:
|
||||
case nir_intrinsic_image_deref_atomic_and:
|
||||
case nir_intrinsic_image_deref_atomic_or:
|
||||
case nir_intrinsic_image_deref_atomic_xor:
|
||||
case nir_intrinsic_image_deref_atomic_exchange:
|
||||
case nir_intrinsic_image_deref_atomic_comp_swap:
|
||||
case nir_intrinsic_image_deref_atomic_fadd:
|
||||
case nir_intrinsic_image_deref_atomic_inc_wrap:
|
||||
case nir_intrinsic_image_deref_atomic_dec_wrap:
|
||||
case nir_intrinsic_image_deref_descriptor_amd: {
|
||||
assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
enum ac_descriptor_type desc_type;
|
||||
if (intrin->intrinsic == nir_intrinsic_image_deref_fragment_mask_load_amd) {
|
||||
desc_type = AC_DESC_FMASK;
|
||||
} else {
|
||||
enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
|
||||
desc_type = dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
|
||||
}
|
||||
|
||||
bool is_load =
|
||||
intrin->intrinsic == nir_intrinsic_image_deref_load ||
|
||||
intrin->intrinsic == nir_intrinsic_image_deref_sparse_load ||
|
||||
intrin->intrinsic == nir_intrinsic_image_deref_fragment_mask_load_amd ||
|
||||
intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd;
|
||||
|
||||
nir_ssa_def *desc = load_deref_image_desc(b, deref, desc_type, is_load, s);
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) {
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
} else {
|
||||
nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
|
||||
nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
|
||||
nir_rewrite_image_intrinsic(intrin, desc, true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
case nir_intrinsic_bindless_image_fragment_mask_load_amd:
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
case nir_intrinsic_bindless_image_atomic_add:
|
||||
case nir_intrinsic_bindless_image_atomic_imin:
|
||||
case nir_intrinsic_bindless_image_atomic_umin:
|
||||
case nir_intrinsic_bindless_image_atomic_fmin:
|
||||
case nir_intrinsic_bindless_image_atomic_imax:
|
||||
case nir_intrinsic_bindless_image_atomic_umax:
|
||||
case nir_intrinsic_bindless_image_atomic_fmax:
|
||||
case nir_intrinsic_bindless_image_atomic_and:
|
||||
case nir_intrinsic_bindless_image_atomic_or:
|
||||
case nir_intrinsic_bindless_image_atomic_xor:
|
||||
case nir_intrinsic_bindless_image_atomic_exchange:
|
||||
case nir_intrinsic_bindless_image_atomic_comp_swap:
|
||||
case nir_intrinsic_bindless_image_atomic_fadd:
|
||||
case nir_intrinsic_bindless_image_atomic_inc_wrap:
|
||||
case nir_intrinsic_bindless_image_atomic_dec_wrap: {
|
||||
assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
|
||||
|
||||
enum ac_descriptor_type desc_type;
|
||||
if (intrin->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
|
||||
desc_type = AC_DESC_FMASK;
|
||||
} else {
|
||||
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intrin);
|
||||
desc_type = dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
|
||||
}
|
||||
|
||||
bool is_load =
|
||||
intrin->intrinsic == nir_intrinsic_bindless_image_load ||
|
||||
intrin->intrinsic == nir_intrinsic_bindless_image_sparse_load ||
|
||||
intrin->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd ||
|
||||
intrin->intrinsic == nir_intrinsic_bindless_image_descriptor_amd;
|
||||
|
||||
nir_ssa_def *index = nir_u2u32(b, intrin->src[0].ssa);
|
||||
|
||||
nir_ssa_def *desc = load_bindless_image_desc(b, index, desc_type, is_load, s);
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_bindless_image_descriptor_amd) {
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
} else {
|
||||
nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], nir_src_for_ssa(desc));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -168,6 +427,148 @@ static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin
|
|||
return true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *load_sampler_desc(nir_builder *b, nir_ssa_def *list, nir_ssa_def *index,
|
||||
enum ac_descriptor_type desc_type)
|
||||
{
|
||||
/* index is in 16 dword unit, convert to offset in bytes */
|
||||
nir_ssa_def *offset = nir_ishl_imm(b, index, 6);
|
||||
|
||||
unsigned num_channels = 0;
|
||||
switch (desc_type) {
|
||||
case AC_DESC_IMAGE:
|
||||
/* The image is at [0:7]. */
|
||||
num_channels = 8;
|
||||
break;
|
||||
case AC_DESC_BUFFER:
|
||||
/* The buffer is in [4:7]. */
|
||||
offset = nir_iadd_imm(b, offset, 16);
|
||||
num_channels = 4;
|
||||
break;
|
||||
case AC_DESC_FMASK:
|
||||
/* The FMASK is at [8:15]. */
|
||||
offset = nir_iadd_imm(b, offset, 32);
|
||||
num_channels = 8;
|
||||
break;
|
||||
case AC_DESC_SAMPLER:
|
||||
/* The sampler state is at [12:15]. */
|
||||
offset = nir_iadd_imm(b, offset, 48);
|
||||
num_channels = 4;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid desc type");
|
||||
break;
|
||||
}
|
||||
|
||||
return nir_load_smem_amd(b, num_channels, list, offset);
|
||||
}
|
||||
|
||||
static nir_ssa_def *load_deref_sampler_desc(nir_builder *b, nir_deref_instr *deref,
|
||||
enum ac_descriptor_type desc_type,
|
||||
struct lower_resource_state *s,
|
||||
bool return_descriptor)
|
||||
{
|
||||
unsigned max_slots = BITSET_LAST_BIT(b->shader->info.textures_used);
|
||||
nir_ssa_def *index = deref_to_index(b, deref, max_slots, NULL, NULL);
|
||||
index = nir_iadd_imm(b, index, SI_NUM_IMAGE_SLOTS / 2);
|
||||
|
||||
/* return actual desc when required by caller */
|
||||
if (return_descriptor) {
|
||||
nir_ssa_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->samplers_and_images);
|
||||
return load_sampler_desc(b, list, index, desc_type);
|
||||
}
|
||||
|
||||
/* Just use index here and let nir-to-llvm backend to translate to actual
|
||||
* descriptor. This is because we need waterfall to handle non-dynamic-uniform
|
||||
* index there.
|
||||
*/
|
||||
return index;
|
||||
}
|
||||
|
||||
static nir_ssa_def *load_bindless_sampler_desc(nir_builder *b, nir_ssa_def *index,
|
||||
enum ac_descriptor_type desc_type,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
nir_ssa_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->bindless_samplers_and_images);
|
||||
|
||||
/* 64 bit to 32 bit */
|
||||
index = nir_u2u32(b, index);
|
||||
|
||||
return load_sampler_desc(b, list, index, desc_type);
|
||||
}
|
||||
|
||||
static bool lower_resource_tex(nir_builder *b, nir_tex_instr *tex,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
assert(!tex->texture_non_uniform && !tex->sampler_non_uniform);
|
||||
|
||||
nir_deref_instr *texture_deref = NULL;
|
||||
nir_deref_instr *sampler_deref = NULL;
|
||||
nir_ssa_def *texture_handle = NULL;
|
||||
nir_ssa_def *sampler_handle = NULL;
|
||||
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
switch (tex->src[i].src_type) {
|
||||
case nir_tex_src_texture_deref:
|
||||
texture_deref = nir_src_as_deref(tex->src[i].src);
|
||||
break;
|
||||
case nir_tex_src_sampler_deref:
|
||||
sampler_deref = nir_src_as_deref(tex->src[i].src);
|
||||
break;
|
||||
case nir_tex_src_texture_handle:
|
||||
texture_handle = tex->src[i].src.ssa;
|
||||
break;
|
||||
case nir_tex_src_sampler_handle:
|
||||
sampler_handle = tex->src[i].src.ssa;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
enum ac_descriptor_type desc_type;
|
||||
if (tex->op == nir_texop_fragment_mask_fetch_amd)
|
||||
desc_type = AC_DESC_FMASK;
|
||||
else
|
||||
desc_type = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
|
||||
|
||||
bool is_descriptor_op = tex->op == nir_texop_descriptor_amd;
|
||||
nir_ssa_def *image = texture_deref ?
|
||||
load_deref_sampler_desc(b, texture_deref, desc_type, s, is_descriptor_op) :
|
||||
load_bindless_sampler_desc(b, texture_handle, desc_type, s);
|
||||
|
||||
nir_ssa_def *sampler = NULL;
|
||||
if (sampler_deref)
|
||||
sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, false);
|
||||
else if (sampler_handle)
|
||||
sampler = load_bindless_sampler_desc(b, sampler_handle, AC_DESC_SAMPLER, s);
|
||||
|
||||
if (is_descriptor_op) {
|
||||
nir_ssa_def_rewrite_uses(&tex->dest.ssa, image);
|
||||
nir_instr_remove(&tex->instr);
|
||||
} else {
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
switch (tex->src[i].src_type) {
|
||||
case nir_tex_src_texture_deref:
|
||||
tex->src[i].src_type = nir_tex_src_texture_handle;
|
||||
FALLTHROUGH;
|
||||
case nir_tex_src_texture_handle:
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, image);
|
||||
break;
|
||||
case nir_tex_src_sampler_deref:
|
||||
tex->src[i].src_type = nir_tex_src_sampler_handle;
|
||||
FALLTHROUGH;
|
||||
case nir_tex_src_sampler_handle:
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, sampler);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool lower_resource_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
{
|
||||
struct lower_resource_state *s = (struct lower_resource_state *)state;
|
||||
|
|
@ -179,6 +580,10 @@ static bool lower_resource_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
return lower_resource_intrinsic(b, intrin, s);
|
||||
}
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
return lower_resource_tex(b, tex, s);
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -257,9 +257,6 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
|
|||
void si_llvm_ps_build_end(struct si_shader_context *ctx);
|
||||
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
|
||||
|
||||
/* si_shader_llvm_resources.c */
|
||||
void si_llvm_init_resource_callbacks(struct si_shader_context *ctx);
|
||||
|
||||
/* si_shader_llvm_vs.c */
|
||||
void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx,
|
||||
struct ac_export_args clipdist[2], LLVMValueRef clipvertex[4]);
|
||||
|
|
|
|||
|
|
@ -724,6 +724,64 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
|
|||
}
|
||||
}
|
||||
|
||||
static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set,
|
||||
unsigned base_index, unsigned constant_index,
|
||||
LLVMValueRef dynamic_index,
|
||||
enum ac_descriptor_type desc_type, bool image,
|
||||
bool write, bool bindless)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
|
||||
/* always 0 for OpenGL */
|
||||
assert(!descriptor_set);
|
||||
|
||||
/* all image and texture has been lowered to bindless one in nir */
|
||||
assert(bindless);
|
||||
|
||||
if (dynamic_index && LLVMTypeOf(dynamic_index) == ctx->ac.i32) {
|
||||
/* image desc has been lowered in nir, we only expect texture here */
|
||||
assert(!image);
|
||||
|
||||
bool is_vec4 = false;
|
||||
LLVMValueRef index = dynamic_index;
|
||||
|
||||
switch (desc_type) {
|
||||
case AC_DESC_IMAGE:
|
||||
/* The image is at [0:7]. */
|
||||
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||
break;
|
||||
case AC_DESC_BUFFER:
|
||||
/* The buffer is in [4:7]. */
|
||||
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0), ctx->ac.i32_1);
|
||||
is_vec4 = true;
|
||||
break;
|
||||
case AC_DESC_FMASK:
|
||||
/* The FMASK is at [8:15]. */
|
||||
assert(ctx->screen->info.gfx_level < GFX11);
|
||||
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 2, 0), ctx->ac.i32_1);
|
||||
break;
|
||||
case AC_DESC_SAMPLER:
|
||||
/* The sampler state is at [12:15]. */
|
||||
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0),
|
||||
LLVMConstInt(ctx->ac.i32, 3, 0));
|
||||
is_vec4 = true;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid desc");
|
||||
}
|
||||
|
||||
struct ac_llvm_pointer list = {
|
||||
.value = ac_get_arg(&ctx->ac, ctx->args->samplers_and_images),
|
||||
.pointee_type = is_vec4 ? ctx->ac.v4i32 : ctx->ac.v8i32,
|
||||
};
|
||||
|
||||
return ac_build_load_to_sgpr(&ctx->ac, list, index);
|
||||
}
|
||||
|
||||
return dynamic_index;
|
||||
}
|
||||
|
||||
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
|
||||
struct nir_shader *nir, bool free_nir)
|
||||
{
|
||||
|
|
@ -741,8 +799,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
|
||||
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
|
||||
ctx->abi.export_vertex = gfx10_ngg_export_vertex;
|
||||
ctx->abi.load_sampler_desc = si_llvm_load_sampler_desc;
|
||||
|
||||
si_llvm_init_resource_callbacks(ctx);
|
||||
si_llvm_create_main_func(ctx);
|
||||
|
||||
if (ctx->stage <= MESA_SHADER_GEOMETRY &&
|
||||
|
|
@ -967,6 +1025,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero ||
|
||||
info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
|
||||
ctx->abi.use_waterfall_for_divergent_tex_samplers = true;
|
||||
ctx->abi.disable_aniso_single_level = true;
|
||||
|
||||
unsigned num_outputs = info->num_outputs;
|
||||
/* need extra output to hold primitive id added by nir ngg lower */
|
||||
|
|
|
|||
|
|
@ -1,267 +0,0 @@
|
|||
/*
|
||||
* Copyright 2020 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "si_pipe.h"
|
||||
#include "si_shader_internal.h"
|
||||
#include "sid.h"
|
||||
|
||||
/**
|
||||
* Return a value that is equal to the given i32 \p index if it lies in [0,num)
|
||||
* or an undefined value in the same interval otherwise.
|
||||
*/
|
||||
static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValueRef index,
|
||||
unsigned num)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef c_max = LLVMConstInt(ctx->ac.i32, num - 1, 0);
|
||||
LLVMValueRef cc;
|
||||
|
||||
if (util_is_power_of_two_or_zero(num)) {
|
||||
index = LLVMBuildAnd(builder, index, c_max, "");
|
||||
} else {
|
||||
/* In theory, this MAX pattern should result in code that is
|
||||
* as good as the bit-wise AND above.
|
||||
*
|
||||
* In practice, LLVM generates worse code (at the time of
|
||||
* writing), because its value tracking is not strong enough.
|
||||
*/
|
||||
cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
|
||||
index = LLVMBuildSelect(builder, cc, index, c_max, "");
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a 256-bit resource descriptor, force the DCC enable bit to off.
|
||||
*
|
||||
* At least on Tonga, executing image stores on images with DCC enabled and
|
||||
* non-trivial can eventually lead to lockups. This can occur when an
|
||||
* application binds an image as read-only but then uses a shader that writes
|
||||
* to it. The OpenGL spec allows almost arbitrarily bad behavior (including
|
||||
* program termination) in this case, but it doesn't cost much to be a bit
|
||||
* nicer: disabling DCC in the shader still leads to undefined results but
|
||||
* avoids the lockup.
|
||||
*/
|
||||
static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rsrc)
|
||||
{
|
||||
if (ctx->screen->info.gfx_level <= GFX7) {
|
||||
return rsrc;
|
||||
} else {
|
||||
LLVMValueRef i32_6 = LLVMConstInt(ctx->ac.i32, 6, 0);
|
||||
LLVMValueRef i32_C = LLVMConstInt(ctx->ac.i32, C_008F28_COMPRESSION_EN, 0);
|
||||
LLVMValueRef tmp;
|
||||
|
||||
tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
|
||||
tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
|
||||
return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
|
||||
}
|
||||
}
|
||||
|
||||
static LLVMValueRef force_write_compress_off(struct si_shader_context *ctx, LLVMValueRef rsrc)
|
||||
{
|
||||
LLVMValueRef i32_6 = LLVMConstInt(ctx->ac.i32, 6, 0);
|
||||
LLVMValueRef i32_C = LLVMConstInt(ctx->ac.i32, C_00A018_WRITE_COMPRESS_ENABLE, 0);
|
||||
LLVMValueRef tmp;
|
||||
|
||||
tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
|
||||
tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
|
||||
return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
|
||||
}
|
||||
|
||||
static LLVMValueRef fixup_image_desc(struct si_shader_context *ctx, LLVMValueRef rsrc,
|
||||
bool uses_store)
|
||||
{
|
||||
if (uses_store && ctx->ac.gfx_level <= GFX9)
|
||||
rsrc = force_dcc_off(ctx, rsrc);
|
||||
|
||||
if (!uses_store && ctx->screen->info.has_image_load_dcc_bug &&
|
||||
ctx->screen->always_allow_dcc_stores)
|
||||
rsrc = force_write_compress_off(ctx, rsrc);
|
||||
|
||||
return rsrc;
|
||||
}
|
||||
|
||||
/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
|
||||
* adjust "index" to point to FMASK. */
|
||||
static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, struct ac_llvm_pointer list,
|
||||
LLVMValueRef index, enum ac_descriptor_type desc_type,
|
||||
bool uses_store, bool bindless)
|
||||
{
|
||||
LLVMValueRef rsrc;
|
||||
|
||||
if (desc_type == AC_DESC_BUFFER) {
|
||||
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 2, 0), ctx->ac.i32_1);
|
||||
list.pointee_type = ctx->ac.v4i32;
|
||||
} else {
|
||||
assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_FMASK);
|
||||
}
|
||||
|
||||
if (bindless)
|
||||
rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
|
||||
else
|
||||
rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
|
||||
|
||||
if (desc_type == AC_DESC_IMAGE)
|
||||
rsrc = fixup_image_desc(ctx, rsrc, uses_store);
|
||||
|
||||
return rsrc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load an image view, fmask view. or sampler state descriptor.
|
||||
*/
|
||||
static LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, struct ac_llvm_pointer list,
|
||||
LLVMValueRef index, enum ac_descriptor_type type)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
|
||||
switch (type) {
|
||||
case AC_DESC_IMAGE:
|
||||
/* The image is at [0:7]. */
|
||||
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||
break;
|
||||
case AC_DESC_BUFFER:
|
||||
/* The buffer is in [4:7]. */
|
||||
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0), ctx->ac.i32_1);
|
||||
list.pointee_type = ctx->ac.v4i32;
|
||||
break;
|
||||
case AC_DESC_FMASK:
|
||||
/* The FMASK is at [8:15]. */
|
||||
assert(ctx->screen->info.gfx_level < GFX11);
|
||||
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 2, 0), ctx->ac.i32_1);
|
||||
break;
|
||||
case AC_DESC_SAMPLER:
|
||||
/* The sampler state is at [12:15]. */
|
||||
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0),
|
||||
LLVMConstInt(ctx->ac.i32, 3, 0));
|
||||
list.pointee_type = ctx->ac.v4i32;
|
||||
break;
|
||||
case AC_DESC_PLANE_0:
|
||||
case AC_DESC_PLANE_1:
|
||||
case AC_DESC_PLANE_2:
|
||||
/* Only used for the multiplane image support for Vulkan. Should
|
||||
* never be reached in radeonsi.
|
||||
*/
|
||||
unreachable("Plane descriptor requested in radeonsi.");
|
||||
}
|
||||
|
||||
return ac_build_load_to_sgpr(&ctx->ac, list, index);
|
||||
}
|
||||
|
||||
static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set,
|
||||
unsigned base_index, unsigned constant_index,
|
||||
LLVMValueRef dynamic_index,
|
||||
enum ac_descriptor_type desc_type, bool image,
|
||||
bool write, bool bindless)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
unsigned const_index = base_index + constant_index;
|
||||
|
||||
assert(!descriptor_set);
|
||||
assert(desc_type <= AC_DESC_BUFFER);
|
||||
|
||||
if (bindless) {
|
||||
struct ac_llvm_pointer list = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->bindless_samplers_and_images);
|
||||
|
||||
/* dynamic_index is the bindless handle */
|
||||
if (image) {
|
||||
/* Bindless image descriptors use 16-dword slots. */
|
||||
dynamic_index =
|
||||
LLVMBuildMul(ctx->ac.builder, dynamic_index, LLVMConstInt(ctx->ac.i64, 2, 0), "");
|
||||
/* FMASK is right after the image. */
|
||||
if (desc_type == AC_DESC_FMASK) {
|
||||
dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index, ctx->ac.i32_1, "");
|
||||
}
|
||||
|
||||
return si_load_image_desc(ctx, list, dynamic_index, desc_type, write, true);
|
||||
}
|
||||
|
||||
/* Since bindless handle arithmetic can contain an unsigned integer
|
||||
* wraparound and si_load_sampler_desc assumes there isn't any,
|
||||
* use GEP without "inbounds" (inside ac_build_pointer_add)
|
||||
* to prevent incorrect code generation and hangs.
|
||||
*/
|
||||
dynamic_index =
|
||||
LLVMBuildMul(ctx->ac.builder, dynamic_index, LLVMConstInt(ctx->ac.i64, 2, 0), "");
|
||||
list.v = ac_build_pointer_add(&ctx->ac, ctx->ac.v8i32, list.v, dynamic_index);
|
||||
return si_load_sampler_desc(ctx, list, ctx->ac.i32_0, desc_type);
|
||||
}
|
||||
|
||||
unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
|
||||
|
||||
/* Redirect invalid resource indices to the first array element. */
|
||||
if (const_index >= num_slots)
|
||||
const_index = base_index;
|
||||
|
||||
struct ac_llvm_pointer list = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->samplers_and_images);
|
||||
LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
|
||||
|
||||
if (dynamic_index) {
|
||||
index = LLVMBuildAdd(builder, index, dynamic_index, "");
|
||||
|
||||
/* From the GL_ARB_shader_image_load_store extension spec:
|
||||
*
|
||||
* If a shader performs an image load, store, or atomic
|
||||
* operation using an image variable declared as an array,
|
||||
* and if the index used to select an individual element is
|
||||
* negative or greater than or equal to the size of the
|
||||
* array, the results of the operation are undefined but may
|
||||
* not lead to termination.
|
||||
*/
|
||||
index = si_llvm_bound_index(ctx, index, num_slots);
|
||||
}
|
||||
|
||||
if (image) {
|
||||
/* Fast path if the image is in user SGPRs. */
|
||||
if (!dynamic_index &&
|
||||
const_index < ctx->shader->selector->cs_num_images_in_user_sgprs &&
|
||||
(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER)) {
|
||||
LLVMValueRef rsrc = ac_get_arg(&ctx->ac, ctx->args->cs_image[const_index]);
|
||||
|
||||
if (desc_type == AC_DESC_IMAGE)
|
||||
rsrc = fixup_image_desc(ctx, rsrc, write);
|
||||
return rsrc;
|
||||
}
|
||||
|
||||
/* FMASKs are separate from images. */
|
||||
if (desc_type == AC_DESC_FMASK) {
|
||||
index =
|
||||
LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGES, 0), "");
|
||||
}
|
||||
index = LLVMBuildSub(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGE_SLOTS - 1, 0),
|
||||
index, "");
|
||||
return si_load_image_desc(ctx, list, index, desc_type, write, false);
|
||||
}
|
||||
|
||||
index = LLVMBuildAdd(ctx->ac.builder, index,
|
||||
LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
|
||||
return si_load_sampler_desc(ctx, list, index, desc_type);
|
||||
}
|
||||
|
||||
void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
|
||||
{
|
||||
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
|
||||
}
|
||||
|
|
@ -264,16 +264,18 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
|||
* and copy-propagated
|
||||
*/
|
||||
|
||||
static const struct nir_lower_tex_options lower_tex_options = {
|
||||
const struct nir_lower_tex_options lower_tex_options = {
|
||||
.lower_txp = ~0u,
|
||||
.lower_txs_cube_array = true,
|
||||
.lower_invalid_implicit_lod = true,
|
||||
.lower_tg4_offsets = true,
|
||||
.lower_to_fragment_fetch_amd = sscreen->info.gfx_level < GFX11,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
|
||||
|
||||
static const struct nir_lower_image_options lower_image_options = {
|
||||
const struct nir_lower_image_options lower_image_options = {
|
||||
.lower_cube_size = true,
|
||||
.lower_to_fragment_mask_load_amd = sscreen->info.gfx_level < GFX11,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_lower_image, &lower_image_options);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue