ac/llvm,radeonsi: lower fbfetch in abi

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21436>
This commit is contained in:
Qiang Yu 2023-02-11 19:11:08 +08:00
parent 28c2527e42
commit 822e756511
8 changed files with 78 additions and 140 deletions

View file

@ -3152,63 +3152,6 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, un
return value;
}
/* Adjust the sample index according to FMASK.
*
* For uncompressed MSAA surfaces, FMASK should return 0x76543210,
* which is the identity mapping. Each nibble says which physical sample
* should be fetched to get that sample.
*
* For example, 0x11111100 means there are only 2 samples stored and
* the second sample covers 3/4 of the pixel. When reading samples 0
* and 1, return physical sample 0 (determined by the first two 0s
* in FMASK), otherwise return physical sample 1.
*
* The sample index should be adjusted as follows:
* addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
*/
void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr,
bool is_array_tex)
{
struct ac_image_args fmask_load = {0};
fmask_load.opcode = ac_image_load;
fmask_load.resource = fmask;
fmask_load.dmask = 0xf;
fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
fmask_load.attributes = AC_ATTR_INVARIANT_LOAD;
fmask_load.coords[0] = addr[0];
fmask_load.coords[1] = addr[1];
if (is_array_tex)
fmask_load.coords[2] = addr[2];
fmask_load.a16 = ac_get_elem_bits(ac, LLVMTypeOf(addr[0])) == 16;
LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, "");
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
* resource descriptor is 0 (invalid).
*/
LLVMValueRef tmp;
tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
fmask_value =
LLVMBuildSelect(ac->builder, tmp, fmask_value, LLVMConstInt(ac->i32, 0x76543210, false), "");
/* Apply the formula. */
unsigned sample_chan = is_array_tex ? 3 : 2;
LLVMValueRef final_sample;
final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
LLVMConstInt(LLVMTypeOf(addr[0]), 4, 0), "");
final_sample = LLVMBuildLShr(ac->builder, fmask_value,
LLVMBuildZExt(ac->builder, final_sample, ac->i32, ""), "");
/* Mask the sample index by 0x7, because 0x8 means an unknown value
* with EQAA, so those will map to 0. */
addr[sample_chan] = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), "");
if (fmask_load.a16)
addr[sample_chan] = LLVMBuildTrunc(ac->builder, final_sample, ac->i16, "");
}
static LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
LLVMValueRef lane, bool with_opt_barrier)
{

View file

@ -502,9 +502,6 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, uns
LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift,
unsigned bitwidth);
void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr,
bool is_array_tex);
LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src,

View file

@ -3337,10 +3337,6 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
/* No indirect indexing is allowed after this point. */
assert(!indir_index);
if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
nir_intrinsic_io_semantics(instr).fb_fetch_output)
return ctx->abi->emit_fbfetch(ctx->abi);
if (ctx->stage == MESA_SHADER_VERTEX && !is_output)
return ctx->abi->load_inputs(ctx->abi, base, component, count, 0, component_type);

View file

@ -96,8 +96,6 @@ struct ac_shader_abi {
LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, LLVMValueRef index,
enum ac_descriptor_type desc_type);
LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
LLVMValueRef (*intrinsic_load)(struct ac_shader_abi *abi, nir_intrinsic_instr *intrin);
/* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently

View file

@ -107,6 +107,72 @@ static nir_ssa_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shade
return nir_vec(b, comp, 4);
}
static nir_ssa_def *
fetch_framebuffer(nir_builder *b, struct si_shader_args *args,
struct si_shader_selector *sel, union si_shader_key *key)
{
/* Load the image descriptor. */
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
nir_ssa_def *zero = nir_imm_zero(b, 1, 32);
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
unsigned chan = 0;
nir_ssa_def *vec[4] = {undef, undef, undef, undef};
vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->pos_fixed_pt, 0, 16);
if (!key->ps.mono.fbfetch_is_1D)
vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->pos_fixed_pt, 16, 16);
/* Get the current render target layer index. */
if (key->ps.mono.fbfetch_layered)
vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 16, 11);
nir_ssa_def *coords = nir_vec(b, vec, 4);
enum glsl_sampler_dim dim;
if (key->ps.mono.fbfetch_msaa)
dim = GLSL_SAMPLER_DIM_MS;
else if (key->ps.mono.fbfetch_is_1D)
dim = GLSL_SAMPLER_DIM_1D;
else
dim = GLSL_SAMPLER_DIM_2D;
nir_ssa_def *sample_id;
if (key->ps.mono.fbfetch_msaa) {
sample_id = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 8, 4);
if (sel->screen->info.gfx_level < GFX11 &&
!(sel->screen->debug_flags & DBG(NO_FMASK))) {
nir_ssa_def *desc =
load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0_FMASK, 8);
nir_ssa_def *fmask =
nir_bindless_image_fragment_mask_load_amd(
b, desc, coords,
.image_dim = dim,
.image_array = key->ps.mono.fbfetch_layered,
.access = ACCESS_CAN_REORDER);
nir_ssa_def *offset = nir_ishl_imm(b, sample_id, 2);
/* 3 for EQAA handling, see lower_image_to_fragment_mask_load() */
nir_ssa_def *width = nir_imm_int(b, 3);
sample_id = nir_ubfe(b, fmask, offset, width);
}
} else {
sample_id = zero;
}
nir_ssa_def *desc = load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0, 8);
return nir_bindless_image_load(b, 4, 32, desc, coords, sample_id, zero,
.image_dim = dim,
.image_array = key->ps.mono.fbfetch_layered,
.access = ACCESS_CAN_REORDER);
}
static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_state *s)
{
if (instr->type != nir_instr_type_intrinsic)
@ -355,6 +421,18 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s
}
break;
}
case nir_intrinsic_load_output: {
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
/* not fbfetch */
if (!(stage == MESA_SHADER_FRAGMENT && sem.fb_fetch_output))
return false;
/* Ignore src0, because KHR_blend_func_extended disallows multiple render targets. */
replacement = fetch_framebuffer(b, args, sel, key);
break;
}
default:
return false;
}

View file

@ -228,7 +228,6 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
bool separate_epilog);
void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader);
void si_llvm_ps_build_end(struct si_shader_context *ctx);
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_vs.c */
void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key,

View file

@ -830,8 +830,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
break;
case MESA_SHADER_FRAGMENT: {
si_llvm_init_ps_callbacks(ctx);
unsigned colors_read = ctx->shader->selector->info.colors_read;
LLVMValueRef main_fn = ctx->main_fn.value;

View file

@ -26,72 +26,6 @@
#include "si_shader_internal.h"
#include "sid.h"
static LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
{
return si_unpack_param(ctx, ctx->args->ac.ancillary, 8, 4);
}
static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct ac_image_args args = {};
LLVMValueRef ptr, image, fmask;
/* Ignore src0, because KHR_blend_func_extended disallows multiple render
* targets.
*/
/* Load the image descriptor. */
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
ptr = ac_get_arg(&ctx->ac, ctx->args->internal_bindings);
ptr =
LLVMBuildPointerCast(ctx->ac.builder, ptr, ac_array_in_const32_addr_space(ctx->ac.v8i32), "");
struct ac_llvm_pointer desc = { .v = ptr, .t = ctx->ac.v8i32 };
image = ac_build_load_to_sgpr(&ctx->ac, desc, LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
unsigned chan = 0;
args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 0, 16);
if (!ctx->shader->key.ps.mono.fbfetch_is_1D)
args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 16, 16);
/* Get the current render target layer index. */
if (ctx->shader->key.ps.mono.fbfetch_layered)
args.coords[chan++] = si_unpack_param(ctx, ctx->args->ac.ancillary, 16, 11);
if (ctx->shader->key.ps.mono.fbfetch_msaa)
args.coords[chan++] = si_get_sample_id(ctx);
if (ctx->screen->info.gfx_level < GFX11 &&
ctx->shader->key.ps.mono.fbfetch_msaa &&
!(ctx->screen->debug_flags & DBG(NO_FMASK))) {
fmask = ac_build_load_to_sgpr(&ctx->ac, desc, LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
ctx->shader->key.ps.mono.fbfetch_layered);
}
args.opcode = ac_image_load;
args.resource = image;
args.dmask = 0xf;
args.attributes = AC_ATTR_INVARIANT_LOAD;
if (ctx->shader->key.ps.mono.fbfetch_msaa)
args.dim =
ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa;
else if (ctx->shader->key.ps.mono.fbfetch_is_1D)
args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_1darray : ac_image_1d;
else
args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darray : ac_image_2d;
return ac_build_image_opcode(&ctx->ac, &args);
}
static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx, unsigned attr_index,
unsigned chan, LLVMValueRef prim_mask, LLVMValueRef i,
LLVMValueRef j)
@ -943,8 +877,3 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false);
}
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx)
{
ctx->abi.emit_fbfetch = si_nir_emit_fbfetch;
}