mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
ac/llvm,radeonsi: lower fbfetch in abi
Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21436>
This commit is contained in:
parent
28c2527e42
commit
822e756511
8 changed files with 78 additions and 140 deletions
|
|
@ -3152,63 +3152,6 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, un
|
|||
return value;
|
||||
}
|
||||
|
||||
/* Adjust the sample index according to FMASK.
|
||||
*
|
||||
* For uncompressed MSAA surfaces, FMASK should return 0x76543210,
|
||||
* which is the identity mapping. Each nibble says which physical sample
|
||||
* should be fetched to get that sample.
|
||||
*
|
||||
* For example, 0x11111100 means there are only 2 samples stored and
|
||||
* the second sample covers 3/4 of the pixel. When reading samples 0
|
||||
* and 1, return physical sample 0 (determined by the first two 0s
|
||||
* in FMASK), otherwise return physical sample 1.
|
||||
*
|
||||
* The sample index should be adjusted as follows:
|
||||
* addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
|
||||
*/
|
||||
void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr,
|
||||
bool is_array_tex)
|
||||
{
|
||||
struct ac_image_args fmask_load = {0};
|
||||
fmask_load.opcode = ac_image_load;
|
||||
fmask_load.resource = fmask;
|
||||
fmask_load.dmask = 0xf;
|
||||
fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
|
||||
fmask_load.attributes = AC_ATTR_INVARIANT_LOAD;
|
||||
|
||||
fmask_load.coords[0] = addr[0];
|
||||
fmask_load.coords[1] = addr[1];
|
||||
if (is_array_tex)
|
||||
fmask_load.coords[2] = addr[2];
|
||||
fmask_load.a16 = ac_get_elem_bits(ac, LLVMTypeOf(addr[0])) == 16;
|
||||
|
||||
LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
|
||||
fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, "");
|
||||
|
||||
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
|
||||
* resource descriptor is 0 (invalid).
|
||||
*/
|
||||
LLVMValueRef tmp;
|
||||
tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
|
||||
tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
|
||||
tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
|
||||
fmask_value =
|
||||
LLVMBuildSelect(ac->builder, tmp, fmask_value, LLVMConstInt(ac->i32, 0x76543210, false), "");
|
||||
|
||||
/* Apply the formula. */
|
||||
unsigned sample_chan = is_array_tex ? 3 : 2;
|
||||
LLVMValueRef final_sample;
|
||||
final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
|
||||
LLVMConstInt(LLVMTypeOf(addr[0]), 4, 0), "");
|
||||
final_sample = LLVMBuildLShr(ac->builder, fmask_value,
|
||||
LLVMBuildZExt(ac->builder, final_sample, ac->i32, ""), "");
|
||||
/* Mask the sample index by 0x7, because 0x8 means an unknown value
|
||||
* with EQAA, so those will map to 0. */
|
||||
addr[sample_chan] = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), "");
|
||||
if (fmask_load.a16)
|
||||
addr[sample_chan] = LLVMBuildTrunc(ac->builder, final_sample, ac->i16, "");
|
||||
}
|
||||
|
||||
static LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
|
||||
LLVMValueRef lane, bool with_opt_barrier)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -502,9 +502,6 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, uns
|
|||
LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift,
|
||||
unsigned bitwidth);
|
||||
|
||||
void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr,
|
||||
bool is_array_tex);
|
||||
|
||||
LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
|
||||
|
||||
LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src,
|
||||
|
|
|
|||
|
|
@ -3337,10 +3337,6 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
|||
/* No indirect indexing is allowed after this point. */
|
||||
assert(!indir_index);
|
||||
|
||||
if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
|
||||
nir_intrinsic_io_semantics(instr).fb_fetch_output)
|
||||
return ctx->abi->emit_fbfetch(ctx->abi);
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX && !is_output)
|
||||
return ctx->abi->load_inputs(ctx->abi, base, component, count, 0, component_type);
|
||||
|
||||
|
|
|
|||
|
|
@ -96,8 +96,6 @@ struct ac_shader_abi {
|
|||
LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, LLVMValueRef index,
|
||||
enum ac_descriptor_type desc_type);
|
||||
|
||||
LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
|
||||
|
||||
LLVMValueRef (*intrinsic_load)(struct ac_shader_abi *abi, nir_intrinsic_instr *intrin);
|
||||
|
||||
/* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
|
||||
|
|
|
|||
|
|
@ -107,6 +107,72 @@ static nir_ssa_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shade
|
|||
return nir_vec(b, comp, 4);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
fetch_framebuffer(nir_builder *b, struct si_shader_args *args,
|
||||
struct si_shader_selector *sel, union si_shader_key *key)
|
||||
{
|
||||
/* Load the image descriptor. */
|
||||
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
|
||||
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
|
||||
|
||||
nir_ssa_def *zero = nir_imm_zero(b, 1, 32);
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
|
||||
unsigned chan = 0;
|
||||
nir_ssa_def *vec[4] = {undef, undef, undef, undef};
|
||||
|
||||
vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->pos_fixed_pt, 0, 16);
|
||||
|
||||
if (!key->ps.mono.fbfetch_is_1D)
|
||||
vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->pos_fixed_pt, 16, 16);
|
||||
|
||||
/* Get the current render target layer index. */
|
||||
if (key->ps.mono.fbfetch_layered)
|
||||
vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 16, 11);
|
||||
|
||||
nir_ssa_def *coords = nir_vec(b, vec, 4);
|
||||
|
||||
enum glsl_sampler_dim dim;
|
||||
if (key->ps.mono.fbfetch_msaa)
|
||||
dim = GLSL_SAMPLER_DIM_MS;
|
||||
else if (key->ps.mono.fbfetch_is_1D)
|
||||
dim = GLSL_SAMPLER_DIM_1D;
|
||||
else
|
||||
dim = GLSL_SAMPLER_DIM_2D;
|
||||
|
||||
nir_ssa_def *sample_id;
|
||||
if (key->ps.mono.fbfetch_msaa) {
|
||||
sample_id = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 8, 4);
|
||||
|
||||
if (sel->screen->info.gfx_level < GFX11 &&
|
||||
!(sel->screen->debug_flags & DBG(NO_FMASK))) {
|
||||
nir_ssa_def *desc =
|
||||
load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0_FMASK, 8);
|
||||
|
||||
nir_ssa_def *fmask =
|
||||
nir_bindless_image_fragment_mask_load_amd(
|
||||
b, desc, coords,
|
||||
.image_dim = dim,
|
||||
.image_array = key->ps.mono.fbfetch_layered,
|
||||
.access = ACCESS_CAN_REORDER);
|
||||
|
||||
nir_ssa_def *offset = nir_ishl_imm(b, sample_id, 2);
|
||||
/* 3 for EQAA handling, see lower_image_to_fragment_mask_load() */
|
||||
nir_ssa_def *width = nir_imm_int(b, 3);
|
||||
sample_id = nir_ubfe(b, fmask, offset, width);
|
||||
}
|
||||
} else {
|
||||
sample_id = zero;
|
||||
}
|
||||
|
||||
nir_ssa_def *desc = load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0, 8);
|
||||
|
||||
return nir_bindless_image_load(b, 4, 32, desc, coords, sample_id, zero,
|
||||
.image_dim = dim,
|
||||
.image_array = key->ps.mono.fbfetch_layered,
|
||||
.access = ACCESS_CAN_REORDER);
|
||||
}
|
||||
|
||||
static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_state *s)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
|
|
@ -355,6 +421,18 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_output: {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||
|
||||
/* not fbfetch */
|
||||
if (!(stage == MESA_SHADER_FRAGMENT && sem.fb_fetch_output))
|
||||
return false;
|
||||
|
||||
/* Ignore src0, because KHR_blend_func_extended disallows multiple render targets. */
|
||||
|
||||
replacement = fetch_framebuffer(b, args, sel, key);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -228,7 +228,6 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
|||
bool separate_epilog);
|
||||
void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader);
|
||||
void si_llvm_ps_build_end(struct si_shader_context *ctx);
|
||||
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
|
||||
|
||||
/* si_shader_llvm_vs.c */
|
||||
void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key,
|
||||
|
|
|
|||
|
|
@ -830,8 +830,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
|||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT: {
|
||||
si_llvm_init_ps_callbacks(ctx);
|
||||
|
||||
unsigned colors_read = ctx->shader->selector->info.colors_read;
|
||||
LLVMValueRef main_fn = ctx->main_fn.value;
|
||||
|
||||
|
|
|
|||
|
|
@ -26,72 +26,6 @@
|
|||
#include "si_shader_internal.h"
|
||||
#include "sid.h"
|
||||
|
||||
static LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
|
||||
{
|
||||
return si_unpack_param(ctx, ctx->args->ac.ancillary, 8, 4);
|
||||
}
|
||||
|
||||
static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct ac_image_args args = {};
|
||||
LLVMValueRef ptr, image, fmask;
|
||||
|
||||
/* Ignore src0, because KHR_blend_func_extended disallows multiple render
|
||||
* targets.
|
||||
*/
|
||||
|
||||
/* Load the image descriptor. */
|
||||
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
|
||||
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
|
||||
|
||||
ptr = ac_get_arg(&ctx->ac, ctx->args->internal_bindings);
|
||||
ptr =
|
||||
LLVMBuildPointerCast(ctx->ac.builder, ptr, ac_array_in_const32_addr_space(ctx->ac.v8i32), "");
|
||||
struct ac_llvm_pointer desc = { .v = ptr, .t = ctx->ac.v8i32 };
|
||||
|
||||
image = ac_build_load_to_sgpr(&ctx->ac, desc, LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
|
||||
|
||||
unsigned chan = 0;
|
||||
|
||||
args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 0, 16);
|
||||
|
||||
if (!ctx->shader->key.ps.mono.fbfetch_is_1D)
|
||||
args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 16, 16);
|
||||
|
||||
/* Get the current render target layer index. */
|
||||
if (ctx->shader->key.ps.mono.fbfetch_layered)
|
||||
args.coords[chan++] = si_unpack_param(ctx, ctx->args->ac.ancillary, 16, 11);
|
||||
|
||||
if (ctx->shader->key.ps.mono.fbfetch_msaa)
|
||||
args.coords[chan++] = si_get_sample_id(ctx);
|
||||
|
||||
if (ctx->screen->info.gfx_level < GFX11 &&
|
||||
ctx->shader->key.ps.mono.fbfetch_msaa &&
|
||||
!(ctx->screen->debug_flags & DBG(NO_FMASK))) {
|
||||
|
||||
fmask = ac_build_load_to_sgpr(&ctx->ac, desc, LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
|
||||
|
||||
ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
|
||||
ctx->shader->key.ps.mono.fbfetch_layered);
|
||||
}
|
||||
|
||||
args.opcode = ac_image_load;
|
||||
args.resource = image;
|
||||
args.dmask = 0xf;
|
||||
args.attributes = AC_ATTR_INVARIANT_LOAD;
|
||||
|
||||
if (ctx->shader->key.ps.mono.fbfetch_msaa)
|
||||
args.dim =
|
||||
ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa;
|
||||
else if (ctx->shader->key.ps.mono.fbfetch_is_1D)
|
||||
args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_1darray : ac_image_1d;
|
||||
else
|
||||
args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darray : ac_image_2d;
|
||||
|
||||
return ac_build_image_opcode(&ctx->ac, &args);
|
||||
}
|
||||
|
||||
static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx, unsigned attr_index,
|
||||
unsigned chan, LLVMValueRef prim_mask, LLVMValueRef i,
|
||||
LLVMValueRef j)
|
||||
|
|
@ -943,8 +877,3 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
|
|||
|
||||
si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false);
|
||||
}
|
||||
|
||||
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx)
|
||||
{
|
||||
ctx->abi.emit_fbfetch = si_nir_emit_fbfetch;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue