ac/llvm: remove handling of input and output loads/stores that are lowered

There is a lot that we still use.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28607>
This commit is contained in:
Marek Olšák 2024-03-30 23:21:16 -04:00
parent ce7ca0d80b
commit 105e22f6fd
4 changed files with 19 additions and 76 deletions

View file

@ -2125,23 +2125,16 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
unsigned writemask = nir_intrinsic_write_mask(instr);
unsigned component = nir_intrinsic_component(instr);
LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
ASSERTED unsigned bit_size = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src));
ASSERTED nir_src offset = *nir_get_io_offset_src(instr);
/* Non-monolithic PS and also LS before TCS in radeonsi use this to forward outputs to
* registers.
*/
assert(bit_size == 16 || bit_size == 32);
/* No indirect indexing is allowed here. */
assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
case 16:
case 32:
break;
case 64:
unreachable("64-bit IO should have been lowered to 32 bits");
return;
default:
unreachable("unhandled store_output bit size");
return;
}
writemask <<= component;
for (unsigned chan = 0; chan < 8; chan++) {
@ -2885,50 +2878,26 @@ static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, LLVMValu
return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
}
static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr,
bool is_output)
static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
LLVMValueRef values[8];
LLVMTypeRef dest_type = get_def_type(ctx, &instr->def);
LLVMTypeRef component_type;
unsigned base = nir_intrinsic_base(instr);
unsigned component = nir_intrinsic_component(instr);
unsigned count = instr->def.num_components;
nir_src *vertex_index_src = nir_get_io_arrayed_index_src(instr);
LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
nir_src offset = *nir_get_io_offset_src(instr);
LLVMValueRef indir_index = NULL;
switch (instr->def.bit_size) {
case 16:
case 32:
break;
case 64:
if (ctx->stage != MESA_SHADER_VERTEX || is_output) {
unreachable("64-bit IO should have been lowered");
return NULL;
}
break;
default:
unreachable("unhandled load type");
return NULL;
}
if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
component_type = LLVMGetElementType(dest_type);
else
component_type = dest_type;
if (nir_src_is_const(offset))
assert(nir_src_as_uint(offset) == 0);
else
indir_index = get_src(ctx, offset);
assert(instr->def.bit_size == 16 || instr->def.bit_size == 32);
/* No indirect indexing allowed. */
assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
/* This is used to load TCS inputs from VGPRs in radeonsi. */
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
LLVMTypeRef component_type = LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind ?
LLVMGetElementType(dest_type) : dest_type;
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
vertex_index, indir_index,
base, component,
count, !is_output);
base, component, count);
if (instr->def.bit_size == 16) {
result = ac_to_integer(&ctx->ac, result);
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
@ -2936,22 +2905,6 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
}
/* No indirect indexing is allowed after this point. */
assert(!indir_index);
/* Other non-fragment cases have outputs in temporaries. */
if (is_output && (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) {
assert(is_output);
for (unsigned chan = component; chan < count + component; chan++)
values[chan] = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32,
ctx->abi->outputs[base * 4 + chan], "");
LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
}
/* Fragment shader inputs. */
assert(ctx->stage == MESA_SHADER_FRAGMENT);
unsigned vertex_id = 0; /* P0 */
@ -3203,14 +3156,9 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
case nir_intrinsic_load_input:
case nir_intrinsic_load_input_vertex:
case nir_intrinsic_load_per_vertex_input:
result = visit_load(ctx, instr, false);
break;
case nir_intrinsic_load_output:
case nir_intrinsic_load_per_vertex_output:
result = visit_load(ctx, instr, true);
result = visit_load(ctx, instr);
break;
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
visit_store_output(ctx, instr);
break;
case nir_intrinsic_load_shared:

View file

@ -42,9 +42,8 @@ struct ac_shader_abi {
unsigned fs_input_attr_indices[MAX_VARYING];
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
unsigned num_components, bool load_inputs);
unsigned num_components);
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);

View file

@ -731,12 +731,9 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls &&
shader->key.ge.opt.same_patch_vertices;
bool tcs_need_output =
ctx->stage == MESA_SHADER_TESS_CTRL && info->tessfactors_are_def_in_all_invocs;
bool ps_need_output = ctx->stage == MESA_SHADER_FRAGMENT;
if (ls_need_output || tcs_need_output || ps_need_output) {
if (ls_need_output || ps_need_output) {
for (unsigned i = 0; i < info->num_outputs; i++) {
LLVMTypeRef type = ctx->ac.f32;

View file

@ -10,14 +10,13 @@
#include "sid.h"
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
unsigned num_components, bool load_input)
unsigned num_components)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_info *info = &ctx->shader->selector->info;
assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
assert(ctx->shader->key.ge.opt.same_patch_vertices);
uint8_t semantic = info->input[driver_location].semantic;
/* Load the TCS input from a VGPR. */