mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 21:40:08 +01:00
radeon/llvm: Use alloca instructions for larger arrays [revert a revert]
This reverts commit f84e9d749f.
Bioshock Infinite no longer hangs.
This commit is contained in:
parent
8636a718b5
commit
c98c732158
2 changed files with 148 additions and 24 deletions
|
|
@ -50,6 +50,11 @@ struct radeon_llvm_loop {
|
|||
LLVMBasicBlockRef endloop_block;
|
||||
};
|
||||
|
||||
struct radeon_llvm_array {
|
||||
struct tgsi_declaration_range range;
|
||||
LLVMValueRef alloca;
|
||||
};
|
||||
|
||||
struct radeon_llvm_context {
|
||||
struct lp_build_tgsi_soa_context soa;
|
||||
|
||||
|
|
@ -96,7 +101,7 @@ struct radeon_llvm_context {
|
|||
unsigned loop_depth;
|
||||
unsigned loop_depth_max;
|
||||
|
||||
struct tgsi_declaration_range *arrays;
|
||||
struct radeon_llvm_array *arrays;
|
||||
|
||||
LLVMValueRef main_fn;
|
||||
LLVMTypeRef return_type;
|
||||
|
|
|
|||
|
|
@ -112,11 +112,25 @@ static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
|
|||
|
||||
static struct tgsi_declaration_range
|
||||
get_array_range(struct lp_build_tgsi_context *bld_base,
|
||||
unsigned File, const struct tgsi_ind_register *reg)
|
||||
unsigned File, unsigned reg_index,
|
||||
const struct tgsi_ind_register *reg)
|
||||
{
|
||||
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
|
||||
|
||||
if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
|
||||
if (!reg) {
|
||||
unsigned i;
|
||||
unsigned num_arrays = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
|
||||
for (i = 0; i < num_arrays; i++) {
|
||||
const struct tgsi_declaration_range *range =
|
||||
&ctx->arrays[i].range;
|
||||
|
||||
if (reg_index >= range->First && reg_index <= range->Last) {
|
||||
return ctx->arrays[i].range;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (File != TGSI_FILE_TEMPORARY || !reg || reg->ArrayID == 0 ||
|
||||
reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) {
|
||||
struct tgsi_declaration_range range;
|
||||
range.First = 0;
|
||||
|
|
@ -124,7 +138,30 @@ get_array_range(struct lp_build_tgsi_context *bld_base,
|
|||
return range;
|
||||
}
|
||||
|
||||
return ctx->arrays[reg->ArrayID - 1];
|
||||
return ctx->arrays[reg->ArrayID - 1].range;
|
||||
}
|
||||
|
||||
static LLVMValueRef get_alloca_for_array(struct lp_build_tgsi_context *bld_base,
|
||||
unsigned file,
|
||||
unsigned index)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned num_arrays;
|
||||
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
|
||||
|
||||
if (file != TGSI_FILE_TEMPORARY)
|
||||
return NULL;
|
||||
|
||||
num_arrays = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
|
||||
for (i = 0; i < num_arrays; i++) {
|
||||
const struct tgsi_declaration_range *range =
|
||||
&ctx->arrays[i].range;
|
||||
|
||||
if (index >= range->First && index <= range->Last) {
|
||||
return ctx->arrays[i].alloca;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
|
|
@ -134,6 +171,9 @@ emit_array_index(struct lp_build_tgsi_soa_context *bld,
|
|||
{
|
||||
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
||||
|
||||
if (!reg) {
|
||||
return lp_build_const_int32(gallivm, offset);
|
||||
}
|
||||
LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
|
||||
return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
|
||||
}
|
||||
|
|
@ -181,7 +221,7 @@ emit_array_fetch(struct lp_build_tgsi_context *bld_base,
|
|||
tmp_reg.Register.Index = i + range.First;
|
||||
LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
|
||||
result = LLVMBuildInsertElement(builder, result, temp,
|
||||
lp_build_const_int32(gallivm, i), "");
|
||||
lp_build_const_int32(gallivm, i), "array_vector");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
@ -195,13 +235,35 @@ load_value_from_array(struct lp_build_tgsi_context *bld_base,
|
|||
const struct tgsi_ind_register *reg_indirect)
|
||||
{
|
||||
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_indirect);
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
|
||||
LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First);
|
||||
LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index);
|
||||
LLVMValueRef ptr, val, indices[2];
|
||||
|
||||
return LLVMBuildExtractElement(builder,
|
||||
emit_array_fetch(bld_base, file, type, range, swizzle),
|
||||
emit_array_index(bld, reg_indirect, reg_index - range.First), "");
|
||||
if (!array) {
|
||||
/* Handle the case where the array is stored as a vector. */
|
||||
return LLVMBuildExtractElement(builder,
|
||||
emit_array_fetch(bld_base, file, type, range, swizzle),
|
||||
index, "");
|
||||
}
|
||||
|
||||
index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), "");
|
||||
index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, swizzle), "");
|
||||
indices[0] = bld_base->uint_bld.zero;
|
||||
indices[1] = index;
|
||||
ptr = LLVMBuildGEP(builder, array, indices, 2, "");
|
||||
val = LLVMBuildLoad(builder, ptr, "");
|
||||
if (tgsi_type_is_64bit(type)) {
|
||||
LLVMValueRef ptr_hi, val_hi;
|
||||
indices[0] = lp_build_const_int32(gallivm, 1);
|
||||
ptr_hi = LLVMBuildGEP(builder, ptr, indices, 1, "");
|
||||
val_hi = LLVMBuildLoad(builder, ptr_hi, "");
|
||||
val = radeon_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
|
||||
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
|
|
@ -213,13 +275,26 @@ store_value_to_array(struct lp_build_tgsi_context *bld_base,
|
|||
const struct tgsi_ind_register *reg_indirect)
|
||||
{
|
||||
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_indirect);
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
|
||||
LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First);
|
||||
LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index);
|
||||
|
||||
return LLVMBuildInsertElement(builder,
|
||||
if (array) {
|
||||
LLVMValueRef indices[2];
|
||||
index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), "");
|
||||
index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, chan_index), "");
|
||||
indices[0] = bld_base->uint_bld.zero;
|
||||
indices[1] = index;
|
||||
LLVMValueRef pointer = LLVMBuildGEP(builder, array, indices, 2, "");
|
||||
LLVMBuildStore(builder, value, pointer);
|
||||
return NULL;
|
||||
} else {
|
||||
return LLVMBuildInsertElement(builder,
|
||||
emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index),
|
||||
value, emit_array_index(bld, reg_indirect, reg_index - range.First), "");
|
||||
return NULL;
|
||||
value, index, "");
|
||||
}
|
||||
}
|
||||
|
||||
LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
|
||||
|
|
@ -243,8 +318,9 @@ LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
|
|||
}
|
||||
|
||||
if (reg->Register.Indirect) {
|
||||
return load_value_from_array(bld_base, reg->Register.File, type,
|
||||
LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
|
||||
swizzle, reg->Register.Index, ®->Indirect);
|
||||
return bitcast(bld_base, type, load);
|
||||
}
|
||||
|
||||
switch(reg->Register.File) {
|
||||
|
|
@ -283,6 +359,11 @@ LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
|
|||
LLVMBuildLoad(builder, ptr, ""),
|
||||
LLVMBuildLoad(builder, ptr2, ""));
|
||||
}
|
||||
LLVMValueRef array = get_alloca_for_array(bld_base, reg->Register.File, reg->Register.Index);
|
||||
if (array) {
|
||||
return bitcast(bld_base, type, load_value_from_array(bld_base, reg->Register.File, type,
|
||||
swizzle, reg->Register.Index, NULL));
|
||||
}
|
||||
result = LLVMBuildLoad(builder, ptr, "");
|
||||
break;
|
||||
|
||||
|
|
@ -333,6 +414,7 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
|
|||
const struct tgsi_full_declaration *decl)
|
||||
{
|
||||
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
unsigned first, last, i, idx;
|
||||
switch(decl->Declaration.File) {
|
||||
case TGSI_FILE_ADDRESS:
|
||||
|
|
@ -350,13 +432,36 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
|
|||
}
|
||||
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
{
|
||||
unsigned decl_size;
|
||||
first = decl->Range.First;
|
||||
last = decl->Range.Last;
|
||||
decl_size = 4 * ((last - first) + 1);
|
||||
if (decl->Declaration.Array) {
|
||||
unsigned id = decl->Array.ArrayID - 1;
|
||||
if (!ctx->arrays) {
|
||||
int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
|
||||
ctx->arrays = MALLOC(sizeof(ctx->arrays[0]) * size);
|
||||
ctx->arrays = CALLOC(size, sizeof(ctx->arrays[0]));
|
||||
for (i = 0; i < size; ++i) {
|
||||
assert(!ctx->arrays[i].alloca);}
|
||||
}
|
||||
|
||||
ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
|
||||
ctx->arrays[id].range = decl->Range;
|
||||
|
||||
/* If the array is more than 16 elements (each element
|
||||
* is 32-bits), then store it in a vector. Storing the
|
||||
* array in a vector will causes the compiler to store
|
||||
* the array in registers and access it using indirect
|
||||
* addressing. 16 is number of vector elements that
|
||||
* LLVM will store in a register.
|
||||
* FIXME: We shouldn't need to do this. LLVM should be
|
||||
* smart enough to promote allocas int registers when
|
||||
* profitable.
|
||||
*/
|
||||
if (decl_size > 16) {
|
||||
ctx->arrays[id].alloca = LLVMBuildAlloca(builder,
|
||||
LLVMArrayType(bld_base->base.vec_type, decl_size),"array");
|
||||
}
|
||||
}
|
||||
first = decl->Range.First;
|
||||
last = decl->Range.Last;
|
||||
|
|
@ -373,7 +478,7 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
|
|||
}
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
case TGSI_FILE_INPUT:
|
||||
{
|
||||
unsigned idx;
|
||||
|
|
@ -482,11 +587,16 @@ void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
|
|||
|
||||
if (reg->Register.Indirect) {
|
||||
struct tgsi_declaration_range range = get_array_range(bld_base,
|
||||
reg->Register.File, ®->Indirect);
|
||||
reg->Register.File, reg->Register.Index, ®->Indirect);
|
||||
|
||||
unsigned i, size = range.Last - range.First + 1;
|
||||
LLVMValueRef array = store_value_to_array(bld_base, value, reg->Register.File, chan_index,
|
||||
reg->Register.Index, ®->Indirect);
|
||||
unsigned file = reg->Register.File;
|
||||
unsigned reg_index = reg->Register.Index;
|
||||
LLVMValueRef array = store_value_to_array(bld_base, value, file, chan_index,
|
||||
reg_index, ®->Indirect);
|
||||
if (get_alloca_for_array(bld_base, file, reg_index)) {
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < size; ++i) {
|
||||
switch(reg->Register.File) {
|
||||
case TGSI_FILE_OUTPUT:
|
||||
|
|
@ -500,7 +610,7 @@ void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
|
|||
break;
|
||||
|
||||
default:
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
value = LLVMBuildExtractElement(builder, array,
|
||||
lp_build_const_int32(gallivm, i), "");
|
||||
|
|
@ -516,14 +626,23 @@ void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
|
|||
break;
|
||||
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
{
|
||||
LLVMValueRef array;
|
||||
if (reg->Register.Index >= ctx->temps_count)
|
||||
continue;
|
||||
array = get_alloca_for_array(bld_base, reg->Register.File, reg->Register.Index);
|
||||
|
||||
if (array) {
|
||||
store_value_to_array(bld_base, value, reg->Register.File, chan_index, reg->Register.Index,
|
||||
NULL);
|
||||
continue;
|
||||
}
|
||||
temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
|
||||
if (tgsi_type_is_64bit(dtype))
|
||||
temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
|
||||
|
||||
break;
|
||||
|
||||
}
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue