mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 13:20:14 +01:00
radeonsi: load the right number of components for VS inputs and TBOs
The supported counts are 1, 2, 4. (3=4)
The following snippet loads float, vec2, vec3, and vec4:
Before:
buffer_load_format_x v9, v4, s[0:3], 0 idxen ; E0002000 80000904
buffer_load_format_xyzw v[0:3], v5, s[8:11], 0 idxen ; E00C2000 80020005
s_waitcnt vmcnt(0) ; BF8C0F70
buffer_load_format_xyzw v[2:5], v6, s[12:15], 0 idxen ; E00C2000 80030206
s_waitcnt vmcnt(0) ; BF8C0F70
buffer_load_format_xyzw v[5:8], v7, s[4:7], 0 idxen ; E00C2000 80010507
After:
buffer_load_format_x v10, v4, s[0:3], 0 idxen ; E0002000 80000A04
buffer_load_format_xy v[8:9], v5, s[8:11], 0 idxen ; E0042000 80020805
buffer_load_format_xyzw v[0:3], v6, s[12:15], 0 idxen ; E00C2000 80030006
s_waitcnt vmcnt(0) ; BF8C0F70
buffer_load_format_xyzw v[3:6], v7, s[4:7], 0 idxen ; E00C2000 80010307
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
parent
472361dd7e
commit
be973ed21f
4 changed files with 54 additions and 5 deletions
|
|
@ -461,6 +461,41 @@ ac_build_gather_values(struct ac_llvm_context *ctx,
|
||||||
return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
|
return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Expand a scalar or vector to <4 x type> by filling the remaining channels
|
||||||
|
* with undef. Extract at most num_channels components from the input.
|
||||||
|
*/
|
||||||
|
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef value,
|
||||||
|
unsigned num_channels)
|
||||||
|
{
|
||||||
|
LLVMTypeRef elemtype;
|
||||||
|
LLVMValueRef chan[4];
|
||||||
|
|
||||||
|
if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
|
||||||
|
unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
|
||||||
|
num_channels = MIN2(num_channels, vec_size);
|
||||||
|
|
||||||
|
if (num_channels >= 4)
|
||||||
|
return value;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < num_channels; i++)
|
||||||
|
chan[i] = ac_llvm_extract_elem(ctx, value, i);
|
||||||
|
|
||||||
|
elemtype = LLVMGetElementType(LLVMTypeOf(value));
|
||||||
|
} else {
|
||||||
|
if (num_channels) {
|
||||||
|
assert(num_channels == 1);
|
||||||
|
chan[0] = value;
|
||||||
|
}
|
||||||
|
elemtype = LLVMTypeOf(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (num_channels < 4)
|
||||||
|
chan[num_channels++] = LLVMGetUndef(elemtype);
|
||||||
|
|
||||||
|
return ac_build_gather_values(ctx, chan, 4);
|
||||||
|
}
|
||||||
|
|
||||||
LLVMValueRef
|
LLVMValueRef
|
||||||
ac_build_fdiv(struct ac_llvm_context *ctx,
|
ac_build_fdiv(struct ac_llvm_context *ctx,
|
||||||
LLVMValueRef num,
|
LLVMValueRef num,
|
||||||
|
|
|
||||||
|
|
@ -141,6 +141,9 @@ LLVMValueRef
|
||||||
ac_build_gather_values(struct ac_llvm_context *ctx,
|
ac_build_gather_values(struct ac_llvm_context *ctx,
|
||||||
LLVMValueRef *values,
|
LLVMValueRef *values,
|
||||||
unsigned value_count);
|
unsigned value_count);
|
||||||
|
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef value,
|
||||||
|
unsigned num_channels);
|
||||||
|
|
||||||
LLVMValueRef
|
LLVMValueRef
|
||||||
ac_build_fdiv(struct ac_llvm_context *ctx,
|
ac_build_fdiv(struct ac_llvm_context *ctx,
|
||||||
|
|
|
||||||
|
|
@ -480,8 +480,8 @@ void si_llvm_load_input_vs(
|
||||||
unsigned input_index,
|
unsigned input_index,
|
||||||
LLVMValueRef out[4])
|
LLVMValueRef out[4])
|
||||||
{
|
{
|
||||||
unsigned vs_blit_property =
|
const struct tgsi_shader_info *info = &ctx->shader->selector->info;
|
||||||
ctx->shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
|
unsigned vs_blit_property = info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
|
||||||
|
|
||||||
if (vs_blit_property) {
|
if (vs_blit_property) {
|
||||||
LLVMValueRef vertex_id = ctx->abi.vertex_id;
|
LLVMValueRef vertex_id = ctx->abi.vertex_id;
|
||||||
|
|
@ -555,6 +555,7 @@ void si_llvm_load_input_vs(
|
||||||
unsigned fix_fetch;
|
unsigned fix_fetch;
|
||||||
unsigned num_fetches;
|
unsigned num_fetches;
|
||||||
unsigned fetch_stride;
|
unsigned fetch_stride;
|
||||||
|
unsigned num_channels;
|
||||||
|
|
||||||
LLVMValueRef t_list_ptr;
|
LLVMValueRef t_list_ptr;
|
||||||
LLVMValueRef t_offset;
|
LLVMValueRef t_offset;
|
||||||
|
|
@ -580,24 +581,29 @@ void si_llvm_load_input_vs(
|
||||||
case SI_FIX_FETCH_RGB_64_FLOAT:
|
case SI_FIX_FETCH_RGB_64_FLOAT:
|
||||||
num_fetches = 3; /* 3 2-dword loads */
|
num_fetches = 3; /* 3 2-dword loads */
|
||||||
fetch_stride = 8;
|
fetch_stride = 8;
|
||||||
|
num_channels = 2;
|
||||||
break;
|
break;
|
||||||
case SI_FIX_FETCH_RGBA_64_FLOAT:
|
case SI_FIX_FETCH_RGBA_64_FLOAT:
|
||||||
num_fetches = 2; /* 2 4-dword loads */
|
num_fetches = 2; /* 2 4-dword loads */
|
||||||
fetch_stride = 16;
|
fetch_stride = 16;
|
||||||
|
num_channels = 4;
|
||||||
break;
|
break;
|
||||||
case SI_FIX_FETCH_RGB_8:
|
case SI_FIX_FETCH_RGB_8:
|
||||||
case SI_FIX_FETCH_RGB_8_INT:
|
case SI_FIX_FETCH_RGB_8_INT:
|
||||||
num_fetches = 3;
|
num_fetches = 3;
|
||||||
fetch_stride = 1;
|
fetch_stride = 1;
|
||||||
|
num_channels = 1;
|
||||||
break;
|
break;
|
||||||
case SI_FIX_FETCH_RGB_16:
|
case SI_FIX_FETCH_RGB_16:
|
||||||
case SI_FIX_FETCH_RGB_16_INT:
|
case SI_FIX_FETCH_RGB_16_INT:
|
||||||
num_fetches = 3;
|
num_fetches = 3;
|
||||||
fetch_stride = 2;
|
fetch_stride = 2;
|
||||||
|
num_channels = 1;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
num_fetches = 1;
|
num_fetches = 1;
|
||||||
fetch_stride = 0;
|
fetch_stride = 0;
|
||||||
|
num_channels = util_last_bit(info->input_usage_mask[input_index]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < num_fetches; i++) {
|
for (unsigned i = 0; i < num_fetches; i++) {
|
||||||
|
|
@ -605,7 +611,8 @@ void si_llvm_load_input_vs(
|
||||||
|
|
||||||
input[i] = ac_build_buffer_load_format(&ctx->ac, t_list,
|
input[i] = ac_build_buffer_load_format(&ctx->ac, t_list,
|
||||||
vertex_index, voffset,
|
vertex_index, voffset,
|
||||||
4, true);
|
num_channels, true);
|
||||||
|
input[i] = ac_build_expand_to_vec4(&ctx->ac, input[i], num_channels);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Break up the vec4 into individual components */
|
/* Break up the vec4 into individual components */
|
||||||
|
|
|
||||||
|
|
@ -1821,12 +1821,16 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
|
||||||
unsigned target = inst->Texture.Texture;
|
unsigned target = inst->Texture.Texture;
|
||||||
|
|
||||||
if (target == TGSI_TEXTURE_BUFFER) {
|
if (target == TGSI_TEXTURE_BUFFER) {
|
||||||
emit_data->output[emit_data->chan] =
|
unsigned num_channels =
|
||||||
|
util_last_bit(inst->Dst[0].Register.WriteMask);
|
||||||
|
LLVMValueRef result =
|
||||||
ac_build_buffer_load_format(&ctx->ac,
|
ac_build_buffer_load_format(&ctx->ac,
|
||||||
emit_data->args[0],
|
emit_data->args[0],
|
||||||
emit_data->args[2],
|
emit_data->args[2],
|
||||||
emit_data->args[1],
|
emit_data->args[1],
|
||||||
4, true);
|
num_channels, true);
|
||||||
|
emit_data->output[emit_data->chan] =
|
||||||
|
ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue