mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 00:30:13 +01:00
radv,aco,llvm: lower adjusting vertex alpha in NIR
Instead of duplicating the same lowering in both compiler backends. This pass will be used to do more VS input lowering. fossils-db (Polaris10): Totals from 48 (0.04% of 135960) affected shaders: VGPRs: 1692 -> 1684 (-0.47%) CodeSize: 54016 -> 53964 (-0.10%); split: -0.11%, +0.01% MaxWaves: 339 -> 341 (+0.59%) Instrs: 11260 -> 11247 (-0.12%); split: -0.13%, +0.02% Latency: 88165 -> 88113 (-0.06%); split: -0.07%, +0.01% InvThroughput: 36153 -> 36093 (-0.17%) Copies: 583 -> 568 (-2.57%) fossils-db (Pitcairn): Totals from 43 (0.03% of 135960) affected shaders: VGPRs: 1548 -> 1552 (+0.26%) CodeSize: 47900 -> 47820 (-0.17%) Instrs: 10751 -> 10731 (-0.19%) Latency: 83029 -> 82873 (-0.19%) VClause: 168 -> 164 (-2.38%) SClause: 393 -> 391 (-0.51%) Copies: 705 -> 685 (-2.84%) Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15076>
This commit is contained in:
parent
100c80392a
commit
369b8cffea
3 changed files with 80 additions and 81 deletions
|
|
@ -5029,37 +5029,6 @@ get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, un
|
||||||
return V_008F0C_BUF_DATA_FORMAT_INVALID;
|
return V_008F0C_BUF_DATA_FORMAT_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
|
|
||||||
* so we may need to fix it up. */
|
|
||||||
Temp
|
|
||||||
adjust_vertex_fetch_alpha(isel_context* ctx, enum radv_vs_input_alpha_adjust adjustment, Temp alpha)
|
|
||||||
{
|
|
||||||
Builder bld(ctx->program, ctx->block);
|
|
||||||
|
|
||||||
if (adjustment == ALPHA_ADJUST_SSCALED)
|
|
||||||
alpha = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), alpha);
|
|
||||||
|
|
||||||
/* For the integer-like cases, do a natural sign extension.
|
|
||||||
*
|
|
||||||
* For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
|
|
||||||
* and happen to contain 0, 1, 2, 3 as the two LSBs of the
|
|
||||||
* exponent.
|
|
||||||
*/
|
|
||||||
unsigned offset = adjustment == ALPHA_ADJUST_SNORM ? 23u : 0u;
|
|
||||||
alpha =
|
|
||||||
bld.vop3(aco_opcode::v_bfe_i32, bld.def(v1), alpha, Operand::c32(offset), Operand::c32(2u));
|
|
||||||
|
|
||||||
/* Convert back to the right type. */
|
|
||||||
if (adjustment == ALPHA_ADJUST_SNORM) {
|
|
||||||
alpha = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), alpha);
|
|
||||||
alpha = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::c32(0xbf800000u), alpha);
|
|
||||||
} else if (adjustment == ALPHA_ADJUST_SSCALED) {
|
|
||||||
alpha = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), alpha);
|
|
||||||
}
|
|
||||||
|
|
||||||
return alpha;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||||
{
|
{
|
||||||
|
|
@ -5113,8 +5082,6 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||||
uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location];
|
uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location];
|
||||||
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[location];
|
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[location];
|
||||||
unsigned binding_align = ctx->options->key.vs.vertex_binding_align[attrib_binding];
|
unsigned binding_align = ctx->options->key.vs.vertex_binding_align[attrib_binding];
|
||||||
enum radv_vs_input_alpha_adjust alpha_adjust =
|
|
||||||
ctx->options->key.vs.vertex_alpha_adjust[location];
|
|
||||||
|
|
||||||
unsigned dfmt = attrib_format & 0xf;
|
unsigned dfmt = attrib_format & 0xf;
|
||||||
unsigned nfmt = (attrib_format >> 4) & 0x7;
|
unsigned nfmt = (attrib_format >> 4) & 0x7;
|
||||||
|
|
@ -5250,7 +5217,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||||
|
|
||||||
Temp fetch_dst;
|
Temp fetch_dst;
|
||||||
if (channel_start == 0 && fetch_bytes == dst.bytes() && !post_shuffle && !expanded &&
|
if (channel_start == 0 && fetch_bytes == dst.bytes() && !post_shuffle && !expanded &&
|
||||||
(alpha_adjust == ALPHA_ADJUST_NONE || num_channels <= 3)) {
|
num_channels <= 3) {
|
||||||
direct_fetch = true;
|
direct_fetch = true;
|
||||||
fetch_dst = dst;
|
fetch_dst = dst;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -5299,8 +5266,6 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||||
unsigned idx = i + component;
|
unsigned idx = i + component;
|
||||||
if (swizzle[idx] < num_channels && channels[swizzle[idx]].id()) {
|
if (swizzle[idx] < num_channels && channels[swizzle[idx]].id()) {
|
||||||
Temp channel = channels[swizzle[idx]];
|
Temp channel = channels[swizzle[idx]];
|
||||||
if (idx == 3 && alpha_adjust != ALPHA_ADJUST_NONE)
|
|
||||||
channel = adjust_vertex_fetch_alpha(ctx, alpha_adjust, channel);
|
|
||||||
vec->operands[i] = Operand(channel);
|
vec->operands[i] = Operand(channel);
|
||||||
|
|
||||||
num_temp++;
|
num_temp++;
|
||||||
|
|
|
||||||
|
|
@ -603,48 +603,6 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign
|
||||||
return descriptor;
|
return descriptor;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
|
|
||||||
* so we may need to fix it up. */
|
|
||||||
static LLVMValueRef
|
|
||||||
adjust_vertex_fetch_alpha(struct radv_shader_context *ctx, unsigned adjustment, LLVMValueRef alpha)
|
|
||||||
{
|
|
||||||
if (adjustment == ALPHA_ADJUST_NONE)
|
|
||||||
return alpha;
|
|
||||||
|
|
||||||
LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
|
|
||||||
|
|
||||||
alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
|
|
||||||
|
|
||||||
if (adjustment == ALPHA_ADJUST_SSCALED)
|
|
||||||
alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
|
|
||||||
else
|
|
||||||
alpha = ac_to_integer(&ctx->ac, alpha);
|
|
||||||
|
|
||||||
/* For the integer-like cases, do a natural sign extension.
|
|
||||||
*
|
|
||||||
* For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
|
|
||||||
* and happen to contain 0, 1, 2, 3 as the two LSBs of the
|
|
||||||
* exponent.
|
|
||||||
*/
|
|
||||||
alpha =
|
|
||||||
LLVMBuildShl(ctx->ac.builder, alpha,
|
|
||||||
adjustment == ALPHA_ADJUST_SNORM ? LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
|
|
||||||
alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
|
|
||||||
|
|
||||||
/* Convert back to the right type. */
|
|
||||||
if (adjustment == ALPHA_ADJUST_SNORM) {
|
|
||||||
LLVMValueRef clamp;
|
|
||||||
LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
|
|
||||||
alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
|
|
||||||
clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
|
|
||||||
alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
|
|
||||||
} else if (adjustment == ALPHA_ADJUST_SSCALED) {
|
|
||||||
alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx, LLVMValueRef value,
|
radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx, LLVMValueRef value,
|
||||||
unsigned num_channels, bool is_float)
|
unsigned num_channels, bool is_float)
|
||||||
|
|
@ -723,7 +681,6 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
|
||||||
unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[attrib_index];
|
unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[attrib_index];
|
||||||
unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
|
unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
|
||||||
unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
|
unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
|
||||||
unsigned alpha_adjust = ctx->options->key.vs.vertex_alpha_adjust[attrib_index];
|
|
||||||
|
|
||||||
if (ctx->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) {
|
if (ctx->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) {
|
||||||
/* Always load, at least, 3 channels for formats that need to be shuffled because X<->Z. */
|
/* Always load, at least, 3 channels for formats that need to be shuffled because X<->Z. */
|
||||||
|
|
@ -803,8 +760,6 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
out[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, out[3]);
|
|
||||||
|
|
||||||
for (unsigned chan = 0; chan < 4; chan++) {
|
for (unsigned chan = 0; chan < 4; chan++) {
|
||||||
out[chan] = ac_to_integer(&ctx->ac, out[chan]);
|
out[chan] = ac_to_integer(&ctx->ac, out[chan]);
|
||||||
if (dest_type == ctx->ac.i16 && !is_float)
|
if (dest_type == ctx->ac.i16 && !is_float)
|
||||||
|
|
|
||||||
|
|
@ -3686,6 +3686,81 @@ radv_consider_force_vrs(const struct radv_pipeline *pipeline, bool noop_fs, nir_
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static nir_ssa_def *
|
||||||
|
radv_adjust_vertex_fetch_alpha(nir_builder *b,
|
||||||
|
enum radv_vs_input_alpha_adjust alpha_adjust,
|
||||||
|
nir_ssa_def *alpha)
|
||||||
|
{
|
||||||
|
if (alpha_adjust == ALPHA_ADJUST_SSCALED)
|
||||||
|
alpha = nir_f2u32(b, alpha);
|
||||||
|
|
||||||
|
/* For the integer-like cases, do a natural sign extension.
|
||||||
|
*
|
||||||
|
* For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 and happen to contain 0, 1, 2, 3 as
|
||||||
|
* the two LSBs of the exponent.
|
||||||
|
*/
|
||||||
|
unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u;
|
||||||
|
|
||||||
|
alpha = nir_ibfe(b, alpha, nir_imm_int(b, offset), nir_imm_int(b, 2u));
|
||||||
|
|
||||||
|
/* Convert back to the right type. */
|
||||||
|
if (alpha_adjust == ALPHA_ADJUST_SNORM) {
|
||||||
|
alpha = nir_i2f32(b, alpha);
|
||||||
|
alpha = nir_fmax(b, alpha, nir_imm_float(b, -1.0f));
|
||||||
|
} else if (alpha_adjust == ALPHA_ADJUST_SSCALED) {
|
||||||
|
alpha = nir_i2f32(b, alpha);
|
||||||
|
}
|
||||||
|
|
||||||
|
return alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_key)
|
||||||
|
{
|
||||||
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
if (pipeline_key->vs.dynamic_input_state)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_builder b;
|
||||||
|
nir_builder_init(&b, impl);
|
||||||
|
|
||||||
|
nir_foreach_block(block, impl) {
|
||||||
|
nir_foreach_instr(instr, block) {
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
|
if (intrin->intrinsic != nir_intrinsic_load_input)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0;
|
||||||
|
enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location];
|
||||||
|
|
||||||
|
if (alpha_adjust == ALPHA_ADJUST_NONE)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned component = nir_intrinsic_component(intrin);
|
||||||
|
unsigned num_components = intrin->dest.ssa.num_components;
|
||||||
|
|
||||||
|
b.cursor = nir_after_instr(instr);
|
||||||
|
|
||||||
|
if (component + num_components == 4) {
|
||||||
|
unsigned idx = num_components - 1;
|
||||||
|
nir_ssa_def *alpha = radv_adjust_vertex_fetch_alpha(
|
||||||
|
&b, alpha_adjust, nir_channel(&b, &intrin->dest.ssa, idx));
|
||||||
|
nir_ssa_def *new_dest = nir_vector_insert_imm(&b, &intrin->dest.ssa, alpha, idx);
|
||||||
|
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest,
|
||||||
|
new_dest->parent_instr);
|
||||||
|
progress = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
|
radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
|
||||||
struct radv_device *device, struct radv_pipeline_cache *cache,
|
struct radv_device *device, struct radv_pipeline_cache *cache,
|
||||||
|
|
@ -3820,6 +3895,10 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
||||||
gather_tess_info(device, nir, infos, pipeline_key);
|
gather_tess_info(device, nir, infos, pipeline_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nir[MESA_SHADER_VERTEX]) {
|
||||||
|
NIR_PASS_V(nir[MESA_SHADER_VERTEX], radv_lower_vs_input, pipeline_key);
|
||||||
|
}
|
||||||
|
|
||||||
radv_fill_shader_info(pipeline, pipeline_layout, pStages, pipeline_key, infos, nir);
|
radv_fill_shader_info(pipeline, pipeline_layout, pStages, pipeline_key, infos, nir);
|
||||||
|
|
||||||
bool pipeline_has_ngg = (nir[MESA_SHADER_VERTEX] && infos[MESA_SHADER_VERTEX].is_ngg) ||
|
bool pipeline_has_ngg = (nir[MESA_SHADER_VERTEX] && infos[MESA_SHADER_VERTEX].is_ngg) ||
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue