ac/llvm: remove the num_channels parameter from ac_build_buffer_store_dword

It was used when LLVM didn't support vec3 and we had to pass vec4
with num_channels=3. We no longer need to do that.

This also removes the vec3 splitting or conversion to vec4 in callers.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14266>
This commit is contained in:
Marek Olšák 2022-01-03 14:04:57 -05:00 committed by Marge Bot
parent e6aac44051
commit d382ceea2b
7 changed files with 31 additions and 51 deletions

View file

@ -1156,15 +1156,14 @@ void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc
ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, cache_policy, true, true);
}
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
* The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
* or v4i32 (num_channels=3,4).
*/
/* buffer_store_dword(,x2,x3,x4) <- the suffix is selected by the type of vdata. */
void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
unsigned num_channels, LLVMValueRef vindex, LLVMValueRef voffset,
LLVMValueRef soffset, unsigned inst_offset, unsigned cache_policy)
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
unsigned inst_offset, unsigned cache_policy)
{
/* Split 3 channel stores. */
unsigned num_channels = ac_get_llvm_num_components(vdata);
/* Split 3 channel stores if unsupported. */
if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
LLVMValueRef v[3], v01;
@ -1173,8 +1172,8 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
}
v01 = ac_build_gather_values(ctx, v, 2);
ac_build_buffer_store_dword(ctx, rsrc, v01, 2, vindex, voffset, soffset, inst_offset, cache_policy);
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, vindex, voffset, soffset, inst_offset + 8,
ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, inst_offset, cache_policy);
ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset, soffset, inst_offset + 8,
cache_policy);
return;
}

View file

@ -254,8 +254,8 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index);
void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
unsigned num_channels, LLVMValueRef vindex, LLVMValueRef voffset,
LLVMValueRef soffset, unsigned inst_offset, unsigned cache_policy);
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
unsigned inst_offset, unsigned cache_policy);
void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy);

View file

@ -1806,7 +1806,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
u_bit_scan_consecutive_range(&writemask, &start, &count);
if (count == 3 && (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
if (count == 3 && elem_size_bytes != 4) {
writemask |= 1 << (start + 2);
count = 2;
}
@ -1846,8 +1846,6 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
} else if (num_bytes == 2) {
ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
} else {
int num_channels = num_bytes / 4;
switch (num_bytes) {
case 16: /* v4f32 */
data_type = ctx->ac.v4f32;
@ -1866,7 +1864,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
}
data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
ac_build_buffer_store_dword(&ctx->ac, rsrc, data, num_channels, NULL, offset,
ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset,
ctx->ac.i32_0, 0, cache_policy);
}
}
@ -4198,7 +4196,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
LLVMValueRef descriptor = get_src(ctx, instr->src[1]);
LLVMValueRef addr_voffset = get_src(ctx, instr->src[2]);
LLVMValueRef addr_soffset = get_src(ctx, instr->src[3]);
unsigned num_components = instr->src[0].ssa->num_components;
unsigned const_offset = nir_intrinsic_base(instr);
bool swizzled = nir_intrinsic_is_swizzled(instr);
bool slc = nir_intrinsic_slc_amd(instr);
@ -4209,7 +4206,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
if (slc)
cache_policy |= ac_slc;
ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data, num_components,
ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data,
NULL, addr_voffset, addr_soffset, const_offset,
cache_policy);
break;

View file

@ -338,7 +338,7 @@ visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMV
out_val = ac_to_integer(&ctx->ac, out_val);
out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, 1, NULL, voffset,
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, voffset,
ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset), 0,
ac_glc | ac_slc | ac_swizzled);
}
@ -1092,18 +1092,13 @@ radv_emit_stream_output(struct radv_shader_context *ctx, LLVMValueRef const *so_
vdata = out[0];
break;
case 2: /* as v2i32 */
case 3: /* as v4i32 (aligned to 4) */
out[3] = LLVMGetUndef(ctx->ac.i32);
FALLTHROUGH;
case 3: /* as v3i32 */
case 4: /* as v4i32 */
vdata = ac_build_gather_values(&ctx->ac, out,
!ac_has_vec3_support(ctx->ac.chip_class, false)
? util_next_power_of_two(num_comps)
: num_comps);
vdata = ac_build_gather_values(&ctx->ac, out, num_comps);
break;
}
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, num_comps, NULL,
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, NULL,
so_write_offsets[buf], ctx->ac.i32_0, offset, ac_glc | ac_slc);
}

View file

@ -176,7 +176,7 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi)
continue;
}
ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, NULL, NULL,
ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, NULL, NULL,
ac_get_arg(&ctx->ac, ctx->args.es2gs_offset),
(4 * param + chan) * 4, ac_glc | ac_slc | ac_swizzled);
}
@ -277,7 +277,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM
out_val = ac_to_integer(&ctx->ac, out_val);
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, 1, NULL,
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL,
voffset, soffset, 0, ac_glc | ac_slc | ac_swizzled);
}
}

View file

@ -537,7 +537,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
values[chan] = value;
if (writemask != 0xF && !is_tess_factor) {
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, NULL, addr, base,
ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base,
4 * chan, ac_glc);
}
@ -555,7 +555,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
if (writemask == 0xF && !is_tess_factor) {
LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, NULL, addr, base, 0, ac_glc);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, 0, ac_glc);
}
}
@ -662,7 +662,7 @@ static void si_copy_tcs_inputs(struct si_shader_context *ctx)
LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, NULL, buffer_addr, buffer_offset, 0,
ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset, 0,
ac_glc);
}
}
@ -774,18 +774,18 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
if (ctx->screen->info.chip_class <= GFX8) {
ac_build_ifcc(&ctx->ac,
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1,
ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0),
NULL, ctx->ac.i32_0, tf_base, offset, ac_glc);
ac_build_endif(&ctx->ac, 6504);
offset += 4;
}
/* Store the tessellation factors. */
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), NULL, byteoffset,
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, NULL, byteoffset,
tf_base, offset, ac_glc);
offset += 16;
if (vec1)
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, NULL, byteoffset,
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, NULL, byteoffset,
tf_base, offset, ac_glc);
/* Store the tess factors into the offchip buffer if TES reads them. */
@ -801,21 +801,17 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->ac.i32, param_outer, 0));
unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false)
? outer_comps
: util_next_power_of_two(outer_comps);
outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_comps);
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, NULL, tf_outer_offset,
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, NULL, tf_outer_offset,
base, 0, ac_glc);
if (inner_comps) {
param_inner = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->ac.i32, param_inner, 0));
inner_vec =
inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps);
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, NULL,
inner_vec = ac_build_gather_values(&ctx->ac, inner, inner_comps);
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, NULL,
tf_inner_offset, base, 0, ac_glc);
}
}

View file

@ -298,19 +298,12 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef
break;
case 2: /* as v2i32 */
case 3: /* as v3i32 */
if (ac_has_vec3_support(ctx->screen->info.chip_class, false)) {
vdata = ac_build_gather_values(&ctx->ac, out, num_comps);
break;
}
/* as v4i32 (aligned to 4) */
out[3] = LLVMGetUndef(ctx->ac.i32);
FALLTHROUGH;
case 4: /* as v4i32 */
vdata = ac_build_gather_values(&ctx->ac, out, util_next_power_of_two(num_comps));
vdata = ac_build_gather_values(&ctx->ac, out, num_comps);
break;
}
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], vdata, num_comps, NULL,
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], vdata, NULL,
so_write_offsets[buf_idx], ctx->ac.i32_0, stream_out->dst_offset * 4,
ac_glc | ac_slc);
}