mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 15:58:05 +02:00
radv: Handle clip+cull distances more generally as compact arrays.
Needed for https://gitlab.freedesktop.org/mesa/mesa/merge_requests/248 .
That MR keeps the clip and cull arrays split.
So we have to handle
- compact arrays with location_frac != 0
- VARYING_SLOT_CLIP_DIST1
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
(cherry picked from commit 1ef2855692)
This commit is contained in:
parent
7114cfc1d2
commit
49dad5e8fe
4 changed files with 82 additions and 98 deletions
|
|
@ -1896,14 +1896,18 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
|
|||
if (var) {
|
||||
bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
|
||||
var->data.mode == nir_var_shader_in;
|
||||
if (var->data.compact)
|
||||
stride = 1;
|
||||
idx = var->data.driver_location;
|
||||
comp = var->data.location_frac;
|
||||
mode = var->data.mode;
|
||||
|
||||
get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL,
|
||||
&const_index, &indir_index);
|
||||
|
||||
if (var->data.compact) {
|
||||
stride = 1;
|
||||
const_index += comp;
|
||||
comp = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->dest.ssa.bit_size == 64)
|
||||
|
|
@ -2022,6 +2026,11 @@ visit_store_var(struct ac_nir_context *ctx,
|
|||
NULL, NULL, &const_index, &indir_index);
|
||||
idx = var->data.driver_location;
|
||||
comp = var->data.location_frac;
|
||||
|
||||
if (var->data.compact) {
|
||||
const_index += comp;
|
||||
comp = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
|
||||
|
|
|
|||
|
|
@ -1442,7 +1442,7 @@ store_tcs_output(struct ac_shader_abi *abi,
|
|||
{
|
||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||
const unsigned location = var->data.location;
|
||||
const unsigned component = var->data.location_frac;
|
||||
unsigned component = var->data.location_frac;
|
||||
const bool is_patch = var->data.patch;
|
||||
const bool is_compact = var->data.compact;
|
||||
LLVMValueRef dw_addr;
|
||||
|
|
@ -1460,10 +1460,14 @@ store_tcs_output(struct ac_shader_abi *abi,
|
|||
}
|
||||
|
||||
param = shader_io_get_unique_index(location);
|
||||
if (location == VARYING_SLOT_CLIP_DIST0 &&
|
||||
is_compact && const_index > 3) {
|
||||
const_index -= 3;
|
||||
param++;
|
||||
if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) {
|
||||
const_index += component;
|
||||
component = 0;
|
||||
|
||||
if (const_index >= 4) {
|
||||
const_index -= 4;
|
||||
param++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_patch) {
|
||||
|
|
@ -1530,9 +1534,13 @@ load_tes_input(struct ac_shader_abi *abi,
|
|||
LLVMValueRef result;
|
||||
unsigned param = shader_io_get_unique_index(location);
|
||||
|
||||
if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) {
|
||||
const_index -= 3;
|
||||
param++;
|
||||
if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) {
|
||||
const_index += component;
|
||||
component = 0;
|
||||
if (const_index >= 4) {
|
||||
const_index -= 4;
|
||||
param++;
|
||||
}
|
||||
}
|
||||
|
||||
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
|
||||
|
|
@ -2140,6 +2148,14 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
|
|||
uint64_t mask;
|
||||
|
||||
variable->data.driver_location = idx * 4;
|
||||
|
||||
|
||||
if (variable->data.compact) {
|
||||
unsigned component_count = variable->data.location_frac +
|
||||
glsl_get_length(variable->type);
|
||||
attrib_count = (component_count + 3) / 4;
|
||||
}
|
||||
|
||||
mask = ((1ull << attrib_count) - 1) << variable->data.location;
|
||||
|
||||
unsigned interp_type;
|
||||
|
|
@ -2158,14 +2174,6 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
|
|||
for (unsigned i = 0; i < attrib_count; ++i)
|
||||
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
|
||||
|
||||
if (idx == VARYING_SLOT_CLIP_DIST0) {
|
||||
/* Do not account for the number of components inside the array
|
||||
* of clip/cull distances because this might wrongly set other
|
||||
* bits like primitive ID or layer.
|
||||
*/
|
||||
mask = 1ull << VARYING_SLOT_CLIP_DIST0;
|
||||
}
|
||||
|
||||
ctx->input_mask |= mask;
|
||||
}
|
||||
|
||||
|
|
@ -2280,6 +2288,12 @@ scan_shader_output_decl(struct radv_shader_context *ctx,
|
|||
if (stage == MESA_SHADER_TESS_CTRL)
|
||||
return;
|
||||
|
||||
if (variable->data.compact) {
|
||||
unsigned component_count = variable->data.location_frac +
|
||||
glsl_get_length(variable->type);
|
||||
attrib_count = (component_count + 3) / 4;
|
||||
}
|
||||
|
||||
mask_attribs = ((1ull << attrib_count) - 1) << idx;
|
||||
if (stage == MESA_SHADER_VERTEX ||
|
||||
stage == MESA_SHADER_TESS_EVAL ||
|
||||
|
|
@ -2295,8 +2309,6 @@ scan_shader_output_decl(struct radv_shader_context *ctx,
|
|||
ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
|
||||
ctx->shader_info->tes.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size;
|
||||
}
|
||||
|
||||
mask_attribs = 1ull << idx;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2641,51 +2653,41 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
|
|||
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
|
||||
sizeof(outinfo->vs_output_param_offset));
|
||||
|
||||
if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
|
||||
unsigned output_usage_mask, length;
|
||||
LLVMValueRef slots[8];
|
||||
unsigned j;
|
||||
for(unsigned location = VARYING_SLOT_CLIP_DIST0; location <= VARYING_SLOT_CLIP_DIST1; ++location) {
|
||||
if (ctx->output_mask & (1ull << location)) {
|
||||
unsigned output_usage_mask, length;
|
||||
LLVMValueRef slots[4];
|
||||
unsigned j;
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX &&
|
||||
!ctx->is_gs_copy_shader) {
|
||||
output_usage_mask =
|
||||
ctx->shader_info->info.vs.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
|
||||
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
|
||||
output_usage_mask =
|
||||
ctx->shader_info->info.tes.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
|
||||
} else {
|
||||
assert(ctx->is_gs_copy_shader);
|
||||
output_usage_mask =
|
||||
ctx->shader_info->info.gs.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
|
||||
}
|
||||
if (ctx->stage == MESA_SHADER_VERTEX &&
|
||||
!ctx->is_gs_copy_shader) {
|
||||
output_usage_mask =
|
||||
ctx->shader_info->info.vs.output_usage_mask[location];
|
||||
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
|
||||
output_usage_mask =
|
||||
ctx->shader_info->info.tes.output_usage_mask[location];
|
||||
} else {
|
||||
assert(ctx->is_gs_copy_shader);
|
||||
output_usage_mask =
|
||||
ctx->shader_info->info.gs.output_usage_mask[location];
|
||||
}
|
||||
|
||||
length = util_last_bit(output_usage_mask);
|
||||
length = util_last_bit(output_usage_mask);
|
||||
|
||||
i = VARYING_SLOT_CLIP_DIST0;
|
||||
for (j = 0; j < length; j++)
|
||||
slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
|
||||
for (j = 0; j < length; j++)
|
||||
slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, location, j));
|
||||
|
||||
for (i = length; i < 8; i++)
|
||||
slots[i] = LLVMGetUndef(ctx->ac.f32);
|
||||
for (i = length; i < 4; i++)
|
||||
slots[i] = LLVMGetUndef(ctx->ac.f32);
|
||||
|
||||
if (length > 4) {
|
||||
target = V_008DFC_SQ_EXP_POS + 3;
|
||||
si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args);
|
||||
target = V_008DFC_SQ_EXP_POS + 2 + (location - VARYING_SLOT_CLIP_DIST0);
|
||||
si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args);
|
||||
memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
|
||||
&args, sizeof(args));
|
||||
}
|
||||
&args, sizeof(args));
|
||||
|
||||
target = V_008DFC_SQ_EXP_POS + 2;
|
||||
si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args);
|
||||
memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
|
||||
&args, sizeof(args));
|
||||
|
||||
/* Export the clip/cull distances values to the next stage. */
|
||||
radv_export_param(ctx, param_count, &slots[0], 0xf);
|
||||
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++;
|
||||
if (length > 4) {
|
||||
radv_export_param(ctx, param_count, &slots[4], 0xf);
|
||||
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
|
||||
/* Export the clip/cull distances values to the next stage. */
|
||||
radv_export_param(ctx, param_count, &slots[0], 0xf);
|
||||
outinfo->vs_output_param_offset[location] = param_count++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2846,28 +2848,14 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
|||
LLVMValueRef lds_base = NULL;
|
||||
|
||||
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
|
||||
unsigned output_usage_mask;
|
||||
int param_index;
|
||||
int length = 4;
|
||||
|
||||
if (!(ctx->output_mask & (1ull << i)))
|
||||
continue;
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||
output_usage_mask =
|
||||
ctx->shader_info->info.vs.output_usage_mask[i];
|
||||
} else {
|
||||
assert(ctx->stage == MESA_SHADER_TESS_EVAL);
|
||||
output_usage_mask =
|
||||
ctx->shader_info->info.tes.output_usage_mask[i];
|
||||
}
|
||||
|
||||
if (i == VARYING_SLOT_CLIP_DIST0)
|
||||
length = util_last_bit(output_usage_mask);
|
||||
|
||||
param_index = shader_io_get_unique_index(i);
|
||||
|
||||
max_output_written = MAX2(param_index + (length > 4), max_output_written);
|
||||
max_output_written = MAX2(param_index, max_output_written);
|
||||
}
|
||||
|
||||
outinfo->esgs_itemsize = (max_output_written + 1) * 16;
|
||||
|
|
@ -2888,7 +2876,6 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
|||
LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
|
||||
unsigned output_usage_mask;
|
||||
int param_index;
|
||||
int length = 4;
|
||||
|
||||
if (!(ctx->output_mask & (1ull << i)))
|
||||
continue;
|
||||
|
|
@ -2902,9 +2889,6 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
|||
ctx->shader_info->info.tes.output_usage_mask[i];
|
||||
}
|
||||
|
||||
if (i == VARYING_SLOT_CLIP_DIST0)
|
||||
length = util_last_bit(output_usage_mask);
|
||||
|
||||
param_index = shader_io_get_unique_index(i);
|
||||
|
||||
if (lds_base) {
|
||||
|
|
@ -2913,7 +2897,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
|||
"");
|
||||
}
|
||||
|
||||
for (j = 0; j < length; j++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
if (!(output_usage_mask & (1 << j)))
|
||||
continue;
|
||||
|
||||
|
|
@ -2950,22 +2934,16 @@ handle_ls_outputs_post(struct radv_shader_context *ctx)
|
|||
vertex_dw_stride, "");
|
||||
|
||||
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
|
||||
unsigned output_usage_mask =
|
||||
ctx->shader_info->info.vs.output_usage_mask[i];
|
||||
LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
|
||||
int length = 4;
|
||||
|
||||
if (!(ctx->output_mask & (1ull << i)))
|
||||
continue;
|
||||
|
||||
if (i == VARYING_SLOT_CLIP_DIST0)
|
||||
length = util_last_bit(output_usage_mask);
|
||||
|
||||
int param = shader_io_get_unique_index(i);
|
||||
LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
|
||||
LLVMConstInt(ctx->ac.i32, param * 4, false),
|
||||
"");
|
||||
for (unsigned j = 0; j < length; j++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
|
||||
value = ac_to_integer(&ctx->ac, value);
|
||||
value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
|
||||
|
|
|
|||
|
|
@ -402,6 +402,8 @@ static inline unsigned shader_io_get_unique_index(gl_varying_slot slot)
|
|||
return 1;
|
||||
if (slot == VARYING_SLOT_CLIP_DIST0)
|
||||
return 2;
|
||||
if (slot == VARYING_SLOT_CLIP_DIST1)
|
||||
return 3;
|
||||
/* 3 is reserved for clip dist as well */
|
||||
if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
|
||||
return 4 + (slot - VARYING_SLOT_VAR0);
|
||||
|
|
|
|||
|
|
@ -129,11 +129,9 @@ set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr,
|
|||
|
||||
get_deref_offset(deref_instr, &const_offset);
|
||||
|
||||
if (idx == VARYING_SLOT_CLIP_DIST0) {
|
||||
/* Special case for clip/cull distances because there are
|
||||
* combined into a single array that contains both.
|
||||
*/
|
||||
output_usage_mask[idx] |= 1 << const_offset;
|
||||
if (var->data.compact) {
|
||||
const_offset += comp;
|
||||
output_usage_mask[idx + const_offset / 4] |= 1 << (const_offset % 4);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -174,13 +172,9 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
|
|||
type = glsl_get_array_element(var->type);
|
||||
|
||||
unsigned slots =
|
||||
var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
|
||||
var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4)
|
||||
: glsl_count_attribute_slots(type, false);
|
||||
|
||||
if (idx == VARYING_SLOT_CLIP_DIST0)
|
||||
slots = (nir->info.clip_distance_array_size +
|
||||
nir->info.cull_distance_array_size > 4) ? 2 : 1;
|
||||
|
||||
mark_tess_output(info, var->data.patch, param, slots);
|
||||
break;
|
||||
}
|
||||
|
|
@ -374,7 +368,8 @@ gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
|
|||
info->ps.layer_input = true;
|
||||
break;
|
||||
case VARYING_SLOT_CLIP_DIST0:
|
||||
info->ps.num_input_clips_culls = attrib_count;
|
||||
case VARYING_SLOT_CLIP_DIST1:
|
||||
info->ps.num_input_clips_culls += attrib_count;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
@ -409,8 +404,8 @@ gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var,
|
|||
int idx = var->data.location;
|
||||
unsigned param = shader_io_get_unique_index(idx);
|
||||
int num_slots = glsl_count_attribute_slots(var->type, false);
|
||||
if (idx == VARYING_SLOT_CLIP_DIST0)
|
||||
num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1;
|
||||
if (var->data.compact)
|
||||
num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4);
|
||||
mark_ls_output(info, param, num_slots);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue