mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 22:30:12 +01:00
i965: enable component packing for vs and fs
Rather than trying to work out the total number of components used at a location we simply treat all outputs as vec4s. This removes the need for complex code looping over varyings to match packed locations and the need for storing the total number of components used at each location. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
09e46f99ad
commit
7f53fead5c
4 changed files with 16 additions and 25 deletions
|
|
@ -317,7 +317,6 @@ public:
|
|||
fs_reg frag_stencil;
|
||||
fs_reg sample_mask;
|
||||
fs_reg outputs[VARYING_SLOT_MAX];
|
||||
unsigned output_components[VARYING_SLOT_MAX];
|
||||
fs_reg dual_src_output;
|
||||
bool do_dual_src;
|
||||
int first_non_payload_grf;
|
||||
|
|
|
|||
|
|
@ -67,13 +67,12 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
|
|||
}
|
||||
} else {
|
||||
assert(type->is_scalar() || type->is_vector());
|
||||
unsigned num_elements = type->vector_elements;
|
||||
unsigned num_iter = 1;
|
||||
if (type->is_double())
|
||||
num_elements *= 2;
|
||||
for (unsigned count = 0; count < num_elements; count += 4) {
|
||||
num_iter = 2;
|
||||
for (unsigned count = 0; count < num_iter; count++) {
|
||||
this->outputs[*location] = *reg;
|
||||
this->output_components[*location] = MIN2(4, num_elements - count);
|
||||
*reg = offset(*reg, bld, this->output_components[*location]);
|
||||
*reg = offset(*reg, bld, 4);
|
||||
(*location)++;
|
||||
}
|
||||
}
|
||||
|
|
@ -114,7 +113,6 @@ fs_visitor::nir_setup_outputs()
|
|||
/* Writing gl_FragColor outputs to all color regions. */
|
||||
for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
|
||||
this->outputs[i] = reg;
|
||||
this->output_components[i] = 4;
|
||||
}
|
||||
} else if (var->data.location == FRAG_RESULT_DEPTH) {
|
||||
this->frag_depth = reg;
|
||||
|
|
@ -123,8 +121,6 @@ fs_visitor::nir_setup_outputs()
|
|||
} else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
|
||||
this->sample_mask = reg;
|
||||
} else {
|
||||
int vector_elements = var->type->without_array()->vector_elements;
|
||||
|
||||
/* gl_FragData or a user-defined FS output */
|
||||
assert(var->data.location >= FRAG_RESULT_DATA0 &&
|
||||
var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
|
||||
|
|
@ -132,8 +128,7 @@ fs_visitor::nir_setup_outputs()
|
|||
/* General color output. */
|
||||
for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
|
||||
int output = var->data.location - FRAG_RESULT_DATA0 + i;
|
||||
this->outputs[output] = offset(reg, bld, vector_elements * i);
|
||||
this->output_components[output] = vector_elements;
|
||||
this->outputs[output] = offset(reg, bld, 4 * i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
@ -2360,6 +2355,7 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
|
|||
|
||||
case nir_intrinsic_load_input: {
|
||||
fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
|
||||
unsigned first_component = nir_intrinsic_component(instr);
|
||||
unsigned num_components = instr->num_components;
|
||||
enum brw_reg_type type = dest.type;
|
||||
|
||||
|
|
@ -2368,7 +2364,7 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
|
|||
src = offset(src, bld, const_offset->u32[0]);
|
||||
|
||||
for (unsigned j = 0; j < num_components; j++) {
|
||||
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
|
||||
bld.MOV(offset(dest, bld, j), offset(src, bld, j + first_component));
|
||||
}
|
||||
|
||||
if (type == BRW_REGISTER_TYPE_DF) {
|
||||
|
|
@ -4103,6 +4099,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
new_dest = offset(new_dest, bld, const_offset->u32[0]);
|
||||
|
||||
unsigned num_components = instr->num_components;
|
||||
unsigned first_component = nir_intrinsic_component(instr);
|
||||
unsigned bit_size = instr->src[0].is_ssa ?
|
||||
instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
|
||||
if (bit_size == 64) {
|
||||
|
|
@ -4116,7 +4113,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
}
|
||||
|
||||
for (unsigned j = 0; j < num_components; j++) {
|
||||
bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
|
||||
bld.MOV(offset(new_dest, bld, j + first_component),
|
||||
offset(src, bld, j));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -459,8 +459,7 @@ fs_visitor::emit_fb_writes()
|
|||
src0_alpha = offset(outputs[0], bld, 3);
|
||||
|
||||
inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
|
||||
src0_alpha,
|
||||
this->output_components[target]);
|
||||
src0_alpha, 4);
|
||||
inst->target = target;
|
||||
}
|
||||
}
|
||||
|
|
@ -545,9 +544,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
|
|||
const fs_builder abld = bld.annotate("user clip distances");
|
||||
|
||||
this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
|
||||
this->output_components[VARYING_SLOT_CLIP_DIST0] = 4;
|
||||
this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
|
||||
this->output_components[VARYING_SLOT_CLIP_DIST1] = 4;
|
||||
|
||||
for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
|
||||
fs_reg u = userplane[i];
|
||||
|
|
@ -724,10 +721,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
|||
sources[length++] = reg;
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < output_components[varying]; i++)
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
sources[length++] = offset(this->outputs[varying], bld, i);
|
||||
for (unsigned i = output_components[varying]; i < 4; i++)
|
||||
sources[length++] = brw_imm_d(0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -901,7 +896,6 @@ fs_visitor::init()
|
|||
this->nir_ssa_values = NULL;
|
||||
|
||||
memset(&this->payload, 0, sizeof(this->payload));
|
||||
memset(this->output_components, 0, sizeof(this->output_components));
|
||||
this->source_depth_to_render_target = false;
|
||||
this->runtime_check_aads_emit = false;
|
||||
this->first_non_payload_grf = 0;
|
||||
|
|
|
|||
|
|
@ -302,8 +302,8 @@ brw_nir_lower_vue_outputs(nir_shader *nir,
|
|||
if (is_scalar) {
|
||||
nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
|
||||
VARYING_SLOT_VAR0,
|
||||
type_size_scalar);
|
||||
nir_lower_io(nir, nir_var_shader_out, type_size_scalar);
|
||||
type_size_vec4_times_4);
|
||||
nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
|
||||
} else {
|
||||
nir_foreach_variable(var, &nir->outputs)
|
||||
var->data.driver_location = var->data.location;
|
||||
|
|
@ -340,8 +340,8 @@ void
|
|||
brw_nir_lower_fs_outputs(nir_shader *nir)
|
||||
{
|
||||
nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
|
||||
FRAG_RESULT_DATA0, type_size_scalar);
|
||||
nir_lower_io(nir, nir_var_shader_out, type_size_scalar);
|
||||
FRAG_RESULT_DATA0, type_size_vec4_times_4);
|
||||
nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue