r600/sfn: Sort FS inputs to make interpolated values come first

On R600 and R700 class hardware the input declaration order maps
directly to the register the hardware writes the inputs to, so
make all interpolated inputs come first, and only then emit the
system values like POS or FACE.

Related: #7035

Fixes: 33765aa92a
     r600/sfn: Enable NIR for pre RG hardware

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18130>
This commit is contained in:
Gert Wollny 2022-08-14 14:44:29 +02:00 committed by Marge Bot
parent 34b9e3e44c
commit f6582027dc
2 changed files with 17 additions and 26 deletions

View file

@ -92,7 +92,6 @@ bool FragmentShader::load_input(nir_intrinsic_instr *intr)
emit_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
set_input_gpr(nir_intrinsic_base(intr), m_pos_input[0]->sel());
return true;
}
@ -102,7 +101,6 @@ bool FragmentShader::load_input(nir_intrinsic_instr *intr)
m_face_input,
vf.inline_const(ALU_SRC_0, 0),
AluInstr::last_write);
set_input_gpr(nir_intrinsic_base(intr), m_face_input->sel());
emit_instruction(ir);
return true;
}
@ -230,18 +228,19 @@ bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
int FragmentShader::do_allocate_reserved_registers()
{
int next_register = allocate_interpolators();
int next_register = allocate_interpolators_or_inputs();
if (m_sv_values.test(es_pos)) {
set_input_gpr(m_pos_driver_loc, next_register);
m_pos_input = value_factory().allocate_pinned_vec4(next_register++, false);
for (int i = 0; i < 4; ++i)
m_pos_input[i]->pin_live_range(true);
}
next_register = allocate_register_inputs(next_register);
}
int face_reg_index = -1;
if (m_sv_values.test(es_face)) {
set_input_gpr(m_face_driver_loc, next_register);
face_reg_index = next_register++;
m_face_input = value_factory().allocate_pinned_register(face_reg_index, 0);
m_face_input->pin_live_range(true);
@ -356,6 +355,7 @@ bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
auto index = nir_src_as_const_value(intr->src[index_src_id]);
assert(index);
const unsigned location_offset = chip_class() < ISA_CC_EVERGREEN ? 32 : 0;
bool uses_interpol_at_centroid = false;
unsigned location = nir_intrinsic_io_semantics(intr).location + index->u32;
@ -366,7 +366,8 @@ bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
if (location == VARYING_SLOT_POS) {
m_sv_values.set(es_pos);
ShaderInput pos_input(driver_location, name);
m_pos_driver_loc = driver_location + location_offset;
ShaderInput pos_input(m_pos_driver_loc, name);
pos_input.set_sid(sid);
pos_input.set_interpolator(TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER, false);
add_input(pos_input);
@ -375,7 +376,8 @@ bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
if (location == VARYING_SLOT_FACE) {
m_sv_values.set(es_face);
ShaderInput face_input(driver_location, name);
m_face_driver_loc = driver_location + location_offset;
ShaderInput face_input(m_face_driver_loc, name);
face_input.set_sid(sid);
add_input(face_input);
return true;
@ -616,9 +618,9 @@ void FragmentShader::do_print_properties(std::ostream& os) const
os << "PROP WRITE_ALL_COLORS:" << m_fs_write_all << "\n";
}
int FragmentShaderR600::allocate_register_inputs(int first_register)
int FragmentShaderR600::allocate_interpolators_or_inputs()
{
int pos = first_register;
int pos = 0;
auto& vf = value_factory();
for (auto& [index, inp]: inputs()) {
if (inp.need_lds_pos()) {
@ -641,11 +643,6 @@ int FragmentShaderR600::allocate_register_inputs(int first_register)
return pos;
}
int FragmentShaderR600::allocate_interpolators()
{
return 0;
}
bool FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr)
{
auto& vf = value_factory();
@ -716,12 +713,7 @@ bool FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
return true;
}
int FragmentShaderEG::allocate_register_inputs(int first_register)
{
return first_register;
}
int FragmentShaderEG::allocate_interpolators()
int FragmentShaderEG::allocate_interpolators_or_inputs()
{
for (unsigned i = 0; i < s_max_interpolators; ++i) {
if (interpolators_used(i)) {

View file

@ -47,8 +47,7 @@ private:
bool load_interpolated_input(nir_intrinsic_instr *intr);
virtual int allocate_register_inputs(int first_register) = 0;
virtual int allocate_interpolators() = 0;
virtual int allocate_interpolators_or_inputs() = 0;
virtual bool load_input_hw(nir_intrinsic_instr *intr) = 0;
virtual bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) = 0;
virtual bool load_interpolated_input_hw(nir_intrinsic_instr *intr) = 0;
@ -92,14 +91,15 @@ private:
int m_nsys_inputs{0};
bool m_apply_sample_mask{false};
int m_rat_base{0};
int m_pos_driver_loc{0};
int m_face_driver_loc{0};
};
class FragmentShaderR600 : public FragmentShader {
public:
using FragmentShader::FragmentShader;
private:
int allocate_register_inputs(int first_register) override;
int allocate_interpolators() override;
int allocate_interpolators_or_inputs() override;
bool load_input_hw(nir_intrinsic_instr *intr) override;
bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) override;
bool load_interpolated_input_hw(nir_intrinsic_instr *intr) override;
@ -127,8 +127,7 @@ private:
int base;
};
int allocate_register_inputs(int first_register) override;
int allocate_interpolators() override;
int allocate_interpolators_or_inputs() override;
bool load_input_hw(nir_intrinsic_instr *intr) override;
bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) override;
bool load_interpolated_input_hw(nir_intrinsic_instr *intr) override;