r600/sfn: rework getting a vector and uniforms from the value pool

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-by: Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5085>
This commit is contained in:
Gert Wollny 2020-05-06 23:36:14 +02:00 committed by Marge Bot
parent afd47ea83b
commit aed9618e20
11 changed files with 151 additions and 87 deletions

View file

@ -98,10 +98,15 @@ PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigne
return m_proc.from_nir_with_fetch_constant(src, component);
}
GPRVector *EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle)
GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle, bool match)
{
return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle);
return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match);
}
void EmitInstruction::add_uniform(unsigned index, const PValue &value)
{
m_proc.add_uniform(index, value);
}
void EmitInstruction::load_uniform(const nir_alu_src& src)
@ -153,6 +158,24 @@ enum chip_class EmitInstruction::get_chip_class(void) const
return m_proc.get_chip_class();
}
PValue EmitInstruction::literal(uint32_t value)
{
return m_proc.literal(value);
}
GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components)
{
return m_proc.vec_from_nir(dst, num_components);
}
bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle,
const PValue& reg, bool map)
{
return m_proc.inject_register(sel, swizzle, reg, map);
}
const std::set<AluModifiers> EmitInstruction::empty = {};
const std::set<AluModifiers> EmitInstruction::write = {alu_write};
const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};

View file

@ -77,14 +77,22 @@ protected:
const std::set<AluModifiers>& m_flags);
PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component);
GPRVector *vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle);
GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle, bool match = false);
void add_uniform(unsigned index, const PValue &value);
void load_uniform(const nir_alu_src& src);
const nir_variable *get_deref_location(const nir_src& v) const;
enum chip_class get_chip_class(void) const;
PValue literal(uint32_t value);
GPRVector vec_from_nir(const nir_dest& dst, int num_components);
bool inject_register(unsigned sel, unsigned swizzle,
const PValue& reg, bool map);
private:
ShaderFromNirProcessor& m_proc;

View file

@ -242,17 +242,18 @@ bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
emit_instruction(ir);
#else
PValue value(from_nir_with_fetch_constant(instr->src[0], 0));
GPRVector out_vec({value, value, value, value});
auto values = vec_from_nir_with_fetch_constant(instr->src[0],
(1 << instr->src[0].ssa->num_components) - 1, {0,1,2,3}, true);
emit_instruction(new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
out_vec, addr_vec, 0, rat_id, 1,
values, addr_vec, 0, rat_id, 1,
1, 0, false));
for (int i = 1; i < instr->src[0].ssa->num_components; ++i) {
emit_instruction(new AluInstruction(op1_mov, out_vec.reg_i(0), from_nir(instr->src[0], i), write));
emit_instruction(new AluInstruction(op1_mov, values.reg_i(0), from_nir(instr->src[0], i), write));
emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
{addr_vec.reg_i(0), Value::one_i}, last_write));
emit_instruction(new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
out_vec, addr_vec, 0, rat_id, 1,
values, addr_vec, 0, rat_id, 1,
1, 0, false));
}
#endif

View file

@ -658,15 +658,19 @@ bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src)
for (unsigned i = 0; i < instr->coord_components; ++i)
swizzle[i] = i;
std::unique_ptr<GPRVector> ofs(vec_from_nir_with_fetch_constant(*src.offset,
( 1 << instr->coord_components) -1,
swizzle));
int noffsets = instr->coord_components;
if (instr->is_array)
--noffsets;
auto ofs = vec_from_nir_with_fetch_constant(*src.offset,
( 1 << noffsets) - 1,
swizzle);
GPRVector dummy(0, {7,7,7,7});
tex_op = (tex_op == TexInstruction::gather4_c) ?
TexInstruction::gather4_c_o : TexInstruction::gather4_o;
auto set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy,
*ofs, sampler.id,
ofs, sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
set_ofs->set_dest_swizzle({7,7,7,7});
emit_instruction(set_ofs);
@ -838,29 +842,25 @@ bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src)
break;
case nir_tex_src_coord: {
std::unique_ptr<GPRVector> coord(vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << instr.coord_components) - 1,
{0,1,2,3}));
src.coord = *coord;
src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << instr.coord_components) - 1,
{0,1,2,3});
} break;
case nir_tex_src_comparator:
src.comperator = from_nir(instr.src[i], 0);
break;
case nir_tex_src_ddx: {
sfn_log << SfnLog::tex << "Get DDX ";
std::unique_ptr<GPRVector> coord(vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << grad_components) - 1,
swizzle_from_mask(grad_components)));
src.ddx = *coord;
src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << grad_components) - 1,
swizzle_from_comps(grad_components));
sfn_log << SfnLog::tex << src.ddx << "\n";
} break;
case nir_tex_src_ddy:{
sfn_log << SfnLog::tex << "Get DDY ";
std::unique_ptr<GPRVector> coord(vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << grad_components) - 1,
swizzle_from_mask(grad_components)));
src.ddy = *coord;
src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << grad_components) - 1,
swizzle_from_comps(grad_components));
sfn_log << SfnLog::tex << src.ddy << "\n";
} break;
case nir_tex_src_lod:

View file

@ -592,9 +592,8 @@ bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
{
PValue address = from_nir(instr->src[1], 0, 0);
std::unique_ptr<GPRVector> vec(vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
swizzle_from_mask(instr->num_components)));
GPRVector value(*vec);
auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
swizzle_from_comps(instr->num_components));
int writemask = nir_intrinsic_write_mask(instr);
int align = nir_intrinsic_align_mul(instr);
@ -629,21 +628,44 @@ bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
return true;
}
GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
UNUSED unsigned mask,
const GPRVector::Swizzle& swizzle)
GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
unsigned mask,
const GPRVector::Swizzle& swizzle,
bool match)
{
GPRVector *result = nullptr;
int sel = lookup_register_index(src);
if (sel >= 0 && from_nir(src, 0)->type() == Value::gpr &&
from_nir(src, 0)->chan() == 0) {
/* If the x-channel is really an x-channel register then we are pretty
* save that the value come like we need them */
result = new GPRVector(from_nir(src, 0)->sel(), swizzle);
} else {
bool use_same = true;
GPRVector::Values v;
for (int i = 0; i < 4 && use_same; ++i) {
if ((1 << i) & mask) {
if (swizzle[i] < 4) {
v[i] = from_nir(src, swizzle[i]);
assert(v[i]);
if (v[i]->type() != Value::gpr)
use_same = false;
if (match && (v[i]->chan() != swizzle[i]))
use_same = false;
}
}
}
if (use_same) {
int i = 0;
while (!v[i] && i < 4) ++i;
assert(i < 4);
unsigned sel = v[i]->sel();
for (i = 0; i < 4 && use_same; ++i) {
if (!v[i])
v[i] = PValue(new GPRValue(sel, swizzle[i]));
else
use_same &= v[i]->sel() == sel;
}
}
if (!use_same) {
AluInstruction *ir = nullptr;
int sel = allocate_temp_register();
GPRVector::Values v;
for (int i = 0; i < 4; ++i) {
v[i] = PValue(new GPRValue(sel, swizzle[i]));
if (swizzle[i] < 4 && (mask & (1 << i))) {
@ -654,10 +676,8 @@ GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_sr
}
if (ir)
ir->set_flag(alu_last_instr);
result = new GPRVector(v);
}
return result;
return GPRVector(v);;
}
bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)

View file

@ -62,8 +62,8 @@ public:
void emit_instruction(Instruction *ir);
PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component);
GPRVector *vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle);
GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle, bool match = false);
bool emit_instruction(EAluOp opcode, PValue dest,
std::vector<PValue> src0,
@ -79,7 +79,7 @@ public:
r600_shader& sh_info() {return m_sh_info;}
void add_param_output_reg(int loc, const GPRVector *gpr);
void set_output(unsigned pos, PValue var);
void set_output(unsigned pos, int sel);
const GPRVector *output_register(unsigned location) const;
void evaluate_spi_sid(r600_shader_io &io);
@ -176,7 +176,7 @@ private:
std::set<nir_variable*> m_arrays;
std::map<unsigned, PValue> m_inputs;
std::map<unsigned, PValue> m_outputs;
std::map<unsigned, int> m_outputs;
std::map<unsigned, nir_variable*> m_var_derefs;
std::map<const nir_variable *, nir_variable_mode> m_var_mode;

View file

@ -664,17 +664,26 @@ bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_i
std::array<uint32_t,4> swizzle;
unsigned writemask = nir_intrinsic_write_mask(instr);
if (out_var->data.location != FRAG_RESULT_STENCIL) {
switch (out_var->data.location) {
case FRAG_RESULT_STENCIL:
writemask = 2;
swizzle = {7,0,7,7};
break;
case FRAG_RESULT_SAMPLE_MASK:
writemask = 4;
swizzle = {7,7,0,7};
break;
default:
std::cerr << "Swizzle = ";
for (int i = 0; i < 4; ++i) {
swizzle[i] = (i < instr->num_components) ? i : 7;
std::cerr << swizzle[i] << ", ";
}
} else {
swizzle = {7,0,7,7};
}
GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
set_output(out_var->data.driver_location, PValue(value));
set_output(out_var->data.driver_location, value.sel());
if (out_var->data.location == FRAG_RESULT_COLOR ||
(out_var->data.location >= FRAG_RESULT_DATA0 &&
@ -688,7 +697,7 @@ bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_i
continue;
}
m_last_pixel_export = new ExportInstruction(location, *value, ExportInstruction::et_pixel);
m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
if (sh_info().ps_export_highest < location)
sh_info().ps_export_highest = location;
@ -701,16 +710,14 @@ bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_i
emit_export_instruction(m_last_pixel_export);
++m_max_counted_color_exports;
};
} else if (out_var->data.location == FRAG_RESULT_DEPTH) {
} else if (out_var->data.location == FRAG_RESULT_DEPTH ||
out_var->data.location == FRAG_RESULT_STENCIL ||
out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
m_depth_exports++;
emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel));
} else if (out_var->data.location == FRAG_RESULT_STENCIL) {
m_depth_exports++;
emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel));
emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
} else {
return false;
}
return true;
}

View file

@ -57,18 +57,15 @@ bool GeometryShaderFromNir::do_emit_load_deref(UNUSED const nir_variable *in_var
bool GeometryShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
{
uint32_t write_mask = (1 << instr->num_components) - 1;
GPRVector::Swizzle swz = swizzle_from_mask(instr->num_components);
std::unique_ptr<GPRVector> vec(vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swz));
uint32_t write_mask = nir_intrinsic_write_mask(instr);
GPRVector::Swizzle swz = swizzle_from_mask(write_mask);
auto out_value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swz, true);
GPRVector out_value = *vec;
sh_info().output[out_var->data.driver_location].write_mask =
(1 << instr->num_components) - 1;
sh_info().output[out_var->data.driver_location].write_mask = write_mask;
auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value,
4 * out_var->data.driver_location,
4, m_export_base);
instr->num_components, m_export_base);
emit_instruction(ir);
return true;

View file

@ -113,9 +113,9 @@ bool TcsShaderFromNir::store_tess_factor(nir_intrinsic_instr* instr)
{
const GPRVector::Swizzle& swizzle = (instr->src[0].ssa->num_components == 4) ?
GPRVector::Swizzle({0, 1, 2, 3}) : GPRVector::Swizzle({0, 1, 7, 7});
std::unique_ptr<GPRVector> val(vec_from_nir_with_fetch_constant(instr->src[0],
0xf, swizzle));
emit_instruction(new GDSStoreTessFactor(*val));
auto val = vec_from_nir_with_fetch_constant(instr->src[0],
(1 << instr->src[0].ssa->num_components) - 1, swizzle);
emit_instruction(new GDSStoreTessFactor(val));
return true;
}

View file

@ -177,7 +177,7 @@ inline size_t GPRArrayValue::array_size() const
return m_array->size();
}
inline GPRVector::Swizzle swizzle_from_mask(unsigned ncomp)
inline GPRVector::Swizzle swizzle_from_comps(unsigned ncomp)
{
GPRVector::Swizzle swz = {0,1,2,3};
for (int i = ncomp; i < 4; ++i)
@ -185,6 +185,14 @@ inline GPRVector::Swizzle swizzle_from_mask(unsigned ncomp)
return swz;
}
inline GPRVector::Swizzle swizzle_from_mask(unsigned mask)
{
GPRVector::Swizzle swz;
for (int i = 0; i < 4; ++i)
swz[i] = ((1 << i) & mask) ? i : 7;
return swz;
}
}

View file

@ -159,8 +159,8 @@ bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_i
m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
m_proc.set_output(out_var->data.driver_location, PValue(value));
GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
m_proc.set_output(out_var->data.driver_location, value.sel());
int export_slot = 0;
@ -168,8 +168,8 @@ bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_i
case VARYING_SLOT_EDGE: {
m_proc.sh_info().vs_out_misc_write = 1;
m_proc.sh_info().vs_out_edgeflag = 1;
m_proc.emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
m_proc.emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr});
m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr});
m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
}
/* fallthrough */
@ -189,7 +189,7 @@ bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_i
return false;
}
m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos);
m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos);
m_proc.emit_export_instruction(m_last_pos_export);
m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
return true;
@ -207,16 +207,16 @@ bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir
m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
m_proc.sh_info().output[out_var->data.driver_location].gpr = value->sel();
GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
m_proc.sh_info().output[out_var->data.driver_location].gpr = value.sel();
/* This should use the registers!! */
m_proc.set_output(out_var->data.driver_location, PValue(value));
m_proc.set_output(out_var->data.driver_location, value.sel());
auto param_loc = m_param_map.find(out_var->data.location);
assert(param_loc != m_param_map.end());
m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param);
m_last_param_export = new ExportInstruction(param_loc->second, value, ExportInstruction::et_param);
m_proc.emit_export_instruction(m_last_param_export);
m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
return true;
@ -227,7 +227,7 @@ bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir
m_proc.sh_info().cc_dist_mask = 0xff;
m_proc.sh_info().clip_dist_write = 0xff;
std::unique_ptr<GPRVector> clip_vertex(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3}));
GPRVector clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3});
for (int i = 0; i < 4; ++i)
m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
@ -239,7 +239,7 @@ bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir
int ochan = i & 3;
AluInstruction *ir = nullptr;
for (int j = 0; j < 4; j++) {
ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j),
ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex.reg_i(j),
PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
(j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
m_proc.emit_instruction(ir);
@ -419,10 +419,10 @@ bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrin
uint32_t write_mask = (1 << instr->num_components) - 1;
std::unique_ptr<GPRVector> value(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
swizzle_from_mask(instr->num_components)));
GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
swizzle_from_comps(instr->num_components));
auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value,
auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value,
ring_offset >> 2, 4, PValue());
m_proc.emit_export_instruction(ir);