mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 17:50:12 +01:00
r600/sfn: extract class to handle the VS export to different stages
This code can be shared with the TESS_EVAL shader Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4714>
This commit is contained in:
parent
38038b369f
commit
f7df2c57a2
8 changed files with 621 additions and 471 deletions
|
|
@ -152,7 +152,9 @@ CXX_SOURCES = \
|
||||||
sfn/sfn_value_gpr.cpp \
|
sfn/sfn_value_gpr.cpp \
|
||||||
sfn/sfn_value_gpr.h \
|
sfn/sfn_value_gpr.h \
|
||||||
sfn/sfn_valuepool.cpp \
|
sfn/sfn_valuepool.cpp \
|
||||||
sfn/sfn_valuepool.h
|
sfn/sfn_valuepool.h \
|
||||||
|
sfn/sfn_vertexstageexport.cpp \
|
||||||
|
sfn/sfn_vertexstageexport.h
|
||||||
|
|
||||||
R600_GENERATED_FILES = \
|
R600_GENERATED_FILES = \
|
||||||
egd_tables.h
|
egd_tables.h
|
||||||
|
|
|
||||||
|
|
@ -170,6 +170,8 @@ files_r600 = files(
|
||||||
'sfn/sfn_value_gpr.h',
|
'sfn/sfn_value_gpr.h',
|
||||||
'sfn/sfn_valuepool.cpp',
|
'sfn/sfn_valuepool.cpp',
|
||||||
'sfn/sfn_valuepool.h',
|
'sfn/sfn_valuepool.h',
|
||||||
|
'sfn/sfn_vertexstageexport.cpp',
|
||||||
|
'sfn/sfn_vertexstageexport.h',
|
||||||
)
|
)
|
||||||
|
|
||||||
egd_tables_h = custom_target(
|
egd_tables_h = custom_target(
|
||||||
|
|
|
||||||
|
|
@ -60,16 +60,8 @@ bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shade
|
||||||
|
|
||||||
switch (shader->info.stage) {
|
switch (shader->info.stage) {
|
||||||
case MESA_SHADER_VERTEX:
|
case MESA_SHADER_VERTEX:
|
||||||
if (key.vs.as_es) {
|
impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader));
|
||||||
sfn_log << SfnLog::trans << "Start VS for GS\n";
|
break;
|
||||||
impl.reset(new VertexShaderFromNirForGS(pipe_shader, *sel, key, gs_shader));
|
|
||||||
} else if (key.vs.as_ls) {
|
|
||||||
sfn_log << "VS: next type TCS and TES not yet supported\n";
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
sfn_log << SfnLog::trans << "Start VS for FS\n";
|
|
||||||
impl.reset(new VertexShaderFromNirForFS(pipe_shader, *sel, key));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_GEOMETRY:
|
case MESA_SHADER_GEOMETRY:
|
||||||
sfn_log << SfnLog::trans << "Start GS\n";
|
sfn_log << SfnLog::trans << "Start GS\n";
|
||||||
|
|
|
||||||
|
|
@ -105,11 +105,10 @@ protected:
|
||||||
bool emit_store_local_shared(nir_intrinsic_instr* instr);
|
bool emit_store_local_shared(nir_intrinsic_instr* instr);
|
||||||
|
|
||||||
bool emit_barrier(nir_intrinsic_instr* instr);
|
bool emit_barrier(nir_intrinsic_instr* instr);
|
||||||
const GPRVector *output_register(unsigned location) const;
|
|
||||||
|
|
||||||
bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
|
bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
|
||||||
bool as_last = true);
|
bool as_last = true);
|
||||||
void add_param_output_reg(int loc, const GPRVector *gpr);
|
|
||||||
void inc_atomic_file_count();
|
void inc_atomic_file_count();
|
||||||
std::bitset<8> m_sv_values;
|
std::bitset<8> m_sv_values;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,17 +38,16 @@ using std::priority_queue;
|
||||||
|
|
||||||
VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
|
VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
|
||||||
r600_pipe_shader_selector& sel,
|
r600_pipe_shader_selector& sel,
|
||||||
const r600_shader_key& key):
|
const r600_shader_key& key,
|
||||||
ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader,
|
struct r600_shader* gs_shader):
|
||||||
sh->scratch_space_needed),
|
VertexStage(PIPE_SHADER_VERTEX, sel, sh->shader,
|
||||||
|
sh->scratch_space_needed),
|
||||||
m_num_clip_dist(0),
|
m_num_clip_dist(0),
|
||||||
m_last_param_export(nullptr),
|
m_last_param_export(nullptr),
|
||||||
m_last_pos_export(nullptr),
|
m_last_pos_export(nullptr),
|
||||||
m_pipe_shader(sh),
|
m_pipe_shader(sh),
|
||||||
m_enabled_stream_buffers_mask(0),
|
m_enabled_stream_buffers_mask(0),
|
||||||
m_so_info(&sel.so),
|
m_so_info(&sel.so),
|
||||||
m_cur_param(0),
|
|
||||||
m_cur_clip_pos(1),
|
|
||||||
m_vertex_id(),
|
m_vertex_id(),
|
||||||
m_key(key)
|
m_key(key)
|
||||||
{
|
{
|
||||||
|
|
@ -56,6 +55,18 @@ VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
|
||||||
increment_reserved_registers();
|
increment_reserved_registers();
|
||||||
|
|
||||||
sh_info().atomic_base = key.vs.first_atomic_counter;
|
sh_info().atomic_base = key.vs.first_atomic_counter;
|
||||||
|
sh_info().vs_as_gs_a = m_key.vs.as_gs_a;
|
||||||
|
|
||||||
|
if (key.vs.as_es) {
|
||||||
|
sh->shader.vs_as_es = true;
|
||||||
|
m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader));
|
||||||
|
} else if (key.vs.as_ls) {
|
||||||
|
sh->shader.vs_as_ls = true;
|
||||||
|
sfn_log << SfnLog::trans << "Start VS for GS\n";
|
||||||
|
m_export_processor.reset(new VertexStageExportForES(*this));
|
||||||
|
} else {
|
||||||
|
m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VertexShaderFromNir::do_process_inputs(nir_variable *input)
|
bool VertexShaderFromNir::do_process_inputs(nir_variable *input)
|
||||||
|
|
@ -80,6 +91,13 @@ bool VertexShaderFromNir::allocate_reserved_registers()
|
||||||
m_vertex_id.reset(R0x);
|
m_vertex_id.reset(R0x);
|
||||||
inject_register(0, 0, m_vertex_id, false);
|
inject_register(0, 0, m_vertex_id, false);
|
||||||
|
|
||||||
|
if (m_key.vs.as_gs_a || m_sv_values.test(es_primitive_id)) {
|
||||||
|
auto R0z = new GPRValue(0,2);
|
||||||
|
R0x->set_as_input();
|
||||||
|
m_primitive_id.reset(R0z);
|
||||||
|
inject_register(0, 2, m_primitive_id, false);
|
||||||
|
}
|
||||||
|
|
||||||
if (m_sv_values.test(es_instanceid)) {
|
if (m_sv_values.test(es_instanceid)) {
|
||||||
auto R0w = new GPRValue(0,3);
|
auto R0w = new GPRValue(0,3);
|
||||||
R0w->set_as_input();
|
R0w->set_as_input();
|
||||||
|
|
@ -87,20 +105,22 @@ bool VertexShaderFromNir::allocate_reserved_registers()
|
||||||
inject_register(0, 3, m_instance_id, false);
|
inject_register(0, 3, m_instance_id, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
priority_queue<int, std::vector<int>, std::greater<int>> q;
|
|
||||||
for (auto a: m_param_map) {
|
if (m_sv_values.test(es_rel_patch_id)) {
|
||||||
q.push(a.first);
|
auto R0y = new GPRValue(0,1);
|
||||||
|
R0y->set_as_input();
|
||||||
|
m_rel_vertex_id.reset(R0y);
|
||||||
|
inject_register(0, 1, m_rel_vertex_id, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
int next_param = 0;
|
|
||||||
while (!q.empty()) {
|
|
||||||
int loc = q.top();
|
|
||||||
q.pop();
|
|
||||||
m_param_map[loc] = next_param++;
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VertexShaderFromNir::emit_shader_start()
|
||||||
|
{
|
||||||
|
m_export_processor->setup_paramn_map();
|
||||||
|
}
|
||||||
|
|
||||||
bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr)
|
bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr)
|
||||||
{
|
{
|
||||||
switch (instr->type) {
|
switch (instr->type) {
|
||||||
|
|
@ -135,48 +155,28 @@ bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_inst
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VertexShaderFromNir::emit_store_local_shared(nir_intrinsic_instr* instr)
|
||||||
|
{
|
||||||
|
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||||
|
|
||||||
|
auto address = from_nir(instr->src[1], 0);
|
||||||
|
int swizzle_base = (write_mask & 0x3) ? 0 : 2;
|
||||||
|
write_mask |= write_mask >> 2;
|
||||||
|
|
||||||
|
auto value = from_nir(instr->src[0], swizzle_base);
|
||||||
|
if (!(write_mask & 2)) {
|
||||||
|
emit_instruction(new LDSWriteInstruction(address, 1, value));
|
||||||
|
} else {
|
||||||
|
auto value1 = from_nir(instr->src[0], swizzle_base + 1);
|
||||||
|
emit_instruction(new LDSWriteInstruction(address, 1, value, value1));
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool VertexShaderFromNir::do_process_outputs(nir_variable *output)
|
bool VertexShaderFromNir::do_process_outputs(nir_variable *output)
|
||||||
{
|
{
|
||||||
if (output->data.location == VARYING_SLOT_COL0 ||
|
return m_export_processor->do_process_outputs(output);
|
||||||
output->data.location == VARYING_SLOT_COL1 ||
|
|
||||||
(output->data.location >= VARYING_SLOT_VAR0 &&
|
|
||||||
output->data.location <= VARYING_SLOT_VAR31) ||
|
|
||||||
(output->data.location >= VARYING_SLOT_TEX0 &&
|
|
||||||
output->data.location <= VARYING_SLOT_TEX7) ||
|
|
||||||
output->data.location == VARYING_SLOT_BFC0 ||
|
|
||||||
output->data.location == VARYING_SLOT_BFC1 ||
|
|
||||||
output->data.location == VARYING_SLOT_CLIP_VERTEX ||
|
|
||||||
output->data.location == VARYING_SLOT_CLIP_DIST0 ||
|
|
||||||
output->data.location == VARYING_SLOT_CLIP_DIST1 ||
|
|
||||||
output->data.location == VARYING_SLOT_POS ||
|
|
||||||
output->data.location == VARYING_SLOT_PSIZ ||
|
|
||||||
output->data.location == VARYING_SLOT_FOGC ||
|
|
||||||
output->data.location == VARYING_SLOT_LAYER ||
|
|
||||||
output->data.location == VARYING_SLOT_EDGE ||
|
|
||||||
output->data.location == VARYING_SLOT_VIEWPORT
|
|
||||||
) {
|
|
||||||
|
|
||||||
r600_shader_io& io = sh_info().output[output->data.driver_location];
|
|
||||||
tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>( output->data.location),
|
|
||||||
true, &io.name, &io.sid);
|
|
||||||
if (! m_key.vs.as_es)
|
|
||||||
evaluate_spi_sid(io);
|
|
||||||
++sh_info().noutput;
|
|
||||||
|
|
||||||
if (output->data.location == VARYING_SLOT_PSIZ ||
|
|
||||||
output->data.location == VARYING_SLOT_EDGE ||
|
|
||||||
output->data.location == VARYING_SLOT_LAYER)
|
|
||||||
m_cur_clip_pos = 2;
|
|
||||||
|
|
||||||
if (output->data.location != VARYING_SLOT_POS &&
|
|
||||||
output->data.location != VARYING_SLOT_EDGE &&
|
|
||||||
output->data.location != VARYING_SLOT_PSIZ &&
|
|
||||||
output->data.location != VARYING_SLOT_CLIP_VERTEX)
|
|
||||||
m_param_map[output->data.location] = m_cur_param++;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
|
bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
|
||||||
|
|
@ -199,375 +199,14 @@ bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_int
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VertexShaderFromNir::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
|
||||||
{
|
|
||||||
sh_info().cc_dist_mask = 0xff;
|
|
||||||
sh_info().clip_dist_write = 0xff;
|
|
||||||
|
|
||||||
std::unique_ptr<GPRVector> clip_vertex(vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3}));
|
|
||||||
|
|
||||||
for (int i = 0; i < 4; ++i)
|
|
||||||
sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
|
|
||||||
|
|
||||||
GPRVector clip_dist[2] = { get_temp_vec4(), get_temp_vec4()};
|
|
||||||
|
|
||||||
for (int i = 0; i < 8; i++) {
|
|
||||||
int oreg = i >> 2;
|
|
||||||
int ochan = i & 3;
|
|
||||||
AluInstruction *ir = nullptr;
|
|
||||||
for (int j = 0; j < 4; j++) {
|
|
||||||
ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j),
|
|
||||||
PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
|
|
||||||
(j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
ir->set_flag(alu_last_instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
|
|
||||||
emit_export_instruction(m_last_pos_export);
|
|
||||||
|
|
||||||
m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
|
|
||||||
emit_export_instruction(m_last_pos_export);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VertexShaderFromNir::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
|
|
||||||
std::array<uint32_t, 4> *swizzle_override)
|
|
||||||
{
|
|
||||||
std::array<uint32_t,4> swizzle;
|
|
||||||
uint32_t write_mask = 0;
|
|
||||||
|
|
||||||
if (swizzle_override) {
|
|
||||||
swizzle = *swizzle_override;
|
|
||||||
for (int i = 0; i < 4; ++i) {
|
|
||||||
if (swizzle[i] < 6)
|
|
||||||
write_mask |= 1 << i;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
|
|
||||||
for (int i = 0; i < 4; ++i)
|
|
||||||
swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
|
|
||||||
}
|
|
||||||
|
|
||||||
sh_info().output[out_var->data.driver_location].write_mask = write_mask;
|
|
||||||
|
|
||||||
GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
|
|
||||||
set_output(out_var->data.driver_location, PValue(value));
|
|
||||||
|
|
||||||
int export_slot = 0;
|
|
||||||
|
|
||||||
switch (out_var->data.location) {
|
|
||||||
case VARYING_SLOT_EDGE: {
|
|
||||||
sh_info().vs_out_misc_write = 1;
|
|
||||||
sh_info().vs_out_edgeflag = 1;
|
|
||||||
emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
|
|
||||||
emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr});
|
|
||||||
sh_info().output[out_var->data.driver_location].write_mask = 0xf;
|
|
||||||
}
|
|
||||||
/* fallthrough */
|
|
||||||
case VARYING_SLOT_PSIZ:
|
|
||||||
case VARYING_SLOT_LAYER:
|
|
||||||
export_slot = 1;
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_POS:
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_CLIP_DIST0:
|
|
||||||
case VARYING_SLOT_CLIP_DIST1:
|
|
||||||
export_slot = m_cur_clip_pos++;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
sfn_log << SfnLog::err << __func__ << "Unsupported location "
|
|
||||||
<< out_var->data.location << "\n";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos);
|
|
||||||
emit_export_instruction(m_last_pos_export);
|
|
||||||
add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VertexShaderFromNir::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
|
||||||
{
|
|
||||||
assert(out_var->data.driver_location < sh_info().noutput);
|
|
||||||
sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
|
|
||||||
|
|
||||||
int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
|
|
||||||
std::array<uint32_t,4> swizzle;
|
|
||||||
for (int i = 0; i < 4; ++i)
|
|
||||||
swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
|
|
||||||
|
|
||||||
sh_info().output[out_var->data.driver_location].write_mask = write_mask;
|
|
||||||
|
|
||||||
GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
|
|
||||||
sh_info().output[out_var->data.driver_location].gpr = value->sel();
|
|
||||||
|
|
||||||
/* This should use the registers!! */
|
|
||||||
set_output(out_var->data.driver_location, PValue(value));
|
|
||||||
|
|
||||||
auto param_loc = m_param_map.find(out_var->data.location);
|
|
||||||
assert(param_loc != m_param_map.end());
|
|
||||||
|
|
||||||
m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param);
|
|
||||||
emit_export_instruction(m_last_param_export);
|
|
||||||
add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VertexShaderFromNir::emit_stream(int stream)
|
|
||||||
{
|
|
||||||
assert(m_so_info);
|
|
||||||
if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
|
|
||||||
R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
|
|
||||||
if (m_so_info->output[i].output_buffer >= 4) {
|
|
||||||
R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
|
|
||||||
m_so_info->output[i].output_buffer);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
|
|
||||||
unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
|
|
||||||
std::vector<GPRVector> tmp(m_so_info->num_outputs);
|
|
||||||
|
|
||||||
/* Initialize locations where the outputs are stored. */
|
|
||||||
for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
|
|
||||||
if (stream != -1 && stream != m_so_info->output[i].stream)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
sfn_log << SfnLog::instr << "Emit stream " << i
|
|
||||||
<< " with register index " << m_so_info->output[i].register_index << " so_gpr:";
|
|
||||||
|
|
||||||
|
|
||||||
so_gpr[i] = output_register(m_so_info->output[i].register_index);
|
|
||||||
|
|
||||||
if (!so_gpr[i]) {
|
|
||||||
sfn_log << SfnLog::err << "\nERR: register index "
|
|
||||||
<< m_so_info->output[i].register_index
|
|
||||||
<< " doesn't correspond to an output register\n";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
start_comp[i] = m_so_info->output[i].start_component;
|
|
||||||
/* Lower outputs with dst_offset < start_component.
|
|
||||||
*
|
|
||||||
* We can only output 4D vectors with a write mask, e.g. we can
|
|
||||||
* only output the W component at offset 3, etc. If we want
|
|
||||||
* to store Y, Z, or W at buffer offset 0, we need to use MOV
|
|
||||||
* to move it to X and output X. */
|
|
||||||
if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
|
|
||||||
int tmp_index = allocate_temp_register();
|
|
||||||
int sc = m_so_info->output[i].start_component;
|
|
||||||
AluInstruction *alu = nullptr;
|
|
||||||
for (int j = 0; j < m_so_info->output[i].num_components; j++) {
|
|
||||||
PValue dst(new GPRValue(tmp_index, j));
|
|
||||||
alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
|
|
||||||
tmp[i].set_reg_i(j, dst);
|
|
||||||
emit_instruction(alu);
|
|
||||||
}
|
|
||||||
if (alu)
|
|
||||||
alu->set_flag(alu_last_instr);
|
|
||||||
|
|
||||||
/* Fill the vector with masked values */
|
|
||||||
PValue dst_blank(new GPRValue(tmp_index, 7));
|
|
||||||
for (int j = m_so_info->output[i].num_components; j < 4; j++)
|
|
||||||
tmp[i].set_reg_i(j, dst_blank);
|
|
||||||
|
|
||||||
start_comp[i] = 0;
|
|
||||||
so_gpr[i] = &tmp[i];
|
|
||||||
}
|
|
||||||
sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Write outputs to buffers. */
|
|
||||||
for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
|
|
||||||
sfn_log << SfnLog::instr << "Write output buffer " << i
|
|
||||||
<< " with register index " << m_so_info->output[i].register_index << "\n";
|
|
||||||
|
|
||||||
StreamOutIntruction *out_stream =
|
|
||||||
new StreamOutIntruction(*so_gpr[i],
|
|
||||||
m_so_info->output[i].num_components,
|
|
||||||
m_so_info->output[i].dst_offset - start_comp[i],
|
|
||||||
((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
|
|
||||||
m_so_info->output[i].output_buffer,
|
|
||||||
m_so_info->output[i].stream);
|
|
||||||
emit_export_instruction(out_stream);
|
|
||||||
m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VertexShaderFromNir::do_finalize()
|
void VertexShaderFromNir::do_finalize()
|
||||||
{
|
{
|
||||||
if (m_key.vs.as_gs_a) {
|
m_export_processor->finalize_exports();
|
||||||
PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
|
|
||||||
GPRVector primid({PValue(new GPRValue(0,2)), o,o,o});
|
|
||||||
m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
|
|
||||||
emit_export_instruction(m_last_param_export);
|
|
||||||
int i;
|
|
||||||
i = sh_info().noutput++;
|
|
||||||
auto& io = sh_info().output[i];
|
|
||||||
io.name = TGSI_SEMANTIC_PRIMID;
|
|
||||||
io.sid = 0;
|
|
||||||
io.gpr = 0;
|
|
||||||
io.interpolate = TGSI_INTERPOLATE_CONSTANT;
|
|
||||||
io.write_mask = 0x4;
|
|
||||||
io.spi_sid = m_key.vs.prim_id_out;
|
|
||||||
sh_info().vs_as_gs_a = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
finalize_exports();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VertexShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
||||||
bool VertexShaderFromNirForFS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
|
||||||
{
|
|
||||||
|
|
||||||
switch (out_var->data.location) {
|
|
||||||
case VARYING_SLOT_PSIZ:
|
|
||||||
sh_info().vs_out_point_size = 1;
|
|
||||||
sh_info().vs_out_misc_write = 1;
|
|
||||||
/* fallthrough */
|
|
||||||
case VARYING_SLOT_POS:
|
|
||||||
return emit_varying_pos(out_var, instr);
|
|
||||||
case VARYING_SLOT_EDGE: {
|
|
||||||
std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
|
|
||||||
return emit_varying_pos(out_var, instr, &swizzle_override);
|
|
||||||
}
|
|
||||||
case VARYING_SLOT_CLIP_VERTEX:
|
|
||||||
return emit_clip_vertices(out_var, instr);
|
|
||||||
case VARYING_SLOT_CLIP_DIST0:
|
|
||||||
case VARYING_SLOT_CLIP_DIST1:
|
|
||||||
m_num_clip_dist += 4;
|
|
||||||
return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
|
|
||||||
case VARYING_SLOT_LAYER: {
|
|
||||||
sh_info().vs_out_misc_write = 1;
|
|
||||||
sh_info().vs_out_layer = 1;
|
|
||||||
std::array<uint32_t, 4> swz = {7,7,0,7};
|
|
||||||
return emit_varying_pos(out_var, instr, &swz) &&
|
|
||||||
emit_varying_param(out_var, instr);
|
|
||||||
}
|
|
||||||
case VARYING_SLOT_VIEW_INDEX:
|
|
||||||
return emit_varying_pos(out_var, instr) &&
|
|
||||||
emit_varying_param(out_var, instr);
|
|
||||||
|
|
||||||
default:
|
|
||||||
if (out_var->data.location <= VARYING_SLOT_VAR31 ||
|
|
||||||
(out_var->data.location >= VARYING_SLOT_TEX0 &&
|
|
||||||
out_var->data.location <= VARYING_SLOT_TEX7))
|
|
||||||
return emit_varying_param(out_var, instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
|
|
||||||
out_var->data.location);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VertexShaderFromNirForFS::finalize_exports()
|
|
||||||
{
|
|
||||||
if (m_so_info && m_so_info->num_outputs)
|
|
||||||
emit_stream(-1);
|
|
||||||
|
|
||||||
m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
|
|
||||||
|
|
||||||
if (!m_last_param_export) {
|
|
||||||
GPRVector value(0,{7,7,7,7});
|
|
||||||
m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
|
|
||||||
emit_export_instruction(m_last_param_export);
|
|
||||||
}
|
|
||||||
m_last_param_export->set_last();
|
|
||||||
|
|
||||||
if (!m_last_pos_export) {
|
|
||||||
GPRVector value(0,{7,7,7,7});
|
|
||||||
m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
|
|
||||||
emit_export_instruction(m_last_pos_export);
|
|
||||||
}
|
|
||||||
m_last_pos_export->set_last();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
VertexShaderFromNirForGS::VertexShaderFromNirForGS(r600_pipe_shader *sh,
|
|
||||||
r600_pipe_shader_selector& sel,
|
|
||||||
const r600_shader_key &key,
|
|
||||||
const r600_shader *gs_shader):
|
|
||||||
VertexShaderFromNir(sh, sel, key),
|
|
||||||
m_gs_shader(gs_shader)
|
|
||||||
{
|
|
||||||
sh->shader.vs_as_es = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VertexShaderFromNirForGS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
|
||||||
{
|
|
||||||
|
|
||||||
assert(m_gs_shader);
|
|
||||||
|
|
||||||
int ring_offset = -1;
|
|
||||||
const r600_shader_io& out_io = sh_info().output[out_var->data.driver_location];
|
|
||||||
|
|
||||||
sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
|
|
||||||
<< " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
|
|
||||||
for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
|
|
||||||
auto& in_io = m_gs_shader->input[k];
|
|
||||||
sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
|
|
||||||
|
|
||||||
if (in_io.name == out_io.name &&
|
|
||||||
in_io.sid == out_io.sid) {
|
|
||||||
ring_offset = in_io.ring_offset;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (out_var->data.location == VARYING_SLOT_VIEWPORT)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (ring_offset == -1) {
|
|
||||||
sfn_log << SfnLog::err << "VS defines output at "
|
|
||||||
<< out_var->data.driver_location << "name=" << out_io.name
|
|
||||||
<< " sid=" << out_io.sid << " that is not consumed as GS input\n";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t write_mask = (1 << instr->num_components) - 1;
|
|
||||||
|
|
||||||
std::unique_ptr<GPRVector> value(vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
|
|
||||||
swizzle_from_mask(instr->num_components)));
|
|
||||||
|
|
||||||
auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value,
|
|
||||||
ring_offset >> 2, 4, PValue());
|
|
||||||
emit_export_instruction(ir);
|
|
||||||
|
|
||||||
sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
|
|
||||||
if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
|
|
||||||
out_var->data.location == VARYING_SLOT_CLIP_DIST1)
|
|
||||||
m_num_clip_dist += 4;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VertexShaderFromNirForGS::finalize_exports()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
VertexShaderFromNirForES::VertexShaderFromNirForES(r600_pipe_shader *sh,
|
|
||||||
UNUSED const pipe_stream_output_info *so_info,
|
|
||||||
r600_pipe_shader_selector& sel,
|
|
||||||
const r600_shader_key &key):
|
|
||||||
VertexShaderFromNir(sh, sel, key)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VertexShaderFromNirForES::do_emit_store_deref(UNUSED const nir_variable *out_var,
|
|
||||||
UNUSED nir_intrinsic_instr* instr)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VertexShaderFromNirForES::finalize_exports()
|
|
||||||
{
|
{
|
||||||
|
return m_export_processor->store_deref(out_var, instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,23 +28,21 @@
|
||||||
#define sfn_vertex_shader_from_nir_h
|
#define sfn_vertex_shader_from_nir_h
|
||||||
|
|
||||||
#include "sfn_shader_base.h"
|
#include "sfn_shader_base.h"
|
||||||
|
#include "sfn_vertexstageexport.h"
|
||||||
|
|
||||||
namespace r600 {
|
namespace r600 {
|
||||||
|
|
||||||
class VertexShaderFromNir : public ShaderFromNirProcessor {
|
class VertexShaderFromNir : public VertexStage {
|
||||||
public:
|
public:
|
||||||
VertexShaderFromNir(r600_pipe_shader *sh,
|
VertexShaderFromNir(r600_pipe_shader *sh,
|
||||||
r600_pipe_shader_selector &sel,
|
r600_pipe_shader_selector &sel,
|
||||||
const r600_shader_key &key);
|
const r600_shader_key &key, r600_shader *gs_shader);
|
||||||
|
|
||||||
bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override;
|
bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override;
|
||||||
bool scan_sysvalue_access(nir_instr *instr) override;
|
bool scan_sysvalue_access(nir_instr *instr) override;
|
||||||
|
|
||||||
|
PValue primitive_id() override {return m_primitive_id;}
|
||||||
protected:
|
protected:
|
||||||
bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
|
|
||||||
std::array<uint32_t, 4> *swizzle_override = nullptr);
|
|
||||||
bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr);
|
|
||||||
bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr);
|
|
||||||
bool emit_stream(int stream);
|
|
||||||
|
|
||||||
// todo: encapsulate
|
// todo: encapsulate
|
||||||
unsigned m_num_clip_dist;
|
unsigned m_num_clip_dist;
|
||||||
|
|
@ -54,49 +52,26 @@ protected:
|
||||||
unsigned m_enabled_stream_buffers_mask;
|
unsigned m_enabled_stream_buffers_mask;
|
||||||
const pipe_stream_output_info *m_so_info;
|
const pipe_stream_output_info *m_so_info;
|
||||||
void do_finalize() override;
|
void do_finalize() override;
|
||||||
private:
|
|
||||||
|
|
||||||
|
std::map<unsigned, unsigned> m_param_map;
|
||||||
|
private:
|
||||||
|
bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
|
||||||
|
void finalize_exports();
|
||||||
|
|
||||||
|
void emit_shader_start() override;
|
||||||
bool do_process_inputs(nir_variable *input) override;
|
bool do_process_inputs(nir_variable *input) override;
|
||||||
bool allocate_reserved_registers() override;
|
bool allocate_reserved_registers() override;
|
||||||
bool do_process_outputs(nir_variable *output) override;
|
bool do_process_outputs(nir_variable *output) override;
|
||||||
bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
|
bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
|
||||||
|
bool emit_store_local_shared(nir_intrinsic_instr* instr);
|
||||||
virtual void finalize_exports() = 0;
|
|
||||||
|
|
||||||
unsigned m_cur_param;
|
|
||||||
std::map<unsigned, unsigned> m_param_map;
|
|
||||||
unsigned m_cur_clip_pos;
|
|
||||||
|
|
||||||
PValue m_vertex_id;
|
PValue m_vertex_id;
|
||||||
PValue m_instance_id;
|
PValue m_instance_id;
|
||||||
|
PValue m_rel_vertex_id;
|
||||||
|
PValue m_primitive_id;
|
||||||
r600_shader_key m_key;
|
r600_shader_key m_key;
|
||||||
};
|
|
||||||
|
|
||||||
class VertexShaderFromNirForFS : public VertexShaderFromNir {
|
std::unique_ptr<VertexStageExportBase> m_export_processor;
|
||||||
public:
|
|
||||||
using VertexShaderFromNir::VertexShaderFromNir;
|
|
||||||
|
|
||||||
bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
|
|
||||||
private:
|
|
||||||
void finalize_exports() override;
|
|
||||||
};
|
|
||||||
|
|
||||||
class VertexShaderFromNirForGS : public VertexShaderFromNir {
|
|
||||||
public:
|
|
||||||
VertexShaderFromNirForGS(r600_pipe_shader *sh, r600_pipe_shader_selector &sel,
|
|
||||||
const r600_shader_key &key, const r600_shader *gs_shader);
|
|
||||||
bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
|
|
||||||
void finalize_exports() override;
|
|
||||||
|
|
||||||
const r600_shader *m_gs_shader;
|
|
||||||
};
|
|
||||||
|
|
||||||
class VertexShaderFromNirForES : public VertexShaderFromNir {
|
|
||||||
public:
|
|
||||||
VertexShaderFromNirForES(r600_pipe_shader *sh, const pipe_stream_output_info *so_info, r600_pipe_shader_selector &sel,
|
|
||||||
const r600_shader_key &key);
|
|
||||||
bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
|
|
||||||
void finalize_exports() override;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
456
src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
Normal file
456
src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
Normal file
|
|
@ -0,0 +1,456 @@
|
||||||
|
#include "sfn_vertexstageexport.h"
|
||||||
|
|
||||||
|
#include "tgsi/tgsi_from_mesa.h"
|
||||||
|
|
||||||
|
namespace r600 {
|
||||||
|
|
||||||
|
using std::priority_queue;
|
||||||
|
|
||||||
|
VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
|
||||||
|
m_proc(proc),
|
||||||
|
m_cur_clip_pos(1),
|
||||||
|
m_cur_param(0)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
VertexStageExportBase::~VertexStageExportBase()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
|
||||||
|
const pipe_stream_output_info *so_info,
|
||||||
|
r600_pipe_shader *pipe_shader, const r600_shader_key &key):
|
||||||
|
VertexStageExportBase(proc),
|
||||||
|
m_last_param_export(nullptr),
|
||||||
|
m_last_pos_export(nullptr),
|
||||||
|
m_num_clip_dist(0),
|
||||||
|
m_enabled_stream_buffers_mask(0),
|
||||||
|
m_so_info(so_info),
|
||||||
|
m_pipe_shader(pipe_shader),
|
||||||
|
m_key(key)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexStageExportBase::setup_paramn_map()
|
||||||
|
{
|
||||||
|
priority_queue<int, std::vector<int>, std::greater<int>> q;
|
||||||
|
for (auto a: m_param_map) {
|
||||||
|
q.push(a.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
int next_param = 0;
|
||||||
|
while (!q.empty()) {
|
||||||
|
int loc = q.top();
|
||||||
|
q.pop();
|
||||||
|
m_param_map[loc] = next_param++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VertexStageExportBase::do_process_outputs(nir_variable *output)
|
||||||
|
{
|
||||||
|
if (output->data.location == VARYING_SLOT_COL0 ||
|
||||||
|
output->data.location == VARYING_SLOT_COL1 ||
|
||||||
|
(output->data.location >= VARYING_SLOT_VAR0 &&
|
||||||
|
output->data.location <= VARYING_SLOT_VAR31) ||
|
||||||
|
(output->data.location >= VARYING_SLOT_TEX0 &&
|
||||||
|
output->data.location <= VARYING_SLOT_TEX7) ||
|
||||||
|
output->data.location == VARYING_SLOT_BFC0 ||
|
||||||
|
output->data.location == VARYING_SLOT_BFC1 ||
|
||||||
|
output->data.location == VARYING_SLOT_CLIP_VERTEX ||
|
||||||
|
output->data.location == VARYING_SLOT_CLIP_DIST0 ||
|
||||||
|
output->data.location == VARYING_SLOT_CLIP_DIST1 ||
|
||||||
|
output->data.location == VARYING_SLOT_POS ||
|
||||||
|
output->data.location == VARYING_SLOT_PSIZ ||
|
||||||
|
output->data.location == VARYING_SLOT_FOGC ||
|
||||||
|
output->data.location == VARYING_SLOT_LAYER ||
|
||||||
|
output->data.location == VARYING_SLOT_EDGE ||
|
||||||
|
output->data.location == VARYING_SLOT_VIEWPORT
|
||||||
|
) {
|
||||||
|
|
||||||
|
r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
|
||||||
|
tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>( output->data.location),
|
||||||
|
true, &io.name, &io.sid);
|
||||||
|
|
||||||
|
m_proc.evaluate_spi_sid(io);
|
||||||
|
io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
|
||||||
|
<< output->data.location_frac;
|
||||||
|
++m_proc.sh_info().noutput;
|
||||||
|
|
||||||
|
if (output->data.location == VARYING_SLOT_PSIZ ||
|
||||||
|
output->data.location == VARYING_SLOT_EDGE ||
|
||||||
|
output->data.location == VARYING_SLOT_LAYER)
|
||||||
|
m_cur_clip_pos = 2;
|
||||||
|
|
||||||
|
if (output->data.location != VARYING_SLOT_POS &&
|
||||||
|
output->data.location != VARYING_SLOT_EDGE &&
|
||||||
|
output->data.location != VARYING_SLOT_PSIZ &&
|
||||||
|
output->data.location != VARYING_SLOT_CLIP_VERTEX)
|
||||||
|
m_param_map[output->data.location] = m_cur_param++;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
||||||
|
{
|
||||||
|
|
||||||
|
switch (out_var->data.location) {
|
||||||
|
case VARYING_SLOT_PSIZ:
|
||||||
|
m_proc.sh_info().vs_out_point_size = 1;
|
||||||
|
m_proc.sh_info().vs_out_misc_write = 1;
|
||||||
|
/* fallthrough */
|
||||||
|
case VARYING_SLOT_POS:
|
||||||
|
return emit_varying_pos(out_var, instr);
|
||||||
|
case VARYING_SLOT_EDGE: {
|
||||||
|
std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
|
||||||
|
return emit_varying_pos(out_var, instr, &swizzle_override);
|
||||||
|
}
|
||||||
|
case VARYING_SLOT_CLIP_VERTEX:
|
||||||
|
return emit_clip_vertices(out_var, instr);
|
||||||
|
case VARYING_SLOT_CLIP_DIST0:
|
||||||
|
case VARYING_SLOT_CLIP_DIST1:
|
||||||
|
m_num_clip_dist += 4;
|
||||||
|
return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
|
||||||
|
case VARYING_SLOT_LAYER: {
|
||||||
|
m_proc.sh_info().vs_out_misc_write = 1;
|
||||||
|
m_proc.sh_info().vs_out_layer = 1;
|
||||||
|
std::array<uint32_t, 4> swz = {7,7,0,7};
|
||||||
|
return emit_varying_pos(out_var, instr, &swz) &&
|
||||||
|
emit_varying_param(out_var, instr);
|
||||||
|
}
|
||||||
|
case VARYING_SLOT_VIEW_INDEX:
|
||||||
|
return emit_varying_pos(out_var, instr) &&
|
||||||
|
emit_varying_param(out_var, instr);
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (out_var->data.location <= VARYING_SLOT_VAR31 ||
|
||||||
|
(out_var->data.location >= VARYING_SLOT_TEX0 &&
|
||||||
|
out_var->data.location <= VARYING_SLOT_TEX7))
|
||||||
|
return emit_varying_param(out_var, instr);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
|
||||||
|
out_var->data.location);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
|
||||||
|
std::array<uint32_t, 4> *swizzle_override)
|
||||||
|
{
|
||||||
|
std::array<uint32_t,4> swizzle;
|
||||||
|
uint32_t write_mask = 0;
|
||||||
|
|
||||||
|
if (swizzle_override) {
|
||||||
|
swizzle = *swizzle_override;
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
if (swizzle[i] < 6)
|
||||||
|
write_mask |= 1 << i;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
|
||||||
|
for (int i = 0; i < 4; ++i)
|
||||||
|
swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
|
||||||
|
|
||||||
|
GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
|
||||||
|
m_proc.set_output(out_var->data.driver_location, PValue(value));
|
||||||
|
|
||||||
|
int export_slot = 0;
|
||||||
|
|
||||||
|
switch (out_var->data.location) {
|
||||||
|
case VARYING_SLOT_EDGE: {
|
||||||
|
m_proc.sh_info().vs_out_misc_write = 1;
|
||||||
|
m_proc.sh_info().vs_out_edgeflag = 1;
|
||||||
|
m_proc.emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
|
||||||
|
m_proc.emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr});
|
||||||
|
m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
|
||||||
|
}
|
||||||
|
/* fallthrough */
|
||||||
|
case VARYING_SLOT_PSIZ:
|
||||||
|
case VARYING_SLOT_LAYER:
|
||||||
|
export_slot = 1;
|
||||||
|
break;
|
||||||
|
case VARYING_SLOT_POS:
|
||||||
|
break;
|
||||||
|
case VARYING_SLOT_CLIP_DIST0:
|
||||||
|
case VARYING_SLOT_CLIP_DIST1:
|
||||||
|
export_slot = m_cur_clip_pos++;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
sfn_log << SfnLog::err << __func__ << "Unsupported location "
|
||||||
|
<< out_var->data.location << "\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos);
|
||||||
|
m_proc.emit_export_instruction(m_last_pos_export);
|
||||||
|
m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
||||||
|
{
|
||||||
|
assert(out_var->data.driver_location < m_proc.sh_info().noutput);
|
||||||
|
sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
|
||||||
|
|
||||||
|
int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
|
||||||
|
std::array<uint32_t,4> swizzle;
|
||||||
|
for (int i = 0; i < 4; ++i)
|
||||||
|
swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
|
||||||
|
|
||||||
|
m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
|
||||||
|
|
||||||
|
GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
|
||||||
|
m_proc.sh_info().output[out_var->data.driver_location].gpr = value->sel();
|
||||||
|
|
||||||
|
/* This should use the registers!! */
|
||||||
|
m_proc.set_output(out_var->data.driver_location, PValue(value));
|
||||||
|
|
||||||
|
auto param_loc = m_param_map.find(out_var->data.location);
|
||||||
|
assert(param_loc != m_param_map.end());
|
||||||
|
|
||||||
|
m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param);
|
||||||
|
m_proc.emit_export_instruction(m_last_param_export);
|
||||||
|
m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
||||||
|
{
|
||||||
|
m_proc.sh_info().cc_dist_mask = 0xff;
|
||||||
|
m_proc.sh_info().clip_dist_write = 0xff;
|
||||||
|
|
||||||
|
std::unique_ptr<GPRVector> clip_vertex(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3}));
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; ++i)
|
||||||
|
m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
|
||||||
|
|
||||||
|
GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
int oreg = i >> 2;
|
||||||
|
int ochan = i & 3;
|
||||||
|
AluInstruction *ir = nullptr;
|
||||||
|
for (int j = 0; j < 4; j++) {
|
||||||
|
ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j),
|
||||||
|
PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
|
||||||
|
(j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
|
||||||
|
m_proc.emit_instruction(ir);
|
||||||
|
}
|
||||||
|
ir->set_flag(alu_last_instr);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
|
||||||
|
m_proc.emit_export_instruction(m_last_pos_export);
|
||||||
|
|
||||||
|
m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
|
||||||
|
m_proc.emit_export_instruction(m_last_pos_export);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexStageExportForFS::finalize_exports()
|
||||||
|
{
|
||||||
|
if (m_key.vs.as_gs_a) {
|
||||||
|
PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
|
||||||
|
GPRVector primid({m_proc.primitive_id(), o,o,o});
|
||||||
|
m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
|
||||||
|
m_proc.emit_export_instruction(m_last_param_export);
|
||||||
|
int i;
|
||||||
|
i = m_proc.sh_info().noutput++;
|
||||||
|
auto& io = m_proc.sh_info().output[i];
|
||||||
|
io.name = TGSI_SEMANTIC_PRIMID;
|
||||||
|
io.sid = 0;
|
||||||
|
io.gpr = 0;
|
||||||
|
io.interpolate = TGSI_INTERPOLATE_CONSTANT;
|
||||||
|
io.write_mask = 0x1;
|
||||||
|
io.spi_sid = m_key.vs.prim_id_out;
|
||||||
|
m_proc.sh_info().vs_as_gs_a = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_so_info && m_so_info->num_outputs)
|
||||||
|
emit_stream(-1);
|
||||||
|
|
||||||
|
m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
|
||||||
|
|
||||||
|
if (!m_last_param_export) {
|
||||||
|
GPRVector value(0,{7,7,7,7});
|
||||||
|
m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
|
||||||
|
m_proc.emit_export_instruction(m_last_param_export);
|
||||||
|
}
|
||||||
|
m_last_param_export->set_last();
|
||||||
|
|
||||||
|
if (!m_last_pos_export) {
|
||||||
|
GPRVector value(0,{7,7,7,7});
|
||||||
|
m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
|
||||||
|
m_proc.emit_export_instruction(m_last_pos_export);
|
||||||
|
}
|
||||||
|
m_last_pos_export->set_last();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VertexStageExportForFS::emit_stream(int stream)
|
||||||
|
{
|
||||||
|
assert(m_so_info);
|
||||||
|
if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
|
||||||
|
R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
|
||||||
|
if (m_so_info->output[i].output_buffer >= 4) {
|
||||||
|
R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
|
||||||
|
m_so_info->output[i].output_buffer);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
|
||||||
|
unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
|
||||||
|
std::vector<GPRVector> tmp(m_so_info->num_outputs);
|
||||||
|
|
||||||
|
/* Initialize locations where the outputs are stored. */
|
||||||
|
for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
|
||||||
|
if (stream != -1 && stream != m_so_info->output[i].stream)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
sfn_log << SfnLog::instr << "Emit stream " << i
|
||||||
|
<< " with register index " << m_so_info->output[i].register_index << " so_gpr:";
|
||||||
|
|
||||||
|
|
||||||
|
so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
|
||||||
|
|
||||||
|
if (!so_gpr[i]) {
|
||||||
|
sfn_log << SfnLog::err << "\nERR: register index "
|
||||||
|
<< m_so_info->output[i].register_index
|
||||||
|
<< " doesn't correspond to an output register\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
start_comp[i] = m_so_info->output[i].start_component;
|
||||||
|
/* Lower outputs with dst_offset < start_component.
|
||||||
|
*
|
||||||
|
* We can only output 4D vectors with a write mask, e.g. we can
|
||||||
|
* only output the W component at offset 3, etc. If we want
|
||||||
|
* to store Y, Z, or W at buffer offset 0, we need to use MOV
|
||||||
|
* to move it to X and output X. */
|
||||||
|
if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
|
||||||
|
int tmp_index = m_proc.allocate_temp_register();
|
||||||
|
int sc = m_so_info->output[i].start_component;
|
||||||
|
AluInstruction *alu = nullptr;
|
||||||
|
for (int j = 0; j < m_so_info->output[i].num_components; j++) {
|
||||||
|
PValue dst(new GPRValue(tmp_index, j));
|
||||||
|
alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
|
||||||
|
tmp[i].set_reg_i(j, dst);
|
||||||
|
m_proc.emit_instruction(alu);
|
||||||
|
}
|
||||||
|
if (alu)
|
||||||
|
alu->set_flag(alu_last_instr);
|
||||||
|
|
||||||
|
/* Fill the vector with masked values */
|
||||||
|
PValue dst_blank(new GPRValue(tmp_index, 7));
|
||||||
|
for (int j = m_so_info->output[i].num_components; j < 4; j++)
|
||||||
|
tmp[i].set_reg_i(j, dst_blank);
|
||||||
|
|
||||||
|
start_comp[i] = 0;
|
||||||
|
so_gpr[i] = &tmp[i];
|
||||||
|
}
|
||||||
|
sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write outputs to buffers. */
|
||||||
|
for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
|
||||||
|
sfn_log << SfnLog::instr << "Write output buffer " << i
|
||||||
|
<< " with register index " << m_so_info->output[i].register_index << "\n";
|
||||||
|
|
||||||
|
StreamOutIntruction *out_stream =
|
||||||
|
new StreamOutIntruction(*so_gpr[i],
|
||||||
|
m_so_info->output[i].num_components,
|
||||||
|
m_so_info->output[i].dst_offset - start_comp[i],
|
||||||
|
((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
|
||||||
|
m_so_info->output[i].output_buffer,
|
||||||
|
m_so_info->output[i].stream);
|
||||||
|
m_proc.emit_export_instruction(out_stream);
|
||||||
|
m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
|
||||||
|
const r600_shader *gs_shader):
|
||||||
|
VertexStageExportBase(proc),
|
||||||
|
m_gs_shader(gs_shader)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
||||||
|
{
|
||||||
|
|
||||||
|
int ring_offset = -1;
|
||||||
|
const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location];
|
||||||
|
|
||||||
|
sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
|
||||||
|
<< " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
|
||||||
|
for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
|
||||||
|
auto& in_io = m_gs_shader->input[k];
|
||||||
|
sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
|
||||||
|
|
||||||
|
if (in_io.name == out_io.name &&
|
||||||
|
in_io.sid == out_io.sid) {
|
||||||
|
ring_offset = in_io.ring_offset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (out_var->data.location == VARYING_SLOT_VIEWPORT)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (ring_offset == -1) {
|
||||||
|
sfn_log << SfnLog::err << "VS defines output at "
|
||||||
|
<< out_var->data.driver_location << "name=" << out_io.name
|
||||||
|
<< " sid=" << out_io.sid << " that is not consumed as GS input\n";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t write_mask = (1 << instr->num_components) - 1;
|
||||||
|
|
||||||
|
std::unique_ptr<GPRVector> value(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
|
||||||
|
swizzle_from_mask(instr->num_components)));
|
||||||
|
|
||||||
|
auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value,
|
||||||
|
ring_offset >> 2, 4, PValue());
|
||||||
|
m_proc.emit_export_instruction(ir);
|
||||||
|
|
||||||
|
m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
|
||||||
|
if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
|
||||||
|
out_var->data.location == VARYING_SLOT_CLIP_DIST1)
|
||||||
|
m_num_clip_dist += 4;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexStageExportForGS::finalize_exports()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
|
||||||
|
VertexStageExportBase(proc)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexStageExportForES::finalize_exports()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
85
src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
Normal file
85
src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
#ifndef VERTEXSTAGEEXPORT_H
|
||||||
|
#define VERTEXSTAGEEXPORT_H
|
||||||
|
|
||||||
|
#include "sfn_shader_base.h"
|
||||||
|
|
||||||
|
namespace r600 {
|
||||||
|
|
||||||
|
class VertexStage : public ShaderFromNirProcessor {
|
||||||
|
public:
|
||||||
|
using ShaderFromNirProcessor::ShaderFromNirProcessor;
|
||||||
|
|
||||||
|
virtual PValue primitive_id() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class VertexStageExportBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
VertexStageExportBase(VertexStage& proc);
|
||||||
|
virtual ~VertexStageExportBase();
|
||||||
|
void setup_paramn_map();
|
||||||
|
virtual bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0;
|
||||||
|
virtual void finalize_exports() = 0;
|
||||||
|
virtual bool do_process_outputs(nir_variable *output);
|
||||||
|
int cur_param() const {return m_cur_param;}
|
||||||
|
protected:
|
||||||
|
VertexStage& m_proc;
|
||||||
|
std::map<unsigned, unsigned> m_param_map;
|
||||||
|
int m_cur_clip_pos;
|
||||||
|
int m_cur_param;
|
||||||
|
};
|
||||||
|
|
||||||
|
class VertexStageExportForFS : public VertexStageExportBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
VertexStageExportForFS(VertexStage& proc,
|
||||||
|
const pipe_stream_output_info *so_info,
|
||||||
|
r600_pipe_shader *pipe_shader,
|
||||||
|
const r600_shader_key& key);
|
||||||
|
|
||||||
|
bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
|
||||||
|
void finalize_exports() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr);
|
||||||
|
bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
|
||||||
|
std::array<uint32_t, 4> *swizzle_override = nullptr);
|
||||||
|
bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr);
|
||||||
|
bool emit_stream(int stream);
|
||||||
|
|
||||||
|
ExportInstruction *m_last_param_export;
|
||||||
|
ExportInstruction *m_last_pos_export;
|
||||||
|
|
||||||
|
int m_num_clip_dist;
|
||||||
|
int m_enabled_stream_buffers_mask;
|
||||||
|
const pipe_stream_output_info *m_so_info;
|
||||||
|
r600_pipe_shader *m_pipe_shader;
|
||||||
|
const r600_shader_key& m_key;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class VertexStageExportForGS : public VertexStageExportBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
VertexStageExportForGS(VertexStage& proc,
|
||||||
|
const r600_shader *gs_shader);
|
||||||
|
bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
|
||||||
|
void finalize_exports() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned m_num_clip_dist;
|
||||||
|
const r600_shader *m_gs_shader;
|
||||||
|
};
|
||||||
|
|
||||||
|
class VertexStageExportForES : public VertexStageExportBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
VertexStageExportForES(VertexStage& proc);
|
||||||
|
bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
|
||||||
|
void finalize_exports() override;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // VERTEXSTAGEEXPORT_H
|
||||||
Loading…
Add table
Reference in a new issue