r600/sfn: Add imageio support

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5206>
This commit is contained in:
Gert Wollny 2020-05-07 00:08:15 +02:00 committed by Marge Bot
parent b303540c48
commit 249dbcb769
3 changed files with 325 additions and 47 deletions

View file

@ -4,9 +4,40 @@
#include "sfn_instruction_gds.h"
#include "sfn_instruction_misc.h"
#include "../r600_pipe.h"
#include "../r600_asm.h"
namespace r600 {
EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
EmitInstruction(processor),
m_require_rat_return_address(false)
{
}
void EmitSSBOInstruction::set_require_rat_return_address()
{
m_require_rat_return_address = true;
}
bool
EmitSSBOInstruction::load_rat_return_address()
{
if (m_require_rat_return_address) {
m_rat_return_address = get_temp_vec4();
emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
{alu_write, alu_last_instr}));
m_require_rat_return_address = false;
}
return true;
}
bool EmitSSBOInstruction::do_emit(nir_instr* instr)
{
const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
@ -29,8 +60,24 @@ bool EmitSSBOInstruction::do_emit(nir_instr* instr)
return emit_atomic_pre_dec(intr);
case nir_intrinsic_load_ssbo:
return emit_load_ssbo(intr);
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_ssbo:
return emit_store_ssbo(intr);
case nir_intrinsic_ssbo_atomic_add:
return emit_ssbo_atomic_op(intr);
case nir_intrinsic_image_store:
return emit_image_store(intr);
case nir_intrinsic_image_load:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_imax:
return emit_image_load(intr);
default:
return false;
}
@ -111,6 +158,48 @@ ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode)
}
}
RatInstruction::ERatOp
EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
{
switch (opcode) {
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_image_atomic_add:
return RatInstruction::ADD_RTN;
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_image_atomic_and:
return RatInstruction::AND_RTN;
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_image_atomic_exchange:
return RatInstruction::XCHG_RTN;
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_image_atomic_or:
return RatInstruction::OR_RTN;
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_image_atomic_imin:
return RatInstruction::MIN_INT_RTN;
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_image_atomic_imax:
return RatInstruction::MAX_INT_RTN;
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_image_atomic_umin:
return RatInstruction::MIN_UINT_RTN;
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_image_atomic_umax:
return RatInstruction::MAX_UINT_RTN;
case nir_intrinsic_image_atomic_xor:
return RatInstruction::XOR_RTN;
case nir_intrinsic_image_atomic_comp_swap:
if (util_format_is_float(format))
return RatInstruction::CMPXCHG_FLT_RTN;
else
return RatInstruction::CMPXCHG_INT_RTN;
case nir_intrinsic_image_load:
return RatInstruction::NOP_RTN;
default:
unreachable("Unsupported RAT instruction");
}
}
bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
{
@ -127,22 +216,19 @@ bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
return true;
}
bool EmitSSBOInstruction::load_atomic_inc_limits()
{
m_atomic_update = get_temp_register();
emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
{alu_write, alu_last_instr}));
return true;
}
bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
{
GPRVector dest = make_dest(instr);
PValue uav_id = from_nir(instr->src[0], 0);
if (!m_atomic_limit) {
int one_tmp = allocate_temp_register();
m_atomic_limit = PValue(new GPRValue(one_tmp, 0));
emit_instruction(new AluInstruction(op1_mov, m_atomic_limit,
PValue(new LiteralValue(0xffffffff)),
{alu_write, alu_last_instr}));
}
auto ir = new GDSInstr(DS_OP_INC_RET, dest, m_atomic_limit, uav_id,
GPRVector dest = make_dest(instr);
auto ir = new GDSInstr(DS_OP_ADD_RET, dest, m_atomic_update, uav_id,
nir_intrinsic_base(instr));
emit_instruction(ir);
return true;
@ -154,18 +240,10 @@ bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
PValue uav_id = from_nir(instr->src[0], 0);
int one_tmp = allocate_temp_register();
PValue value(new GPRValue(one_tmp, 0));
emit_instruction(new AluInstruction(op1_mov, value, Value::one_i,
{alu_write, alu_last_instr}));
auto ir = new GDSInstr(DS_OP_SUB_RET, dest, value, uav_id,
auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
nir_intrinsic_base(instr));
emit_instruction(ir);
ir = new GDSInstr(DS_OP_READ_RET, dest, uav_id, nir_intrinsic_base(instr));
emit_instruction(ir);
return true;
}
@ -260,6 +338,169 @@ bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
return true;
}
bool
EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
{
int imageid = 0;
PValue image_offset;
if (nir_src_is_const(intrin->src[0]))
imageid = nir_src_as_int(intrin->src[0]);
else
image_offset = from_nir(intrin->src[0], 0);
auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
auto undef = from_nir(intrin->src[2], 0);
auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3});
auto unknown = from_nir(intrin->src[4], 0);
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
nir_intrinsic_image_array(intrin)) {
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
}
auto store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, value, coord, imageid,
image_offset, 1, 0xf, 0, false);
emit_instruction(store);
return true;
}
bool
EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
{
int imageid = 0;
PValue image_offset;
if (nir_src_is_const(intrin->src[0]))
imageid = nir_src_as_int(intrin->src[0]);
else
image_offset = from_nir(intrin->src[0], 0);
auto opcode = EmitSSBOInstruction::get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
auto coord = from_nir_with_fetch_constant(intrin->src[1], 0);
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), from_nir(intrin->src[2], 0), write));
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
GPRVector out_vec({coord, coord, coord, coord});
auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid,
image_offset, 1, 0xf, 0, true);
emit_instruction(atomic);
emit_instruction(new WaitAck(0));
GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
auto fetch = new FetchInstruction(vc_fetch,
no_index_offset,
fmt_32,
vtx_nf_int,
vtx_es_none,
m_rat_return_address.reg_i(1),
dest,
0,
false,
0xf,
R600_IMAGE_IMMED_RESOURCE_OFFSET,
0,
bim_none,
false,
false,
0,
0,
0,
PValue(),
{0,7,7,7});
fetch->set_flag(vtx_srf_mode);
fetch->set_flag(vtx_use_tc);
emit_instruction(fetch);
return true;
}
bool
EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
{
int imageid = 0;
PValue image_offset;
if (nir_src_is_const(intrin->src[0]))
imageid = nir_src_as_int(intrin->src[0]);
else
image_offset = from_nir(intrin->src[0], 0);
auto rat_op = get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin));
GPRVector::Swizzle swz = {0,1,2,3};
auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
nir_intrinsic_image_array(intrin)) {
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
}
if (intrin->intrinsic != nir_intrinsic_image_load) {
if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
from_nir(intrin->src[4], 0), {alu_write}));
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
} else {
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
}
}
auto store = new RatInstruction(cf_mem_rat, rat_op, m_rat_return_address, coord, imageid,
image_offset, 1, 0xf, 0, true);
emit_instruction(store);
return fetch_return_value(intrin);
}
bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
{
emit_instruction(new WaitAck(0));
pipe_format format = nir_intrinsic_format(intrin);
unsigned fmt = fmt_32;
unsigned num_format = 0;
unsigned format_comp = 0;
unsigned endian = 0;
r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
auto fetch = new FetchInstruction(vc_fetch,
no_index_offset,
(EVTXDataFormat)fmt,
(EVFetchNumFormat)num_format,
(EVFetchEndianSwap)endian,
m_rat_return_address.reg_i(1),
dest,
0,
false,
0x3,
R600_IMAGE_IMMED_RESOURCE_OFFSET,
0,
bim_none,
false,
false,
0,
0,
0,
PValue(),
{0,1,2,3});
fetch->set_flag(vtx_srf_mode);
fetch->set_flag(vtx_use_tc);
if (format_comp)
fetch->set_flag(vtx_format_comp_signed);
emit_instruction(fetch);
return true;
}
GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
{
GPRVector::Values v;

View file

@ -2,12 +2,18 @@
#define SFN_EMITSSBOINSTRUCTION_H
#include "sfn_emitinstruction.h"
#include "sfn_instruction_gds.h"
namespace r600 {
class EmitSSBOInstruction: public EmitInstruction {
public:
using EmitInstruction::EmitInstruction;
EmitSSBOInstruction(ShaderFromNirProcessor& processor);
void set_require_rat_return_address();
bool load_rat_return_address();
bool load_atomic_inc_limits();
private:
bool do_emit(nir_instr *instr);
@ -19,11 +25,22 @@ private:
bool emit_load_ssbo(const nir_intrinsic_instr* instr);
bool emit_store_ssbo(const nir_intrinsic_instr* instr);
bool emit_image_load(const nir_intrinsic_instr *intrin);
bool emit_image_store(const nir_intrinsic_instr *intrin);
bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin);
bool fetch_return_value(const nir_intrinsic_instr *intrin);
ESDOp get_opcode(nir_intrinsic_op opcode);
RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
GPRVector make_dest(const nir_intrinsic_instr* instr);
PValue m_atomic_limit;
PValue m_atomic_update;
bool m_require_rat_return_address;
GPRVector m_rat_return_address;
};
}

View file

@ -93,6 +93,36 @@ bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
nir_tex_instr *t = nir_instr_as_tex(instr);
if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
sh_info().uses_tex_buffers = true;
break;
}
case nir_instr_type_intrinsic: {
auto *i = nir_instr_as_intrinsic(instr);
switch (i->intrinsic) {
case nir_intrinsic_image_load:
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
m_ssbo_instr.set_require_rat_return_address();
m_sel.info.writes_memory = 1;
break;
default:
;
}
}
default:
;
@ -490,6 +520,11 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins
if (emit_intrinsic_instruction_override(instr))
return true;
if (m_ssbo_instr.emit(&instr->instr)) {
m_sel.info.writes_memory = true;
return true;
}
switch (instr->intrinsic) {
case nir_intrinsic_load_deref: {
auto var = get_deref_location(instr->src[0]);
@ -524,39 +559,24 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins
return emit_discard_if(instr);
case nir_intrinsic_load_ubo_r600:
return emit_load_ubo(instr);
case nir_intrinsic_atomic_counter_add:
case nir_intrinsic_atomic_counter_and:
case nir_intrinsic_atomic_counter_exchange:
case nir_intrinsic_atomic_counter_max:
case nir_intrinsic_atomic_counter_min:
case nir_intrinsic_atomic_counter_or:
case nir_intrinsic_atomic_counter_xor:
case nir_intrinsic_atomic_counter_comp_swap:
case nir_intrinsic_atomic_counter_read:
case nir_intrinsic_atomic_counter_post_dec:
case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_pre_dec:
case nir_intrinsic_store_ssbo:
m_sel.info.writes_memory = true;
/* fallthrough */
case nir_intrinsic_load_ssbo:
return m_ssbo_instr.emit(&instr->instr);
break;
case nir_intrinsic_copy_deref:
case nir_intrinsic_load_constant:
case nir_intrinsic_load_input:
case nir_intrinsic_store_output:
case nir_intrinsic_load_tcs_in_param_base_r600:
return emit_load_tcs_param_base(instr, 0);
case nir_intrinsic_load_tcs_out_param_base_r600:
return emit_load_tcs_param_base(instr, 16);
case nir_intrinsic_load_local_shared_r600:
case nir_intrinsic_load_shared:
return emit_load_local_shared(instr);
case nir_intrinsic_store_local_shared_r600:
case nir_intrinsic_store_shared:
return emit_store_local_shared(instr);
case nir_intrinsic_control_barrier:
case nir_intrinsic_memory_barrier_tcs_patch:
case nir_intrinsic_memory_barrier_shared:
return emit_barrier(instr);
case nir_intrinsic_copy_deref:
case nir_intrinsic_load_constant:
case nir_intrinsic_load_input:
case nir_intrinsic_store_output:
default:
fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);