r600/sfn: lower uniforms to UBOs

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19416>
This commit is contained in:
Gert Wollny 2022-10-31 15:08:14 +01:00 committed by Marge Bot
parent 981bc603b4
commit 350c56b1c3
8 changed files with 85 additions and 13 deletions

View file

@ -1364,7 +1364,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
.linker_ignore_precision = true,
.lower_fpow = true,
.lower_int64_options = ~0,
.lower_cs_local_index_to_id = true
.lower_cs_local_index_to_id = true,
.lower_uniforms_to_ubo = true
};
rscreen->nir_options = nir_options;

View file

@ -307,7 +307,6 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
if (buffer_offset && kcache_index_mode == bim_none) {
kcache_index_mode = bim_zero;
alu.src[i].kc_bank = 1;
alu.src[i].kc_rel = 1;
}

View file

@ -28,7 +28,9 @@
#include "../r600_pipe.h"
#include "../r600_shader.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_intrinsics.h"
#include "sfn_assembler.h"
#include "sfn_debug.h"
#include "sfn_instr_tex.h"
@ -226,8 +228,7 @@ private:
auto intr = nir_instr_as_intrinsic(instr);
nir_ssa_def *output[8] = {nullptr};
// for UBO loads we correct the buffer ID by adding 1
auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER - 1);
auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER);
assert(intr->src[0].is_ssa);
auto clip_vtx = intr->src[0].ssa;
@ -270,6 +271,67 @@ private:
pipe_stream_output_info& m_so_info;
};
/* lower_uniforms_to_ubo adds a 1 to the UBO buffer ID.
* If the buffer ID is a non-constant value we end up
* with "iadd bufid, 1", bot on r600 we can put that constant
* "1" as constant cache ID into the CF instruction and don't need
* to execute that extra ADD op, so eliminate the addition here
* again and move the buffer base ID into the base value of
* the intrinsic that is not used otherwise */
class OptIndirectUBOLoads : public NirLowerInstruction {
private:
bool filter(const nir_instr *instr) const override
{
if (instr->type != nir_instr_type_intrinsic)
return false;
auto intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_ubo_vec4)
return false;
if (nir_src_as_const_value(intr->src[0]) != nullptr)
return false;
return nir_intrinsic_base(intr) == 0;
}
nir_ssa_def *lower(nir_instr *instr) override
{
auto intr = nir_instr_as_intrinsic(instr);
assert(intr->intrinsic == nir_intrinsic_load_ubo_vec4);
assert(intr->src[0].is_ssa);
auto parent = intr->src[0].ssa->parent_instr;
if (parent->type != nir_instr_type_alu)
return nullptr;
auto alu = nir_instr_as_alu(parent);
if (alu->op != nir_op_iadd)
return nullptr;
int new_base = 0;
nir_src *new_bufid = nullptr;
auto src0 = nir_src_as_const_value(alu->src[0].src);
if (src0) {
new_bufid = &alu->src[1].src;
new_base = src0->i32;
} else if (auto src1 = nir_src_as_const_value(alu->src[1].src)) {
new_bufid = &alu->src[0].src;
new_base = src1->i32;
} else {
return nullptr;
}
assert(new_bufid->is_ssa);
nir_intrinsic_set_base(intr, new_base);
nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(new_bufid->ssa));
return &intr->dest.ssa;
}
};
} // namespace r600
static nir_intrinsic_op
@ -558,6 +620,12 @@ r600_lower_fs_pos_input(nir_shader *shader)
nullptr);
};
bool
r600_opt_indirect_fbo_loads(nir_shader *shader)
{
return r600::OptIndirectUBOLoads().run(shader);
}
static bool
optimize_once(nir_shader *shader)
{
@ -788,6 +856,7 @@ r600_shader_from_nir(struct r600_context *rctx,
}
NIR_PASS_V(sh, nir_lower_ubo_vec4);
NIR_PASS_V(sh, r600_opt_indirect_fbo_loads);
if (lower_64bit)
NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);

View file

@ -665,7 +665,7 @@ LowerSplit64BitVar::split_double_load_ubo(nir_intrinsic_instr *intr)
nir_intrinsic_set_range(load2, nir_intrinsic_range(intr));
nir_intrinsic_set_access(load2, nir_intrinsic_access(intr));
nir_intrinsic_set_align_mul(load2, nir_intrinsic_align_mul(intr));
nir_intrinsic_set_align_offset(load2, nir_intrinsic_align_offset(intr) + 16);
nir_intrinsic_set_align_offset(load2, nir_intrinsic_align_offset(intr));
load2->num_components = second_components;

View file

@ -28,6 +28,7 @@
#include "gallium/drivers/r600/r600_shader.h"
#include "nir.h"
#include "nir_intrinsics.h"
#include "sfn_debug.h"
#include "sfn_instr.h"
#include "sfn_instr_alugroup.h"
@ -1282,6 +1283,7 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
{
auto bufid = nir_src_as_const_value(instr->src[0]);
auto buf_offset = nir_src_as_const_value(instr->src[1]);
auto base_id = nir_intrinsic_base(instr);
if (!buf_offset) {
/* TODO: if bufid is constant then this can also be solved by using the
@ -1299,11 +1301,11 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
LoadFromBuffer *ir;
if (bufid) {
ir = new LoadFromBuffer(
dest, dest_swz, addr, 0, 1 + bufid->u32, nullptr, fmt_32_32_32_32_float);
dest, dest_swz, addr, 0, bufid->u32, nullptr, fmt_32_32_32_32_float);
} else {
auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0));
ir = new LoadFromBuffer(
dest, dest_swz, addr, 0, 1, buffer_id, fmt_32_32_32_32_float);
dest, dest_swz, addr, 0, base_id, buffer_id, fmt_32_32_32_32_float);
}
emit_instruction(ir);
return true;
@ -1323,7 +1325,7 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
<< " const[" << i << "]: " << instr->const_index[i] << "\n";
auto uniform =
value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32 + 1);
value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32);
ir = new AluInstr(op1_mov,
value_factory().dest(instr->dest, i, pin),
uniform,
@ -1340,7 +1342,8 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
int cmp = buf_cmp + i;
auto u = new UniformValue(512 + buf_offset->u32, cmp, kc_id);
auto u =
new UniformValue(512 + buf_offset->u32, cmp, kc_id, nir_intrinsic_base(instr));
auto dest = value_factory().dest(instr->dest, i, pin_none);
ir = new AluInstr(op1_mov, dest, u, AluInstr::write);
emit_instruction(ir);

View file

@ -731,9 +731,9 @@ UniformValue::UniformValue(int sel, int chan, int kcache_bank):
{
}
UniformValue::UniformValue(int sel, int chan, PVirtualValue buf_addr):
UniformValue::UniformValue(int sel, int chan, PVirtualValue buf_addr, int kcache_bank):
VirtualValue(sel, chan, pin_none),
m_kcache_bank(0),
m_kcache_bank(kcache_bank),
m_buf_addr(buf_addr)
{
}

View file

@ -379,7 +379,7 @@ public:
using Pointer = R600_POINTER_TYPE(UniformValue);
UniformValue(int sel, int chan, int kcache_bank = 0);
UniformValue(int sel, int chan, PVirtualValue buf_addr);
UniformValue(int sel, int chan, PVirtualValue buf_addr, int kcache_bank);
void accept(RegisterVisitor& vistor) override;
void accept(ConstRegisterVisitor& vistor) const override;

View file

@ -100,7 +100,7 @@ TEST_F(ValueTest, uniform_value)
auto addr = new Register(1024, 0, pin_none);
ASSERT_TRUE(addr);
UniformValue reg_with_buffer_addr(513, 0, addr);
UniformValue reg_with_buffer_addr(513, 0, addr, 0);
EXPECT_EQ(reg_with_buffer_addr.sel(), 513);
EXPECT_EQ(reg_with_buffer_addr.chan(), 0);