mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 02:58:05 +02:00
r600g/sb: Support gs5 sampler indexing (v2)
[airlied: v2 cayman fixups] Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
bd198b9f0a
commit
80c5062abf
9 changed files with 195 additions and 25 deletions
|
|
@ -166,8 +166,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
|
|||
if (rctx->b.chip_class <= R700) {
|
||||
use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
|
||||
}
|
||||
/* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
|
||||
use_sb &= !shader->shader.uses_index_registers;
|
||||
/* disable SB for shaders using ubo array indexing as it doesn't handle those currently */
|
||||
use_sb &= !shader->shader.uses_ubo_indexing;
|
||||
/* disable SB for shaders using doubles */
|
||||
use_sb &= !shader->shader.uses_doubles;
|
||||
|
||||
|
|
@ -1251,7 +1251,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx)
|
|||
}
|
||||
|
||||
if (ctx->src[i].kc_rel)
|
||||
ctx->shader->uses_index_registers = true;
|
||||
ctx->shader->uses_ubo_indexing = true;
|
||||
|
||||
if (ctx->src[i].rel) {
|
||||
int chan = inst->Src[i].Indirect.Swizzle;
|
||||
|
|
@ -1912,7 +1912,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
|
||||
shader->uses_doubles = ctx.info.uses_doubles;
|
||||
|
||||
indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
|
||||
indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER));
|
||||
tgsi_parse_init(&ctx.parse, tokens);
|
||||
ctx.type = ctx.info.processor;
|
||||
shader->processor_type = ctx.type;
|
||||
|
|
@ -1936,7 +1936,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
ctx.gs_next_vertex = 0;
|
||||
ctx.gs_stream_output_info = &so;
|
||||
|
||||
shader->uses_index_registers = false;
|
||||
shader->uses_ubo_indexing = false;
|
||||
ctx.face_gpr = -1;
|
||||
ctx.fixed_pt_position_gpr = -1;
|
||||
ctx.fragcoord_input = -1;
|
||||
|
|
@ -5703,8 +5703,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
|
|||
sampler_src_reg = 3;
|
||||
|
||||
sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
|
||||
if (sampler_index_mode)
|
||||
ctx->shader->uses_index_registers = true;
|
||||
|
||||
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -75,8 +75,8 @@ struct r600_shader {
|
|||
boolean has_txq_cube_array_z_comp;
|
||||
boolean uses_tex_buffers;
|
||||
boolean gs_prim_id_input;
|
||||
/* Temporarily workaround SB not handling CF_INDEX_[01] index registers */
|
||||
boolean uses_index_registers;
|
||||
/* Temporarily workaround SB not handling ubo indexing */
|
||||
boolean uses_ubo_indexing;
|
||||
|
||||
/* Size in bytes of a data item in the ring(s) (single vertex data).
|
||||
Stages with only one ring items 123 will be set to 0. */
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ class fetch_node;
|
|||
class alu_group_node;
|
||||
class region_node;
|
||||
class shader;
|
||||
class value;
|
||||
|
||||
class sb_ostream {
|
||||
public:
|
||||
|
|
@ -818,13 +819,16 @@ class bc_parser {
|
|||
|
||||
bool gpr_reladdr;
|
||||
|
||||
// Note: currently relies on input emitting SET_CF in same basic block as uses
|
||||
value *cf_index_value[2];
|
||||
alu_node *mova;
|
||||
public:
|
||||
|
||||
bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
|
||||
ctx(sctx), dec(), bc(bc), pshader(pshader),
|
||||
dw(), bc_ndw(), max_cf(),
|
||||
sh(), error(), slots(), cgroup(),
|
||||
cf_map(), loop_stack(), gpr_reladdr() { }
|
||||
cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
|
||||
|
||||
int decode();
|
||||
int prepare();
|
||||
|
|
@ -852,6 +856,10 @@ private:
|
|||
int prepare_loop(cf_node *c);
|
||||
int prepare_if(cf_node *c);
|
||||
|
||||
void save_set_cf_index(value *val, unsigned idx);
|
||||
value *get_cf_index_value(unsigned idx);
|
||||
void save_mova(alu_node *mova);
|
||||
alu_node *get_mova();
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include "sb_bc.h"
|
||||
#include "sb_shader.h"
|
||||
#include "sb_pass.h"
|
||||
#include "eg_sq.h" // V_SQ_CF_INDEX_0/1
|
||||
|
||||
namespace r600_sb {
|
||||
|
||||
|
|
@ -354,6 +355,14 @@ void bc_dump::dump(alu_node& n) {
|
|||
s << " " << vec_bs[n.bc.bank_swizzle];
|
||||
}
|
||||
|
||||
if (ctx.is_cayman()) {
|
||||
if (n.bc.op == ALU_OP1_MOVA_INT) {
|
||||
static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1",
|
||||
" Unknown MOVA_INT dest" };
|
||||
s << mova_str[std::min(n.bc.dst_gpr, 4u)]; // CM_V_SQ_MOVA_DST_AR_*
|
||||
}
|
||||
}
|
||||
|
||||
sblog << s.str() << "\n";
|
||||
}
|
||||
|
||||
|
|
@ -450,9 +459,9 @@ void bc_dump::dump(fetch_node& n) {
|
|||
if (n.bc.fetch_whole_quad)
|
||||
s << " FWQ";
|
||||
if (ctx.is_egcm() && n.bc.resource_index_mode)
|
||||
s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode;
|
||||
s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
|
||||
if (ctx.is_egcm() && n.bc.sampler_index_mode)
|
||||
s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode;
|
||||
s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
|
||||
|
||||
s << " UCF:" << n.bc.use_const_fields
|
||||
<< " FMT(DTA:" << n.bc.data_format
|
||||
|
|
@ -470,9 +479,9 @@ void bc_dump::dump(fetch_node& n) {
|
|||
if (n.bc.offset[k])
|
||||
s << " O" << chans[k] << ":" << n.bc.offset[k];
|
||||
if (ctx.is_egcm() && n.bc.resource_index_mode)
|
||||
s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode;
|
||||
s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
|
||||
if (ctx.is_egcm() && n.bc.sampler_index_mode)
|
||||
s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode;
|
||||
s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
|
||||
}
|
||||
|
||||
sblog << s.str() << "\n";
|
||||
|
|
|
|||
|
|
@ -303,7 +303,8 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
|
|||
assert(fdst.chan() == slot || slot == SLOT_TRANS);
|
||||
}
|
||||
|
||||
n->bc.dst_gpr = fdst.sel();
|
||||
if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
|
||||
n->bc.dst_gpr = fdst.sel();
|
||||
n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
|
||||
#include "r600_pipe.h"
|
||||
#include "r600_shader.h"
|
||||
#include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1
|
||||
|
||||
#include <stack>
|
||||
|
||||
|
|
@ -121,7 +122,7 @@ int bc_parser::parse_decls() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
|
||||
if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) {
|
||||
|
||||
assert(pshader->num_arrays);
|
||||
|
||||
|
|
@ -328,6 +329,28 @@ int bc_parser::prepare_alu_clause(cf_node* cf) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
void bc_parser::save_set_cf_index(value *val, unsigned idx)
|
||||
{
|
||||
assert(idx <= 1);
|
||||
assert(val);
|
||||
cf_index_value[idx] = val;
|
||||
}
|
||||
value *bc_parser::get_cf_index_value(unsigned idx)
|
||||
{
|
||||
assert(idx <= 1);
|
||||
return cf_index_value[idx];
|
||||
}
|
||||
void bc_parser::save_mova(alu_node *mova)
|
||||
{
|
||||
assert(mova);
|
||||
this->mova = mova;
|
||||
}
|
||||
alu_node *bc_parser::get_mova()
|
||||
{
|
||||
assert(mova);
|
||||
return mova;
|
||||
}
|
||||
|
||||
int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
|
||||
|
||||
alu_node *n;
|
||||
|
|
@ -375,9 +398,14 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
|
|||
n->dst.resize(1);
|
||||
}
|
||||
|
||||
if (flags & AF_MOVA) {
|
||||
if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) {
|
||||
// Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
|
||||
// DCE will kill this op
|
||||
save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1);
|
||||
} else if (flags & AF_MOVA) {
|
||||
|
||||
n->dst[0] = sh->get_special_value(SV_AR_INDEX);
|
||||
save_mova(n);
|
||||
|
||||
n->flags |= NF_DONT_HOIST;
|
||||
|
||||
|
|
@ -469,6 +497,10 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
|
|||
}
|
||||
}
|
||||
}
|
||||
if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
|
||||
ctx.is_cayman())
|
||||
// Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
|
||||
save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1);
|
||||
}
|
||||
|
||||
// pack multislot instructions into alu_packed_node
|
||||
|
|
@ -608,6 +640,10 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) {
|
|||
n->bc.src_sel[s], false);
|
||||
}
|
||||
|
||||
// Scheduler will emit the appropriate instructions to set CF_IDX0/1
|
||||
if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
|
||||
n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@
|
|||
#include "sb_bc.h"
|
||||
#include "sb_shader.h"
|
||||
#include "sb_pass.h"
|
||||
#include "eg_sq.h" // V_SQ_CF_INDEX_NONE
|
||||
|
||||
namespace r600_sb {
|
||||
|
||||
|
|
@ -406,6 +407,14 @@ void gcm::bu_sched_bb(bb_node* bb) {
|
|||
ncnt = 3;
|
||||
}
|
||||
|
||||
bool sampler_indexing = false;
|
||||
if (n->is_fetch_inst() &&
|
||||
static_cast<fetch_node *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
|
||||
{
|
||||
sampler_indexing = true; // Give sampler indexed ops get their own clause
|
||||
ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1
|
||||
}
|
||||
|
||||
if ((sq == SQ_TEX || sq == SQ_VTX) &&
|
||||
((last_count >= ctx.max_fetch/2 &&
|
||||
check_alu_ready_count(24)) ||
|
||||
|
|
@ -418,7 +427,7 @@ void gcm::bu_sched_bb(bb_node* bb) {
|
|||
bu_ready[sq].pop_front();
|
||||
|
||||
if (sq != SQ_CF) {
|
||||
if (!clause) {
|
||||
if (!clause || sampler_indexing) {
|
||||
clause = sh.create_clause(sq == SQ_ALU ?
|
||||
NST_ALU_CLAUSE :
|
||||
sq == SQ_TEX ? NST_TEX_CLAUSE :
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@
|
|||
#include "sb_shader.h"
|
||||
#include "sb_pass.h"
|
||||
#include "sb_sched.h"
|
||||
#include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
|
||||
|
||||
namespace r600_sb {
|
||||
|
||||
|
|
@ -781,7 +782,14 @@ void post_scheduler::schedule_bb(bb_node* bb) {
|
|||
sblog << "\n";
|
||||
);
|
||||
|
||||
if (n->subtype == NST_ALU_CLAUSE) {
|
||||
// May require emitting ALU ops to load index registers
|
||||
if (n->is_fetch_clause()) {
|
||||
n->remove();
|
||||
process_fetch(static_cast<container_node *>(n));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n->is_alu_clause()) {
|
||||
n->remove();
|
||||
process_alu(static_cast<container_node*>(n));
|
||||
continue;
|
||||
|
|
@ -823,6 +831,102 @@ void post_scheduler::init_regmap() {
|
|||
}
|
||||
}
|
||||
|
||||
static alu_node *create_set_idx(shader &sh, unsigned ar_idx) {
|
||||
alu_node *a = sh.create_alu();
|
||||
|
||||
assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1);
|
||||
if (ar_idx == V_SQ_CF_INDEX_0)
|
||||
a->bc.set_op(ALU_OP0_SET_CF_IDX0);
|
||||
else
|
||||
a->bc.set_op(ALU_OP0_SET_CF_IDX1);
|
||||
a->bc.slot = SLOT_X;
|
||||
a->dst.resize(1); // Dummy needed for recolor
|
||||
|
||||
PSC_DUMP(
|
||||
sblog << "created IDX load: "
|
||||
dump::dump_op(a);
|
||||
sblog << "\n";
|
||||
);
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
void post_scheduler::load_index_register(value *v, unsigned ar_idx)
|
||||
{
|
||||
alu.reset();
|
||||
|
||||
if (!sh.get_ctx().is_cayman()) {
|
||||
// Evergreen has to first load address register, then use CF_SET_IDX0/1
|
||||
alu_group_tracker &rt = alu.grp();
|
||||
alu_node *set_idx = create_set_idx(sh, ar_idx);
|
||||
if (!rt.try_reserve(set_idx)) {
|
||||
sblog << "can't emit SET_CF_IDX";
|
||||
dump::dump_op(set_idx);
|
||||
sblog << "\n";
|
||||
}
|
||||
process_group();
|
||||
|
||||
if (!alu.check_clause_limits()) {
|
||||
// Can't happen since clause only contains MOVA/CF_SET_IDX0/1
|
||||
}
|
||||
alu.emit_group();
|
||||
}
|
||||
|
||||
alu_group_tracker &rt = alu.grp();
|
||||
alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ? SEL_Z : SEL_Y);
|
||||
|
||||
if (!rt.try_reserve(a)) {
|
||||
sblog << "can't emit AR load : ";
|
||||
dump::dump_op(a);
|
||||
sblog << "\n";
|
||||
}
|
||||
|
||||
process_group();
|
||||
|
||||
if (!alu.check_clause_limits()) {
|
||||
// Can't happen since clause only contains MOVA/CF_SET_IDX0/1
|
||||
}
|
||||
|
||||
alu.emit_group();
|
||||
alu.emit_clause(cur_bb);
|
||||
}
|
||||
|
||||
void post_scheduler::process_fetch(container_node *c) {
|
||||
if (c->empty())
|
||||
return;
|
||||
|
||||
for (node_iterator N, I = c->begin(), E = c->end(); I != E; I = N) {
|
||||
N = I;
|
||||
++N;
|
||||
|
||||
node *n = *I;
|
||||
|
||||
fetch_node *f = static_cast<fetch_node*>(n);
|
||||
|
||||
PSC_DUMP(
|
||||
sblog << "process_tex ";
|
||||
dump::dump_op(n);
|
||||
sblog << " ";
|
||||
);
|
||||
|
||||
if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
|
||||
// Currently require prior opt passes to use one TEX per indexed op
|
||||
assert(f->parent->count() == 1);
|
||||
|
||||
value *v = f->src.back(); // Last src is index offset
|
||||
|
||||
cur_bb->push_front(c);
|
||||
|
||||
load_index_register(v, f->bc.sampler_index_mode);
|
||||
f->src.pop_back(); // Don't need index value any more
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
cur_bb->push_front(c);
|
||||
}
|
||||
|
||||
void post_scheduler::process_alu(container_node *c) {
|
||||
|
||||
if (c->empty())
|
||||
|
|
@ -1180,7 +1284,7 @@ void post_scheduler::emit_load_ar() {
|
|||
alu.discard_current_group();
|
||||
|
||||
alu_group_tracker &rt = alu.grp();
|
||||
alu_node *a = alu.create_ar_load();
|
||||
alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X);
|
||||
|
||||
if (!rt.try_reserve(a)) {
|
||||
sblog << "can't emit AR load : ";
|
||||
|
|
@ -1936,11 +2040,9 @@ bool alu_kcache_tracker::update_kc() {
|
|||
return true;
|
||||
}
|
||||
|
||||
alu_node* alu_clause_tracker::create_ar_load() {
|
||||
alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select ar_channel) {
|
||||
alu_node *a = sh.create_alu();
|
||||
|
||||
// FIXME use MOVA_GPR on R6xx
|
||||
|
||||
if (sh.get_ctx().uses_mova_gpr) {
|
||||
a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
|
||||
a->bc.slot = SLOT_TRANS;
|
||||
|
|
@ -1948,9 +2050,13 @@ alu_node* alu_clause_tracker::create_ar_load() {
|
|||
a->bc.set_op(ALU_OP1_MOVA_INT);
|
||||
a->bc.slot = SLOT_X;
|
||||
}
|
||||
a->bc.dst_chan = ar_channel;
|
||||
if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) {
|
||||
a->bc.dst_gpr = ar_channel == SEL_Y ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
|
||||
}
|
||||
|
||||
a->dst.resize(1);
|
||||
a->src.push_back(current_ar);
|
||||
a->src.push_back(v);
|
||||
|
||||
PSC_DUMP(
|
||||
sblog << "created AR load: ";
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ public:
|
|||
void new_group();
|
||||
bool is_empty();
|
||||
|
||||
alu_node* create_ar_load();
|
||||
alu_node* create_ar_load(value *v, chan_select ar_channel);
|
||||
|
||||
void discard_current_group();
|
||||
|
||||
|
|
@ -266,6 +266,9 @@ public:
|
|||
void run_on(container_node *n);
|
||||
void schedule_bb(bb_node *bb);
|
||||
|
||||
void load_index_register(value *v, unsigned idx);
|
||||
void process_fetch(container_node *c);
|
||||
|
||||
void process_alu(container_node *c);
|
||||
void schedule_alu(container_node *c);
|
||||
bool prepare_alu_group();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue