mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 11:38:05 +02:00
i965/vs: Add support for pull constant loads for uniform arrays.
v2: reworked the instruction emit and made use of gen6_resolve_implied_move,
from Ken's review
This commit is contained in:
parent
88e08de801
commit
483f5b348b
6 changed files with 195 additions and 5 deletions
|
|
@ -643,6 +643,7 @@ enum opcode {
|
|||
VS_OPCODE_URB_WRITE,
|
||||
VS_OPCODE_SCRATCH_READ,
|
||||
VS_OPCODE_SCRATCH_WRITE,
|
||||
VS_OPCODE_PULL_CONSTANT_LOAD,
|
||||
};
|
||||
|
||||
#define BRW_PREDICATE_NONE 0
|
||||
|
|
|
|||
|
|
@ -807,6 +807,9 @@ void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
|
|||
void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
|
||||
struct brw_reg reg);
|
||||
|
||||
void gen6_resolve_implied_move(struct brw_compile *p,
|
||||
struct brw_reg *src,
|
||||
GLuint msg_reg_nr);
|
||||
|
||||
/* Helpers for regular instructions:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ static void guess_execution_size(struct brw_compile *p,
|
|||
* On Sandybridge, this is no longer the case. This function performs the
|
||||
* explicit move; it should be called before emitting a SEND instruction.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
gen6_resolve_implied_move(struct brw_compile *p,
|
||||
struct brw_reg *src,
|
||||
GLuint msg_reg_nr)
|
||||
|
|
|
|||
|
|
@ -370,6 +370,7 @@ public:
|
|||
*/
|
||||
dst_reg output_reg[VERT_RESULT_MAX];
|
||||
int uniform_size[MAX_UNIFORMS];
|
||||
int uniform_vector_size[MAX_UNIFORMS];
|
||||
int uniforms;
|
||||
|
||||
struct hash_table *variable_ht;
|
||||
|
|
@ -386,6 +387,7 @@ public:
|
|||
void reg_allocate_trivial();
|
||||
void reg_allocate();
|
||||
void move_grf_array_access_to_scratch();
|
||||
void move_uniform_array_access_to_pull_constants();
|
||||
void calculate_live_intervals();
|
||||
bool dead_code_eliminate();
|
||||
bool virtual_grf_interferes(int a, int b);
|
||||
|
|
@ -447,6 +449,8 @@ public:
|
|||
|
||||
src_reg get_scratch_offset(vec4_instruction *inst,
|
||||
src_reg *reladdr, int reg_offset);
|
||||
src_reg get_pull_constant_offset(vec4_instruction *inst,
|
||||
src_reg *reladdr, int reg_offset);
|
||||
void emit_scratch_read(vec4_instruction *inst,
|
||||
dst_reg dst,
|
||||
src_reg orig_src,
|
||||
|
|
@ -455,6 +459,10 @@ public:
|
|||
src_reg temp,
|
||||
dst_reg orig_dst,
|
||||
int base_offset);
|
||||
void emit_pull_constant_load(vec4_instruction *inst,
|
||||
dst_reg dst,
|
||||
src_reg orig_src,
|
||||
int base_offset);
|
||||
|
||||
GLboolean try_emit_sat(ir_expression *ir);
|
||||
|
||||
|
|
@ -490,6 +498,9 @@ public:
|
|||
void generate_scratch_read(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index);
|
||||
void generate_pull_constant_load(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index);
|
||||
};
|
||||
|
||||
} /* namespace brw */
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ vec4_visitor::setup_uniforms(int reg)
|
|||
* matter what, or the GPU would hang.
|
||||
*/
|
||||
if (intel->gen < 6 && this->uniforms == 0) {
|
||||
this->uniform_size[this->uniforms] = 1;
|
||||
this->uniform_vector_size[this->uniforms] = 1;
|
||||
|
||||
for (unsigned int i = 0; i < 4; i++) {
|
||||
unsigned int slot = this->uniforms * 4 + i;
|
||||
|
|
@ -229,6 +229,9 @@ vec4_instruction::get_src(int i)
|
|||
brw_reg = brw_abs(brw_reg);
|
||||
if (src[i].negate)
|
||||
brw_reg = negate(brw_reg);
|
||||
|
||||
/* This should have been moved to pull constants. */
|
||||
assert(!src[i].reladdr);
|
||||
break;
|
||||
|
||||
case HW_REG:
|
||||
|
|
@ -487,6 +490,42 @@ vec4_visitor::generate_scratch_write(vec4_instruction *inst,
|
|||
false /* commit */);
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::generate_pull_constant_load(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index)
|
||||
{
|
||||
struct brw_reg header = brw_vec8_grf(0, 0);
|
||||
|
||||
gen6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
|
||||
brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D),
|
||||
index);
|
||||
|
||||
uint32_t msg_type;
|
||||
|
||||
if (intel->gen >= 6)
|
||||
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
|
||||
else if (intel->gen == 5 || intel->is_g4x)
|
||||
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
|
||||
else
|
||||
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
|
||||
|
||||
/* Each of the 8 channel enables is considered for whether each
|
||||
* dword is written.
|
||||
*/
|
||||
struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
||||
brw_set_dest(p, send, dst);
|
||||
brw_set_src0(p, send, header);
|
||||
brw_set_dp_read_message(p, send,
|
||||
SURF_INDEX_VERT_CONST_BUFFER,
|
||||
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
|
||||
msg_type,
|
||||
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
|
||||
2, /* mlen */
|
||||
1 /* rlen */);
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
|
||||
struct brw_reg dst,
|
||||
|
|
@ -529,6 +568,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
|
|||
generate_scratch_write(inst, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
generate_pull_constant_load(inst, dst, src[0]);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
|
||||
fail("unsupported opcode in `%s' in VS\n",
|
||||
|
|
@ -556,6 +599,7 @@ vec4_visitor::run()
|
|||
* often do repeated subexpressions for those.
|
||||
*/
|
||||
move_grf_array_access_to_scratch();
|
||||
move_uniform_array_access_to_pull_constants();
|
||||
|
||||
bool progress;
|
||||
do {
|
||||
|
|
|
|||
|
|
@ -381,7 +381,10 @@ vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
|
|||
c->prog_data.param[this->uniforms * 4 + i] = &zero;
|
||||
}
|
||||
|
||||
this->uniform_size[this->uniforms] = type->vector_elements;
|
||||
/* Track the size of this uniform vector, for future packing of
|
||||
* uniforms.
|
||||
*/
|
||||
this->uniform_vector_size[this->uniforms] = type->vector_elements;
|
||||
this->uniforms++;
|
||||
|
||||
return 1;
|
||||
|
|
@ -429,7 +432,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
|
|||
(gl_state_index *)slots[i].tokens);
|
||||
float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
|
||||
|
||||
this->uniform_size[this->uniforms] = 0;
|
||||
this->uniform_vector_size[this->uniforms] = 0;
|
||||
/* Add each of the unique swizzled channels of the element.
|
||||
* This will end up matching the size of the glsl_type of this field.
|
||||
*/
|
||||
|
|
@ -440,7 +443,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
|
|||
|
||||
c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
|
||||
if (swiz <= last_swiz)
|
||||
this->uniform_size[this->uniforms]++;
|
||||
this->uniform_vector_size[this->uniforms]++;
|
||||
}
|
||||
this->uniforms++;
|
||||
}
|
||||
|
|
@ -677,6 +680,11 @@ vec4_visitor::visit(ir_variable *ir)
|
|||
case ir_var_uniform:
|
||||
reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
|
||||
|
||||
/* Track how big the whole uniform variable is, in case we need to put a
|
||||
* copy of its data into pull constants for array access.
|
||||
*/
|
||||
this->uniform_size[this->uniforms] = type_size(ir->type);
|
||||
|
||||
if (!strncmp(ir->name, "gl_", 3)) {
|
||||
setup_builtin_uniform_values(ir);
|
||||
} else {
|
||||
|
|
@ -1947,6 +1955,42 @@ vec4_visitor::get_scratch_offset(vec4_instruction *inst,
|
|||
}
|
||||
}
|
||||
|
||||
src_reg
|
||||
vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
|
||||
src_reg *reladdr, int reg_offset)
|
||||
{
|
||||
if (reladdr) {
|
||||
src_reg index = src_reg(this, glsl_type::int_type);
|
||||
|
||||
vec4_instruction *add = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_ADD,
|
||||
dst_reg(index),
|
||||
*reladdr,
|
||||
src_reg(reg_offset));
|
||||
add->ir = inst->ir;
|
||||
add->annotation = inst->annotation;
|
||||
inst->insert_before(add);
|
||||
|
||||
/* Pre-gen6, the message header uses byte offsets instead of vec4
|
||||
* (16-byte) offset units.
|
||||
*/
|
||||
if (intel->gen < 6) {
|
||||
vec4_instruction *mul = new(mem_ctx) vec4_instruction(this,
|
||||
BRW_OPCODE_MUL,
|
||||
dst_reg(index),
|
||||
index,
|
||||
src_reg(16));
|
||||
mul->ir = inst->ir;
|
||||
mul->annotation = inst->annotation;
|
||||
inst->insert_before(mul);
|
||||
}
|
||||
|
||||
return index;
|
||||
} else {
|
||||
int message_header_scale = intel->gen < 6 ? 16 : 1;
|
||||
return src_reg(reg_offset * message_header_scale);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emits an instruction before @inst to load the value named by @orig_src
|
||||
* from scratch space at @base_offset to @temp.
|
||||
|
|
@ -2072,6 +2116,93 @@ vec4_visitor::move_grf_array_access_to_scratch()
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emits an instruction before @inst to load the value named by @orig_src
|
||||
* from the pull constant buffer (surface) at @base_offset to @temp.
|
||||
*/
|
||||
void
|
||||
vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
|
||||
dst_reg temp, src_reg orig_src,
|
||||
int base_offset)
|
||||
{
|
||||
int reg_offset = base_offset + orig_src.reg_offset;
|
||||
src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
|
||||
vec4_instruction *load;
|
||||
|
||||
load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
|
||||
temp, index);
|
||||
load->annotation = inst->annotation;
|
||||
load->ir = inst->ir;
|
||||
load->base_mrf = 14;
|
||||
load->mlen = 1;
|
||||
inst->insert_before(load);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements array access of uniforms by inserting a
|
||||
* PULL_CONSTANT_LOAD instruction.
|
||||
*
|
||||
* Unlike temporary GRF array access (where we don't support it due to
|
||||
* the difficulty of doing relative addressing on instruction
|
||||
* destinations), we could potentially do array access of uniforms
|
||||
* that were loaded in GRF space as push constants. In real-world
|
||||
* usage we've seen, though, the arrays being used are always larger
|
||||
* than we could load as push constants, so just always move all
|
||||
* uniform array access out to a pull constant buffer.
|
||||
*/
|
||||
void
|
||||
vec4_visitor::move_uniform_array_access_to_pull_constants()
|
||||
{
|
||||
int pull_constant_loc[this->uniforms];
|
||||
|
||||
for (int i = 0; i < this->uniforms; i++) {
|
||||
pull_constant_loc[i] = -1;
|
||||
}
|
||||
|
||||
/* Walk through and find array access of uniforms. Put a copy of that
|
||||
* uniform in the pull constant buffer.
|
||||
*
|
||||
* Note that we don't move constant-indexed accesses to arrays. No
|
||||
* testing has been done of the performance impact of this choice.
|
||||
*/
|
||||
foreach_list_safe(node, &this->instructions) {
|
||||
vec4_instruction *inst = (vec4_instruction *)node;
|
||||
|
||||
for (int i = 0 ; i < 3; i++) {
|
||||
if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
|
||||
continue;
|
||||
|
||||
int uniform = inst->src[i].reg;
|
||||
|
||||
/* If this array isn't already present in the pull constant buffer,
|
||||
* add it.
|
||||
*/
|
||||
if (pull_constant_loc[uniform] == -1) {
|
||||
const float **values = &prog_data->param[uniform * 4];
|
||||
|
||||
pull_constant_loc[uniform] = prog_data->nr_pull_params;
|
||||
|
||||
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
|
||||
prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
|
||||
}
|
||||
}
|
||||
|
||||
/* Set up the annotation tracking for new generated instructions. */
|
||||
base_ir = inst->ir;
|
||||
current_annotation = inst->annotation;
|
||||
|
||||
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
|
||||
|
||||
emit_pull_constant_load(inst, temp, inst->src[i],
|
||||
pull_constant_loc[uniform]);
|
||||
|
||||
inst->src[i].file = temp.file;
|
||||
inst->src[i].reg = temp.reg;
|
||||
inst->src[i].reg_offset = temp.reg_offset;
|
||||
inst->src[i].reladdr = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
|
||||
struct gl_shader_program *prog,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue