mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 06:50:11 +01:00
Merge remote-tracking branch 'jekstrand/wip/i965-uniforms' into vulkan
This commit is contained in:
commit
83c86e09a8
15 changed files with 318 additions and 234 deletions
|
|
@ -311,6 +311,9 @@ SYSTEM_VALUE(helper_invocation, 1, 0)
|
||||||
* of the start of the variable being loaded and and the offset source is a
|
* of the start of the variable being loaded and and the offset source is a
|
||||||
* offset into that variable.
|
* offset into that variable.
|
||||||
*
|
*
|
||||||
|
* Uniform load operations have a second index that specifies the size of the
|
||||||
|
* variable being loaded. If const_index[1] == 0, then the size is unknown.
|
||||||
|
*
|
||||||
* Some load operations such as UBO/SSBO load and per_vertex loads take an
|
* Some load operations such as UBO/SSBO load and per_vertex loads take an
|
||||||
* additional source to specify which UBO/SSBO/vertex to load from.
|
* additional source to specify which UBO/SSBO/vertex to load from.
|
||||||
*
|
*
|
||||||
|
|
@ -323,8 +326,8 @@ SYSTEM_VALUE(helper_invocation, 1, 0)
|
||||||
#define LOAD(name, srcs, indices, flags) \
|
#define LOAD(name, srcs, indices, flags) \
|
||||||
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
|
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
|
||||||
|
|
||||||
/* src[] = { offset }. const_index[] = { base } */
|
/* src[] = { offset }. const_index[] = { base, size } */
|
||||||
LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
LOAD(uniform, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||||
/* src[] = { buffer_index, offset }. No const_index */
|
/* src[] = { buffer_index, offset }. No const_index */
|
||||||
LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||||
/* src[] = { offset }. const_index[] = { base } */
|
/* src[] = { offset }. const_index[] = { base } */
|
||||||
|
|
|
||||||
|
|
@ -277,6 +277,11 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
||||||
load->const_index[0] =
|
load->const_index[0] =
|
||||||
intrin->variables[0]->var->data.driver_location;
|
intrin->variables[0]->var->data.driver_location;
|
||||||
|
|
||||||
|
if (load->intrinsic == nir_intrinsic_load_uniform) {
|
||||||
|
load->const_index[1] =
|
||||||
|
state->type_size(intrin->variables[0]->var->type);
|
||||||
|
}
|
||||||
|
|
||||||
if (per_vertex)
|
if (per_vertex)
|
||||||
load->src[0] = nir_src_for_ssa(vertex_index);
|
load->src[0] = nir_src_for_ssa(vertex_index);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -174,7 +174,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
||||||
* CSE can later notice that those loads are all the same and eliminate
|
* CSE can later notice that those loads are all the same and eliminate
|
||||||
* the redundant ones.
|
* the redundant ones.
|
||||||
*/
|
*/
|
||||||
fs_reg vec4_offset = vgrf(glsl_type::int_type);
|
fs_reg vec4_offset = vgrf(glsl_type::uint_type);
|
||||||
bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
|
bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
|
||||||
|
|
||||||
int scale = 1;
|
int scale = 1;
|
||||||
|
|
@ -433,7 +433,6 @@ fs_reg::fs_reg(struct ::brw_reg reg) :
|
||||||
{
|
{
|
||||||
this->reg_offset = 0;
|
this->reg_offset = 0;
|
||||||
this->subreg_offset = 0;
|
this->subreg_offset = 0;
|
||||||
this->reladdr = NULL;
|
|
||||||
this->stride = 1;
|
this->stride = 1;
|
||||||
if (this->file == IMM &&
|
if (this->file == IMM &&
|
||||||
(this->type != BRW_REGISTER_TYPE_V &&
|
(this->type != BRW_REGISTER_TYPE_V &&
|
||||||
|
|
@ -448,7 +447,6 @@ fs_reg::equals(const fs_reg &r) const
|
||||||
{
|
{
|
||||||
return (this->backend_reg::equals(r) &&
|
return (this->backend_reg::equals(r) &&
|
||||||
subreg_offset == r.subreg_offset &&
|
subreg_offset == r.subreg_offset &&
|
||||||
!reladdr && !r.reladdr &&
|
|
||||||
stride == r.stride);
|
stride == r.stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -851,7 +849,10 @@ fs_inst::regs_read(int arg) const
|
||||||
assert(src[2].file == IMM);
|
assert(src[2].file == IMM);
|
||||||
unsigned region_length = src[2].ud;
|
unsigned region_length = src[2].ud;
|
||||||
|
|
||||||
if (src[0].file == FIXED_GRF) {
|
if (src[0].file == UNIFORM) {
|
||||||
|
assert(region_length % 4 == 0);
|
||||||
|
return region_length / 4;
|
||||||
|
} else if (src[0].file == FIXED_GRF) {
|
||||||
/* If the start of the region is not register aligned, then
|
/* If the start of the region is not register aligned, then
|
||||||
* there's some portion of the register that's technically
|
* there's some portion of the register that's technically
|
||||||
* unread at the beginning.
|
* unread at the beginning.
|
||||||
|
|
@ -865,7 +866,7 @@ fs_inst::regs_read(int arg) const
|
||||||
* unread portion at the beginning.
|
* unread portion at the beginning.
|
||||||
*/
|
*/
|
||||||
if (src[0].subnr)
|
if (src[0].subnr)
|
||||||
region_length += src[0].subnr * type_sz(src[0].type);
|
region_length += src[0].subnr;
|
||||||
|
|
||||||
return DIV_ROUND_UP(region_length, REG_SIZE);
|
return DIV_ROUND_UP(region_length, REG_SIZE);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1021,7 +1022,6 @@ fs_visitor::import_uniforms(fs_visitor *v)
|
||||||
this->push_constant_loc = v->push_constant_loc;
|
this->push_constant_loc = v->push_constant_loc;
|
||||||
this->pull_constant_loc = v->pull_constant_loc;
|
this->pull_constant_loc = v->pull_constant_loc;
|
||||||
this->uniforms = v->uniforms;
|
this->uniforms = v->uniforms;
|
||||||
this->param_size = v->param_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg *
|
fs_reg *
|
||||||
|
|
@ -1930,9 +1930,7 @@ fs_visitor::compact_virtual_grfs()
|
||||||
* maximum number of fragment shader uniform components (64). If
|
* maximum number of fragment shader uniform components (64). If
|
||||||
* there are too many of these, they'd fill up all of register space.
|
* there are too many of these, they'd fill up all of register space.
|
||||||
* So, this will push some of them out to the pull constant buffer and
|
* So, this will push some of them out to the pull constant buffer and
|
||||||
* update the program to load them. We also use pull constants for all
|
* update the program to load them.
|
||||||
* indirect constant loads because we don't support indirect accesses in
|
|
||||||
* registers yet.
|
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
fs_visitor::assign_constant_locations()
|
fs_visitor::assign_constant_locations()
|
||||||
|
|
@ -1941,20 +1939,21 @@ fs_visitor::assign_constant_locations()
|
||||||
if (dispatch_width != 8)
|
if (dispatch_width != 8)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
unsigned int num_pull_constants = 0;
|
|
||||||
|
|
||||||
pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
|
|
||||||
memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms);
|
|
||||||
|
|
||||||
bool is_live[uniforms];
|
bool is_live[uniforms];
|
||||||
memset(is_live, 0, sizeof(is_live));
|
memset(is_live, 0, sizeof(is_live));
|
||||||
|
|
||||||
|
/* For each uniform slot, a value of true indicates that the given slot and
|
||||||
|
* the next slot must remain contiguous. This is used to keep us from
|
||||||
|
* splitting arrays apart.
|
||||||
|
*/
|
||||||
|
bool contiguous[uniforms];
|
||||||
|
memset(contiguous, 0, sizeof(contiguous));
|
||||||
|
|
||||||
/* First, we walk through the instructions and do two things:
|
/* First, we walk through the instructions and do two things:
|
||||||
*
|
*
|
||||||
* 1) Figure out which uniforms are live.
|
* 1) Figure out which uniforms are live.
|
||||||
*
|
*
|
||||||
* 2) Find all indirect access of uniform arrays and flag them as needing
|
* 2) Mark any indirectly used ranges of registers as contiguous.
|
||||||
* to go into the pull constant buffer.
|
|
||||||
*
|
*
|
||||||
* Note that we don't move constant-indexed accesses to arrays. No
|
* Note that we don't move constant-indexed accesses to arrays. No
|
||||||
* testing has been done of the performance impact of this choice.
|
* testing has been done of the performance impact of this choice.
|
||||||
|
|
@ -1964,20 +1963,19 @@ fs_visitor::assign_constant_locations()
|
||||||
if (inst->src[i].file != UNIFORM)
|
if (inst->src[i].file != UNIFORM)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (inst->src[i].reladdr) {
|
int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
|
||||||
int uniform = inst->src[i].nr;
|
|
||||||
|
|
||||||
/* If this array isn't already present in the pull constant buffer,
|
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
|
||||||
* add it.
|
assert(inst->src[2].ud % 4 == 0);
|
||||||
*/
|
unsigned last = constant_nr + (inst->src[2].ud / 4) - 1;
|
||||||
if (pull_constant_loc[uniform] == -1) {
|
assert(last < uniforms);
|
||||||
assert(param_size[uniform]);
|
|
||||||
for (int j = 0; j < param_size[uniform]; j++)
|
for (unsigned j = constant_nr; j < last; j++) {
|
||||||
pull_constant_loc[uniform + j] = num_pull_constants++;
|
is_live[j] = true;
|
||||||
|
contiguous[j] = true;
|
||||||
}
|
}
|
||||||
|
is_live[last] = true;
|
||||||
} else {
|
} else {
|
||||||
/* Mark the the one accessed uniform as live */
|
|
||||||
int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
|
|
||||||
if (constant_nr >= 0 && constant_nr < (int) uniforms)
|
if (constant_nr >= 0 && constant_nr < (int) uniforms)
|
||||||
is_live[constant_nr] = true;
|
is_live[constant_nr] = true;
|
||||||
}
|
}
|
||||||
|
|
@ -1992,29 +1990,49 @@ fs_visitor::assign_constant_locations()
|
||||||
* If changing this value, note the limitation about total_regs in
|
* If changing this value, note the limitation about total_regs in
|
||||||
* brw_curbe.c.
|
* brw_curbe.c.
|
||||||
*/
|
*/
|
||||||
unsigned int max_push_components = 16 * 8;
|
const unsigned int max_push_components = 16 * 8;
|
||||||
|
|
||||||
|
/* We push small arrays, but no bigger than 16 floats. This is big enough
|
||||||
|
* for a vec4 but hopefully not large enough to push out other stuff. We
|
||||||
|
* should probably use a better heuristic at some point.
|
||||||
|
*/
|
||||||
|
const unsigned int max_chunk_size = 16;
|
||||||
|
|
||||||
unsigned int num_push_constants = 0;
|
unsigned int num_push_constants = 0;
|
||||||
|
unsigned int num_pull_constants = 0;
|
||||||
|
|
||||||
push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
|
push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
|
||||||
|
pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
|
||||||
|
|
||||||
for (unsigned int i = 0; i < uniforms; i++) {
|
int chunk_start = -1;
|
||||||
if (!is_live[i] || pull_constant_loc[i] != -1) {
|
for (unsigned u = 0; u < uniforms; u++) {
|
||||||
/* This UNIFORM register is either dead, or has already been demoted
|
push_constant_loc[u] = -1;
|
||||||
* to a pull const. Mark it as no longer living in the param[] array.
|
pull_constant_loc[u] = -1;
|
||||||
*/
|
|
||||||
push_constant_loc[i] = -1;
|
if (!is_live[u])
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
if (num_push_constants < max_push_components) {
|
/* This is the first live uniform in the chunk */
|
||||||
/* Retain as a push constant. Record the location in the params[]
|
if (chunk_start < 0)
|
||||||
* array.
|
chunk_start = u;
|
||||||
*/
|
|
||||||
push_constant_loc[i] = num_push_constants++;
|
/* If this element does not need to be contiguous with the next, we
|
||||||
} else {
|
* split at this point and everthing between chunk_start and u forms a
|
||||||
/* Demote to a pull constant. */
|
* single chunk.
|
||||||
push_constant_loc[i] = -1;
|
*/
|
||||||
pull_constant_loc[i] = num_pull_constants++;
|
if (!contiguous[u]) {
|
||||||
|
unsigned chunk_size = u - chunk_start + 1;
|
||||||
|
|
||||||
|
if (num_push_constants + chunk_size <= max_push_components &&
|
||||||
|
chunk_size <= max_chunk_size) {
|
||||||
|
for (unsigned j = chunk_start; j <= u; j++)
|
||||||
|
push_constant_loc[j] = num_push_constants++;
|
||||||
|
} else {
|
||||||
|
for (unsigned j = chunk_start; j <= u; j++)
|
||||||
|
pull_constant_loc[j] = num_pull_constants++;
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk_start = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2045,51 +2063,67 @@ fs_visitor::assign_constant_locations()
|
||||||
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
|
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
fs_visitor::demote_pull_constants()
|
fs_visitor::lower_constant_loads()
|
||||||
{
|
{
|
||||||
foreach_block_and_inst (block, fs_inst, inst, cfg) {
|
const unsigned index = stage_prog_data->binding_table.pull_constants_start;
|
||||||
|
|
||||||
|
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
||||||
|
/* Set up the annotation tracking for new generated instructions. */
|
||||||
|
const fs_builder ibld(this, block, inst);
|
||||||
|
|
||||||
for (int i = 0; i < inst->sources; i++) {
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
if (inst->src[i].file != UNIFORM)
|
if (inst->src[i].file != UNIFORM)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int pull_index;
|
/* We'll handle this case later */
|
||||||
|
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
|
unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
|
||||||
if (location >= uniforms) /* Out of bounds access */
|
if (location >= uniforms)
|
||||||
pull_index = -1;
|
continue; /* Out of bounds access */
|
||||||
else
|
|
||||||
pull_index = pull_constant_loc[location];
|
int pull_index = pull_constant_loc[location];
|
||||||
|
|
||||||
if (pull_index == -1)
|
if (pull_index == -1)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Set up the annotation tracking for new generated instructions. */
|
|
||||||
const fs_builder ibld(this, block, inst);
|
|
||||||
const unsigned index = stage_prog_data->binding_table.pull_constants_start;
|
|
||||||
fs_reg dst = vgrf(glsl_type::float_type);
|
|
||||||
|
|
||||||
assert(inst->src[i].stride == 0);
|
assert(inst->src[i].stride == 0);
|
||||||
|
|
||||||
/* Generate a pull load into dst. */
|
fs_reg dst = vgrf(glsl_type::float_type);
|
||||||
if (inst->src[i].reladdr) {
|
const fs_builder ubld = ibld.exec_all().group(8, 0);
|
||||||
VARYING_PULL_CONSTANT_LOAD(ibld, dst,
|
struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
|
||||||
brw_imm_ud(index),
|
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
|
||||||
*inst->src[i].reladdr,
|
dst, brw_imm_ud(index), offset);
|
||||||
pull_index * 4);
|
|
||||||
inst->src[i].reladdr = NULL;
|
|
||||||
inst->src[i].stride = 1;
|
|
||||||
} else {
|
|
||||||
const fs_builder ubld = ibld.exec_all().group(8, 0);
|
|
||||||
struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
|
|
||||||
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
|
|
||||||
dst, brw_imm_ud(index), offset);
|
|
||||||
inst->src[i].set_smear(pull_index & 3);
|
|
||||||
}
|
|
||||||
brw_mark_surface_used(prog_data, index);
|
|
||||||
|
|
||||||
/* Rewrite the instruction to use the temporary VGRF. */
|
/* Rewrite the instruction to use the temporary VGRF. */
|
||||||
inst->src[i].file = VGRF;
|
inst->src[i].file = VGRF;
|
||||||
inst->src[i].nr = dst.nr;
|
inst->src[i].nr = dst.nr;
|
||||||
inst->src[i].reg_offset = 0;
|
inst->src[i].reg_offset = 0;
|
||||||
|
inst->src[i].set_smear(pull_index & 3);
|
||||||
|
|
||||||
|
brw_mark_surface_used(prog_data, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
|
||||||
|
inst->src[0].file == UNIFORM) {
|
||||||
|
|
||||||
|
unsigned location = inst->src[0].nr + inst->src[0].reg_offset;
|
||||||
|
if (location >= uniforms)
|
||||||
|
continue; /* Out of bounds access */
|
||||||
|
|
||||||
|
int pull_index = pull_constant_loc[location];
|
||||||
|
|
||||||
|
if (pull_index == -1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
|
||||||
|
brw_imm_ud(index),
|
||||||
|
inst->src[1],
|
||||||
|
pull_index * 4);
|
||||||
|
inst->remove(block);
|
||||||
|
|
||||||
|
brw_mark_surface_used(prog_data, index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
invalidate_live_intervals();
|
invalidate_live_intervals();
|
||||||
|
|
@ -4462,6 +4496,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
|
||||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
||||||
return 8;
|
return 8;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_MOV_INDIRECT:
|
||||||
|
/* Prior to Broadwell, we only have 8 address subregisters */
|
||||||
|
return devinfo->gen < 8 ? 8 : inst->exec_size;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return inst->exec_size;
|
return inst->exec_size;
|
||||||
}
|
}
|
||||||
|
|
@ -4744,9 +4782,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
|
||||||
break;
|
break;
|
||||||
case UNIFORM:
|
case UNIFORM:
|
||||||
fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset);
|
fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset);
|
||||||
if (inst->src[i].reladdr) {
|
if (inst->src[i].subreg_offset) {
|
||||||
fprintf(file, "+reladdr");
|
|
||||||
} else if (inst->src[i].subreg_offset) {
|
|
||||||
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
|
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
|
||||||
inst->src[i].subreg_offset);
|
inst->src[i].subreg_offset);
|
||||||
}
|
}
|
||||||
|
|
@ -4857,7 +4893,6 @@ fs_visitor::get_instruction_generating_reg(fs_inst *start,
|
||||||
{
|
{
|
||||||
if (end == start ||
|
if (end == start ||
|
||||||
end->is_partial_write() ||
|
end->is_partial_write() ||
|
||||||
reg.reladdr ||
|
|
||||||
!reg.equals(end->dst)) {
|
!reg.equals(end->dst)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -5070,7 +5105,7 @@ fs_visitor::optimize()
|
||||||
bld = fs_builder(this, 64);
|
bld = fs_builder(this, 64);
|
||||||
|
|
||||||
assign_constant_locations();
|
assign_constant_locations();
|
||||||
demote_pull_constants();
|
lower_constant_loads();
|
||||||
|
|
||||||
validate();
|
validate();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -139,7 +139,7 @@ public:
|
||||||
void split_virtual_grfs();
|
void split_virtual_grfs();
|
||||||
bool compact_virtual_grfs();
|
bool compact_virtual_grfs();
|
||||||
void assign_constant_locations();
|
void assign_constant_locations();
|
||||||
void demote_pull_constants();
|
void lower_constant_loads();
|
||||||
void invalidate_live_intervals();
|
void invalidate_live_intervals();
|
||||||
void calculate_live_intervals();
|
void calculate_live_intervals();
|
||||||
void calculate_register_pressure();
|
void calculate_register_pressure();
|
||||||
|
|
@ -323,8 +323,6 @@ public:
|
||||||
|
|
||||||
const struct brw_vue_map *input_vue_map;
|
const struct brw_vue_map *input_vue_map;
|
||||||
|
|
||||||
int *param_size;
|
|
||||||
|
|
||||||
int *virtual_grf_start;
|
int *virtual_grf_start;
|
||||||
int *virtual_grf_end;
|
int *virtual_grf_end;
|
||||||
brw::fs_live_variables *live_intervals;
|
brw::fs_live_variables *live_intervals;
|
||||||
|
|
|
||||||
|
|
@ -351,23 +351,47 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
|
||||||
|
|
||||||
unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
|
unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
|
||||||
|
|
||||||
/* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
|
if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
|
||||||
struct brw_reg addr = vec8(brw_address_reg(0));
|
imm_byte_offset += indirect_byte_offset.ud;
|
||||||
|
|
||||||
/* The destination stride of an instruction (in bytes) must be greater
|
reg.nr = imm_byte_offset / REG_SIZE;
|
||||||
* than or equal to the size of the rest of the instruction. Since the
|
reg.subnr = imm_byte_offset % REG_SIZE;
|
||||||
* address register is of type UW, we can't use a D-type instruction.
|
brw_MOV(p, dst, reg);
|
||||||
* In order to get around this, re re-type to UW and use a stride.
|
} else {
|
||||||
*/
|
/* Prior to Broadwell, there are only 8 address registers. */
|
||||||
indirect_byte_offset =
|
assert(inst->exec_size == 8 || devinfo->gen >= 8);
|
||||||
retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
|
|
||||||
|
|
||||||
/* Prior to Broadwell, there are only 8 address registers. */
|
/* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
|
||||||
assert(inst->exec_size == 8 || devinfo->gen >= 8);
|
struct brw_reg addr = vec8(brw_address_reg(0));
|
||||||
|
|
||||||
brw_MOV(p, addr, indirect_byte_offset);
|
/* The destination stride of an instruction (in bytes) must be greater
|
||||||
brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
|
* than or equal to the size of the rest of the instruction. Since the
|
||||||
brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
|
* address register is of type UW, we can't use a D-type instruction.
|
||||||
|
* In order to get around this, re re-type to UW and use a stride.
|
||||||
|
*/
|
||||||
|
indirect_byte_offset =
|
||||||
|
retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
|
||||||
|
|
||||||
|
if (devinfo->gen < 8) {
|
||||||
|
/* Prior to broadwell, we have a restriction that the bottom 5 bits
|
||||||
|
* of the base offset and the bottom 5 bits of the indirect must add
|
||||||
|
* to less than 32. In other words, the hardware needs to be able to
|
||||||
|
* add the bottom five bits of the two to get the subnumber and add
|
||||||
|
* the next 7 bits of each to get the actual register number. Since
|
||||||
|
* the indirect may cause us to cross a register boundary, this makes
|
||||||
|
* it almost useless. We could try and do something clever where we
|
||||||
|
* use a actual base offset if base_offset % 32 == 0 but that would
|
||||||
|
* mean we were generating different code depending on the base
|
||||||
|
* offset. Instead, for the sake of consistency, we'll just do the
|
||||||
|
* add ourselves.
|
||||||
|
*/
|
||||||
|
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
|
||||||
|
brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
|
||||||
|
} else {
|
||||||
|
brw_MOV(p, addr, indirect_byte_offset);
|
||||||
|
brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -175,15 +175,6 @@ fs_visitor::nir_setup_uniforms()
|
||||||
return;
|
return;
|
||||||
|
|
||||||
uniforms = nir->num_uniforms / 4;
|
uniforms = nir->num_uniforms / 4;
|
||||||
|
|
||||||
nir_foreach_variable(var, &nir->uniforms) {
|
|
||||||
/* UBO's and atomics don't take up space in the uniform file */
|
|
||||||
if (var->interface_type != NULL || var->type->contains_atomic())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (type_size_scalar(var->type) > 0)
|
|
||||||
param_size[var->data.driver_location / 4] = type_size_scalar(var->type);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
@ -1195,6 +1186,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
|
||||||
{
|
{
|
||||||
fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
|
fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
|
||||||
BRW_REGISTER_TYPE_UD);
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
fs_reg indirect;
|
||||||
|
unsigned indirect_max = 0;
|
||||||
|
|
||||||
for (const nir_deref *tail = &deref->deref; tail->child;
|
for (const nir_deref *tail = &deref->deref; tail->child;
|
||||||
tail = tail->child) {
|
tail = tail->child) {
|
||||||
|
|
@ -1206,7 +1199,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
|
||||||
image = offset(image, bld, base * element_size);
|
image = offset(image, bld, base * element_size);
|
||||||
|
|
||||||
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
|
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
|
||||||
fs_reg tmp = vgrf(glsl_type::int_type);
|
fs_reg tmp = vgrf(glsl_type::uint_type);
|
||||||
|
|
||||||
if (devinfo->gen == 7 && !devinfo->is_haswell) {
|
if (devinfo->gen == 7 && !devinfo->is_haswell) {
|
||||||
/* IVB hangs when trying to access an invalid surface index with
|
/* IVB hangs when trying to access an invalid surface index with
|
||||||
|
|
@ -1224,15 +1217,31 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
|
||||||
bld.MOV(tmp, get_nir_src(deref_array->indirect));
|
bld.MOV(tmp, get_nir_src(deref_array->indirect));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
indirect_max += element_size * (tail->type->length - 1);
|
||||||
|
|
||||||
bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4));
|
bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4));
|
||||||
if (image.reladdr)
|
if (indirect.file == BAD_FILE) {
|
||||||
bld.ADD(*image.reladdr, *image.reladdr, tmp);
|
indirect = tmp;
|
||||||
else
|
} else {
|
||||||
image.reladdr = new(mem_ctx) fs_reg(tmp);
|
bld.ADD(indirect, indirect, tmp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return image;
|
if (indirect.file == BAD_FILE) {
|
||||||
|
return image;
|
||||||
|
} else {
|
||||||
|
/* Emit a pile of MOVs to load the uniform into a temporary. The
|
||||||
|
* dead-code elimination pass will get rid of what we don't use.
|
||||||
|
*/
|
||||||
|
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
|
||||||
|
for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
|
||||||
|
bld.emit(SHADER_OPCODE_MOV_INDIRECT,
|
||||||
|
offset(tmp, bld, j), offset(image, bld, j),
|
||||||
|
indirect, brw_imm_ud((indirect_max + 1) * 4));
|
||||||
|
}
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -2609,12 +2618,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||||
/* Offsets are in bytes but they should always be multiples of 4 */
|
/* Offsets are in bytes but they should always be multiples of 4 */
|
||||||
assert(const_offset->u[0] % 4 == 0);
|
assert(const_offset->u[0] % 4 == 0);
|
||||||
src.reg_offset = const_offset->u[0] / 4;
|
src.reg_offset = const_offset->u[0] / 4;
|
||||||
} else {
|
|
||||||
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned j = 0; j < instr->num_components; j++) {
|
for (unsigned j = 0; j < instr->num_components; j++) {
|
||||||
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
|
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fs_reg indirect = retype(get_nir_src(instr->src[0]),
|
||||||
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
|
||||||
|
/* We need to pass a size to the MOV_INDIRECT but we don't want it to
|
||||||
|
* go past the end of the uniform. In order to keep the n'th
|
||||||
|
* component from running past, we subtract off the size of all but
|
||||||
|
* one component of the vector.
|
||||||
|
*/
|
||||||
|
assert(instr->const_index[1] >= instr->num_components * 4);
|
||||||
|
unsigned read_size = instr->const_index[1] -
|
||||||
|
(instr->num_components - 1) * 4;
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < instr->num_components; j++) {
|
||||||
|
bld.emit(SHADER_OPCODE_MOV_INDIRECT,
|
||||||
|
offset(dest, bld, j), offset(src, bld, j),
|
||||||
|
indirect, brw_imm_ud(read_size));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1037,9 +1037,6 @@ fs_visitor::init()
|
||||||
|
|
||||||
this->spilled_any_registers = false;
|
this->spilled_any_registers = false;
|
||||||
this->do_dual_src = false;
|
this->do_dual_src = false;
|
||||||
|
|
||||||
if (dispatch_width == 8)
|
|
||||||
this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_visitor::~fs_visitor()
|
fs_visitor::~fs_visitor()
|
||||||
|
|
|
||||||
|
|
@ -58,8 +58,6 @@ public:
|
||||||
*/
|
*/
|
||||||
int subreg_offset;
|
int subreg_offset;
|
||||||
|
|
||||||
fs_reg *reladdr;
|
|
||||||
|
|
||||||
/** Register region horizontal stride */
|
/** Register region horizontal stride */
|
||||||
uint8_t stride;
|
uint8_t stride;
|
||||||
};
|
};
|
||||||
|
|
@ -136,8 +134,7 @@ component(fs_reg reg, unsigned idx)
|
||||||
static inline bool
|
static inline bool
|
||||||
is_uniform(const fs_reg ®)
|
is_uniform(const fs_reg ®)
|
||||||
{
|
{
|
||||||
return (reg.stride == 0 || reg.is_null()) &&
|
return (reg.stride == 0 || reg.is_null());
|
||||||
(!reg.reladdr || is_uniform(*reg.reladdr));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -477,11 +477,6 @@ vec4_visitor::split_uniform_registers()
|
||||||
inst->src[i].reg_offset = 0;
|
inst->src[i].reg_offset = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update that everything is now vector-sized. */
|
|
||||||
for (int i = 0; i < this->uniforms; i++) {
|
|
||||||
this->uniform_size[i] = 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -539,7 +534,6 @@ vec4_visitor::pack_uniform_registers()
|
||||||
* push constants.
|
* push constants.
|
||||||
*/
|
*/
|
||||||
for (int src = 0; src < uniforms; src++) {
|
for (int src = 0; src < uniforms; src++) {
|
||||||
assert(src < uniform_array_size);
|
|
||||||
int size = chans_used[src];
|
int size = chans_used[src];
|
||||||
|
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
|
|
@ -786,7 +780,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
|
||||||
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
|
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
|
||||||
|
|
||||||
emit_pull_constant_load(block, inst, temp, inst->src[i],
|
emit_pull_constant_load(block, inst, temp, inst->src[i],
|
||||||
pull_constant_loc[uniform]);
|
pull_constant_loc[uniform], src_reg());
|
||||||
|
|
||||||
inst->src[i].file = temp.file;
|
inst->src[i].file = temp.file;
|
||||||
inst->src[i].nr = temp.nr;
|
inst->src[i].nr = temp.nr;
|
||||||
|
|
@ -1606,8 +1600,6 @@ vec4_visitor::setup_uniforms(int reg)
|
||||||
* matter what, or the GPU would hang.
|
* matter what, or the GPU would hang.
|
||||||
*/
|
*/
|
||||||
if (devinfo->gen < 6 && this->uniforms == 0) {
|
if (devinfo->gen < 6 && this->uniforms == 0) {
|
||||||
assert(this->uniforms < this->uniform_array_size);
|
|
||||||
|
|
||||||
stage_prog_data->param =
|
stage_prog_data->param =
|
||||||
reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4);
|
reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4);
|
||||||
for (unsigned int i = 0; i < 4; i++) {
|
for (unsigned int i = 0; i < 4; i++) {
|
||||||
|
|
|
||||||
|
|
@ -115,8 +115,6 @@ public:
|
||||||
*/
|
*/
|
||||||
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
|
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
|
||||||
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
|
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
|
||||||
int *uniform_size;
|
|
||||||
int uniform_array_size; /*< Size of the uniform_size array */
|
|
||||||
int uniforms;
|
int uniforms;
|
||||||
|
|
||||||
src_reg shader_start_time;
|
src_reg shader_start_time;
|
||||||
|
|
@ -285,8 +283,6 @@ public:
|
||||||
|
|
||||||
src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
|
src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
|
||||||
src_reg *reladdr, int reg_offset);
|
src_reg *reladdr, int reg_offset);
|
||||||
src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
|
|
||||||
src_reg *reladdr, int reg_offset);
|
|
||||||
void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
|
void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
|
||||||
dst_reg dst,
|
dst_reg dst,
|
||||||
src_reg orig_src,
|
src_reg orig_src,
|
||||||
|
|
@ -296,7 +292,8 @@ public:
|
||||||
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
||||||
dst_reg dst,
|
dst_reg dst,
|
||||||
src_reg orig_src,
|
src_reg orig_src,
|
||||||
int base_offset);
|
int base_offset,
|
||||||
|
src_reg indirect);
|
||||||
void emit_pull_constant_load_reg(dst_reg dst,
|
void emit_pull_constant_load_reg(dst_reg dst,
|
||||||
src_reg surf_index,
|
src_reg surf_index,
|
||||||
src_reg offset,
|
src_reg offset,
|
||||||
|
|
|
||||||
|
|
@ -1390,6 +1390,48 @@ generate_set_simd4x2_header_gen9(struct brw_codegen *p,
|
||||||
brw_pop_insn_state(p);
|
brw_pop_insn_state(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
generate_mov_indirect(struct brw_codegen *p,
|
||||||
|
vec4_instruction *inst,
|
||||||
|
struct brw_reg dst, struct brw_reg reg,
|
||||||
|
struct brw_reg indirect, struct brw_reg length)
|
||||||
|
{
|
||||||
|
assert(indirect.type == BRW_REGISTER_TYPE_UD);
|
||||||
|
|
||||||
|
unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2);
|
||||||
|
|
||||||
|
/* This instruction acts in align1 mode */
|
||||||
|
assert(inst->force_writemask_all || reg.writemask == 0xf);
|
||||||
|
|
||||||
|
brw_push_insn_state(p);
|
||||||
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||||
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||||
|
|
||||||
|
struct brw_reg addr = vec2(brw_address_reg(0));
|
||||||
|
|
||||||
|
/* We need to move the indirect value into the address register. In order
|
||||||
|
* to make things make some sense, we want to respect at least the X
|
||||||
|
* component of the swizzle. In order to do that, we need to convert the
|
||||||
|
* subnr (probably 0) to an align1 subnr and add in the swizzle. We then
|
||||||
|
* use a region of <8,4,0>:uw to pick off the first 2 bytes of the indirect
|
||||||
|
* and splat it out to all four channels of the given half of a0.
|
||||||
|
*/
|
||||||
|
assert(brw_is_single_value_swizzle(indirect.swizzle));
|
||||||
|
indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0)) * 2;
|
||||||
|
indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0);
|
||||||
|
|
||||||
|
brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset));
|
||||||
|
|
||||||
|
/* Use a <4,1> region Vx1 region*/
|
||||||
|
struct brw_reg src = brw_VxH_indirect(0, 0);
|
||||||
|
src.width = BRW_WIDTH_4;
|
||||||
|
src.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||||
|
|
||||||
|
brw_MOV(p, dst, retype(src, reg.type));
|
||||||
|
|
||||||
|
brw_pop_insn_state(p);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
generate_code(struct brw_codegen *p,
|
generate_code(struct brw_codegen *p,
|
||||||
const struct brw_compiler *compiler,
|
const struct brw_compiler *compiler,
|
||||||
|
|
@ -1936,6 +1978,9 @@ generate_code(struct brw_codegen *p,
|
||||||
brw_WAIT(p);
|
brw_WAIT(p);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_MOV_INDIRECT:
|
||||||
|
generate_mov_indirect(p, inst, dst, src[0], src[1], src[2]);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
unreachable("Unsupported opcode");
|
unreachable("Unsupported opcode");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -132,15 +132,6 @@ void
|
||||||
vec4_visitor::nir_setup_uniforms()
|
vec4_visitor::nir_setup_uniforms()
|
||||||
{
|
{
|
||||||
uniforms = nir->num_uniforms / 16;
|
uniforms = nir->num_uniforms / 16;
|
||||||
|
|
||||||
nir_foreach_variable(var, &nir->uniforms) {
|
|
||||||
/* UBO's and atomics don't take up space in the uniform file */
|
|
||||||
if (var->interface_type != NULL || var->type->contains_atomic())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (type_size_vec4(var->type) > 0)
|
|
||||||
uniform_size[var->data.driver_location / 16] = type_size_vec4(var->type);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -710,12 +701,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||||
/* Offsets are in bytes but they should always be multiples of 16 */
|
/* Offsets are in bytes but they should always be multiples of 16 */
|
||||||
assert(const_offset->u[0] % 16 == 0);
|
assert(const_offset->u[0] % 16 == 0);
|
||||||
src.reg_offset = const_offset->u[0] / 16;
|
src.reg_offset = const_offset->u[0] / 16;
|
||||||
} else {
|
|
||||||
src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
|
|
||||||
src.reladdr = new(mem_ctx) src_reg(tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
emit(MOV(dest, src));
|
emit(MOV(dest, src));
|
||||||
|
} else {
|
||||||
|
src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
|
||||||
|
|
||||||
|
emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
|
||||||
|
indirect, brw_imm_ud(instr->const_index[1]));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -59,8 +59,6 @@ vec4_tcs_visitor::emit_nir_code()
|
||||||
* copies VS outputs to TES inputs.
|
* copies VS outputs to TES inputs.
|
||||||
*/
|
*/
|
||||||
uniforms = 2;
|
uniforms = 2;
|
||||||
uniform_size[0] = 1;
|
|
||||||
uniform_size[1] = 1;
|
|
||||||
|
|
||||||
uint64_t varyings = key->outputs_written;
|
uint64_t varyings = key->outputs_written;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1468,27 +1468,6 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
src_reg
|
|
||||||
vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
|
|
||||||
src_reg *reladdr, int reg_offset)
|
|
||||||
{
|
|
||||||
if (reladdr) {
|
|
||||||
src_reg index = src_reg(this, glsl_type::int_type);
|
|
||||||
|
|
||||||
emit_before(block, inst, ADD(dst_reg(index), *reladdr,
|
|
||||||
brw_imm_d(reg_offset * 16)));
|
|
||||||
|
|
||||||
return index;
|
|
||||||
} else if (devinfo->gen >= 8) {
|
|
||||||
/* Store the offset in a GRF so we can send-from-GRF. */
|
|
||||||
src_reg offset = src_reg(this, glsl_type::int_type);
|
|
||||||
emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
|
|
||||||
return offset;
|
|
||||||
} else {
|
|
||||||
return brw_imm_d(reg_offset * 16);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Emits an instruction before @inst to load the value named by @orig_src
|
* Emits an instruction before @inst to load the value named by @orig_src
|
||||||
* from scratch space at @base_offset to @temp.
|
* from scratch space at @base_offset to @temp.
|
||||||
|
|
@ -1666,12 +1645,24 @@ vec4_visitor::move_grf_array_access_to_scratch()
|
||||||
void
|
void
|
||||||
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
||||||
dst_reg temp, src_reg orig_src,
|
dst_reg temp, src_reg orig_src,
|
||||||
int base_offset)
|
int base_offset, src_reg indirect)
|
||||||
{
|
{
|
||||||
int reg_offset = base_offset + orig_src.reg_offset;
|
int reg_offset = base_offset + orig_src.reg_offset;
|
||||||
const unsigned index = prog_data->base.binding_table.pull_constants_start;
|
const unsigned index = prog_data->base.binding_table.pull_constants_start;
|
||||||
src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
|
|
||||||
reg_offset);
|
src_reg offset;
|
||||||
|
if (indirect.file != BAD_FILE) {
|
||||||
|
offset = src_reg(this, glsl_type::int_type);
|
||||||
|
|
||||||
|
emit_before(block, inst, ADD(dst_reg(offset), indirect,
|
||||||
|
brw_imm_d(reg_offset * 16)));
|
||||||
|
} else if (devinfo->gen >= 8) {
|
||||||
|
/* Store the offset in a GRF so we can send-from-GRF. */
|
||||||
|
offset = src_reg(this, glsl_type::int_type);
|
||||||
|
emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
|
||||||
|
} else {
|
||||||
|
offset = brw_imm_d(reg_offset * 16);
|
||||||
|
}
|
||||||
|
|
||||||
emit_pull_constant_load_reg(temp,
|
emit_pull_constant_load_reg(temp,
|
||||||
brw_imm_ud(index),
|
brw_imm_ud(index),
|
||||||
|
|
@ -1698,59 +1689,55 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
|
||||||
{
|
{
|
||||||
int pull_constant_loc[this->uniforms];
|
int pull_constant_loc[this->uniforms];
|
||||||
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
|
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
|
||||||
bool nested_reladdr;
|
|
||||||
|
|
||||||
/* Walk through and find array access of uniforms. Put a copy of that
|
/* First, walk through the instructions and determine which things need to
|
||||||
* uniform in the pull constant buffer.
|
* be pulled. We mark something as needing to be pulled by setting
|
||||||
*
|
* pull_constant_loc to 0.
|
||||||
* Note that we don't move constant-indexed accesses to arrays. No
|
|
||||||
* testing has been done of the performance impact of this choice.
|
|
||||||
*/
|
*/
|
||||||
do {
|
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
||||||
nested_reladdr = false;
|
/* We only care about MOV_INDIRECT of a uniform */
|
||||||
|
if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
|
||||||
|
inst->src[0].file != UNIFORM)
|
||||||
|
continue;
|
||||||
|
|
||||||
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
|
int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
|
||||||
for (int i = 0 ; i < 3; i++) {
|
|
||||||
if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int uniform = inst->src[i].nr;
|
for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++)
|
||||||
|
pull_constant_loc[uniform_nr + j] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (inst->src[i].reladdr->reladdr)
|
/* Next, we walk the list of uniforms and assign real pull constant
|
||||||
nested_reladdr = true; /* will need another pass */
|
* locations and set their corresponding entries in pull_param.
|
||||||
|
*/
|
||||||
|
for (int j = 0; j < this->uniforms; j++) {
|
||||||
|
if (pull_constant_loc[j] < 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
/* If this array isn't already present in the pull constant buffer,
|
pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
|
||||||
* add it.
|
|
||||||
*/
|
|
||||||
if (pull_constant_loc[uniform] == -1) {
|
|
||||||
const gl_constant_value **values =
|
|
||||||
&stage_prog_data->param[uniform * 4];
|
|
||||||
|
|
||||||
pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
|
for (int i = 0; i < 4; i++) {
|
||||||
|
stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
|
||||||
assert(uniform < uniform_array_size);
|
= stage_prog_data->param[j * 4 + i];
|
||||||
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
|
|
||||||
stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
|
|
||||||
= values[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Set up the annotation tracking for new generated instructions. */
|
|
||||||
base_ir = inst->ir;
|
|
||||||
current_annotation = inst->annotation;
|
|
||||||
|
|
||||||
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
|
|
||||||
|
|
||||||
emit_pull_constant_load(block, inst, temp, inst->src[i],
|
|
||||||
pull_constant_loc[uniform]);
|
|
||||||
|
|
||||||
inst->src[i].file = temp.file;
|
|
||||||
inst->src[i].nr = temp.nr;
|
|
||||||
inst->src[i].reg_offset = temp.reg_offset;
|
|
||||||
inst->src[i].reladdr = NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} while (nested_reladdr);
|
}
|
||||||
|
|
||||||
|
/* Finally, we can walk through the instructions and lower MOV_INDIRECT
|
||||||
|
* instructions to actual uniform pulls.
|
||||||
|
*/
|
||||||
|
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
|
||||||
|
/* We only care about MOV_INDIRECT of a uniform */
|
||||||
|
if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
|
||||||
|
inst->src[0].file != UNIFORM)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
|
||||||
|
|
||||||
|
assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
|
||||||
|
|
||||||
|
emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
|
||||||
|
pull_constant_loc[uniform_nr], inst->src[1]);
|
||||||
|
inst->remove(block);
|
||||||
|
}
|
||||||
|
|
||||||
/* Now there are no accesses of the UNIFORM file with a reladdr, so
|
/* Now there are no accesses of the UNIFORM file with a reladdr, so
|
||||||
* no need to track them as larger-than-vec4 objects. This will be
|
* no need to track them as larger-than-vec4 objects. This will be
|
||||||
|
|
@ -1803,17 +1790,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
|
||||||
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
|
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
|
||||||
|
|
||||||
this->uniforms = 0;
|
this->uniforms = 0;
|
||||||
|
|
||||||
/* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
|
|
||||||
* at least one. See setup_uniforms() in brw_vec4.cpp.
|
|
||||||
*/
|
|
||||||
this->uniform_array_size = 1;
|
|
||||||
if (prog_data) {
|
|
||||||
this->uniform_array_size =
|
|
||||||
MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vec4_visitor::~vec4_visitor()
|
vec4_visitor::~vec4_visitor()
|
||||||
|
|
|
||||||
|
|
@ -270,7 +270,6 @@ void
|
||||||
vec4_vs_visitor::setup_uniform_clipplane_values()
|
vec4_vs_visitor::setup_uniform_clipplane_values()
|
||||||
{
|
{
|
||||||
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
|
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
|
||||||
assert(this->uniforms < uniform_array_size);
|
|
||||||
this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
|
this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
|
||||||
this->userplane[i].type = BRW_REGISTER_TYPE_F;
|
this->userplane[i].type = BRW_REGISTER_TYPE_F;
|
||||||
for (int j = 0; j < 4; ++j) {
|
for (int j = 0; j < 4; ++j) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue