i965/vec4: make emit_pull_constant_load support 64-bit loads

This way callers don't need to know about 64-bit particularities and
we reuse some code.

v2:
  - use byte_offset() instead of offset()
  - only mark the surface as used once

Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Iago Toral Quiroga 2016-07-15 13:02:27 +02:00 committed by Samuel Iglesias Gonsálvez
parent df6e3aa6ae
commit ae400e38d9
2 changed files with 49 additions and 54 deletions

View file

@ -884,25 +884,12 @@ vec4_visitor::move_push_constants_to_pull_constants()
int uniform = inst->src[i].nr; int uniform = inst->src[i].nr;
dst_reg temp; const glsl_type *temp_type = type_sz(inst->src[i].type) == 8 ?
if (type_sz(inst->src[i].type) != 8) { glsl_type::dvec4_type : glsl_type::vec4_type;
temp = dst_reg(this, glsl_type::vec4_type); dst_reg temp = dst_reg(this, temp_type);
emit_pull_constant_load(block, inst, temp, inst->src[i],
pull_constant_loc[uniform], src_reg());
} else {
dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
dst_reg shuffled_float = retype(shuffled, BRW_REGISTER_TYPE_F);
emit_pull_constant_load(block, inst, shuffled_float, inst->src[i], emit_pull_constant_load(block, inst, temp, inst->src[i],
pull_constant_loc[uniform], src_reg()); pull_constant_loc[uniform], src_reg());
emit_pull_constant_load(block, inst,
offset(shuffled_float, 8, 1),
offset(inst->src[i], 8, 1),
pull_constant_loc[uniform], src_reg());
temp = dst_reg(this, glsl_type::dvec4_type);
shuffle_64bit_data(temp, src_reg(shuffled), false, block, inst);
}
inst->src[i].file = temp.file; inst->src[i].file = temp.file;
inst->src[i].nr = temp.nr; inst->src[i].nr = temp.nr;

View file

@ -1702,33 +1702,57 @@ vec4_visitor::move_grf_array_access_to_scratch()
*/ */
void void
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg temp, src_reg orig_src, dst_reg temp, src_reg orig_src,
int base_offset, src_reg indirect) int base_offset, src_reg indirect)
{ {
assert(orig_src.offset % 16 == 0); assert(orig_src.offset % 16 == 0);
int reg_offset = base_offset + orig_src.offset / 16;
const unsigned index = prog_data->base.binding_table.pull_constants_start; const unsigned index = prog_data->base.binding_table.pull_constants_start;
src_reg offset; /* For 64bit loads we need to emit two 32-bit load messages and we also
if (indirect.file != BAD_FILE) { * we need to shuffle the 32-bit data result into proper 64-bit data. To do
offset = src_reg(this, glsl_type::uint_type); * that we emit the 32-bit loads into a temporary and we shuffle the result
* into the original destination.
emit_before(block, inst, ADD(dst_reg(offset), indirect, */
brw_imm_ud(reg_offset * 16))); dst_reg orig_temp = temp;
} else if (devinfo->gen >= 8) { bool is_64bit = type_sz(orig_src.type) == 8;
/* Store the offset in a GRF so we can send-from-GRF. */ if (is_64bit) {
offset = src_reg(this, glsl_type::uint_type); assert(type_sz(temp.type) == 8);
emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16))); dst_reg temp_df = dst_reg(this, glsl_type::dvec4_type);
} else { temp = retype(temp_df, BRW_REGISTER_TYPE_F);
offset = brw_imm_d(reg_offset * 16);
} }
emit_pull_constant_load_reg(temp, src_reg src = orig_src;
brw_imm_ud(index), for (int i = 0; i < (is_64bit ? 2 : 1); i++) {
offset, int reg_offset = base_offset + src.offset / 16;
block, inst);
src_reg offset;
if (indirect.file != BAD_FILE) {
offset = src_reg(this, glsl_type::uint_type);
emit_before(block, inst, ADD(dst_reg(offset), indirect,
brw_imm_ud(reg_offset * 16)));
} else if (devinfo->gen >= 8) {
/* Store the offset in a GRF so we can send-from-GRF. */
offset = src_reg(this, glsl_type::uint_type);
emit_before(block, inst, MOV(dst_reg(offset),
brw_imm_ud(reg_offset * 16)));
} else {
offset = brw_imm_d(reg_offset * 16);
}
emit_pull_constant_load_reg(byte_offset(temp, i * REG_SIZE),
brw_imm_ud(index),
offset,
block, inst);
src = byte_offset(src, 16);
}
brw_mark_surface_used(&prog_data->base, index); brw_mark_surface_used(&prog_data->base, index);
if (is_64bit) {
temp = retype(temp, BRW_REGISTER_TYPE_DF);
shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst);
}
} }
/** /**
@ -1801,24 +1825,8 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP); assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
if (type_sz(inst->src[0].type) != 8) { emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
emit_pull_constant_load(block, inst, inst->dst, inst->src[0], pull_constant_loc[uniform_nr], inst->src[1]);
pull_constant_loc[uniform_nr], inst->src[1]);
} else {
dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
dst_reg shuffled_float = retype(shuffled, BRW_REGISTER_TYPE_F);
emit_pull_constant_load(block, inst, shuffled_float, inst->src[0],
pull_constant_loc[uniform_nr], inst->src[1]);
emit_pull_constant_load(block, inst,
offset(shuffled_float, 8, 1),
offset(inst->src[0], 8, 1),
pull_constant_loc[uniform_nr], inst->src[1]);
shuffle_64bit_data(retype(inst->dst, BRW_REGISTER_TYPE_DF),
src_reg(shuffled), false, block, inst);
}
inst->remove(block); inst->remove(block);
} }