i965/fs: Expose arbitrary pull constant load sizes to the IR.

Change the FS generator to ask the dataport for enough owords worth of
constants to fill the execution size of the instruction -- Which means
that the visitor now needs to set the execution size correctly for
uniform pull constant load instructions, which we were kind of
neglecting until now.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Francisco Jerez 2016-12-08 20:05:18 -08:00
parent 7a6aadb76f
commit 9b22a0d295
4 changed files with 26 additions and 27 deletions

View file

@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p,
}
/**
* Read a float[4] vector from the data port constant cache.
* Read float[4] vectors from the data port constant cache.
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
*/
@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p,
const unsigned target_cache =
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
BRW_DATAPORT_READ_TARGET_DATA_CACHE);
const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
/* On newer hardware, offset is in units of owords. */
if (devinfo->gen >= 6)
@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p,
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_push_insn_state(p);
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p,
mrf.nr,
2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(offset));
brw_pop_insn_state(p);
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p,
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
}
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
brw_set_dp_read_message(p, insn, bind_table_index,
BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
target_cache,
1, /* msg_length */
true, /* header_present */
1); /* response_length (1 reg, 2 owords!) */
DIV_ROUND_UP(exec_size, 8)); /* response_length */
brw_pop_insn_state(p);
}

View file

@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads()
assert(inst->src[i].stride == 0);
const fs_builder ubld = ibld.exec_all().group(8, 0);
const fs_builder ubld = ibld.exec_all().group(4, 0);
struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
dst, brw_imm_ud(index), offset);

View file

@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
struct brw_reg index,
struct brw_reg offset)
{
assert(type_sz(dst.type) == 4);
assert(inst->mlen != 0);
assert(index.file == BRW_IMMEDIATE_VALUE &&
@ -1149,27 +1150,25 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
{
assert(index.type == BRW_REGISTER_TYPE_UD);
assert(payload.file == BRW_GENERAL_REGISTER_FILE);
assert(type_sz(dst.type) == 4);
if (index.file == BRW_IMMEDIATE_VALUE) {
const uint32_t surf_index = index.ud;
brw_push_insn_state(p);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4);
brw_pop_insn_state(p);
brw_set_dest(p, send, vec4(retype(dst, BRW_REGISTER_TYPE_UD)));
brw_set_src0(p, send, vec4(retype(payload, BRW_REGISTER_TYPE_UD)));
brw_set_dp_read_message(p, send,
surf_index,
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
brw_set_dp_read_message(p, send, surf_index,
BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
GEN6_SFID_DATAPORT_CONSTANT_CACHE,
1, /* mlen */
true, /* header */
1); /* rlen */
DIV_ROUND_UP(inst->size_written, REG_SIZE));
} else {
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@ -1188,17 +1187,15 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
/* dst = send(payload, a0.0 | <descriptor>) */
brw_inst *insn = brw_send_indirect_message(
p, GEN6_SFID_DATAPORT_CONSTANT_CACHE,
vec4(retype(dst, BRW_REGISTER_TYPE_UD)),
vec4(retype(payload, BRW_REGISTER_TYPE_UD)), addr);
brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
brw_set_dp_read_message(p, insn,
0, /* surface */
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
retype(dst, BRW_REGISTER_TYPE_UD),
retype(payload, BRW_REGISTER_TYPE_UD), addr);
brw_set_dp_read_message(p, insn, 0 /* surface */,
BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
GEN6_SFID_DATAPORT_CONSTANT_CACHE,
1, /* mlen */
true, /* header */
1); /* rlen */
DIV_ROUND_UP(inst->size_written, REG_SIZE));
brw_pop_insn_state(p);
}

View file

@ -4059,7 +4059,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* and we have to split it if necessary.
*/
const unsigned type_size = type_sz(dest.type);
const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F);
const fs_builder ubld = bld.exec_all().group(4, 0);
const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F);
for (unsigned c = 0; c < instr->num_components;) {
const unsigned base = const_offset->u32[0] + c * type_size;
@ -4067,9 +4069,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
const unsigned count = MIN2(instr->num_components - c,
(16 - base % 16) / type_size);
bld.exec_all()
.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
packed_consts, surf_index, brw_imm_ud(base & ~15));
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
packed_consts, surf_index, brw_imm_ud(base & ~15));
const fs_reg consts =
retype(byte_offset(packed_consts, base & 15), dest.type);