i965/fs/skl+: Use ld2dms_w instead of ld2dms

In order to support 16x MSAA, skl+ has a wider version of ld2dms that
takes two parameters for the MCS data. The MCS data retrieved from the
ld_mcs instruction already returns 4 or 8 registers and is documented
to return zeroes for the mcsh value when the sample count is less than
16.

v2: Use get_lowered_simd_width to fall back to SIMD8 instructions when
    the message length would be too long in SIMD16.
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
This commit is contained in:
Neil Roberts 2015-09-08 15:52:09 +01:00
parent 20250e854e
commit e386fb0dee
6 changed files with 60 additions and 5 deletions

View file

@ -964,6 +964,8 @@ enum opcode {
FS_OPCODE_TXB_LOGICAL,
SHADER_OPCODE_TXF_CMS,
SHADER_OPCODE_TXF_CMS_LOGICAL,
SHADER_OPCODE_TXF_CMS_W,
SHADER_OPCODE_TXF_CMS_W_LOGICAL,
SHADER_OPCODE_TXF_UMS,
SHADER_OPCODE_TXF_UMS_LOGICAL,
SHADER_OPCODE_TXF_MCS,
@ -1539,6 +1541,7 @@ enum brw_message_target {
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W 28
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31

View file

@ -622,6 +622,7 @@ static const char *const gen5_sampler_msg_type[] = {
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po",
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
[HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c",
[GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W] = "ld2dms_w",
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs",
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms",
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss",

View file

@ -717,6 +717,7 @@ fs_inst::components_read(unsigned i) const
case SHADER_OPCODE_TXS_LOGICAL:
case FS_OPCODE_TXB_LOGICAL:
case SHADER_OPCODE_TXF_CMS_LOGICAL:
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
case SHADER_OPCODE_TXF_UMS_LOGICAL:
case SHADER_OPCODE_TXF_MCS_LOGICAL:
case SHADER_OPCODE_LOD_LOGICAL:
@ -732,6 +733,9 @@ fs_inst::components_read(unsigned i) const
/* Texture offset. */
else if (i == 7)
return 2;
/* MCS */
else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
return 2;
else
return 1;
@ -896,6 +900,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
@ -3920,17 +3925,31 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
coordinate_done = true;
break;
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_UMS:
case SHADER_OPCODE_TXF_MCS:
if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) {
if (op == SHADER_OPCODE_TXF_UMS ||
op == SHADER_OPCODE_TXF_CMS ||
op == SHADER_OPCODE_TXF_CMS_W) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
length++;
}
if (op == SHADER_OPCODE_TXF_CMS) {
if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) {
/* Data from the multisample control surface. */
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
length++;
/* On Gen9+ we'll use ld2dms_w instead which has two registers for
* the MCS data.
*/
if (op == SHADER_OPCODE_TXF_CMS_W) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD),
mcs.file == IMM ?
mcs :
offset(mcs, bld, 1));
length++;
}
}
/* There is no offsetting for this message; just copy in the integer
@ -4144,6 +4163,10 @@ fs_visitor::lower_logical_sends()
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS);
break;
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W);
break;
case SHADER_OPCODE_TXF_UMS_LOGICAL:
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS);
break;
@ -4336,6 +4359,21 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
else
return inst->exec_size;
case SHADER_OPCODE_TXF_CMS_W_LOGICAL: {
/* This opcode can take up to 6 arguments which means that in some
* circumstances it can end up with a message that is too long in SIMD16
* mode.
*/
const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
/* First three arguments are the sample index and the two arguments for
* the MCS data.
*/
if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE)
return 8;
else
return inst->exec_size;
}
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:

View file

@ -741,6 +741,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_TXF_CMS_W:
assert(devinfo->gen >= 9);
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
break;
case SHADER_OPCODE_TXF_CMS:
if (devinfo->gen >= 7)
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
@ -2050,6 +2054,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_UMS:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TXL:

View file

@ -208,8 +208,8 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
ARRAY_SIZE(srcs));
/* We only care about one reg of response, but the sampler always writes
* 4/8.
/* We only care about one or two regs of response, but the sampler always
* writes 4/8.
*/
inst->regs_written = 4 * dispatch_width / 8;
@ -295,7 +295,10 @@ fs_visitor::emit_texture(ir_texture_opcode op,
opcode = SHADER_OPCODE_TXF_LOGICAL;
break;
case ir_txf_ms:
opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
if (devinfo->gen >= 9)
opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
else
opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
break;
case ir_txs:
case ir_query_levels:

View file

@ -351,6 +351,10 @@ brw_instruction_name(enum opcode op)
return "txf_cms";
case SHADER_OPCODE_TXF_CMS_LOGICAL:
return "txf_cms_logical";
case SHADER_OPCODE_TXF_CMS_W:
return "txf_cms_w";
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
return "txf_cms_w_logical";
case SHADER_OPCODE_TXF_UMS:
return "txf_ums";
case SHADER_OPCODE_TXF_UMS_LOGICAL:
@ -787,6 +791,7 @@ backend_instruction::is_tex() const
opcode == SHADER_OPCODE_TXD ||
opcode == SHADER_OPCODE_TXF ||
opcode == SHADER_OPCODE_TXF_CMS ||
opcode == SHADER_OPCODE_TXF_CMS_W ||
opcode == SHADER_OPCODE_TXF_UMS ||
opcode == SHADER_OPCODE_TXF_MCS ||
opcode == SHADER_OPCODE_TXL ||