mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
i965/fs/skl+: Use ld2dms_w instead of ld2dms
In order to support 16x MSAA, skl+ has a wider version of ld2dms that
takes two parameters for the MCS data. The MCS data retrieved from the
ld_mcs instruction already returns 4 or 8 registers and is documented
to return zeroes for the mcsh value when the sample count is less than
16.
v2: Use get_lowered_simd_width to fall back to SIMD8 instructions when
the message length would be too long in SIMD16.
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
This commit is contained in:
parent
20250e854e
commit
e386fb0dee
6 changed files with 60 additions and 5 deletions
|
|
@ -964,6 +964,8 @@ enum opcode {
|
|||
FS_OPCODE_TXB_LOGICAL,
|
||||
SHADER_OPCODE_TXF_CMS,
|
||||
SHADER_OPCODE_TXF_CMS_LOGICAL,
|
||||
SHADER_OPCODE_TXF_CMS_W,
|
||||
SHADER_OPCODE_TXF_CMS_W_LOGICAL,
|
||||
SHADER_OPCODE_TXF_UMS,
|
||||
SHADER_OPCODE_TXF_UMS_LOGICAL,
|
||||
SHADER_OPCODE_TXF_MCS,
|
||||
|
|
@ -1539,6 +1541,7 @@ enum brw_message_target {
|
|||
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17
|
||||
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
|
||||
#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
|
||||
#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W 28
|
||||
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29
|
||||
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30
|
||||
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31
|
||||
|
|
|
|||
|
|
@ -622,6 +622,7 @@ static const char *const gen5_sampler_msg_type[] = {
|
|||
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po",
|
||||
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
|
||||
[HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c",
|
||||
[GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W] = "ld2dms_w",
|
||||
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs",
|
||||
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms",
|
||||
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss",
|
||||
|
|
|
|||
|
|
@ -717,6 +717,7 @@ fs_inst::components_read(unsigned i) const
|
|||
case SHADER_OPCODE_TXS_LOGICAL:
|
||||
case FS_OPCODE_TXB_LOGICAL:
|
||||
case SHADER_OPCODE_TXF_CMS_LOGICAL:
|
||||
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
||||
case SHADER_OPCODE_TXF_UMS_LOGICAL:
|
||||
case SHADER_OPCODE_TXF_MCS_LOGICAL:
|
||||
case SHADER_OPCODE_LOD_LOGICAL:
|
||||
|
|
@ -732,6 +733,9 @@ fs_inst::components_read(unsigned i) const
|
|||
/* Texture offset. */
|
||||
else if (i == 7)
|
||||
return 2;
|
||||
/* MCS */
|
||||
else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
|
||||
|
|
@ -896,6 +900,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
|
|||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
|
|
@ -3920,17 +3925,31 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
coordinate_done = true;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_UMS:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) {
|
||||
if (op == SHADER_OPCODE_TXF_UMS ||
|
||||
op == SHADER_OPCODE_TXF_CMS ||
|
||||
op == SHADER_OPCODE_TXF_CMS_W) {
|
||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
|
||||
length++;
|
||||
}
|
||||
|
||||
if (op == SHADER_OPCODE_TXF_CMS) {
|
||||
if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) {
|
||||
/* Data from the multisample control surface. */
|
||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
|
||||
length++;
|
||||
|
||||
/* On Gen9+ we'll use ld2dms_w instead which has two registers for
|
||||
* the MCS data.
|
||||
*/
|
||||
if (op == SHADER_OPCODE_TXF_CMS_W) {
|
||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD),
|
||||
mcs.file == IMM ?
|
||||
mcs :
|
||||
offset(mcs, bld, 1));
|
||||
length++;
|
||||
}
|
||||
}
|
||||
|
||||
/* There is no offsetting for this message; just copy in the integer
|
||||
|
|
@ -4144,6 +4163,10 @@ fs_visitor::lower_logical_sends()
|
|||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXF_UMS_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS);
|
||||
break;
|
||||
|
|
@ -4336,6 +4359,21 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
|
|||
else
|
||||
return inst->exec_size;
|
||||
|
||||
case SHADER_OPCODE_TXF_CMS_W_LOGICAL: {
|
||||
/* This opcode can take up to 6 arguments which means that in some
|
||||
* circumstances it can end up with a message that is too long in SIMD16
|
||||
* mode.
|
||||
*/
|
||||
const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
|
||||
/* First three arguments are the sample index and the two arguments for
|
||||
* the MCS data.
|
||||
*/
|
||||
if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE)
|
||||
return 8;
|
||||
else
|
||||
return inst->exec_size;
|
||||
}
|
||||
|
||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
||||
|
|
|
|||
|
|
@ -741,6 +741,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
|
|||
case SHADER_OPCODE_TXF:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
assert(devinfo->gen >= 9);
|
||||
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
if (devinfo->gen >= 7)
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
|
||||
|
|
@ -2050,6 +2054,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
|||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_UMS:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
|
|
|
|||
|
|
@ -208,8 +208,8 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
|||
fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
|
||||
ARRAY_SIZE(srcs));
|
||||
|
||||
/* We only care about one reg of response, but the sampler always writes
|
||||
* 4/8.
|
||||
/* We only care about one or two regs of response, but the sampler always
|
||||
* writes 4/8.
|
||||
*/
|
||||
inst->regs_written = 4 * dispatch_width / 8;
|
||||
|
||||
|
|
@ -295,7 +295,10 @@ fs_visitor::emit_texture(ir_texture_opcode op,
|
|||
opcode = SHADER_OPCODE_TXF_LOGICAL;
|
||||
break;
|
||||
case ir_txf_ms:
|
||||
opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
|
||||
if (devinfo->gen >= 9)
|
||||
opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
|
||||
else
|
||||
opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
|
||||
break;
|
||||
case ir_txs:
|
||||
case ir_query_levels:
|
||||
|
|
|
|||
|
|
@ -351,6 +351,10 @@ brw_instruction_name(enum opcode op)
|
|||
return "txf_cms";
|
||||
case SHADER_OPCODE_TXF_CMS_LOGICAL:
|
||||
return "txf_cms_logical";
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
return "txf_cms_w";
|
||||
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
||||
return "txf_cms_w_logical";
|
||||
case SHADER_OPCODE_TXF_UMS:
|
||||
return "txf_ums";
|
||||
case SHADER_OPCODE_TXF_UMS_LOGICAL:
|
||||
|
|
@ -787,6 +791,7 @@ backend_instruction::is_tex() const
|
|||
opcode == SHADER_OPCODE_TXD ||
|
||||
opcode == SHADER_OPCODE_TXF ||
|
||||
opcode == SHADER_OPCODE_TXF_CMS ||
|
||||
opcode == SHADER_OPCODE_TXF_CMS_W ||
|
||||
opcode == SHADER_OPCODE_TXF_UMS ||
|
||||
opcode == SHADER_OPCODE_TXF_MCS ||
|
||||
opcode == SHADER_OPCODE_TXL ||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue