i965: Add typed surface access opcodes.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
This commit is contained in:
Francisco Jerez 2015-04-23 14:28:25 +03:00
parent 0775d8835a
commit f118e5d15f
9 changed files with 261 additions and 0 deletions

View file

@ -906,6 +906,10 @@ enum opcode {
SHADER_OPCODE_UNTYPED_SURFACE_READ,
SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
SHADER_OPCODE_TYPED_ATOMIC,
SHADER_OPCODE_TYPED_SURFACE_READ,
SHADER_OPCODE_TYPED_SURFACE_WRITE,
SHADER_OPCODE_GEN4_SCRATCH_READ,
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_GEN7_SCRATCH_READ,

View file

@ -423,6 +423,30 @@ brw_untyped_surface_write(struct brw_codegen *p,
unsigned msg_length,
unsigned num_channels);
void
brw_typed_atomic(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
bool response_expected);
void
brw_typed_surface_read(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
unsigned num_channels);
void
brw_typed_surface_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
unsigned num_channels);
void
brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg dest,

View file

@ -2945,6 +2945,175 @@ brw_untyped_surface_write(struct brw_codegen *p,
p, insn, num_channels);
}
static void
brw_set_dp_typed_atomic_message(struct brw_codegen *p,
struct brw_inst *insn,
unsigned atomic_op,
bool response_expected)
{
const struct brw_device_info *devinfo = p->devinfo;
unsigned msg_control =
atomic_op | /* Atomic Operation Type: BRW_AOP_* */
(response_expected ? 1 << 5 : 0); /* Return data expected */
if (devinfo->gen >= 8 || devinfo->is_haswell) {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
brw_inst_set_dp_msg_type(devinfo, insn,
HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP);
} else {
brw_inst_set_dp_msg_type(devinfo, insn,
HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2);
}
} else {
brw_inst_set_dp_msg_type(devinfo, insn,
GEN7_DATAPORT_RC_TYPED_ATOMIC_OP);
if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
}
brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
}
void
brw_typed_atomic(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
bool response_expected) {
const struct brw_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN6_SFID_DATAPORT_RENDER_CACHE);
const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
/* Mask out unused components -- See comment in brw_untyped_atomic(). */
const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
brw_surface_payload_size(p, response_expected,
devinfo->gen >= 8 || devinfo->is_haswell, false),
true);
brw_set_dp_typed_atomic_message(
p, insn, atomic_op, response_expected);
}
static void
brw_set_dp_typed_surface_read_message(struct brw_codegen *p,
struct brw_inst *insn,
unsigned num_channels)
{
const struct brw_device_info *devinfo = p->devinfo;
/* Set mask of unused channels. */
unsigned msg_control = 0xf & (0xf << num_channels);
if (devinfo->gen >= 8 || devinfo->is_haswell) {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
else
msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
}
brw_inst_set_dp_msg_type(devinfo, insn,
HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ);
} else {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
}
brw_inst_set_dp_msg_type(devinfo, insn,
GEN7_DATAPORT_RC_TYPED_SURFACE_READ);
}
brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
}
void
brw_typed_surface_read(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
unsigned num_channels)
{
const struct brw_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN6_SFID_DATAPORT_RENDER_CACHE);
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, dst, payload, surface, msg_length,
brw_surface_payload_size(p, num_channels,
devinfo->gen >= 8 || devinfo->is_haswell, false),
true);
brw_set_dp_typed_surface_read_message(
p, insn, num_channels);
}
static void
brw_set_dp_typed_surface_write_message(struct brw_codegen *p,
struct brw_inst *insn,
unsigned num_channels)
{
const struct brw_device_info *devinfo = p->devinfo;
/* Set mask of unused channels. */
unsigned msg_control = 0xf & (0xf << num_channels);
if (devinfo->gen >= 8 || devinfo->is_haswell) {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
else
msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
}
brw_inst_set_dp_msg_type(devinfo, insn,
HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE);
} else {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
}
brw_inst_set_dp_msg_type(devinfo, insn,
GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE);
}
brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
}
void
brw_typed_surface_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
unsigned num_channels)
{
const struct brw_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN6_SFID_DATAPORT_RENDER_CACHE);
const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
/* Mask out unused components -- See comment in brw_untyped_atomic(). */
const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
WRITEMASK_X : WRITEMASK_XYZW);
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, brw_writemask(brw_null_reg(), mask),
payload, surface, msg_length, 0, true);
brw_set_dp_typed_surface_write_message(
p, insn, num_channels);
}
void
brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg dest,

View file

@ -503,6 +503,9 @@ fs_inst::is_send_from_grf() const
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
@ -959,6 +962,12 @@ fs_inst::regs_read(int arg) const
return mlen;
} else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_WRITE && arg == 0) {
return mlen;
} else if (opcode == SHADER_OPCODE_TYPED_ATOMIC && arg == 0) {
return mlen;
} else if (opcode == SHADER_OPCODE_TYPED_SURFACE_READ && arg == 0) {
return mlen;
} else if (opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE && arg == 0) {
return mlen;
} else if (opcode == FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET && arg == 0) {
return mlen;
} else if (opcode == FS_OPCODE_LINTERP && arg == 0) {
@ -1052,6 +1061,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case FS_OPCODE_INTERPOLATE_AT_CENTROID:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:

View file

@ -2036,6 +2036,23 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
inst->mlen, src[2].dw1.ud);
break;
case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_atomic(p, dst, src[0], src[1],
src[2].dw1.ud, inst->mlen, !inst->dst.is_null());
break;
case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1],
inst->mlen, src[2].dw1.ud);
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].dw1.ud);
break;
case FS_OPCODE_SET_SIMD4X2_OFFSET:
generate_set_simd4x2_offset(inst, dst, src[0]);
break;

View file

@ -340,6 +340,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_TYPED_ATOMIC:
/* Test code:
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
* mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all };
@ -359,6 +360,8 @@ schedule_node::set_latency_gen7(bool is_haswell)
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
/* Test code:
* mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
* mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };

View file

@ -496,6 +496,12 @@ brw_instruction_name(enum opcode op)
return "untyped_surface_read";
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
return "untyped_surface_write";
case SHADER_OPCODE_TYPED_ATOMIC:
return "typed_atomic";
case SHADER_OPCODE_TYPED_SURFACE_READ:
return "typed_surface_read";
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
return "typed_surface_write";
case SHADER_OPCODE_LOAD_PAYLOAD:
return "load_payload";
@ -1041,6 +1047,8 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case FS_OPCODE_FB_WRITE:
return true;

View file

@ -217,6 +217,9 @@ vec4_instruction::is_send_from_grf()
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
return true;
default:
return false;
@ -234,6 +237,9 @@ vec4_instruction::regs_read(unsigned arg) const
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
return arg == 0 ? mlen : 1;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:

View file

@ -1490,6 +1490,24 @@ vec4_generator::generate_code(const cfg_t *cfg)
src[2].dw1.ud);
break;
case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
!inst->dst.is_null());
break;
case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen,
src[2].dw1.ud);
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_write(p, src[0], src[1], inst->mlen,
src[2].dw1.ud);
break;
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
generate_unpack_flags(dst);
break;