intel/compiler/mesh: implement IO for xe2

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25195>
This commit is contained in:
Marcin Ślusarz 2023-02-01 14:56:56 +01:00 committed by Marge Bot
parent ee4214de6e
commit 815eee10e0

View file

@ -1699,6 +1699,68 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
comp_shift, comps, mask);
}
static void
emit_urb_direct_vec4_write_xe2(const fs_builder &bld,
unsigned offset_in_bytes,
const fs_reg &src,
fs_reg urb_handle,
unsigned comps,
unsigned mask)
{
const struct intel_device_info *devinfo = bld.shader->devinfo;
const unsigned runit = reg_unit(devinfo);
const unsigned write_size = 8 * runit;
if (offset_in_bytes > 0) {
fs_builder bldall = bld.group(write_size, 0).exec_all();
fs_reg new_handle = bldall.vgrf(BRW_REGISTER_TYPE_UD);
bldall.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_bytes));
urb_handle = new_handle;
}
for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
fs_builder hbld = bld.group(write_size, q);
fs_reg payload_srcs[comps];
for (unsigned c = 0; c < comps; c++)
payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
int nr = bld.shader->alloc.allocate(comps * runit);
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, nr, BRW_REGISTER_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
hbld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
reg_undef, srcs, ARRAY_SIZE(srcs));
}
}
static void
emit_urb_direct_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &src, fs_reg urb_handle)
{
assert(nir_src_bit_size(instr->src[0]) == 32);
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset_nir_src));
const unsigned comps = nir_src_num_components(instr->src[0]);
assert(comps <= 4);
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
nir_src_as_uint(*offset_nir_src) +
component_from_intrinsic(instr);
const unsigned mask = nir_intrinsic_write_mask(instr);
emit_urb_direct_vec4_write_xe2(bld, offset_in_dwords * 4, src,
urb_handle, comps, mask);
}
static void
emit_urb_indirect_vec4_write(const fs_builder &bld,
const fs_reg &offset_src,
@ -1764,6 +1826,57 @@ emit_urb_indirect_writes_mod(const fs_builder &bld, nir_intrinsic_instr *instr,
urb_handle, comp_shift, comps, mask);
}
static void
emit_urb_indirect_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &src, const fs_reg &offset_src,
fs_reg urb_handle)
{
assert(nir_src_bit_size(instr->src[0]) == 32);
const struct intel_device_info *devinfo = bld.shader->devinfo;
const unsigned runit = reg_unit(devinfo);
const unsigned write_size = 8 * runit;
const unsigned comps = nir_src_num_components(instr->src[0]);
assert(comps <= 4);
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
component_from_intrinsic(instr);
if (base_in_dwords > 0) {
fs_builder bldall = bld.group(write_size, 0).exec_all();
fs_reg new_handle = bldall.vgrf(BRW_REGISTER_TYPE_UD);
bldall.ADD(new_handle, urb_handle, brw_imm_ud(base_in_dwords * 4));
urb_handle = new_handle;
}
const unsigned mask = nir_intrinsic_write_mask(instr);
for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
fs_builder wbld = bld.group(write_size, q);
fs_reg payload_srcs[comps];
for (unsigned c = 0; c < comps; c++)
payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
fs_reg addr = wbld.vgrf(BRW_REGISTER_TYPE_UD);
wbld.SHL(addr, horiz_offset(offset_src, write_size * q), brw_imm_ud(2));
wbld.ADD(addr, addr, urb_handle);
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = addr;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
int nr = bld.shader->alloc.allocate(comps * runit);
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, nr, BRW_REGISTER_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
wbld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
reg_undef, srcs, ARRAY_SIZE(srcs));
}
}
static void
emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &src, const fs_reg &offset_src,
@ -1872,6 +1985,46 @@ emit_urb_direct_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
}
}
static void
emit_urb_direct_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &dest, fs_reg urb_handle)
{
assert(instr->def.bit_size == 32);
unsigned comps = instr->def.num_components;
if (comps == 0)
return;
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset_nir_src));
fs_builder ubld16 = bld.group(16, 0).exec_all();
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
nir_src_as_uint(*offset_nir_src) +
component_from_intrinsic(instr);
if (offset_in_dwords > 0) {
fs_reg new_handle = ubld16.vgrf(BRW_REGISTER_TYPE_UD);
ubld16.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_dwords * 4));
urb_handle = new_handle;
}
fs_reg data = ubld16.vgrf(BRW_REGISTER_TYPE_UD, comps);
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
fs_inst *inst = ubld16.emit(SHADER_OPCODE_URB_READ_LOGICAL,
data, srcs, ARRAY_SIZE(srcs));
inst->size_written = 2 * comps * REG_SIZE;
for (unsigned c = 0; c < comps; c++) {
fs_reg dest_comp = offset(dest, bld, c);
fs_reg data_comp = horiz_stride(offset(data, ubld16, c), 0);
bld.MOV(retype(dest_comp, BRW_REGISTER_TYPE_UD), data_comp);
}
}
static void
emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &dest, const fs_reg &offset_src, fs_reg urb_handle)
@ -1936,6 +2089,53 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
}
}
static void
emit_urb_indirect_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &dest, const fs_reg &offset_src,
fs_reg urb_handle)
{
assert(instr->def.bit_size == 32);
unsigned comps = instr->def.num_components;
if (comps == 0)
return;
fs_builder ubld16 = bld.group(16, 0).exec_all();
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
component_from_intrinsic(instr);
if (offset_in_dwords > 0) {
fs_reg new_handle = ubld16.vgrf(BRW_REGISTER_TYPE_UD);
ubld16.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_dwords * 4));
urb_handle = new_handle;
}
fs_reg data = ubld16.vgrf(BRW_REGISTER_TYPE_UD, comps);
for (unsigned q = 0; q < bld.dispatch_width() / 16; q++) {
fs_builder wbld = bld.group(16, q);
fs_reg addr = wbld.vgrf(BRW_REGISTER_TYPE_UD);
wbld.SHL(addr, horiz_offset(offset_src, 16 * q), brw_imm_ud(2));
wbld.ADD(addr, addr, urb_handle);
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = addr;
fs_inst *inst = wbld.emit(SHADER_OPCODE_URB_READ_LOGICAL,
data, srcs, ARRAY_SIZE(srcs));
inst->size_written = 2 * comps * REG_SIZE;
for (unsigned c = 0; c < comps; c++) {
fs_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q);
fs_reg data_comp = offset(data, wbld, c);
wbld.MOV(retype(dest_comp, BRW_REGISTER_TYPE_UD), data_comp);
}
}
}
void
fs_visitor::emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &urb_handle)
@ -1944,8 +2144,15 @@ fs_visitor::emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *ins
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
if (nir_src_is_const(*offset_nir_src)) {
emit_urb_direct_writes(bld, instr, src, urb_handle);
if (bld.shader->devinfo->ver >= 20)
emit_urb_direct_writes_xe2(bld, instr, src, urb_handle);
else
emit_urb_direct_writes(bld, instr, src, urb_handle);
} else {
if (bld.shader->devinfo->ver >= 20) {
emit_urb_indirect_writes_xe2(bld, instr, src, get_nir_src(*offset_nir_src), urb_handle);
return;
}
bool use_mod = false;
unsigned mod;
@ -1978,10 +2185,17 @@ fs_visitor::emit_task_mesh_load(const fs_builder &bld, nir_intrinsic_instr *inst
* a single large aligned read instead one per component.
*/
if (nir_src_is_const(*offset_nir_src))
emit_urb_direct_reads(bld, instr, dest, urb_handle);
else
emit_urb_indirect_reads(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
if (nir_src_is_const(*offset_nir_src)) {
if (bld.shader->devinfo->ver >= 20)
emit_urb_direct_reads_xe2(bld, instr, dest, urb_handle);
else
emit_urb_direct_reads(bld, instr, dest, urb_handle);
} else {
if (bld.shader->devinfo->ver >= 20)
emit_urb_indirect_reads_xe2(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
else
emit_urb_indirect_reads(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
}
}
void