mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-20 11:00:24 +01:00
intel/compiler/mesh: implement IO for xe2
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25195>
This commit is contained in:
parent
ee4214de6e
commit
815eee10e0
1 changed files with 219 additions and 5 deletions
|
|
@ -1699,6 +1699,68 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||
comp_shift, comps, mask);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_vec4_write_xe2(const fs_builder &bld,
|
||||
unsigned offset_in_bytes,
|
||||
const fs_reg &src,
|
||||
fs_reg urb_handle,
|
||||
unsigned comps,
|
||||
unsigned mask)
|
||||
{
|
||||
const struct intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const unsigned runit = reg_unit(devinfo);
|
||||
const unsigned write_size = 8 * runit;
|
||||
|
||||
if (offset_in_bytes > 0) {
|
||||
fs_builder bldall = bld.group(write_size, 0).exec_all();
|
||||
fs_reg new_handle = bldall.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
bldall.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_bytes));
|
||||
urb_handle = new_handle;
|
||||
}
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
|
||||
fs_builder hbld = bld.group(write_size, q);
|
||||
|
||||
fs_reg payload_srcs[comps];
|
||||
|
||||
for (unsigned c = 0; c < comps; c++)
|
||||
payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
|
||||
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
|
||||
int nr = bld.shader->alloc.allocate(comps * runit);
|
||||
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, nr, BRW_REGISTER_TYPE_F);
|
||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
|
||||
hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
|
||||
|
||||
hbld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
const fs_reg &src, fs_reg urb_handle)
|
||||
{
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
|
||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_nir_src));
|
||||
|
||||
const unsigned comps = nir_src_num_components(instr->src[0]);
|
||||
assert(comps <= 4);
|
||||
|
||||
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
|
||||
nir_src_as_uint(*offset_nir_src) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
const unsigned mask = nir_intrinsic_write_mask(instr);
|
||||
|
||||
emit_urb_direct_vec4_write_xe2(bld, offset_in_dwords * 4, src,
|
||||
urb_handle, comps, mask);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_vec4_write(const fs_builder &bld,
|
||||
const fs_reg &offset_src,
|
||||
|
|
@ -1764,6 +1826,57 @@ emit_urb_indirect_writes_mod(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||
urb_handle, comp_shift, comps, mask);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
const fs_reg &src, const fs_reg &offset_src,
|
||||
fs_reg urb_handle)
|
||||
{
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
|
||||
const struct intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const unsigned runit = reg_unit(devinfo);
|
||||
const unsigned write_size = 8 * runit;
|
||||
|
||||
const unsigned comps = nir_src_num_components(instr->src[0]);
|
||||
assert(comps <= 4);
|
||||
|
||||
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
if (base_in_dwords > 0) {
|
||||
fs_builder bldall = bld.group(write_size, 0).exec_all();
|
||||
fs_reg new_handle = bldall.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
bldall.ADD(new_handle, urb_handle, brw_imm_ud(base_in_dwords * 4));
|
||||
urb_handle = new_handle;
|
||||
}
|
||||
|
||||
const unsigned mask = nir_intrinsic_write_mask(instr);
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
|
||||
fs_builder wbld = bld.group(write_size, q);
|
||||
|
||||
fs_reg payload_srcs[comps];
|
||||
|
||||
for (unsigned c = 0; c < comps; c++)
|
||||
payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
|
||||
|
||||
fs_reg addr = wbld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
wbld.SHL(addr, horiz_offset(offset_src, write_size * q), brw_imm_ud(2));
|
||||
wbld.ADD(addr, addr, urb_handle);
|
||||
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = addr;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
|
||||
int nr = bld.shader->alloc.allocate(comps * runit);
|
||||
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, nr, BRW_REGISTER_TYPE_F);
|
||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
|
||||
wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
|
||||
|
||||
wbld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
const fs_reg &src, const fs_reg &offset_src,
|
||||
|
|
@ -1872,6 +1985,46 @@ emit_urb_direct_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
const fs_reg &dest, fs_reg urb_handle)
|
||||
{
|
||||
assert(instr->def.bit_size == 32);
|
||||
|
||||
unsigned comps = instr->def.num_components;
|
||||
if (comps == 0)
|
||||
return;
|
||||
|
||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_nir_src));
|
||||
|
||||
fs_builder ubld16 = bld.group(16, 0).exec_all();
|
||||
|
||||
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
|
||||
nir_src_as_uint(*offset_nir_src) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
if (offset_in_dwords > 0) {
|
||||
fs_reg new_handle = ubld16.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld16.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_dwords * 4));
|
||||
urb_handle = new_handle;
|
||||
}
|
||||
|
||||
fs_reg data = ubld16.vgrf(BRW_REGISTER_TYPE_UD, comps);
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
|
||||
fs_inst *inst = ubld16.emit(SHADER_OPCODE_URB_READ_LOGICAL,
|
||||
data, srcs, ARRAY_SIZE(srcs));
|
||||
inst->size_written = 2 * comps * REG_SIZE;
|
||||
|
||||
for (unsigned c = 0; c < comps; c++) {
|
||||
fs_reg dest_comp = offset(dest, bld, c);
|
||||
fs_reg data_comp = horiz_stride(offset(data, ubld16, c), 0);
|
||||
bld.MOV(retype(dest_comp, BRW_REGISTER_TYPE_UD), data_comp);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
const fs_reg &dest, const fs_reg &offset_src, fs_reg urb_handle)
|
||||
|
|
@ -1936,6 +2089,53 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
const fs_reg &dest, const fs_reg &offset_src,
|
||||
fs_reg urb_handle)
|
||||
{
|
||||
assert(instr->def.bit_size == 32);
|
||||
|
||||
unsigned comps = instr->def.num_components;
|
||||
if (comps == 0)
|
||||
return;
|
||||
|
||||
fs_builder ubld16 = bld.group(16, 0).exec_all();
|
||||
|
||||
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
if (offset_in_dwords > 0) {
|
||||
fs_reg new_handle = ubld16.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld16.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_dwords * 4));
|
||||
urb_handle = new_handle;
|
||||
}
|
||||
|
||||
fs_reg data = ubld16.vgrf(BRW_REGISTER_TYPE_UD, comps);
|
||||
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / 16; q++) {
|
||||
fs_builder wbld = bld.group(16, q);
|
||||
|
||||
fs_reg addr = wbld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
wbld.SHL(addr, horiz_offset(offset_src, 16 * q), brw_imm_ud(2));
|
||||
wbld.ADD(addr, addr, urb_handle);
|
||||
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = addr;
|
||||
|
||||
fs_inst *inst = wbld.emit(SHADER_OPCODE_URB_READ_LOGICAL,
|
||||
data, srcs, ARRAY_SIZE(srcs));
|
||||
inst->size_written = 2 * comps * REG_SIZE;
|
||||
|
||||
for (unsigned c = 0; c < comps; c++) {
|
||||
fs_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q);
|
||||
fs_reg data_comp = offset(data, wbld, c);
|
||||
wbld.MOV(retype(dest_comp, BRW_REGISTER_TYPE_UD), data_comp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
const fs_reg &urb_handle)
|
||||
|
|
@ -1944,8 +2144,15 @@ fs_visitor::emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *ins
|
|||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
|
||||
if (nir_src_is_const(*offset_nir_src)) {
|
||||
emit_urb_direct_writes(bld, instr, src, urb_handle);
|
||||
if (bld.shader->devinfo->ver >= 20)
|
||||
emit_urb_direct_writes_xe2(bld, instr, src, urb_handle);
|
||||
else
|
||||
emit_urb_direct_writes(bld, instr, src, urb_handle);
|
||||
} else {
|
||||
if (bld.shader->devinfo->ver >= 20) {
|
||||
emit_urb_indirect_writes_xe2(bld, instr, src, get_nir_src(*offset_nir_src), urb_handle);
|
||||
return;
|
||||
}
|
||||
bool use_mod = false;
|
||||
unsigned mod;
|
||||
|
||||
|
|
@ -1978,10 +2185,17 @@ fs_visitor::emit_task_mesh_load(const fs_builder &bld, nir_intrinsic_instr *inst
|
|||
* a single large aligned read instead one per component.
|
||||
*/
|
||||
|
||||
if (nir_src_is_const(*offset_nir_src))
|
||||
emit_urb_direct_reads(bld, instr, dest, urb_handle);
|
||||
else
|
||||
emit_urb_indirect_reads(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
|
||||
if (nir_src_is_const(*offset_nir_src)) {
|
||||
if (bld.shader->devinfo->ver >= 20)
|
||||
emit_urb_direct_reads_xe2(bld, instr, dest, urb_handle);
|
||||
else
|
||||
emit_urb_direct_reads(bld, instr, dest, urb_handle);
|
||||
} else {
|
||||
if (bld.shader->devinfo->ver >= 20)
|
||||
emit_urb_indirect_reads_xe2(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
|
||||
else
|
||||
emit_urb_indirect_reads(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue