mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 04:30:10 +01:00
intel/fs: Add and implement a load_global_const_block intrinsic
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
This commit is contained in:
parent
6d5b57aeb7
commit
1f6e70c85a
2 changed files with 32 additions and 0 deletions
|
|
@ -1051,6 +1051,12 @@ image("load_raw_intel", src_comp=[1], dest_comp=0,
|
|||
flags=[CAN_ELIMINATE])
|
||||
image("store_raw_intel", src_comp=[1, 0])
|
||||
|
||||
# Intrinsic to load a block of at least 32B of constant data from a 64-bit
|
||||
# global memory address. The memory address must be uniform and 32B-aligned.
|
||||
# src[] = { address }.
|
||||
intrinsic("load_global_const_block_intel", src_comp=[1], dest_comp=0,
|
||||
indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Number of data items being operated on for a SIMD program.
|
||||
system_value("simd_width_intel", 1)
|
||||
|
||||
|
|
|
|||
|
|
@ -4695,6 +4695,32 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
nir_emit_global_atomic_float(bld, brw_aop_for_nir_intrinsic(instr), instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_global_const_block_intel: {
|
||||
assert(nir_dest_bit_size(instr->dest) == 32);
|
||||
assert(instr->num_components == 8 || instr->num_components == 16);
|
||||
|
||||
const fs_builder ubld = bld.exec_all().group(instr->num_components, 0);
|
||||
fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL,
|
||||
tmp,
|
||||
bld.emit_uniformize(get_nir_src(instr->src[0])), /* Address */
|
||||
fs_reg(), /* No source data */
|
||||
brw_imm_ud(instr->num_components));
|
||||
|
||||
/* From the HW perspective, we just did a single SIMD16 instruction
|
||||
* which loaded a dword in each SIMD channel. From NIR's perspective,
|
||||
* this instruction returns a vec16. Any users of this data in the
|
||||
* back-end will expect a vec16 per SIMD channel so we have to emit a
|
||||
* pile of MOVs to resolve this discrepancy. Fortunately, copy-prop
|
||||
* will generally clean them up for us.
|
||||
*/
|
||||
for (unsigned i = 0; i < instr->num_components; i++) {
|
||||
bld.MOV(retype(offset(dest, bld, i), BRW_REGISTER_TYPE_UD),
|
||||
component(tmp, i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ssbo: {
|
||||
assert(devinfo->gen >= 7);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue