mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 22:10:10 +01:00
Either replace the BRW prefix with ELK or add an extra ELK prefix. Used
the following sed script to perform the renames in this patch:
```
# Simple prefix changes.
s/\<BRW_/ELK_/g
s/\<brw_/elk_/g
s/nir_to_brw/nir_to_elk/g
s/\<as_brw_reg\>/as_elk_reg/g
s/\<_brw_/_elk_/g
# Add prefix to various symbols.
#
# Initially I've considered using C++ namespaces here, but in various
# cases the structs or functions had to be also visible from C code.
# So added explicit prefix instead.
s/\<backend_instruction/elk_\0/g
s/\<backend_reg/elk_\0/g
s/\<backend_shader/elk_\0/g
s/\<bblock_t\>/elk_\0/g
s/\<bblock_link\>/elk_\0/g
s/\<cfg_t\>/elk_\0/g
s/\<fs_visitor\>/elk_\0/g
s/\<fs_reg\>/elk_\0/g
s/\<fs_instruction_scheduler\>/elk_\0/g
s/\<vec4_instruction_scheduler\>/elk_\0/g
s/\<instruction_scheduler\>/elk_\0/g
s/\<schedule_node\>/elk_\0/g
s/\<schedule_node_child\>/elk_\0/g
s/\<\([a-z]*_\)\?thread_payload\>/elk_\1thread_payload/g
s/\<fs_generator\>/elk_\0/g
s/\<fs_inst\>/elk_\0/g
s/\<fs_reg_alloc\>/elk_\0/g
s/\<disasm_info\>/elk_\0/g
s/\<gfx._math\>/elk_\0/g
s/\<gfx7_block_read_scratch\>/elk_\0/g
s/\<gfx6_IF\>/elk_\0/g
s/\<gfx9_fb_READ\>/elk_\0/g
s/\<gfx6_resolve_implied_move\>/elk_\0/g
# Opcodes.
s/\<opcode op\>/elk_\0/g
s/\<opcode mov_op\>/elk_\0/g
s/\<opcode opcode\>/elk_\0/g
s/enum opcode\>/enum elk_opcode/g
s/static opcode\>/static elk_opcode/g
s/\<opcode elk_op/elk_opcode elk_op/g
s/struct opcode_desc/struct elk_opcode_desc/g
s/NUM_BRW_OPCODES/NUM_ELK_OPCODES/g
s/\<.._OPCODE_/ELK_\0/g
s/\<T.._OPCODE_/ELK_\0/g
s/\<VEC4_OPCODE_/ELK_\0/g
s/\<VEC4_...\?_OPCODE_/ELK_\0/g
s/\<SHADER_OPCODE_/ELK_\0/g
# Remaining specific cases.
s/\<wm_prog_data_barycentric_modes\>/elk_\0/g
s/\<encode_slm_size\>/elk_\0/g
s/\<intel_calculate_slm_size\>/elk_\0/g
s/\<gfx6_gather_sampler_wa\>/elk_\0/g
s/\<is_3src\>/elk_\0/g
s/\<WA_/ELK_\0/g
s/\<conditional_modifier\>/elk_\0/g
s/\<pred_ctrl_align16\>/elk_\0/g
s/\<shuffle_from_32bit_read\>/elk_\0/g
s/\<shuffle_src_to_dst\>/elk_\0/g
s/\<setup_imm_..\?\>/elk_\0/g
s/\<opt_predicated_break\>/elk_\0/g
s/\<has_bank_conflict\>/elk_\0/g
s/\<dead_control_flow_eliminate\>/elk_\0/g
s/\<disasm_new_inst_group\>/elk_\0/g
s/\<disasm_initialize\>/elk_\0/g
s/\<dump_assembly\>/elk_\0/g
s/\<disasm_insert_error\>/elk_\0/g
s/\<disasm_annotate\>/elk_\0/g
s/\<enum lsc_opcode\>/enum elk_lsc_opcode/g
s/\<lsc_opcode_/elk_lsc_opcode_/g
s/\<lsc_aop_[a-z_]\+\>/elk_\0/g
s/\<type_size_vec4\>/elk_\0/g
s/\<type_size_dvec4\>/elk_\0/g
s/\<type_size_xvec4\>/elk_\0/g
s/\<type_size_[a-z4]\+_bytes\>/elk_\0/g
s/\<gfx12_systolic_depth\>/elk_\0/g
```
Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27563>
213 lines
8.2 KiB
C++
213 lines
8.2 KiB
C++
/*
|
|
* Copyright © 2013-2015 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "elk_vec4_surface_builder.h"
|
|
|
|
using namespace elk;
|
|
|
|
namespace {
|
|
namespace array_utils {
|
|
/**
|
|
* Copy one every \p src_stride logical components of the argument into
|
|
* one every \p dst_stride logical components of the result.
|
|
*/
|
|
static src_reg
|
|
emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
|
|
unsigned dst_stride, unsigned src_stride)
|
|
{
|
|
if (src_stride == 1 && dst_stride == 1) {
|
|
return src;
|
|
} else {
|
|
const dst_reg dst = bld.vgrf(src.type,
|
|
DIV_ROUND_UP(size * dst_stride, 4));
|
|
|
|
for (unsigned i = 0; i < size; ++i)
|
|
bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
|
|
1 << (i * dst_stride % 4)),
|
|
swizzle(offset(src, 8, i * src_stride / 4),
|
|
elk_swizzle_for_mask(1 << (i * src_stride % 4))));
|
|
|
|
return src_reg(dst);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert a VEC4 into an array of registers with the layout expected by
|
|
* the recipient shared unit. If \p has_simd4x2 is true the argument is
|
|
* left unmodified in SIMD4x2 form, otherwise it will be rearranged into
|
|
* a SIMD8 vector.
|
|
*/
|
|
static src_reg
|
|
emit_insert(const vec4_builder &bld, const src_reg &src,
|
|
unsigned n, bool has_simd4x2)
|
|
{
|
|
if (src.file == BAD_FILE || n == 0) {
|
|
return src_reg();
|
|
|
|
} else {
|
|
/* Pad unused components with zeroes. */
|
|
const unsigned mask = (1 << n) - 1;
|
|
const dst_reg tmp = bld.vgrf(src.type);
|
|
|
|
bld.MOV(writemask(tmp, mask), src);
|
|
if (n < 4)
|
|
bld.MOV(writemask(tmp, ~mask), elk_imm_d(0));
|
|
|
|
return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
namespace elk {
|
|
namespace surface_access {
|
|
namespace {
|
|
using namespace array_utils;
|
|
|
|
/**
|
|
* Generate a send opcode for a surface message and return the
|
|
* result.
|
|
*/
|
|
src_reg
|
|
emit_send(const vec4_builder &bld, enum elk_opcode op,
|
|
const src_reg &header,
|
|
const src_reg &addr, unsigned addr_sz,
|
|
const src_reg &src, unsigned src_sz,
|
|
const src_reg &surface,
|
|
unsigned arg, unsigned ret_sz,
|
|
elk_predicate pred = ELK_PREDICATE_NONE)
|
|
{
|
|
/* Calculate the total number of components of the payload. */
|
|
const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
|
|
const unsigned sz = header_sz + addr_sz + src_sz;
|
|
|
|
/* Construct the payload. */
|
|
const dst_reg payload = bld.vgrf(ELK_REGISTER_TYPE_UD, sz);
|
|
unsigned n = 0;
|
|
|
|
if (header_sz)
|
|
bld.exec_all().MOV(offset(payload, 8, n++),
|
|
retype(header, ELK_REGISTER_TYPE_UD));
|
|
|
|
for (unsigned i = 0; i < addr_sz; i++)
|
|
bld.MOV(offset(payload, 8, n++),
|
|
offset(retype(addr, ELK_REGISTER_TYPE_UD), 8, i));
|
|
|
|
for (unsigned i = 0; i < src_sz; i++)
|
|
bld.MOV(offset(payload, 8, n++),
|
|
offset(retype(src, ELK_REGISTER_TYPE_UD), 8, i));
|
|
|
|
/* Reduce the dynamically uniform surface index to a single
|
|
* scalar.
|
|
*/
|
|
const src_reg usurface = bld.emit_uniformize(surface);
|
|
|
|
/* Emit the message send instruction. */
|
|
const dst_reg dst = bld.vgrf(ELK_REGISTER_TYPE_UD, ret_sz);
|
|
vec4_instruction *inst =
|
|
bld.emit(op, dst, src_reg(payload), usurface, elk_imm_ud(arg));
|
|
inst->mlen = sz;
|
|
inst->size_written = ret_sz * REG_SIZE;
|
|
inst->header_size = header_sz;
|
|
inst->predicate = pred;
|
|
|
|
return src_reg(dst);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Emit an untyped surface read opcode. \p dims determines the number
|
|
* of components of the address and \p size the number of components of
|
|
* the returned value.
|
|
*/
|
|
src_reg
|
|
emit_untyped_read(const vec4_builder &bld,
|
|
const src_reg &surface, const src_reg &addr,
|
|
unsigned dims, unsigned size,
|
|
elk_predicate pred)
|
|
{
|
|
return emit_send(bld, ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
|
|
emit_insert(bld, addr, dims, true), 1,
|
|
src_reg(), 0,
|
|
surface, size, 1, pred);
|
|
}
|
|
|
|
/**
|
|
* Emit an untyped surface write opcode. \p dims determines the number
|
|
* of components of the address and \p size the number of components of
|
|
* the argument.
|
|
*/
|
|
void
|
|
emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
|
|
const src_reg &addr, const src_reg &src,
|
|
unsigned dims, unsigned size,
|
|
elk_predicate pred)
|
|
{
|
|
const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
|
|
emit_send(bld, ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
|
|
emit_insert(bld, addr, dims, has_simd4x2),
|
|
has_simd4x2 ? 1 : dims,
|
|
emit_insert(bld, src, size, has_simd4x2),
|
|
has_simd4x2 ? 1 : size,
|
|
surface, size, 0, pred);
|
|
}
|
|
|
|
/**
|
|
* Emit an untyped surface atomic opcode. \p dims determines the number
|
|
* of components of the address and \p rsize the number of components of
|
|
* the returned value (either zero or one).
|
|
*/
|
|
src_reg
|
|
emit_untyped_atomic(const vec4_builder &bld,
|
|
const src_reg &surface, const src_reg &addr,
|
|
const src_reg &src0, const src_reg &src1,
|
|
unsigned dims, unsigned rsize, unsigned op,
|
|
elk_predicate pred)
|
|
{
|
|
const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
|
|
|
|
/* Zip the components of both sources, they are represented as the X
|
|
* and Y components of the same vector.
|
|
*/
|
|
const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
|
|
const dst_reg srcs = bld.vgrf(ELK_REGISTER_TYPE_UD);
|
|
|
|
if (size >= 1) {
|
|
bld.MOV(writemask(srcs, WRITEMASK_X),
|
|
swizzle(src0, ELK_SWIZZLE_XXXX));
|
|
}
|
|
|
|
if (size >= 2) {
|
|
bld.MOV(writemask(srcs, WRITEMASK_Y),
|
|
swizzle(src1, ELK_SWIZZLE_XXXX));
|
|
}
|
|
|
|
return emit_send(bld, ELK_VEC4_OPCODE_UNTYPED_ATOMIC, src_reg(),
|
|
emit_insert(bld, addr, dims, has_simd4x2),
|
|
has_simd4x2 ? 1 : dims,
|
|
emit_insert(bld, src_reg(srcs), size, has_simd4x2),
|
|
has_simd4x2 && size ? 1 : size,
|
|
surface, op, rsize, pred);
|
|
}
|
|
}
|
|
}
|