mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 15:40:11 +01:00
Either replace the BRW prefix with ELK or add an extra ELK prefix. Used
the following sed script to perform the renames in this patch:
```
# Simple prefix changes.
s/\<BRW_/ELK_/g
s/\<brw_/elk_/g
s/nir_to_brw/nir_to_elk/g
s/\<as_brw_reg\>/as_elk_reg/g
s/\<_brw_/_elk_/g
# Add prefix to various symbols.
#
# Initially I've considered using C++ namespaces here, but in various
# cases the structs or functions had to be also visible from C code.
# So added explicit prefix instead.
s/\<backend_instruction/elk_\0/g
s/\<backend_reg/elk_\0/g
s/\<backend_shader/elk_\0/g
s/\<bblock_t\>/elk_\0/g
s/\<bblock_link\>/elk_\0/g
s/\<cfg_t\>/elk_\0/g
s/\<fs_visitor\>/elk_\0/g
s/\<fs_reg\>/elk_\0/g
s/\<fs_instruction_scheduler\>/elk_\0/g
s/\<vec4_instruction_scheduler\>/elk_\0/g
s/\<instruction_scheduler\>/elk_\0/g
s/\<schedule_node\>/elk_\0/g
s/\<schedule_node_child\>/elk_\0/g
s/\<\([a-z]*_\)\?thread_payload\>/elk_\1thread_payload/g
s/\<fs_generator\>/elk_\0/g
s/\<fs_inst\>/elk_\0/g
s/\<fs_reg_alloc\>/elk_\0/g
s/\<disasm_info\>/elk_\0/g
s/\<gfx._math\>/elk_\0/g
s/\<gfx7_block_read_scratch\>/elk_\0/g
s/\<gfx6_IF\>/elk_\0/g
s/\<gfx9_fb_READ\>/elk_\0/g
s/\<gfx6_resolve_implied_move\>/elk_\0/g
# Opcodes.
s/\<opcode op\>/elk_\0/g
s/\<opcode mov_op\>/elk_\0/g
s/\<opcode opcode\>/elk_\0/g
s/enum opcode\>/enum elk_opcode/g
s/static opcode\>/static elk_opcode/g
s/\<opcode elk_op/elk_opcode elk_op/g
s/struct opcode_desc/struct elk_opcode_desc/g
s/NUM_BRW_OPCODES/NUM_ELK_OPCODES/g
s/\<.._OPCODE_/ELK_\0/g
s/\<T.._OPCODE_/ELK_\0/g
s/\<VEC4_OPCODE_/ELK_\0/g
s/\<VEC4_...\?_OPCODE_/ELK_\0/g
s/\<SHADER_OPCODE_/ELK_\0/g
# Remaining specific cases.
s/\<wm_prog_data_barycentric_modes\>/elk_\0/g
s/\<encode_slm_size\>/elk_\0/g
s/\<intel_calculate_slm_size\>/elk_\0/g
s/\<gfx6_gather_sampler_wa\>/elk_\0/g
s/\<is_3src\>/elk_\0/g
s/\<WA_/ELK_\0/g
s/\<conditional_modifier\>/elk_\0/g
s/\<pred_ctrl_align16\>/elk_\0/g
s/\<shuffle_from_32bit_read\>/elk_\0/g
s/\<shuffle_src_to_dst\>/elk_\0/g
s/\<setup_imm_..\?\>/elk_\0/g
s/\<opt_predicated_break\>/elk_\0/g
s/\<has_bank_conflict\>/elk_\0/g
s/\<dead_control_flow_eliminate\>/elk_\0/g
s/\<disasm_new_inst_group\>/elk_\0/g
s/\<disasm_initialize\>/elk_\0/g
s/\<dump_assembly\>/elk_\0/g
s/\<disasm_insert_error\>/elk_\0/g
s/\<disasm_annotate\>/elk_\0/g
s/\<enum lsc_opcode\>/enum elk_lsc_opcode/g
s/\<lsc_opcode_/elk_lsc_opcode_/g
s/\<lsc_aop_[a-z_]\+\>/elk_\0/g
s/\<type_size_vec4\>/elk_\0/g
s/\<type_size_dvec4\>/elk_\0/g
s/\<type_size_xvec4\>/elk_\0/g
s/\<type_size_[a-z4]\+_bytes\>/elk_\0/g
s/\<gfx12_systolic_depth\>/elk_\0/g
```
Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27563>
223 lines
7.5 KiB
C++
223 lines
7.5 KiB
C++
/*
|
|
* Copyright © 2013 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
* \file elk_vec4_tes.cpp
|
|
*
|
|
* Tessellaton evaluation shader specific code derived from the vec4_visitor class.
|
|
*/
|
|
|
|
#include "elk_vec4_tes.h"
|
|
#include "elk_cfg.h"
|
|
#include "dev/intel_debug.h"
|
|
|
|
namespace elk {
|
|
|
|
vec4_tes_visitor::vec4_tes_visitor(const struct elk_compiler *compiler,
|
|
const struct elk_compile_params *params,
|
|
const struct elk_tes_prog_key *key,
|
|
struct elk_tes_prog_data *prog_data,
|
|
const nir_shader *shader,
|
|
bool debug_enabled)
|
|
: vec4_visitor(compiler, params, &key->base.tex, &prog_data->base,
|
|
shader, false, debug_enabled)
|
|
{
|
|
}
|
|
|
|
void
|
|
vec4_tes_visitor::setup_payload()
|
|
{
|
|
int reg = 0;
|
|
|
|
/* The payload always contains important data in r0 and r1, which contains
|
|
* the URB handles that are passed on to the URB write at the end
|
|
* of the thread.
|
|
*/
|
|
reg += 2;
|
|
|
|
reg = setup_uniforms(reg);
|
|
|
|
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
|
for (int i = 0; i < 3; i++) {
|
|
if (inst->src[i].file != ATTR)
|
|
continue;
|
|
|
|
unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
|
|
struct elk_reg grf = elk_vec4_grf(reg + slot / 2, 4 * (slot % 2));
|
|
grf = stride(grf, 0, 4, 1);
|
|
grf.swizzle = inst->src[i].swizzle;
|
|
grf.type = inst->src[i].type;
|
|
grf.abs = inst->src[i].abs;
|
|
grf.negate = inst->src[i].negate;
|
|
inst->src[i] = grf;
|
|
}
|
|
}
|
|
|
|
reg += 8 * prog_data->urb_read_length;
|
|
|
|
this->first_non_payload_grf = reg;
|
|
}
|
|
|
|
|
|
void
|
|
vec4_tes_visitor::emit_prolog()
|
|
{
|
|
input_read_header = src_reg(this, glsl_uvec4_type());
|
|
emit(ELK_TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
|
|
|
|
this->current_annotation = NULL;
|
|
}
|
|
|
|
|
|
void
|
|
vec4_tes_visitor::emit_urb_write_header(int mrf)
|
|
{
|
|
/* No need to do anything for DS; an implied write to this MRF will be
|
|
* performed by ELK_VEC4_VS_OPCODE_URB_WRITE.
|
|
*/
|
|
(void) mrf;
|
|
}
|
|
|
|
|
|
vec4_instruction *
|
|
vec4_tes_visitor::emit_urb_write_opcode(bool complete)
|
|
{
|
|
vec4_instruction *inst = emit(ELK_VEC4_VS_OPCODE_URB_WRITE);
|
|
inst->urb_write_flags = complete ?
|
|
ELK_URB_WRITE_EOT_COMPLETE : ELK_URB_WRITE_NO_FLAGS;
|
|
|
|
return inst;
|
|
}
|
|
|
|
void
|
|
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|
{
|
|
const struct elk_tes_prog_data *tes_prog_data =
|
|
(const struct elk_tes_prog_data *) prog_data;
|
|
|
|
switch (instr->intrinsic) {
|
|
case nir_intrinsic_load_tess_coord:
|
|
/* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
|
|
emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
|
|
src_reg(elk_vec8_grf(1, 0))));
|
|
break;
|
|
case nir_intrinsic_load_tess_level_outer:
|
|
if (tes_prog_data->domain == INTEL_TESS_DOMAIN_ISOLINE) {
|
|
emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
|
|
swizzle(src_reg(ATTR, 1, glsl_vec4_type()),
|
|
ELK_SWIZZLE_ZWZW)));
|
|
} else {
|
|
emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
|
|
swizzle(src_reg(ATTR, 1, glsl_vec4_type()),
|
|
ELK_SWIZZLE_WZYX)));
|
|
}
|
|
break;
|
|
case nir_intrinsic_load_tess_level_inner:
|
|
if (tes_prog_data->domain == INTEL_TESS_DOMAIN_QUAD) {
|
|
emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
|
|
swizzle(src_reg(ATTR, 0, glsl_vec4_type()),
|
|
ELK_SWIZZLE_WZYX)));
|
|
} else {
|
|
emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
|
|
src_reg(ATTR, 1, glsl_float_type())));
|
|
}
|
|
break;
|
|
case nir_intrinsic_load_primitive_id:
|
|
emit(ELK_TES_OPCODE_GET_PRIMITIVE_ID,
|
|
get_nir_def(instr->def, ELK_REGISTER_TYPE_UD));
|
|
break;
|
|
|
|
case nir_intrinsic_load_input:
|
|
case nir_intrinsic_load_per_vertex_input: {
|
|
assert(instr->def.bit_size == 32);
|
|
src_reg indirect_offset = get_indirect_offset(instr);
|
|
unsigned imm_offset = instr->const_index[0];
|
|
src_reg header = input_read_header;
|
|
unsigned first_component = nir_intrinsic_component(instr);
|
|
|
|
if (indirect_offset.file != BAD_FILE) {
|
|
src_reg clamped_indirect_offset = src_reg(this, glsl_uvec4_type());
|
|
|
|
/* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
|
|
* valid range of the offset is [0, 0FFFFFFFh].
|
|
*/
|
|
emit_minmax(ELK_CONDITIONAL_L,
|
|
dst_reg(clamped_indirect_offset),
|
|
retype(indirect_offset, ELK_REGISTER_TYPE_UD),
|
|
elk_imm_ud(0x0fffffffu));
|
|
|
|
header = src_reg(this, glsl_uvec4_type());
|
|
emit(ELK_TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
|
|
input_read_header, clamped_indirect_offset);
|
|
} else {
|
|
/* Arbitrarily only push up to 24 vec4 slots worth of data,
|
|
* which is 12 registers (since each holds 2 vec4 slots).
|
|
*/
|
|
const unsigned max_push_slots = 24;
|
|
if (imm_offset < max_push_slots) {
|
|
src_reg src = src_reg(ATTR, imm_offset, glsl_ivec4_type());
|
|
src.swizzle = ELK_SWZ_COMP_INPUT(first_component);
|
|
|
|
emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_D), src));
|
|
|
|
prog_data->urb_read_length =
|
|
MAX2(prog_data->urb_read_length,
|
|
DIV_ROUND_UP(imm_offset + 1, 2));
|
|
break;
|
|
}
|
|
}
|
|
|
|
dst_reg temp(this, glsl_ivec4_type());
|
|
vec4_instruction *read =
|
|
emit(ELK_VEC4_OPCODE_URB_READ, temp, src_reg(header));
|
|
read->offset = imm_offset;
|
|
read->urb_write_flags = ELK_URB_WRITE_PER_SLOT_OFFSET;
|
|
|
|
src_reg src = src_reg(temp);
|
|
src.swizzle = ELK_SWZ_COMP_INPUT(first_component);
|
|
|
|
/* Copy to target. We might end up with some funky writemasks landing
|
|
* in here, but we really don't want them in the above pseudo-ops.
|
|
*/
|
|
dst_reg dst = get_nir_def(instr->def, ELK_REGISTER_TYPE_D);
|
|
dst.writemask = elk_writemask_for_size(instr->num_components);
|
|
emit(MOV(dst, src));
|
|
break;
|
|
}
|
|
default:
|
|
vec4_visitor::nir_emit_intrinsic(instr);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
vec4_tes_visitor::emit_thread_end()
|
|
{
|
|
/* For DS, we always end the thread by emitting a single vertex.
|
|
* emit_urb_write_opcode() will take care of setting the eot flag on the
|
|
* SEND instruction.
|
|
*/
|
|
emit_vertex();
|
|
}
|
|
|
|
} /* namespace elk */
|