mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
Folks, there's more than one accumulator. In general, when the register file is ARF, the upper 4 bits of the register number specify which ARF, and the lower 4 bits specify which one of that ARF. This can be further partitioned by the subregister number. This is already mostly handled correctly for flags register, but lots of places wanted to check the register number for equality with BRW_ARF_ACCUMULATOR. If acc1 is ever specified, that won't work. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28281>
727 lines
19 KiB
C++
727 lines
19 KiB
C++
/*
|
|
* Copyright © 2010 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "brw_cfg.h"
|
|
#include "brw_eu.h"
|
|
#include "brw_fs.h"
|
|
#include "brw_nir.h"
|
|
#include "brw_private.h"
|
|
#include "dev/intel_debug.h"
|
|
#include "util/macros.h"
|
|
|
|
bool
|
|
fs_reg_saturate_immediate(fs_reg *reg)
|
|
{
|
|
union {
|
|
unsigned ud;
|
|
int d;
|
|
float f;
|
|
double df;
|
|
} imm, sat_imm = { 0 };
|
|
|
|
const unsigned size = type_sz(reg->type);
|
|
|
|
/* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
|
|
* irrelevant, so just check the size of the type and copy from/to an
|
|
* appropriately sized field.
|
|
*/
|
|
if (size < 8)
|
|
imm.ud = reg->ud;
|
|
else
|
|
imm.df = reg->df;
|
|
|
|
switch (reg->type) {
|
|
case BRW_REGISTER_TYPE_UD:
|
|
case BRW_REGISTER_TYPE_D:
|
|
case BRW_REGISTER_TYPE_UW:
|
|
case BRW_REGISTER_TYPE_W:
|
|
case BRW_REGISTER_TYPE_UQ:
|
|
case BRW_REGISTER_TYPE_Q:
|
|
/* Nothing to do. */
|
|
return false;
|
|
case BRW_REGISTER_TYPE_F:
|
|
sat_imm.f = SATURATE(imm.f);
|
|
break;
|
|
case BRW_REGISTER_TYPE_DF:
|
|
sat_imm.df = SATURATE(imm.df);
|
|
break;
|
|
case BRW_REGISTER_TYPE_UB:
|
|
case BRW_REGISTER_TYPE_B:
|
|
unreachable("no UB/B immediates");
|
|
case BRW_REGISTER_TYPE_V:
|
|
case BRW_REGISTER_TYPE_UV:
|
|
case BRW_REGISTER_TYPE_VF:
|
|
unreachable("unimplemented: saturate vector immediate");
|
|
case BRW_REGISTER_TYPE_HF:
|
|
unreachable("unimplemented: saturate HF immediate");
|
|
case BRW_REGISTER_TYPE_NF:
|
|
unreachable("no NF immediates");
|
|
}
|
|
|
|
if (size < 8) {
|
|
if (imm.ud != sat_imm.ud) {
|
|
reg->ud = sat_imm.ud;
|
|
return true;
|
|
}
|
|
} else {
|
|
if (imm.df != sat_imm.df) {
|
|
reg->df = sat_imm.df;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
fs_reg_negate_immediate(fs_reg *reg)
|
|
{
|
|
switch (reg->type) {
|
|
case BRW_REGISTER_TYPE_D:
|
|
case BRW_REGISTER_TYPE_UD:
|
|
reg->d = -reg->d;
|
|
return true;
|
|
case BRW_REGISTER_TYPE_W:
|
|
case BRW_REGISTER_TYPE_UW: {
|
|
uint16_t value = -(int16_t)reg->ud;
|
|
reg->ud = value | (uint32_t)value << 16;
|
|
return true;
|
|
}
|
|
case BRW_REGISTER_TYPE_F:
|
|
reg->f = -reg->f;
|
|
return true;
|
|
case BRW_REGISTER_TYPE_VF:
|
|
reg->ud ^= 0x80808080;
|
|
return true;
|
|
case BRW_REGISTER_TYPE_DF:
|
|
reg->df = -reg->df;
|
|
return true;
|
|
case BRW_REGISTER_TYPE_UQ:
|
|
case BRW_REGISTER_TYPE_Q:
|
|
reg->d64 = -reg->d64;
|
|
return true;
|
|
case BRW_REGISTER_TYPE_UB:
|
|
case BRW_REGISTER_TYPE_B:
|
|
unreachable("no UB/B immediates");
|
|
case BRW_REGISTER_TYPE_UV:
|
|
case BRW_REGISTER_TYPE_V:
|
|
assert(!"unimplemented: negate UV/V immediate");
|
|
case BRW_REGISTER_TYPE_HF:
|
|
reg->ud ^= 0x80008000;
|
|
return true;
|
|
case BRW_REGISTER_TYPE_NF:
|
|
unreachable("no NF immediates");
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
fs_reg_abs_immediate(fs_reg *reg)
|
|
{
|
|
switch (reg->type) {
|
|
case BRW_REGISTER_TYPE_D:
|
|
reg->d = abs(reg->d);
|
|
return true;
|
|
case BRW_REGISTER_TYPE_W: {
|
|
uint16_t value = abs((int16_t)reg->ud);
|
|
reg->ud = value | (uint32_t)value << 16;
|
|
return true;
|
|
}
|
|
case BRW_REGISTER_TYPE_F:
|
|
reg->f = fabsf(reg->f);
|
|
return true;
|
|
case BRW_REGISTER_TYPE_DF:
|
|
reg->df = fabs(reg->df);
|
|
return true;
|
|
case BRW_REGISTER_TYPE_VF:
|
|
reg->ud &= ~0x80808080;
|
|
return true;
|
|
case BRW_REGISTER_TYPE_Q:
|
|
reg->d64 = imaxabs(reg->d64);
|
|
return true;
|
|
case BRW_REGISTER_TYPE_UB:
|
|
case BRW_REGISTER_TYPE_B:
|
|
unreachable("no UB/B immediates");
|
|
case BRW_REGISTER_TYPE_UQ:
|
|
case BRW_REGISTER_TYPE_UD:
|
|
case BRW_REGISTER_TYPE_UW:
|
|
case BRW_REGISTER_TYPE_UV:
|
|
/* Presumably the absolute value modifier on an unsigned source is a
|
|
* nop, but it would be nice to confirm.
|
|
*/
|
|
assert(!"unimplemented: abs unsigned immediate");
|
|
case BRW_REGISTER_TYPE_V:
|
|
assert(!"unimplemented: abs V immediate");
|
|
case BRW_REGISTER_TYPE_HF:
|
|
reg->ud &= ~0x80008000;
|
|
return true;
|
|
case BRW_REGISTER_TYPE_NF:
|
|
unreachable("no NF immediates");
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
fs_reg::is_zero() const
|
|
{
|
|
if (file != IMM)
|
|
return false;
|
|
|
|
assert(type_sz(type) > 1);
|
|
|
|
switch (type) {
|
|
case BRW_REGISTER_TYPE_HF:
|
|
assert((d & 0xffff) == ((d >> 16) & 0xffff));
|
|
return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
|
|
case BRW_REGISTER_TYPE_F:
|
|
return f == 0;
|
|
case BRW_REGISTER_TYPE_DF:
|
|
return df == 0;
|
|
case BRW_REGISTER_TYPE_W:
|
|
case BRW_REGISTER_TYPE_UW:
|
|
assert((d & 0xffff) == ((d >> 16) & 0xffff));
|
|
return (d & 0xffff) == 0;
|
|
case BRW_REGISTER_TYPE_D:
|
|
case BRW_REGISTER_TYPE_UD:
|
|
return d == 0;
|
|
case BRW_REGISTER_TYPE_UQ:
|
|
case BRW_REGISTER_TYPE_Q:
|
|
return u64 == 0;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_reg::is_one() const
|
|
{
|
|
if (file != IMM)
|
|
return false;
|
|
|
|
assert(type_sz(type) > 1);
|
|
|
|
switch (type) {
|
|
case BRW_REGISTER_TYPE_HF:
|
|
assert((d & 0xffff) == ((d >> 16) & 0xffff));
|
|
return (d & 0xffff) == 0x3c00;
|
|
case BRW_REGISTER_TYPE_F:
|
|
return f == 1.0f;
|
|
case BRW_REGISTER_TYPE_DF:
|
|
return df == 1.0;
|
|
case BRW_REGISTER_TYPE_W:
|
|
case BRW_REGISTER_TYPE_UW:
|
|
assert((d & 0xffff) == ((d >> 16) & 0xffff));
|
|
return (d & 0xffff) == 1;
|
|
case BRW_REGISTER_TYPE_D:
|
|
case BRW_REGISTER_TYPE_UD:
|
|
return d == 1;
|
|
case BRW_REGISTER_TYPE_UQ:
|
|
case BRW_REGISTER_TYPE_Q:
|
|
return u64 == 1;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_reg::is_negative_one() const
|
|
{
|
|
if (file != IMM)
|
|
return false;
|
|
|
|
assert(type_sz(type) > 1);
|
|
|
|
switch (type) {
|
|
case BRW_REGISTER_TYPE_HF:
|
|
assert((d & 0xffff) == ((d >> 16) & 0xffff));
|
|
return (d & 0xffff) == 0xbc00;
|
|
case BRW_REGISTER_TYPE_F:
|
|
return f == -1.0;
|
|
case BRW_REGISTER_TYPE_DF:
|
|
return df == -1.0;
|
|
case BRW_REGISTER_TYPE_W:
|
|
assert((d & 0xffff) == ((d >> 16) & 0xffff));
|
|
return (d & 0xffff) == 0xffff;
|
|
case BRW_REGISTER_TYPE_D:
|
|
return d == -1;
|
|
case BRW_REGISTER_TYPE_Q:
|
|
return d64 == -1;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_reg::is_null() const
|
|
{
|
|
return file == ARF && nr == BRW_ARF_NULL;
|
|
}
|
|
|
|
|
|
bool
|
|
fs_reg::is_accumulator() const
|
|
{
|
|
return file == ARF && (nr & 0xF0) == BRW_ARF_ACCUMULATOR;
|
|
}
|
|
|
|
bool
|
|
fs_inst::is_commutative() const
|
|
{
|
|
switch (opcode) {
|
|
case BRW_OPCODE_AND:
|
|
case BRW_OPCODE_OR:
|
|
case BRW_OPCODE_XOR:
|
|
case BRW_OPCODE_ADD:
|
|
case BRW_OPCODE_ADD3:
|
|
case SHADER_OPCODE_MULH:
|
|
return true;
|
|
|
|
case BRW_OPCODE_MUL:
|
|
/* Integer multiplication of dword and word sources is not actually
|
|
* commutative. The DW source must be first.
|
|
*/
|
|
return !brw_reg_type_is_integer(src[0].type) ||
|
|
type_sz(src[0].type) == type_sz(src[1].type);
|
|
|
|
case BRW_OPCODE_SEL:
|
|
/* MIN and MAX are commutative. */
|
|
if (conditional_mod == BRW_CONDITIONAL_GE ||
|
|
conditional_mod == BRW_CONDITIONAL_L) {
|
|
return true;
|
|
}
|
|
FALLTHROUGH;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_inst::is_3src(const struct brw_compiler *compiler) const
|
|
{
|
|
return ::is_3src(&compiler->isa, opcode);
|
|
}
|
|
|
|
bool
|
|
fs_inst::is_math() const
|
|
{
|
|
return (opcode == SHADER_OPCODE_RCP ||
|
|
opcode == SHADER_OPCODE_RSQ ||
|
|
opcode == SHADER_OPCODE_SQRT ||
|
|
opcode == SHADER_OPCODE_EXP2 ||
|
|
opcode == SHADER_OPCODE_LOG2 ||
|
|
opcode == SHADER_OPCODE_SIN ||
|
|
opcode == SHADER_OPCODE_COS ||
|
|
opcode == SHADER_OPCODE_INT_QUOTIENT ||
|
|
opcode == SHADER_OPCODE_INT_REMAINDER ||
|
|
opcode == SHADER_OPCODE_POW);
|
|
}
|
|
|
|
bool
|
|
fs_inst::is_control_flow_begin() const
|
|
{
|
|
switch (opcode) {
|
|
case BRW_OPCODE_DO:
|
|
case BRW_OPCODE_IF:
|
|
case BRW_OPCODE_ELSE:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_inst::is_control_flow_end() const
|
|
{
|
|
switch (opcode) {
|
|
case BRW_OPCODE_ELSE:
|
|
case BRW_OPCODE_WHILE:
|
|
case BRW_OPCODE_ENDIF:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_inst::is_control_flow() const
|
|
{
|
|
switch (opcode) {
|
|
case BRW_OPCODE_DO:
|
|
case BRW_OPCODE_WHILE:
|
|
case BRW_OPCODE_IF:
|
|
case BRW_OPCODE_ELSE:
|
|
case BRW_OPCODE_ENDIF:
|
|
case BRW_OPCODE_BREAK:
|
|
case BRW_OPCODE_CONTINUE:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_inst::uses_indirect_addressing() const
|
|
{
|
|
switch (opcode) {
|
|
case SHADER_OPCODE_BROADCAST:
|
|
case SHADER_OPCODE_CLUSTER_BROADCAST:
|
|
case SHADER_OPCODE_MOV_INDIRECT:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_inst::can_do_saturate() const
|
|
{
|
|
switch (opcode) {
|
|
case BRW_OPCODE_ADD:
|
|
case BRW_OPCODE_ADD3:
|
|
case BRW_OPCODE_ASR:
|
|
case BRW_OPCODE_AVG:
|
|
case BRW_OPCODE_CSEL:
|
|
case BRW_OPCODE_DP2:
|
|
case BRW_OPCODE_DP3:
|
|
case BRW_OPCODE_DP4:
|
|
case BRW_OPCODE_DPH:
|
|
case BRW_OPCODE_DP4A:
|
|
case BRW_OPCODE_LINE:
|
|
case BRW_OPCODE_LRP:
|
|
case BRW_OPCODE_MAC:
|
|
case BRW_OPCODE_MAD:
|
|
case BRW_OPCODE_MATH:
|
|
case BRW_OPCODE_MOV:
|
|
case BRW_OPCODE_MUL:
|
|
case SHADER_OPCODE_MULH:
|
|
case BRW_OPCODE_PLN:
|
|
case BRW_OPCODE_RNDD:
|
|
case BRW_OPCODE_RNDE:
|
|
case BRW_OPCODE_RNDU:
|
|
case BRW_OPCODE_RNDZ:
|
|
case BRW_OPCODE_SEL:
|
|
case BRW_OPCODE_SHL:
|
|
case BRW_OPCODE_SHR:
|
|
case FS_OPCODE_LINTERP:
|
|
case SHADER_OPCODE_COS:
|
|
case SHADER_OPCODE_EXP2:
|
|
case SHADER_OPCODE_LOG2:
|
|
case SHADER_OPCODE_POW:
|
|
case SHADER_OPCODE_RCP:
|
|
case SHADER_OPCODE_RSQ:
|
|
case SHADER_OPCODE_SIN:
|
|
case SHADER_OPCODE_SQRT:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_inst::reads_accumulator_implicitly() const
|
|
{
|
|
switch (opcode) {
|
|
case BRW_OPCODE_MAC:
|
|
case BRW_OPCODE_MACH:
|
|
case BRW_OPCODE_SADA2:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_inst::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
|
|
{
|
|
return writes_accumulator ||
|
|
(opcode == FS_OPCODE_LINTERP && !devinfo->has_pln) ||
|
|
(eot && intel_needs_workaround(devinfo, 14010017096));
|
|
}
|
|
|
|
bool
|
|
fs_inst::has_side_effects() const
|
|
{
|
|
switch (opcode) {
|
|
case SHADER_OPCODE_SEND:
|
|
return send_has_side_effects;
|
|
|
|
case BRW_OPCODE_SYNC:
|
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
|
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_MEMORY_FENCE:
|
|
case SHADER_OPCODE_INTERLOCK:
|
|
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
|
case FS_OPCODE_FB_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_BARRIER:
|
|
case SHADER_OPCODE_RND_MODE:
|
|
case SHADER_OPCODE_FLOAT_CONTROL_MODE:
|
|
case FS_OPCODE_SCHEDULING_FENCE:
|
|
case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
|
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
|
|
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
|
|
case RT_OPCODE_TRACE_RAY_LOGICAL:
|
|
return true;
|
|
default:
|
|
return eot;
|
|
}
|
|
}
|
|
|
|
bool
|
|
fs_inst::is_volatile() const
|
|
{
|
|
switch (opcode) {
|
|
case SHADER_OPCODE_SEND:
|
|
return send_is_volatile;
|
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
|
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
|
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
|
|
case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
|
|
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
|
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
static bool
|
|
inst_is_in_block(const bblock_t *block, const fs_inst *inst)
|
|
{
|
|
const exec_node *n = inst;
|
|
|
|
/* Find the tail sentinel. If the tail sentinel is the sentinel from the
|
|
* list header in the bblock_t, then this instruction is in that basic
|
|
* block.
|
|
*/
|
|
while (!n->is_tail_sentinel())
|
|
n = n->get_next();
|
|
|
|
return n == &block->instructions.tail_sentinel;
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
|
|
{
|
|
for (bblock_t *block_iter = start_block->next();
|
|
block_iter;
|
|
block_iter = block_iter->next()) {
|
|
block_iter->start_ip += ip_adjustment;
|
|
block_iter->end_ip += ip_adjustment;
|
|
}
|
|
}
|
|
|
|
void
|
|
fs_inst::insert_after(bblock_t *block, fs_inst *inst)
|
|
{
|
|
assert(this != inst);
|
|
assert(block->end_ip_delta == 0);
|
|
|
|
if (!this->is_head_sentinel())
|
|
assert(inst_is_in_block(block, this) || !"Instruction not in block");
|
|
|
|
block->end_ip++;
|
|
|
|
adjust_later_block_ips(block, 1);
|
|
|
|
exec_node::insert_after(inst);
|
|
}
|
|
|
|
void
|
|
fs_inst::insert_before(bblock_t *block, fs_inst *inst)
|
|
{
|
|
assert(this != inst);
|
|
assert(block->end_ip_delta == 0);
|
|
|
|
if (!this->is_tail_sentinel())
|
|
assert(inst_is_in_block(block, this) || !"Instruction not in block");
|
|
|
|
block->end_ip++;
|
|
|
|
adjust_later_block_ips(block, 1);
|
|
|
|
exec_node::insert_before(inst);
|
|
}
|
|
|
|
void
|
|
fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates)
|
|
{
|
|
assert(inst_is_in_block(block, this) || !"Instruction not in block");
|
|
|
|
if (defer_later_block_ip_updates) {
|
|
block->end_ip_delta--;
|
|
} else {
|
|
assert(block->end_ip_delta == 0);
|
|
adjust_later_block_ips(block, -1);
|
|
}
|
|
|
|
if (block->start_ip == block->end_ip) {
|
|
if (block->end_ip_delta != 0) {
|
|
adjust_later_block_ips(block, block->end_ip_delta);
|
|
block->end_ip_delta = 0;
|
|
}
|
|
|
|
block->cfg->remove_block(block);
|
|
} else {
|
|
block->end_ip--;
|
|
}
|
|
|
|
exec_node::remove();
|
|
}
|
|
|
|
extern "C" const unsigned *
|
|
brw_compile_tes(const struct brw_compiler *compiler,
|
|
brw_compile_tes_params *params)
|
|
{
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
nir_shader *nir = params->base.nir;
|
|
const struct brw_tes_prog_key *key = params->key;
|
|
const struct intel_vue_map *input_vue_map = params->input_vue_map;
|
|
struct brw_tes_prog_data *prog_data = params->prog_data;
|
|
|
|
const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TES);
|
|
|
|
prog_data->base.base.stage = MESA_SHADER_TESS_EVAL;
|
|
prog_data->base.base.ray_queries = nir->info.ray_queries;
|
|
|
|
nir->info.inputs_read = key->inputs_read;
|
|
nir->info.patch_inputs_read = key->patch_inputs_read;
|
|
|
|
brw_nir_apply_key(nir, compiler, &key->base, 8);
|
|
brw_nir_lower_tes_inputs(nir, input_vue_map);
|
|
brw_nir_lower_vue_outputs(nir);
|
|
brw_postprocess_nir(nir, compiler, debug_enabled,
|
|
key->base.robust_flags);
|
|
|
|
brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
|
|
nir->info.outputs_written,
|
|
nir->info.separate_shader, 1);
|
|
|
|
unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
|
|
|
|
assert(output_size_bytes >= 1);
|
|
if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
|
|
params->base.error_str = ralloc_strdup(params->base.mem_ctx,
|
|
"DS outputs exceed maximum size");
|
|
return NULL;
|
|
}
|
|
|
|
prog_data->base.clip_distance_mask =
|
|
((1 << nir->info.clip_distance_array_size) - 1);
|
|
prog_data->base.cull_distance_mask =
|
|
((1 << nir->info.cull_distance_array_size) - 1) <<
|
|
nir->info.clip_distance_array_size;
|
|
|
|
prog_data->include_primitive_id =
|
|
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
|
|
|
|
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
|
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
|
|
|
prog_data->base.urb_read_length = 0;
|
|
|
|
STATIC_ASSERT(INTEL_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
|
|
STATIC_ASSERT(INTEL_TESS_PARTITIONING_ODD_FRACTIONAL ==
|
|
TESS_SPACING_FRACTIONAL_ODD - 1);
|
|
STATIC_ASSERT(INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL ==
|
|
TESS_SPACING_FRACTIONAL_EVEN - 1);
|
|
|
|
prog_data->partitioning =
|
|
(enum intel_tess_partitioning) (nir->info.tess.spacing - 1);
|
|
|
|
switch (nir->info.tess._primitive_mode) {
|
|
case TESS_PRIMITIVE_QUADS:
|
|
prog_data->domain = INTEL_TESS_DOMAIN_QUAD;
|
|
break;
|
|
case TESS_PRIMITIVE_TRIANGLES:
|
|
prog_data->domain = INTEL_TESS_DOMAIN_TRI;
|
|
break;
|
|
case TESS_PRIMITIVE_ISOLINES:
|
|
prog_data->domain = INTEL_TESS_DOMAIN_ISOLINE;
|
|
break;
|
|
default:
|
|
unreachable("invalid domain shader primitive mode");
|
|
}
|
|
|
|
if (nir->info.tess.point_mode) {
|
|
prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
|
|
} else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
|
|
prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_LINE;
|
|
} else {
|
|
/* Hardware winding order is backwards from OpenGL */
|
|
prog_data->output_topology =
|
|
nir->info.tess.ccw ? INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW
|
|
: INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
|
|
}
|
|
|
|
if (unlikely(debug_enabled)) {
|
|
fprintf(stderr, "TES Input ");
|
|
brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL);
|
|
fprintf(stderr, "TES Output ");
|
|
brw_print_vue_map(stderr, &prog_data->base.vue_map,
|
|
MESA_SHADER_TESS_EVAL);
|
|
}
|
|
|
|
const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8;
|
|
fs_visitor v(compiler, ¶ms->base, &key->base,
|
|
&prog_data->base.base, nir, dispatch_width,
|
|
params->base.stats != NULL, debug_enabled);
|
|
if (!v.run_tes()) {
|
|
params->base.error_str =
|
|
ralloc_strdup(params->base.mem_ctx, v.fail_msg);
|
|
return NULL;
|
|
}
|
|
|
|
assert(v.payload().num_regs % reg_unit(devinfo) == 0);
|
|
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
|
|
|
|
prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
|
|
|
|
fs_generator g(compiler, ¶ms->base,
|
|
&prog_data->base.base, MESA_SHADER_TESS_EVAL);
|
|
if (unlikely(debug_enabled)) {
|
|
g.enable_debug(ralloc_asprintf(params->base.mem_ctx,
|
|
"%s tessellation evaluation shader %s",
|
|
nir->info.label ? nir->info.label
|
|
: "unnamed",
|
|
nir->info.name));
|
|
}
|
|
|
|
g.generate_code(v.cfg, dispatch_width, v.shader_stats,
|
|
v.performance_analysis.require(), params->base.stats);
|
|
|
|
g.add_const_data(nir->constant_data, nir->constant_data_size);
|
|
|
|
return g.get_assembly();
|
|
}
|