ilo: add a toy shader compiler

This is a simple shader compiler that performs almost zero optimizations.  The
generated code is usually much larger comparing to that generated by i965.
The generated code also requires many more registers.

Function-wise, it lacks register spilling and does not support most TGSI
indirections.  Other than those, it works alright.
This commit is contained in:
Chia-I Wu 2012-12-13 05:48:28 +08:00 committed by Chia-I Wu
parent 0fa2d0e98a
commit 7118ff8bb0
14 changed files with 8669 additions and 1 deletions

View file

@ -16,4 +16,11 @@ C_SOURCES := \
ilo_screen.c \
ilo_shader.c \
ilo_state.c \
ilo_video.c
ilo_video.c \
shader/toy_compiler.c \
shader/toy_compiler_asm.c \
shader/toy_compiler_disasm.c \
shader/toy_legalize.c \
shader/toy_legalize_ra.c \
shader/toy_optimize.c \
shader/toy_tgsi.c

View file

@ -0,0 +1,556 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#include "toy_compiler.h"
/**
* Dump an operand.
*/
static void
tc_dump_operand(struct toy_compiler *tc,
enum toy_file file, enum toy_type type, enum toy_rect rect,
bool indirect, unsigned indirect_subreg, uint32_t val32,
bool is_dst)
{
static const char *toy_file_names[TOY_FILE_COUNT] = {
[TOY_FILE_VRF] = "v",
[TOY_FILE_ARF] = "NOT USED",
[TOY_FILE_GRF] = "r",
[TOY_FILE_MRF] = "m",
[TOY_FILE_IMM] = "NOT USED",
};
const char *name = toy_file_names[file];
int reg, subreg;
if (file != TOY_FILE_IMM) {
reg = val32 / TOY_REG_WIDTH;
subreg = (val32 % TOY_REG_WIDTH) / toy_type_size(type);
}
switch (file) {
case TOY_FILE_GRF:
if (indirect) {
const int addr_subreg = indirect_subreg / toy_type_size(TOY_TYPE_UW);
ilo_printf("%s[a0.%d", name, addr_subreg);
if (val32)
ilo_printf("%+d", (int) val32);
ilo_printf("]");
break;
}
/* fall through */
case TOY_FILE_VRF:
case TOY_FILE_MRF:
ilo_printf("%s%d", name, reg);
if (subreg)
ilo_printf(".%d", subreg);
break;
case TOY_FILE_ARF:
switch (reg) {
case BRW_ARF_NULL:
ilo_printf("null");
break;
case BRW_ARF_ADDRESS:
ilo_printf("a0.%d", subreg);
break;
case BRW_ARF_ACCUMULATOR:
case BRW_ARF_ACCUMULATOR + 1:
ilo_printf("acc%d.%d", (reg & 1), subreg);
break;
case BRW_ARF_FLAG:
ilo_printf("f0.%d", subreg);
break;
case BRW_ARF_STATE:
ilo_printf("sr0.%d", subreg);
break;
case BRW_ARF_CONTROL:
ilo_printf("cr0.%d", subreg);
break;
case BRW_ARF_NOTIFICATION_COUNT:
case BRW_ARF_NOTIFICATION_COUNT + 1:
ilo_printf("n%d.%d", (reg & 1), subreg);
break;
case BRW_ARF_IP:
ilo_printf("ip");
break;
}
break;
case TOY_FILE_IMM:
switch (type) {
case TOY_TYPE_F:
{
union fi fi = { .ui = val32 };
ilo_printf("%f", fi.f);
}
break;
case TOY_TYPE_D:
ilo_printf("%d", (int32_t) val32);
break;
case TOY_TYPE_UD:
ilo_printf("%u", val32);
break;
case TOY_TYPE_W:
ilo_printf("%d", (int16_t) (val32 & 0xffff));
break;
case TOY_TYPE_UW:
ilo_printf("%u", val32 & 0xffff);
break;
case TOY_TYPE_V:
ilo_printf("0x%08x", val32);
break;
default:
assert(!"unknown imm type");
break;
}
break;
default:
assert(!"unexpected file");
break;
}
/* dump the region parameter */
if (file != TOY_FILE_IMM) {
int vert_stride, width, horz_stride;
switch (rect) {
case TOY_RECT_LINEAR:
vert_stride = tc->rect_linear_width;
width = tc->rect_linear_width;
horz_stride = 1;
break;
case TOY_RECT_041:
vert_stride = 0;
width = 4;
horz_stride = 1;
break;
case TOY_RECT_010:
vert_stride = 0;
width = 1;
horz_stride = 0;
break;
case TOY_RECT_220:
vert_stride = 2;
width = 2;
horz_stride = 0;
break;
case TOY_RECT_440:
vert_stride = 4;
width = 4;
horz_stride = 0;
break;
case TOY_RECT_240:
vert_stride = 2;
width = 4;
horz_stride = 0;
break;
default:
assert(!"unknown rect parameter");
vert_stride = 0;
width = 0;
horz_stride = 0;
break;
}
if (is_dst)
ilo_printf("<%d>", horz_stride);
else
ilo_printf("<%d;%d,%d>", vert_stride, width, horz_stride);
}
switch (type) {
case TOY_TYPE_F:
ilo_printf(":f");
break;
case TOY_TYPE_D:
ilo_printf(":d");
break;
case TOY_TYPE_UD:
ilo_printf(":ud");
break;
case TOY_TYPE_W:
ilo_printf(":w");
break;
case TOY_TYPE_UW:
ilo_printf(":uw");
break;
case TOY_TYPE_V:
ilo_printf(":v");
break;
default:
assert(!"unexpected type");
break;
}
}
/**
* Dump a source operand.
*/
static void
tc_dump_src(struct toy_compiler *tc, struct toy_src src)
{
if (src.negate)
ilo_printf("-");
if (src.absolute)
ilo_printf("|");
tc_dump_operand(tc, src.file, src.type, src.rect,
src.indirect, src.indirect_subreg, src.val32, false);
if (tsrc_is_swizzled(src)) {
const char xyzw[] = "xyzw";
ilo_printf(".%c%c%c%c",
xyzw[src.swizzle_x],
xyzw[src.swizzle_y],
xyzw[src.swizzle_z],
xyzw[src.swizzle_w]);
}
if (src.absolute)
ilo_printf("|");
}
/**
* Dump a destination operand.
*/
static void
tc_dump_dst(struct toy_compiler *tc, struct toy_dst dst)
{
tc_dump_operand(tc, dst.file, dst.type, dst.rect,
dst.indirect, dst.indirect_subreg, dst.val32, true);
if (dst.writemask != TOY_WRITEMASK_XYZW) {
ilo_printf(".");
if (dst.writemask & TOY_WRITEMASK_X)
ilo_printf("x");
if (dst.writemask & TOY_WRITEMASK_Y)
ilo_printf("y");
if (dst.writemask & TOY_WRITEMASK_Z)
ilo_printf("z");
if (dst.writemask & TOY_WRITEMASK_W)
ilo_printf("w");
}
}
static const char *
get_opcode_name(unsigned opcode)
{
switch (opcode) {
case BRW_OPCODE_MOV: return "mov";
case BRW_OPCODE_SEL: return "sel";
case BRW_OPCODE_NOT: return "not";
case BRW_OPCODE_AND: return "and";
case BRW_OPCODE_OR: return "or";
case BRW_OPCODE_XOR: return "xor";
case BRW_OPCODE_SHR: return "shr";
case BRW_OPCODE_SHL: return "shl";
case BRW_OPCODE_RSR: return "rsr";
case BRW_OPCODE_RSL: return "rsl";
case BRW_OPCODE_ASR: return "asr";
case BRW_OPCODE_CMP: return "cmp";
case BRW_OPCODE_CMPN: return "cmpn";
case BRW_OPCODE_JMPI: return "jmpi";
case BRW_OPCODE_IF: return "if";
case BRW_OPCODE_IFF: return "iff";
case BRW_OPCODE_ELSE: return "else";
case BRW_OPCODE_ENDIF: return "endif";
case BRW_OPCODE_DO: return "do";
case BRW_OPCODE_WHILE: return "while";
case BRW_OPCODE_BREAK: return "break";
case BRW_OPCODE_CONTINUE: return "continue";
case BRW_OPCODE_HALT: return "halt";
case BRW_OPCODE_MSAVE: return "msave";
case BRW_OPCODE_MRESTORE: return "mrestore";
case BRW_OPCODE_PUSH: return "push";
case BRW_OPCODE_POP: return "pop";
case BRW_OPCODE_WAIT: return "wait";
case BRW_OPCODE_SEND: return "send";
case BRW_OPCODE_SENDC: return "sendc";
case BRW_OPCODE_MATH: return "math";
case BRW_OPCODE_ADD: return "add";
case BRW_OPCODE_MUL: return "mul";
case BRW_OPCODE_AVG: return "avg";
case BRW_OPCODE_FRC: return "frc";
case BRW_OPCODE_RNDU: return "rndu";
case BRW_OPCODE_RNDD: return "rndd";
case BRW_OPCODE_RNDE: return "rnde";
case BRW_OPCODE_RNDZ: return "rndz";
case BRW_OPCODE_MAC: return "mac";
case BRW_OPCODE_MACH: return "mach";
case BRW_OPCODE_LZD: return "lzd";
case BRW_OPCODE_SAD2: return "sad2";
case BRW_OPCODE_SADA2: return "sada2";
case BRW_OPCODE_DP4: return "dp4";
case BRW_OPCODE_DPH: return "dph";
case BRW_OPCODE_DP3: return "dp3";
case BRW_OPCODE_DP2: return "dp2";
case BRW_OPCODE_DPA2: return "dpa2";
case BRW_OPCODE_LINE: return "line";
case BRW_OPCODE_PLN: return "pln";
case BRW_OPCODE_MAD: return "mad";
case BRW_OPCODE_NOP: return "nop";
/* TGSI */
case TOY_OPCODE_TGSI_IN: return "tgsi.in";
case TOY_OPCODE_TGSI_CONST: return "tgsi.const";
case TOY_OPCODE_TGSI_SV: return "tgsi.sv";
case TOY_OPCODE_TGSI_IMM: return "tgsi.imm";
case TOY_OPCODE_TGSI_INDIRECT_FETCH: return "tgsi.indirect_fetch";
case TOY_OPCODE_TGSI_INDIRECT_STORE: return "tgsi.indirect_store";
case TOY_OPCODE_TGSI_TEX: return "tgsi.tex";
case TOY_OPCODE_TGSI_TXB: return "tgsi.txb";
case TOY_OPCODE_TGSI_TXD: return "tgsi.txd";
case TOY_OPCODE_TGSI_TXL: return "tgsi.txl";
case TOY_OPCODE_TGSI_TXP: return "tgsi.txp";
case TOY_OPCODE_TGSI_TXF: return "tgsi.txf";
case TOY_OPCODE_TGSI_TXQ: return "tgsi.txq";
case TOY_OPCODE_TGSI_TXQ_LZ: return "tgsi.txq_lz";
case TOY_OPCODE_TGSI_TEX2: return "tgsi.tex2";
case TOY_OPCODE_TGSI_TXB2: return "tgsi.txb2";
case TOY_OPCODE_TGSI_TXL2: return "tgsi.txl2";
case TOY_OPCODE_TGSI_SAMPLE: return "tgsi.sample";
case TOY_OPCODE_TGSI_SAMPLE_I: return "tgsi.sample_i";
case TOY_OPCODE_TGSI_SAMPLE_I_MS: return "tgsi.sample_i_ms";
case TOY_OPCODE_TGSI_SAMPLE_B: return "tgsi.sample_b";
case TOY_OPCODE_TGSI_SAMPLE_C: return "tgsi.sample_c";
case TOY_OPCODE_TGSI_SAMPLE_C_LZ: return "tgsi.sample_c_lz";
case TOY_OPCODE_TGSI_SAMPLE_D: return "tgsi.sample_d";
case TOY_OPCODE_TGSI_SAMPLE_L: return "tgsi.sample_l";
case TOY_OPCODE_TGSI_GATHER4: return "tgsi.gather4";
case TOY_OPCODE_TGSI_SVIEWINFO: return "tgsi.sviewinfo";
case TOY_OPCODE_TGSI_SAMPLE_POS: return "tgsi.sample_pos";
case TOY_OPCODE_TGSI_SAMPLE_INFO: return "tgsi.sample_info";
/* math */
case TOY_OPCODE_INV: return "math.inv";
case TOY_OPCODE_LOG: return "math.log";
case TOY_OPCODE_EXP: return "math.exp";
case TOY_OPCODE_SQRT: return "math.sqrt";
case TOY_OPCODE_RSQ: return "math.rsq";
case TOY_OPCODE_SIN: return "math.sin";
case TOY_OPCODE_COS: return "math.cos";
case TOY_OPCODE_FDIV: return "math.fdiv";
case TOY_OPCODE_POW: return "math.pow";
case TOY_OPCODE_INT_DIV_QUOTIENT: return "math.int_div_quotient";
case TOY_OPCODE_INT_DIV_REMAINDER: return "math.int_div_remainer";
/* urb */
case TOY_OPCODE_URB_WRITE: return "urb.urb_write";
/* gs */
case TOY_OPCODE_EMIT: return "gs.emit";
case TOY_OPCODE_ENDPRIM: return "gs.endprim";
/* fs */
case TOY_OPCODE_DDX: return "fs.ddx";
case TOY_OPCODE_DDY: return "fs.ddy";
case TOY_OPCODE_FB_WRITE: return "fs.fb_write";
case TOY_OPCODE_KIL: return "fs.kil";
default: return "unk";
}
}
static const char *
get_cond_modifier_name(unsigned opcode, unsigned cond_modifier)
{
switch (opcode) {
case BRW_OPCODE_SEND:
case BRW_OPCODE_SENDC:
/* SFID */
switch (cond_modifier) {
case BRW_SFID_NULL: return "Null";
case BRW_SFID_SAMPLER: return "Sampling Engine";
case BRW_SFID_MESSAGE_GATEWAY: return "Message Gateway";
case GEN6_SFID_DATAPORT_SAMPLER_CACHE: return "Data Port Sampler Cache";
case GEN6_SFID_DATAPORT_RENDER_CACHE: return "Data Port Render Cache";
case BRW_SFID_URB: return "URB";
case BRW_SFID_THREAD_SPAWNER: return "Thread Spawner";
case GEN6_SFID_DATAPORT_CONSTANT_CACHE: return "Constant Cache";
default: return "Unknown";
}
break;
case BRW_OPCODE_MATH:
/* FC */
switch (cond_modifier) {
case BRW_MATH_FUNCTION_INV: return "INV";
case BRW_MATH_FUNCTION_LOG: return "LOG";
case BRW_MATH_FUNCTION_EXP: return "EXP";
case BRW_MATH_FUNCTION_SQRT: return "SQRT";
case BRW_MATH_FUNCTION_RSQ: return "RSQ";
case BRW_MATH_FUNCTION_SIN: return "SIN";
case BRW_MATH_FUNCTION_COS: return "COS";
case BRW_MATH_FUNCTION_FDIV: return "FDIV";
case BRW_MATH_FUNCTION_POW: return "POW";
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: return "INT DIV (quotient)";
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: return "INT DIV (remainder)";
default: return "UNK";
}
break;
default:
switch (cond_modifier) {
case BRW_CONDITIONAL_NONE: return NULL;
case BRW_CONDITIONAL_Z: return "z";
case BRW_CONDITIONAL_NZ: return "nz";
case BRW_CONDITIONAL_G: return "g";
case BRW_CONDITIONAL_GE: return "ge";
case BRW_CONDITIONAL_L: return "l";
case BRW_CONDITIONAL_LE: return "le";
default: return "unk";
}
break;
}
}
/**
* Dump an instruction.
*/
static void
tc_dump_inst(struct toy_compiler *tc, const struct toy_inst *inst)
{
const char *name;
int i;
name = get_opcode_name(inst->opcode);
ilo_printf(" %s", name);
if (inst->opcode == BRW_OPCODE_NOP) {
ilo_printf("\n");
return;
}
if (inst->saturate)
ilo_printf(".sat");
name = get_cond_modifier_name(inst->opcode, inst->cond_modifier);
if (name)
ilo_printf(".%s", name);
ilo_printf(" ");
tc_dump_dst(tc, inst->dst);
for (i = 0; i < Elements(inst->src); i++) {
if (tsrc_is_null(inst->src[i]))
break;
ilo_printf(", ");
tc_dump_src(tc, inst->src[i]);
}
ilo_printf("\n");
}
/**
* Dump the instructions added to the compiler.
*/
void
toy_compiler_dump(struct toy_compiler *tc)
{
struct toy_inst *inst;
int pc;
pc = 0;
tc_head(tc);
while ((inst = tc_next_no_skip(tc)) != NULL) {
/* we do not generate code for markers */
if (inst->marker)
ilo_printf("marker:");
else
ilo_printf("%6d:", pc++);
tc_dump_inst(tc, inst);
}
}
/**
* Clean up the toy compiler.
*/
void
toy_compiler_cleanup(struct toy_compiler *tc)
{
struct toy_inst *inst, *next;
LIST_FOR_EACH_ENTRY_SAFE(inst, next, &tc->instructions, list)
util_slab_free(&tc->mempool, inst);
util_slab_destroy(&tc->mempool);
}
/**
* Initialize the instruction template, from which tc_add() initializes the
* newly added instructions.
*/
static void
tc_init_inst_templ(struct toy_compiler *tc)
{
struct toy_inst *templ = &tc->templ;
int i;
templ->opcode = BRW_OPCODE_NOP;
templ->access_mode = BRW_ALIGN_1;
templ->mask_ctrl = BRW_MASK_ENABLE;
templ->dep_ctrl = BRW_DEPENDENCY_NORMAL;
templ->qtr_ctrl = GEN6_COMPRESSION_1Q;
templ->thread_ctrl = BRW_THREAD_NORMAL;
templ->pred_ctrl = BRW_PREDICATE_NONE;
templ->pred_inv = false;
templ->exec_size = BRW_EXECUTE_1;
templ->cond_modifier = BRW_CONDITIONAL_NONE;
templ->acc_wr_ctrl = false;
templ->saturate = false;
templ->marker = false;
templ->dst = tdst_null();
for (i = 0; i < Elements(templ->src); i++)
templ->src[i] = tsrc_null();
for (i = 0; i < Elements(templ->tex.offsets); i++)
templ->tex.offsets[i] = tsrc_null();
list_inithead(&templ->list);
}
/**
* Initialize the toy compiler.
*/
void
toy_compiler_init(struct toy_compiler *tc, int gen)
{
memset(tc, 0, sizeof(*tc));
tc->gen = gen;
tc_init_inst_templ(tc);
util_slab_create(&tc->mempool, sizeof(struct toy_inst),
64, UTIL_SLAB_SINGLETHREADED);
list_inithead(&tc->instructions);
/* instructions are added to the tail */
tc_tail(tc);
tc->rect_linear_width = 1;
/* skip 0 so that util_hash_table_get() never returns NULL */
tc->next_vrf = 1;
}

View file

@ -0,0 +1,473 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#ifndef TOY_COMPILER_H
#define TOY_COMPILER_H
#include "brw_defines.h"
#include "util/u_slab.h"
#include "ilo_common.h"
#include "toy_compiler_reg.h"
/**
* Toy opcodes.
*/
enum toy_opcode {
/* 0..127 are reserved for BRW_OPCODE_x */
TOY_OPCODE_LAST_HW = 127,
/* TGSI register functions */
TOY_OPCODE_TGSI_IN,
TOY_OPCODE_TGSI_CONST,
TOY_OPCODE_TGSI_SV,
TOY_OPCODE_TGSI_IMM,
TOY_OPCODE_TGSI_INDIRECT_FETCH,
TOY_OPCODE_TGSI_INDIRECT_STORE,
/* TGSI sampling functions */
TOY_OPCODE_TGSI_TEX,
TOY_OPCODE_TGSI_TXB,
TOY_OPCODE_TGSI_TXD,
TOY_OPCODE_TGSI_TXL,
TOY_OPCODE_TGSI_TXP,
TOY_OPCODE_TGSI_TXF,
TOY_OPCODE_TGSI_TXQ,
TOY_OPCODE_TGSI_TXQ_LZ,
TOY_OPCODE_TGSI_TEX2,
TOY_OPCODE_TGSI_TXB2,
TOY_OPCODE_TGSI_TXL2,
TOY_OPCODE_TGSI_SAMPLE,
TOY_OPCODE_TGSI_SAMPLE_I,
TOY_OPCODE_TGSI_SAMPLE_I_MS,
TOY_OPCODE_TGSI_SAMPLE_B,
TOY_OPCODE_TGSI_SAMPLE_C,
TOY_OPCODE_TGSI_SAMPLE_C_LZ,
TOY_OPCODE_TGSI_SAMPLE_D,
TOY_OPCODE_TGSI_SAMPLE_L,
TOY_OPCODE_TGSI_GATHER4,
TOY_OPCODE_TGSI_SVIEWINFO,
TOY_OPCODE_TGSI_SAMPLE_POS,
TOY_OPCODE_TGSI_SAMPLE_INFO,
/* math functions */
TOY_OPCODE_INV,
TOY_OPCODE_LOG,
TOY_OPCODE_EXP,
TOY_OPCODE_SQRT,
TOY_OPCODE_RSQ,
TOY_OPCODE_SIN,
TOY_OPCODE_COS,
TOY_OPCODE_FDIV,
TOY_OPCODE_POW,
TOY_OPCODE_INT_DIV_QUOTIENT,
TOY_OPCODE_INT_DIV_REMAINDER,
/* URB functions */
TOY_OPCODE_URB_WRITE,
/* GS-specific functions */
TOY_OPCODE_EMIT,
TOY_OPCODE_ENDPRIM,
/* FS-specific functions */
TOY_OPCODE_DDX,
TOY_OPCODE_DDY,
TOY_OPCODE_FB_WRITE,
TOY_OPCODE_KIL,
};
/**
* Toy instruction.
*/
struct toy_inst {
unsigned opcode:8; /* enum toy_opcode */
unsigned access_mode:1; /* BRW_ALIGN_x */
unsigned mask_ctrl:1; /* BRW_MASK_x */
unsigned dep_ctrl:2; /* BRW_DEPENDENCY_x */
unsigned qtr_ctrl:2; /* GEN6_COMPRESSION_x */
unsigned thread_ctrl:2; /* BRW_THREAD_x */
unsigned pred_ctrl:4; /* BRW_PREDICATE_x */
unsigned pred_inv:1; /* true or false */
unsigned exec_size:3; /* BRW_EXECUTE_x */
unsigned cond_modifier:4; /* BRW_CONDITIONAL_x */
unsigned acc_wr_ctrl:1; /* true or false */
unsigned saturate:1; /* true or false */
/* true if the instruction should be ignored for instruction iteration */
unsigned marker:1;
unsigned pad:1;
struct toy_dst dst;
struct toy_src src[5]; /* match TGSI_FULL_MAX_SRC_REGISTERS */
struct {
int target; /* TGSI_TEXTURE_x */
struct toy_src offsets[1]; /* need to be 4 when GATHER4 is supported */
} tex;
struct list_head list;
};
/**
* Toy compiler.
*/
struct toy_compiler {
int gen;
struct toy_inst templ;
struct util_slab_mempool mempool;
struct list_head instructions;
struct list_head *iter, *iter_next;
/* this is not set until toy_compiler_legalize_for_asm() */
int num_instructions;
int rect_linear_width;
int next_vrf;
bool fail;
const char *reason;
};
/**
* Allocate the given number of VRF registers.
*/
static inline int
tc_alloc_vrf(struct toy_compiler *tc, int count)
{
const int vrf = tc->next_vrf;
tc->next_vrf += count;
return vrf;
}
/**
* Allocate a temporary register.
*/
static inline struct toy_dst
tc_alloc_tmp(struct toy_compiler *tc)
{
return tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, 1), 0);
}
/**
* Allocate four temporary registers.
*/
static inline void
tc_alloc_tmp4(struct toy_compiler *tc, struct toy_dst *tmp)
{
tmp[0] = tc_alloc_tmp(tc);
tmp[1] = tc_alloc_tmp(tc);
tmp[2] = tc_alloc_tmp(tc);
tmp[3] = tc_alloc_tmp(tc);
}
/**
* Duplicate an instruction at the current location.
*/
static inline struct toy_inst *
tc_duplicate_inst(struct toy_compiler *tc, const struct toy_inst *inst)
{
struct toy_inst *new_inst;
new_inst = util_slab_alloc(&tc->mempool);
if (!new_inst)
return NULL;
*new_inst = *inst;
list_addtail(&new_inst->list, tc->iter_next);
return new_inst;
}
/**
* Move an instruction to the current location.
*/
static inline void
tc_move_inst(struct toy_compiler *tc, struct toy_inst *inst)
{
list_del(&inst->list);
list_addtail(&inst->list, tc->iter_next);
}
/**
* Discard an instruction.
*/
static inline void
tc_discard_inst(struct toy_compiler *tc, struct toy_inst *inst)
{
list_del(&inst->list);
util_slab_free(&tc->mempool, inst);
}
/**
* Add a new instruction at the current location, using tc->templ as the
* template.
*/
static inline struct toy_inst *
tc_add(struct toy_compiler *tc)
{
return tc_duplicate_inst(tc, &tc->templ);
}
/**
* A convenient version of tc_add() for instructions with 3 source operands.
*/
static inline struct toy_inst *
tc_add3(struct toy_compiler *tc, unsigned opcode,
struct toy_dst dst,
struct toy_src src0,
struct toy_src src1,
struct toy_src src2)
{
struct toy_inst *inst;
inst = tc_add(tc);
if (!inst)
return NULL;
inst->opcode = opcode;
inst->dst = dst;
inst->src[0] = src0;
inst->src[1] = src1;
inst->src[2] = src2;
return inst;
}
/**
* A convenient version of tc_add() for instructions with 2 source operands.
*/
static inline struct toy_inst *
tc_add2(struct toy_compiler *tc, int opcode,
struct toy_dst dst,
struct toy_src src0,
struct toy_src src1)
{
return tc_add3(tc, opcode, dst, src0, src1, tsrc_null());
}
/**
* A convenient version of tc_add() for instructions with 1 source operand.
*/
static inline struct toy_inst *
tc_add1(struct toy_compiler *tc, unsigned opcode,
struct toy_dst dst,
struct toy_src src0)
{
return tc_add2(tc, opcode, dst, src0, tsrc_null());
}
/**
* A convenient version of tc_add() for instructions without source or
* destination operands.
*/
static inline struct toy_inst *
tc_add0(struct toy_compiler *tc, unsigned opcode)
{
return tc_add1(tc, opcode, tdst_null(), tsrc_null());
}
#define TC_ALU0(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc) \
{ \
return tc_add0(tc, opcode); \
}
#define TC_ALU1(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc, \
struct toy_dst dst, \
struct toy_src src) \
{ \
return tc_add1(tc, opcode, dst, src); \
}
#define TC_ALU2(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc, \
struct toy_dst dst, \
struct toy_src src0, \
struct toy_src src1) \
{ \
return tc_add2(tc, opcode, \
dst, src0, src1); \
}
#define TC_ALU3(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc, \
struct toy_dst dst, \
struct toy_src src0, \
struct toy_src src1, \
struct toy_src src2) \
{ \
return tc_add3(tc, opcode, \
dst, src0, src1, src2); \
}
#define TC_CND2(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc, \
struct toy_dst dst, \
struct toy_src src0, \
struct toy_src src1, \
unsigned cond_modifier) \
{ \
struct toy_inst *inst; \
inst = tc_add2(tc, opcode, \
dst, src0, src1); \
inst->cond_modifier = cond_modifier; \
return inst; \
}
TC_ALU0(tc_NOP, BRW_OPCODE_NOP)
TC_ALU0(tc_ELSE, BRW_OPCODE_ELSE)
TC_ALU0(tc_ENDIF, BRW_OPCODE_ENDIF)
TC_ALU1(tc_MOV, BRW_OPCODE_MOV)
TC_ALU1(tc_RNDD, BRW_OPCODE_RNDD)
TC_ALU1(tc_INV, TOY_OPCODE_INV)
TC_ALU1(tc_FRC, BRW_OPCODE_FRC)
TC_ALU1(tc_EXP, TOY_OPCODE_EXP)
TC_ALU1(tc_LOG, TOY_OPCODE_LOG)
TC_ALU2(tc_ADD, BRW_OPCODE_ADD)
TC_ALU2(tc_MUL, BRW_OPCODE_MUL)
TC_ALU2(tc_AND, BRW_OPCODE_AND)
TC_ALU2(tc_OR, BRW_OPCODE_OR)
TC_ALU2(tc_DP2, BRW_OPCODE_DP2)
TC_ALU2(tc_DP3, BRW_OPCODE_DP3)
TC_ALU2(tc_DP4, BRW_OPCODE_DP4)
TC_ALU2(tc_SHL, BRW_OPCODE_SHL)
TC_ALU2(tc_SHR, BRW_OPCODE_SHR)
TC_ALU2(tc_POW, TOY_OPCODE_POW)
TC_ALU3(tc_MAC, BRW_OPCODE_MAC)
TC_CND2(tc_SEL, BRW_OPCODE_SEL)
TC_CND2(tc_CMP, BRW_OPCODE_CMP)
TC_CND2(tc_IF, BRW_OPCODE_IF)
TC_CND2(tc_SEND, BRW_OPCODE_SEND)
/**
* Upcast a list_head to an instruction.
*/
static inline struct toy_inst *
tc_list_to_inst(struct toy_compiler *tc, struct list_head *item)
{
return container_of(item, (struct toy_inst *) NULL, list);
}
/**
* Return the instruction at the current location.
*/
static inline struct toy_inst *
tc_current(struct toy_compiler *tc)
{
return (tc->iter != &tc->instructions) ?
tc_list_to_inst(tc, tc->iter) : NULL;
}
/**
* Set the current location to the head.
*/
static inline void
tc_head(struct toy_compiler *tc)
{
tc->iter = &tc->instructions;
tc->iter_next = tc->iter->next;
}
/**
* Set the current location to the tail.
*/
static inline void
tc_tail(struct toy_compiler *tc)
{
tc->iter = &tc->instructions;
tc->iter_next = tc->iter;
}
/**
* Advance the current location.
*/
static inline struct toy_inst *
tc_next_no_skip(struct toy_compiler *tc)
{
/* stay at the tail so that new instructions are added there */
if (tc->iter_next == &tc->instructions) {
tc_tail(tc);
return NULL;
}
tc->iter = tc->iter_next;
tc->iter_next = tc->iter_next->next;
return tc_list_to_inst(tc, tc->iter);
}
/**
* Advance the current location, skipping markers.
*/
static inline struct toy_inst *
tc_next(struct toy_compiler *tc)
{
struct toy_inst *inst;
do {
inst = tc_next_no_skip(tc);
} while (inst && inst->marker);
return inst;
}
static inline void
tc_fail(struct toy_compiler *tc, const char *reason)
{
if (!tc->fail) {
tc->fail = true;
tc->reason = reason;
}
}
void
toy_compiler_init(struct toy_compiler *tc, int gen);
void
toy_compiler_cleanup(struct toy_compiler *tc);
void
toy_compiler_dump(struct toy_compiler *tc);
void *
toy_compiler_assemble(struct toy_compiler *tc, int *size);
void
toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size);
#endif /* TOY_COMPILER_H */

View file

@ -0,0 +1,750 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#include "toy_compiler.h"
#define CG_REG_SHIFT 5
#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
struct codegen {
const struct toy_inst *inst;
int pc;
unsigned flag_sub_reg_num;
struct codegen_dst {
unsigned file;
unsigned type;
bool indirect;
unsigned indirect_subreg;
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
unsigned horz_stride;
unsigned writemask;
} dst;
struct codegen_src {
unsigned file;
unsigned type;
bool indirect;
unsigned indirect_subreg;
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
unsigned vert_stride;
unsigned width;
unsigned horz_stride;
unsigned swizzle[4];
bool absolute;
bool negate;
} src[3];
};
/**
* Return true if the source operand is null.
*/
static bool
src_is_null(const struct codegen *cg, int idx)
{
const struct codegen_src *src = &cg->src[idx];
return (src->file == BRW_ARCHITECTURE_REGISTER_FILE &&
src->origin == BRW_ARF_NULL << CG_REG_SHIFT);
}
/**
* Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
*/
static uint32_t
translate_src(const struct codegen *cg, int idx)
{
const struct codegen_src *src = &cg->src[idx];
uint32_t dw;
/* special treatment may be needed if any of the operand is immediate */
if (cg->src[0].file == BRW_IMMEDIATE_VALUE) {
assert(!cg->src[0].absolute && !cg->src[0].negate);
/* only the last src operand can be an immediate */
assert(src_is_null(cg, 1));
if (idx == 0)
return cg->flag_sub_reg_num << 25;
else
return cg->src[0].origin;
}
else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) {
assert(!cg->src[1].absolute && !cg->src[1].negate);
return cg->src[1].origin;
}
assert(src->file != BRW_IMMEDIATE_VALUE);
if (src->indirect) {
const int offset = (int) src->origin;
assert(src->file == BRW_GENERAL_REGISTER_FILE);
assert(offset < 512 && offset >= -512);
if (cg->inst->access_mode == BRW_ALIGN_16) {
assert(src->width == BRW_WIDTH_4);
assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
/* the lower 4 bits are reserved for the swizzle_[xy] */
assert(!(src->origin & 0xf));
dw = src->vert_stride << 21 |
src->swizzle[3] << 18 |
src->swizzle[2] << 16 |
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
src->negate << 14 |
src->absolute << 13 |
src->indirect_subreg << 10 |
(src->origin & 0x3f0) |
src->swizzle[1] << 2 |
src->swizzle[0];
}
else {
assert(src->swizzle[0] == TOY_SWIZZLE_X &&
src->swizzle[1] == TOY_SWIZZLE_Y &&
src->swizzle[2] == TOY_SWIZZLE_Z &&
src->swizzle[3] == TOY_SWIZZLE_W);
dw = src->vert_stride << 21 |
src->width << 18 |
src->horz_stride << 16 |
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
src->negate << 14 |
src->absolute << 13 |
src->indirect_subreg << 10 |
(src->origin & 0x3ff);
}
}
else {
switch (src->file) {
case BRW_ARCHITECTURE_REGISTER_FILE:
break;
case BRW_GENERAL_REGISTER_FILE:
assert(CG_REG_NUM(src->origin) < 128);
break;
case BRW_MESSAGE_REGISTER_FILE:
assert(cg->inst->opcode == BRW_OPCODE_SEND ||
cg->inst->opcode == BRW_OPCODE_SENDC);
assert(CG_REG_NUM(src->origin) < 16);
break;
case BRW_IMMEDIATE_VALUE:
default:
assert(!"invalid src file");
break;
}
if (cg->inst->access_mode == BRW_ALIGN_16) {
assert(src->width == BRW_WIDTH_4);
assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
/* the lower 4 bits are reserved for the swizzle_[xy] */
assert(!(src->origin & 0xf));
dw = src->vert_stride << 21 |
src->swizzle[3] << 18 |
src->swizzle[2] << 16 |
BRW_ADDRESS_DIRECT << 15 |
src->negate << 14 |
src->absolute << 13 |
src->origin |
src->swizzle[1] << 2 |
src->swizzle[0];
}
else {
assert(src->swizzle[0] == TOY_SWIZZLE_X &&
src->swizzle[1] == TOY_SWIZZLE_Y &&
src->swizzle[2] == TOY_SWIZZLE_Z &&
src->swizzle[3] == TOY_SWIZZLE_W);
dw = src->vert_stride << 21 |
src->width << 18 |
src->horz_stride << 16 |
BRW_ADDRESS_DIRECT << 15 |
src->negate << 14 |
src->absolute << 13 |
src->origin;
}
}
if (idx == 0)
dw |= cg->flag_sub_reg_num << 25;
return dw;
}
/**
* Translate the destination operand to the higher 16 bits of DW1 of the
* 1-src/2-src format.
*/
static uint16_t
translate_dst_region(const struct codegen *cg)
{
const struct codegen_dst *dst = &cg->dst;
uint16_t dw1_region;
if (dst->file == BRW_IMMEDIATE_VALUE) {
/* dst is immediate (JIP) when the opcode is a conditional branch */
switch (cg->inst->opcode) {
case BRW_OPCODE_IF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_WHILE:
assert(dst->type == BRW_REGISTER_TYPE_W);
dw1_region = (dst->origin & 0xffff);
break;
default:
assert(!"dst cannot be immediate");
dw1_region = 0;
break;
}
return dw1_region;
}
if (dst->indirect) {
const int offset = (int) dst->origin;
assert(dst->file == BRW_GENERAL_REGISTER_FILE);
assert(offset < 512 && offset >= -512);
if (cg->inst->access_mode == BRW_ALIGN_16) {
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 144:
*
* "Allthough Dst.HorzStride is a don't care for Align16, HW
* needs this to be programmed as 01."
*/
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
/* the lower 4 bits are reserved for the writemask */
assert(!(dst->origin & 0xf));
dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
dst->horz_stride << 13 |
dst->indirect_subreg << 10 |
(dst->origin & 0x3f0) |
dst->writemask;
}
else {
assert(dst->writemask == TOY_WRITEMASK_XYZW);
dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
dst->horz_stride << 13 |
dst->indirect_subreg << 10 |
(dst->origin & 0x3ff);
}
}
else {
assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
CG_REG_NUM(dst->origin) < 128) ||
(dst->file == BRW_MESSAGE_REGISTER_FILE &&
CG_REG_NUM(dst->origin) < 16) ||
(dst->file == BRW_ARCHITECTURE_REGISTER_FILE));
if (cg->inst->access_mode == BRW_ALIGN_16) {
/* similar to the indirect case */
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
assert(!(dst->origin & 0xf));
dw1_region = BRW_ADDRESS_DIRECT << 15 |
dst->horz_stride << 13 |
dst->origin |
dst->writemask;
}
else {
assert(dst->writemask == TOY_WRITEMASK_XYZW);
dw1_region = BRW_ADDRESS_DIRECT << 15 |
dst->horz_stride << 13 |
dst->origin;
}
}
return dw1_region;
}
/**
* Translate the destination operand to DW1 of the 1-src/2-src format.
*/
static uint32_t
translate_dst(const struct codegen *cg)
{
return translate_dst_region(cg) << 16 |
cg->src[1].type << 12 |
cg->src[1].file << 10 |
cg->src[0].type << 7 |
cg->src[0].file << 5 |
cg->dst.type << 2 |
cg->dst.file;
}
/**
* Translate the instruction to DW0 of the 1-src/2-src format.
*/
static uint32_t
translate_inst(const struct codegen *cg)
{
const bool debug_ctrl = false;
const bool cmpt_ctrl = false;
assert(cg->inst->opcode < 128);
return cg->inst->saturate << 31 |
debug_ctrl << 30 |
cmpt_ctrl << 29 |
cg->inst->acc_wr_ctrl << 28 |
cg->inst->cond_modifier << 24 |
cg->inst->exec_size << 21 |
cg->inst->pred_inv << 20 |
cg->inst->pred_ctrl << 16 |
cg->inst->thread_ctrl << 14 |
cg->inst->qtr_ctrl << 12 |
cg->inst->dep_ctrl << 10 |
cg->inst->mask_ctrl << 9 |
cg->inst->access_mode << 8 |
cg->inst->opcode;
}
/**
* Codegen an instruction in 1-src/2-src format.
*/
static void
codegen_inst(const struct codegen *cg, uint32_t *code)
{
code[0] = translate_inst(cg);
code[1] = translate_dst(cg);
code[2] = translate_src(cg, 0);
code[3] = translate_src(cg, 1);
assert(src_is_null(cg, 2));
}
/**
* Codegen an instruction in 3-src format.
*/
static void
codegen_inst_3src(const struct codegen *cg, uint32_t *code)
{
const struct codegen_dst *dst = &cg->dst;
uint32_t dw0, dw1, dw_src[3];
int i;
dw0 = translate_inst(cg);
/*
* 3-src instruction restrictions
*
* - align16 with direct addressing
* - GRF or MRF dst
* - GRF src
* - sub_reg_num is DWORD aligned
* - no regioning except replication control
* (vert_stride == 0 && horz_stride == 0)
*/
assert(cg->inst->access_mode == BRW_ALIGN_16);
assert(!dst->indirect);
assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
CG_REG_NUM(dst->origin) < 128) ||
(dst->file == BRW_MESSAGE_REGISTER_FILE &&
CG_REG_NUM(dst->origin) < 16));
assert(!(dst->origin & 0x3));
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
dw1 = dst->origin << 19 |
dst->writemask << 17 |
cg->src[2].negate << 9 |
cg->src[2].absolute << 8 |
cg->src[1].negate << 7 |
cg->src[1].absolute << 6 |
cg->src[0].negate << 5 |
cg->src[0].absolute << 4 |
cg->flag_sub_reg_num << 1 |
(dst->file == BRW_MESSAGE_REGISTER_FILE);
for (i = 0; i < 3; i++) {
const struct codegen_src *src = &cg->src[i];
assert(!src->indirect);
assert(src->file == BRW_GENERAL_REGISTER_FILE &&
CG_REG_NUM(src->origin) < 128);
assert(!(src->origin & 0x3));
assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 &&
src->horz_stride == BRW_HORIZONTAL_STRIDE_1) ||
(src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
src->horz_stride == BRW_HORIZONTAL_STRIDE_0));
assert(src->width == BRW_WIDTH_4);
dw_src[i] = src->origin << 7 |
src->swizzle[3] << 7 |
src->swizzle[2] << 5 |
src->swizzle[1] << 3 |
src->swizzle[0] << 1 |
(src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
src->horz_stride == BRW_HORIZONTAL_STRIDE_0);
/* only the lower 20 bits are used */
assert((dw_src[i] & 0xfffff) == dw_src[i]);
}
code[0] = dw0;
code[1] = dw1;
/* concatenate the bits of dw_src */
code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
}
/**
* Sanity check the region parameters of the operands.
*/
static void
codegen_validate_region_restrictions(const struct codegen *cg)
{
const int exec_size_map[] = {
[BRW_EXECUTE_1] = 1,
[BRW_EXECUTE_2] = 2,
[BRW_EXECUTE_4] = 4,
[BRW_EXECUTE_8] = 8,
[BRW_EXECUTE_16] = 16,
[BRW_EXECUTE_32] = 32,
};
const int width_map[] = {
[BRW_WIDTH_1] = 1,
[BRW_WIDTH_2] = 2,
[BRW_WIDTH_4] = 4,
[BRW_WIDTH_8] = 8,
[BRW_WIDTH_16] = 16,
};
const int horz_stride_map[] = {
[BRW_HORIZONTAL_STRIDE_0] = 0,
[BRW_HORIZONTAL_STRIDE_1] = 1,
[BRW_HORIZONTAL_STRIDE_2] = 2,
[BRW_HORIZONTAL_STRIDE_4] = 4,
};
const int vert_stride_map[] = {
[BRW_VERTICAL_STRIDE_0] = 0,
[BRW_VERTICAL_STRIDE_1] = 1,
[BRW_VERTICAL_STRIDE_2] = 2,
[BRW_VERTICAL_STRIDE_4] = 4,
[BRW_VERTICAL_STRIDE_8] = 8,
[BRW_VERTICAL_STRIDE_16] = 16,
[BRW_VERTICAL_STRIDE_32] = 32,
[BRW_VERTICAL_STRIDE_64] = 64,
[BRW_VERTICAL_STRIDE_128] = 128,
[BRW_VERTICAL_STRIDE_256] = 256,
[BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0,
};
const int exec_size = exec_size_map[cg->inst->exec_size];
int i;
/* Sandy Bridge PRM, volume 4 part 2, page 94 */
/* 1. (we don't do 32 anyway) */
assert(exec_size <= 16);
for (i = 0; i < Elements(cg->src); i++) {
const int width = width_map[cg->src[i].width];
const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
if (src_is_null(cg, i))
break;
/* 3. */
assert(exec_size >= width);
if (exec_size == width) {
/* 4. & 5. */
if (horz_stride)
assert(vert_stride == width * horz_stride);
}
if (width == 1) {
/* 6. */
assert(horz_stride == 0);
/* 7. */
if (exec_size == 1)
assert(vert_stride == 0);
}
/* 8. */
if (!vert_stride && !horz_stride)
assert(width == 1);
}
/* derived from 10.1.2. & 10.2. */
assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0);
}
static unsigned
translate_vfile(enum toy_file file)
{
switch (file) {
case TOY_FILE_ARF: return BRW_ARCHITECTURE_REGISTER_FILE;
case TOY_FILE_GRF: return BRW_GENERAL_REGISTER_FILE;
case TOY_FILE_MRF: return BRW_MESSAGE_REGISTER_FILE;
case TOY_FILE_IMM: return BRW_IMMEDIATE_VALUE;
default:
assert(!"unhandled toy file");
return BRW_GENERAL_REGISTER_FILE;
}
}
static unsigned
translate_vtype(enum toy_type type)
{
switch (type) {
case TOY_TYPE_F: return BRW_REGISTER_TYPE_F;
case TOY_TYPE_D: return BRW_REGISTER_TYPE_D;
case TOY_TYPE_UD: return BRW_REGISTER_TYPE_UD;
case TOY_TYPE_W: return BRW_REGISTER_TYPE_W;
case TOY_TYPE_UW: return BRW_REGISTER_TYPE_UW;
case TOY_TYPE_V: return BRW_REGISTER_TYPE_V;
default:
assert(!"unhandled toy type");
return BRW_REGISTER_TYPE_F;
}
}
static unsigned
translate_writemask(enum toy_writemask writemask)
{
/* TOY_WRITEMASK_* are compatible with the hardware definitions */
assert(writemask <= 0xf);
return writemask;
}
static unsigned
translate_swizzle(enum toy_swizzle swizzle)
{
/* TOY_SWIZZLE_* are compatible with the hardware definitions */
assert(swizzle <= 3);
return swizzle;
}
/**
* Prepare for generating an instruction.
*/
static void
codegen_prepare(struct codegen *cg, const struct toy_inst *inst,
int pc, int rect_linear_width)
{
int i;
cg->inst = inst;
cg->pc = pc;
cg->flag_sub_reg_num = 0;
cg->dst.file = translate_vfile(inst->dst.file);
cg->dst.type = translate_vtype(inst->dst.type);
cg->dst.indirect = inst->dst.indirect;
cg->dst.indirect_subreg = inst->dst.indirect_subreg;
cg->dst.origin = inst->dst.val32;
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 81:
*
* "For a word or an unsigned word immediate data, software must
* replicate the same 16-bit immediate value to both the lower word
* and the high word of the 32-bit immediate field in an instruction."
*/
if (inst->dst.file == TOY_FILE_IMM) {
switch (inst->dst.type) {
case TOY_TYPE_W:
case TOY_TYPE_UW:
cg->dst.origin &= 0xffff;
cg->dst.origin |= cg->dst.origin << 16;
break;
default:
break;
}
}
cg->dst.writemask = translate_writemask(inst->dst.writemask);
switch (inst->dst.rect) {
case TOY_RECT_LINEAR:
cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
default:
assert(!"unsupported dst region");
cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
}
for (i = 0; i < Elements(cg->src); i++) {
struct codegen_src *src = &cg->src[i];
src->file = translate_vfile(inst->src[i].file);
src->type = translate_vtype(inst->src[i].type);
src->indirect = inst->src[i].indirect;
src->indirect_subreg = inst->src[i].indirect_subreg;
src->origin = inst->src[i].val32;
/* do the same for src */
if (inst->dst.file == TOY_FILE_IMM) {
switch (inst->src[i].type) {
case TOY_TYPE_W:
case TOY_TYPE_UW:
src->origin &= 0xffff;
src->origin |= src->origin << 16;
break;
default:
break;
}
}
src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
src->absolute = inst->src[i].absolute;
src->negate = inst->src[i].negate;
switch (inst->src[i].rect) {
case TOY_RECT_LINEAR:
switch (rect_linear_width) {
case 1:
src->vert_stride = BRW_VERTICAL_STRIDE_1;
src->width = BRW_WIDTH_1;
break;
case 2:
src->vert_stride = BRW_VERTICAL_STRIDE_2;
src->width = BRW_WIDTH_2;
break;
case 4:
src->vert_stride = BRW_VERTICAL_STRIDE_4;
src->width = BRW_WIDTH_4;
break;
case 8:
src->vert_stride = BRW_VERTICAL_STRIDE_8;
src->width = BRW_WIDTH_8;
break;
case 16:
src->vert_stride = BRW_VERTICAL_STRIDE_16;
src->width = BRW_WIDTH_16;
break;
default:
assert(!"unsupported TOY_RECT_LINEAR width");
src->vert_stride = BRW_VERTICAL_STRIDE_1;
src->width = BRW_WIDTH_1;
break;
}
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
case TOY_RECT_041:
src->vert_stride = BRW_VERTICAL_STRIDE_0;
src->width = BRW_WIDTH_4;
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
case TOY_RECT_010:
src->vert_stride = BRW_VERTICAL_STRIDE_0;
src->width = BRW_WIDTH_1;
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
break;
case TOY_RECT_220:
src->vert_stride = BRW_VERTICAL_STRIDE_2;
src->width = BRW_WIDTH_2;
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
break;
case TOY_RECT_440:
src->vert_stride = BRW_VERTICAL_STRIDE_4;
src->width = BRW_WIDTH_4;
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
break;
case TOY_RECT_240:
src->vert_stride = BRW_VERTICAL_STRIDE_2;
src->width = BRW_WIDTH_4;
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
break;
default:
assert(!"unsupported src region");
src->vert_stride = BRW_VERTICAL_STRIDE_1;
src->width = BRW_WIDTH_1;
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
}
}
}
/**
* Generate HW shader code. The instructions should have been legalized.
*/
void *
toy_compiler_assemble(struct toy_compiler *tc, int *size)
{
const struct toy_inst *inst;
uint32_t *code;
int pc;
code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
if (!code)
return NULL;
pc = 0;
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
uint32_t *dw = &code[pc * 4];
struct codegen cg;
if (pc >= tc->num_instructions) {
tc_fail(tc, "wrong instructoun count");
break;
}
codegen_prepare(&cg, inst, pc, tc->rect_linear_width);
codegen_validate_region_restrictions(&cg);
switch (inst->opcode) {
case BRW_OPCODE_MAD:
codegen_inst_3src(&cg, dw);
break;
default:
codegen_inst(&cg, dw);
break;
}
pc++;
}
/* never return an invalid kernel */
if (tc->fail) {
FREE(code);
return NULL;
}
if (size)
*size = pc * 4 * sizeof(uint32_t);
return code;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,800 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#ifndef TOY_REG_H
#define TOY_REG_H
#include "pipe/p_compiler.h"
#include "util/u_debug.h" /* for assert() */
#include "util/u_math.h" /* for union fi */
/* a toy reg is 256-bit wide */
#define TOY_REG_WIDTH 32
/**
* Register files.
*/
enum toy_file {
/* virtual register file */
TOY_FILE_VRF,
TOY_FILE_ARF,
TOY_FILE_GRF,
TOY_FILE_MRF,
TOY_FILE_IMM,
TOY_FILE_COUNT,
};
/**
* Register types.
*/
enum toy_type {
TOY_TYPE_F,
TOY_TYPE_D,
TOY_TYPE_UD,
TOY_TYPE_W,
TOY_TYPE_UW,
TOY_TYPE_V, /* only valid for immediates */
TOY_TYPE_COUNT,
};
/**
* Register rectangles. The three numbers stand for vertical stride, width,
* and horizontal stride respectively.
*/
enum toy_rect {
TOY_RECT_LINEAR,
TOY_RECT_041,
TOY_RECT_010,
TOY_RECT_220,
TOY_RECT_440,
TOY_RECT_240,
TOY_RECT_COUNT,
};
/**
* Source swizzles. They are compatible with TGSI_SWIZZLE_x and hardware
* values.
*/
enum toy_swizzle {
TOY_SWIZZLE_X = 0,
TOY_SWIZZLE_Y = 1,
TOY_SWIZZLE_Z = 2,
TOY_SWIZZLE_W = 3,
};
/**
* Destination writemasks. They are compatible with TGSI_WRITEMASK_x and
* hardware values.
*/
enum toy_writemask {
TOY_WRITEMASK_X = (1 << TOY_SWIZZLE_X),
TOY_WRITEMASK_Y = (1 << TOY_SWIZZLE_Y),
TOY_WRITEMASK_Z = (1 << TOY_SWIZZLE_Z),
TOY_WRITEMASK_W = (1 << TOY_SWIZZLE_W),
TOY_WRITEMASK_XY = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y),
TOY_WRITEMASK_XZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z),
TOY_WRITEMASK_XW = (TOY_WRITEMASK_X | TOY_WRITEMASK_W),
TOY_WRITEMASK_YZ = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
TOY_WRITEMASK_YW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
TOY_WRITEMASK_ZW = (TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
TOY_WRITEMASK_XYZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
TOY_WRITEMASK_XYW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
TOY_WRITEMASK_XZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
TOY_WRITEMASK_YZW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
TOY_WRITEMASK_XYZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y |
TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
};
/**
* Destination operand.
*/
struct toy_dst {
unsigned file:3; /* TOY_FILE_x */
unsigned type:3; /* TOY_TYPE_x */
unsigned rect:3; /* TOY_RECT_x */
unsigned indirect:1; /* true or false */
unsigned indirect_subreg:6; /* which subreg of a0? */
unsigned writemask:4; /* TOY_WRITEMASK_x */
unsigned pad:12;
uint32_t val32;
};
/**
* Source operand.
*/
struct toy_src {
unsigned file:3; /* TOY_FILE_x */
unsigned type:3; /* TOY_TYPE_x */
unsigned rect:3; /* TOY_RECT_x */
unsigned indirect:1; /* true or false */
unsigned indirect_subreg:6; /* which subreg of a0? */
unsigned swizzle_x:2; /* TOY_SWIZZLE_x */
unsigned swizzle_y:2; /* TOY_SWIZZLE_x */
unsigned swizzle_z:2; /* TOY_SWIZZLE_x */
unsigned swizzle_w:2; /* TOY_SWIZZLE_x */
unsigned absolute:1; /* true or false */
unsigned negate:1; /* true or false */
unsigned pad:6;
uint32_t val32;
};
/**
* Return true if the file is virtual.
*/
static inline bool
toy_file_is_virtual(enum toy_file file)
{
return (file == TOY_FILE_VRF);
}
/**
* Return true if the file is a hardware one.
*/
static inline bool
toy_file_is_hw(enum toy_file file)
{
return !toy_file_is_virtual(file);
}
/**
* Return the size of the file.
*/
static inline uint32_t
toy_file_size(enum toy_file file)
{
switch (file) {
case TOY_FILE_GRF:
return 256 * TOY_REG_WIDTH;
case TOY_FILE_MRF:
/* there is no MRF on GEN7+ */
return 256 * TOY_REG_WIDTH;
default:
assert(!"invalid toy file");
return 0;
}
}
/**
* Return the size of the type.
*/
static inline int
toy_type_size(enum toy_type type)
{
switch (type) {
case TOY_TYPE_F:
case TOY_TYPE_D:
case TOY_TYPE_UD:
return 4;
case TOY_TYPE_W:
case TOY_TYPE_UW:
return 2;
case TOY_TYPE_V:
default:
assert(!"invalid toy type");
return 0;
}
}
/**
* Return true if the destination operand is null.
*/
static inline bool
tdst_is_null(struct toy_dst dst)
{
/* BRW_ARF_NULL happens to be 0 */
return (dst.file == TOY_FILE_ARF && dst.val32 == 0);
}
/**
* Validate the destination operand.
*/
static inline struct toy_dst
tdst_validate(struct toy_dst dst)
{
switch (dst.file) {
case TOY_FILE_VRF:
case TOY_FILE_ARF:
case TOY_FILE_MRF:
assert(!dst.indirect);
if (dst.file == TOY_FILE_MRF)
assert(dst.val32 < toy_file_size(dst.file));
break;
case TOY_FILE_GRF:
if (!dst.indirect)
assert(dst.val32 < toy_file_size(dst.file));
break;
case TOY_FILE_IMM:
/* yes, dst can be IMM of type W (for IF/ELSE/ENDIF/WHILE) */
assert(!dst.indirect);
assert(dst.type == TOY_TYPE_W);
break;
default:
assert(!"invalid dst file");
break;
}
switch (dst.type) {
case TOY_TYPE_V:
assert(!"invalid dst type");
break;
default:
break;
}
assert(dst.rect == TOY_RECT_LINEAR);
if (dst.file != TOY_FILE_IMM)
assert(dst.val32 % toy_type_size(dst.type) == 0);
assert(dst.writemask <= TOY_WRITEMASK_XYZW);
return dst;
}
/**
* Change the type of the destination operand.
*/
static inline struct toy_dst
tdst_type(struct toy_dst dst, enum toy_type type)
{
dst.type = type;
return tdst_validate(dst);
}
/**
* Change the type of the destination operand to TOY_TYPE_D.
*/
static inline struct toy_dst
tdst_d(struct toy_dst dst)
{
return tdst_type(dst, TOY_TYPE_D);
}
/**
* Change the type of the destination operand to TOY_TYPE_UD.
*/
static inline struct toy_dst
tdst_ud(struct toy_dst dst)
{
return tdst_type(dst, TOY_TYPE_UD);
}
/**
* Change the type of the destination operand to TOY_TYPE_W.
*/
static inline struct toy_dst
tdst_w(struct toy_dst dst)
{
return tdst_type(dst, TOY_TYPE_W);
}
/**
* Change the type of the destination operand to TOY_TYPE_UW.
*/
static inline struct toy_dst
tdst_uw(struct toy_dst dst)
{
return tdst_type(dst, TOY_TYPE_UW);
}
/**
* Change the rectangle of the destination operand.
*/
static inline struct toy_dst
tdst_rect(struct toy_dst dst, enum toy_rect rect)
{
dst.rect = rect;
return tdst_validate(dst);
}
/**
* Apply writemask to the destination operand. Note that the current
* writemask is honored.
*/
static inline struct toy_dst
tdst_writemask(struct toy_dst dst, enum toy_writemask writemask)
{
dst.writemask &= writemask;
return tdst_validate(dst);
}
/**
* Offset the destination operand.
*/
static inline struct toy_dst
tdst_offset(struct toy_dst dst, int reg, int subreg)
{
dst.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(dst.type);
return tdst_validate(dst);
}
/**
* Construct a destination operand.
*/
static inline struct toy_dst
tdst_full(enum toy_file file, enum toy_type type, enum toy_rect rect,
bool indirect, unsigned indirect_subreg,
enum toy_writemask writemask, uint32_t val32)
{
struct toy_dst dst;
dst.file = file;
dst.type = type;
dst.rect = rect;
dst.indirect = indirect;
dst.indirect_subreg = indirect_subreg;
dst.writemask = writemask;
dst.pad = 0;
dst.val32 = val32;
return tdst_validate(dst);
}
/**
* Construct a null destination operand.
*/
static inline struct toy_dst
tdst_null(void)
{
static const struct toy_dst null_dst = {
.file = TOY_FILE_ARF,
.type = TOY_TYPE_F,
.rect = TOY_RECT_LINEAR,
.indirect = false,
.indirect_subreg = 0,
.writemask = TOY_WRITEMASK_XYZW,
.pad = 0,
.val32 = 0,
};
return null_dst;
}
/**
* Construct a destination operand from a source operand.
*/
static inline struct toy_dst
tdst_from(struct toy_src src)
{
const enum toy_writemask writemask =
(1 << src.swizzle_x) |
(1 << src.swizzle_y) |
(1 << src.swizzle_z) |
(1 << src.swizzle_w);
return tdst_full(src.file, src.type, src.rect,
src.indirect, src.indirect_subreg, writemask, src.val32);
}
/**
* Construct a destination operand, assuming the type is TOY_TYPE_F, the
* rectangle is TOY_RECT_LINEAR, and the writemask is TOY_WRITEMASK_XYZW.
*/
static inline struct toy_dst
tdst(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
{
const enum toy_type type = TOY_TYPE_F;
const enum toy_rect rect = TOY_RECT_LINEAR;
const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
return tdst_full(file, type, rect,
false, 0, TOY_WRITEMASK_XYZW, val32);
}
/**
* Construct an immediate destination operand of type TOY_TYPE_W.
*/
static inline struct toy_dst
tdst_imm_w(int16_t w)
{
const union fi fi = { .i = w };
return tdst_full(TOY_FILE_IMM, TOY_TYPE_W, TOY_RECT_LINEAR,
false, 0, TOY_WRITEMASK_XYZW, fi.ui);
}
/**
* Return true if the source operand is null.
*/
static inline bool
tsrc_is_null(struct toy_src src)
{
/* BRW_ARF_NULL happens to be 0 */
return (src.file == TOY_FILE_ARF && src.val32 == 0);
}
/**
* Return true if the source operand is swizzled.
*/
static inline bool
tsrc_is_swizzled(struct toy_src src)
{
return (src.swizzle_x != TOY_SWIZZLE_X ||
src.swizzle_y != TOY_SWIZZLE_Y ||
src.swizzle_z != TOY_SWIZZLE_Z ||
src.swizzle_w != TOY_SWIZZLE_W);
}
/**
* Return true if the source operand is swizzled to the same channel.
*/
static inline bool
tsrc_is_swizzle1(struct toy_src src)
{
return (src.swizzle_x == src.swizzle_y &&
src.swizzle_x == src.swizzle_z &&
src.swizzle_x == src.swizzle_w);
}
/**
* Validate the source operand.
*/
static inline struct toy_src
tsrc_validate(struct toy_src src)
{
switch (src.file) {
case TOY_FILE_VRF:
case TOY_FILE_ARF:
case TOY_FILE_MRF:
assert(!src.indirect);
if (src.file == TOY_FILE_MRF)
assert(src.val32 < toy_file_size(src.file));
break;
case TOY_FILE_GRF:
if (!src.indirect)
assert(src.val32 < toy_file_size(src.file));
break;
case TOY_FILE_IMM:
assert(!src.indirect);
break;
default:
assert(!"invalid src file");
break;
}
switch (src.type) {
case TOY_TYPE_V:
assert(src.file == TOY_FILE_IMM);
break;
default:
break;
}
if (src.file != TOY_FILE_IMM)
assert(src.val32 % toy_type_size(src.type) == 0);
assert(src.swizzle_x < 4 && src.swizzle_y < 4 &&
src.swizzle_z < 4 && src.swizzle_w < 4);
return src;
}
/**
* Change the type of the source operand.
*/
static inline struct toy_src
tsrc_type(struct toy_src src, enum toy_type type)
{
src.type = type;
return tsrc_validate(src);
}
/**
* Change the type of the source operand to TOY_TYPE_D.
*/
static inline struct toy_src
tsrc_d(struct toy_src src)
{
return tsrc_type(src, TOY_TYPE_D);
}
/**
* Change the type of the source operand to TOY_TYPE_UD.
*/
static inline struct toy_src
tsrc_ud(struct toy_src src)
{
return tsrc_type(src, TOY_TYPE_UD);
}
/**
* Change the type of the source operand to TOY_TYPE_W.
*/
static inline struct toy_src
tsrc_w(struct toy_src src)
{
return tsrc_type(src, TOY_TYPE_W);
}
/**
* Change the type of the source operand to TOY_TYPE_UW.
*/
static inline struct toy_src
tsrc_uw(struct toy_src src)
{
return tsrc_type(src, TOY_TYPE_UW);
}
/**
* Change the rectangle of the source operand.
*/
static inline struct toy_src
tsrc_rect(struct toy_src src, enum toy_rect rect)
{
src.rect = rect;
return tsrc_validate(src);
}
/**
* Swizzle the source operand. Note that the current swizzles are honored.
*/
static inline struct toy_src
tsrc_swizzle(struct toy_src src,
enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w)
{
const enum toy_swizzle current[4] = {
src.swizzle_x, src.swizzle_y,
src.swizzle_z, src.swizzle_w,
};
src.swizzle_x = current[swizzle_x];
src.swizzle_y = current[swizzle_y];
src.swizzle_z = current[swizzle_z];
src.swizzle_w = current[swizzle_w];
return tsrc_validate(src);
}
/**
* Swizzle the source operand to the same channel. Note that the current
* swizzles are honored.
*/
static inline struct toy_src
tsrc_swizzle1(struct toy_src src, enum toy_swizzle swizzle)
{
return tsrc_swizzle(src, swizzle, swizzle, swizzle, swizzle);
}
/**
* Set absolute and unset negate of the source operand.
*/
static inline struct toy_src
tsrc_absolute(struct toy_src src)
{
src.absolute = true;
src.negate = false;
return tsrc_validate(src);
}
/**
* Negate the source operand.
*/
static inline struct toy_src
tsrc_negate(struct toy_src src)
{
src.negate = !src.negate;
return tsrc_validate(src);
}
/**
* Offset the source operand.
*/
static inline struct toy_src
tsrc_offset(struct toy_src src, int reg, int subreg)
{
src.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(src.type);
return tsrc_validate(src);
}
/**
* Construct a source operand.
*/
static inline struct toy_src
tsrc_full(enum toy_file file, enum toy_type type,
enum toy_rect rect, bool indirect, unsigned indirect_subreg,
enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w,
bool absolute, bool negate,
uint32_t val32)
{
struct toy_src src;
src.file = file;
src.type = type;
src.rect = rect;
src.indirect = indirect;
src.indirect_subreg = indirect_subreg;
src.swizzle_x = swizzle_x;
src.swizzle_y = swizzle_y;
src.swizzle_z = swizzle_z;
src.swizzle_w = swizzle_w;
src.absolute = absolute;
src.negate = negate;
src.pad = 0;
src.val32 = val32;
return tsrc_validate(src);
}
/**
* Construct a null source operand.
*/
static inline struct toy_src
tsrc_null(void)
{
static const struct toy_src null_src = {
.file = TOY_FILE_ARF,
.type = TOY_TYPE_F,
.rect = TOY_RECT_LINEAR,
.indirect = false,
.indirect_subreg = 0,
.swizzle_x = TOY_SWIZZLE_X,
.swizzle_y = TOY_SWIZZLE_Y,
.swizzle_z = TOY_SWIZZLE_Z,
.swizzle_w = TOY_SWIZZLE_W,
.absolute = false,
.negate = false,
.pad = 0,
.val32 = 0,
};
return null_src;
}
/**
* Construct a source operand from a destination operand.
*/
static inline struct toy_src
tsrc_from(struct toy_dst dst)
{
enum toy_swizzle swizzle[4];
if (dst.writemask == TOY_WRITEMASK_XYZW) {
swizzle[0] = TOY_SWIZZLE_X;
swizzle[1] = TOY_SWIZZLE_Y;
swizzle[2] = TOY_SWIZZLE_Z;
swizzle[3] = TOY_SWIZZLE_W;
}
else {
const enum toy_swizzle first =
(dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X :
(dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y :
(dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z :
(dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W :
TOY_SWIZZLE_X;
swizzle[0] = (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : first;
swizzle[1] = (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : first;
swizzle[2] = (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : first;
swizzle[3] = (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : first;
}
return tsrc_full(dst.file, dst.type, dst.rect,
dst.indirect, dst.indirect_subreg,
swizzle[0], swizzle[1], swizzle[2], swizzle[3],
false, false, dst.val32);
}
/**
* Construct a source operand, assuming the type is TOY_TYPE_F, the
* rectangle is TOY_RECT_LINEAR, and no swizzles/absolute/negate.
*/
static inline struct toy_src
tsrc(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
{
const enum toy_type type = TOY_TYPE_F;
const enum toy_rect rect = TOY_RECT_LINEAR;
const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
return tsrc_full(file, type, rect, false, 0,
TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
false, false, val32);
}
/**
* Construct an immediate source operand.
*/
static inline struct toy_src
tsrc_imm(enum toy_type type, uint32_t val32)
{
return tsrc_full(TOY_FILE_IMM, type, TOY_RECT_LINEAR, false, 0,
TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
false, false, val32);
}
/**
* Construct an immediate source operand of type TOY_TYPE_F.
*/
static inline struct toy_src
tsrc_imm_f(float f)
{
const union fi fi = { .f = f };
return tsrc_imm(TOY_TYPE_F, fi.ui);
}
/**
* Construct an immediate source operand of type TOY_TYPE_D.
*/
static inline struct toy_src
tsrc_imm_d(int32_t d)
{
const union fi fi = { .i = d };
return tsrc_imm(TOY_TYPE_D, fi.ui);
}
/**
* Construct an immediate source operand of type TOY_TYPE_UD.
*/
static inline struct toy_src
tsrc_imm_ud(uint32_t ud)
{
const union fi fi = { .ui = ud };
return tsrc_imm(TOY_TYPE_UD, fi.ui);
}
/**
* Construct an immediate source operand of type TOY_TYPE_W.
*/
static inline struct toy_src
tsrc_imm_w(int16_t w)
{
const union fi fi = { .i = w };
return tsrc_imm(TOY_TYPE_W, fi.ui);
}
/**
* Construct an immediate source operand of type TOY_TYPE_UW.
*/
static inline struct toy_src
tsrc_imm_uw(uint16_t uw)
{
const union fi fi = { .ui = uw };
return tsrc_imm(TOY_TYPE_UW, fi.ui);
}
/**
* Construct an immediate source operand of type TOY_TYPE_V.
*/
static inline struct toy_src
tsrc_imm_v(uint32_t v)
{
return tsrc_imm(TOY_TYPE_V, v);
}
#endif /* TOY_REG_H */

View file

@ -0,0 +1,289 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#ifndef TOY_HELPERS_H
#define TOY_HELPERS_H
#include "toy_compiler.h"
/**
* Transpose a dst operand.
*
* Instead of processing a single vertex with each of its attributes in one
* register, such as
*
* r0 = [x0, y0, z0, w0]
*
* we want to process four vertices at a time
*
* r0 = [x0, y0, z0, w0]
* r1 = [x1, y1, z1, w1]
* r2 = [x2, y2, z2, w2]
* r3 = [x3, y3, z3, w3]
*
* but with the attribute data "transposed"
*
* r0 = [x0, x1, x2, x3]
* r1 = [y0, y1, y2, y3]
* r2 = [z0, z1, z2, z3]
* r3 = [w0, w1, w2, w3]
*
* This is also known as the SoA form.
*/
static inline void
tdst_transpose(struct toy_dst dst, struct toy_dst *trans)
{
int i;
switch (dst.file) {
case TOY_FILE_VRF:
assert(!dst.indirect);
for (i = 0; i < 4; i++) {
if (dst.writemask & (1 << i)) {
trans[i] = tdst_offset(dst, i, 0);
trans[i].writemask = TOY_WRITEMASK_XYZW;
}
else {
trans[i] = tdst_null();
}
}
break;
case TOY_FILE_ARF:
assert(tdst_is_null(dst));
for (i = 0; i < 4; i++)
trans[i] = dst;
break;
case TOY_FILE_GRF:
case TOY_FILE_MRF:
case TOY_FILE_IMM:
default:
assert(!"unexpected file in dst transposition");
for (i = 0; i < 4; i++)
trans[i] = tdst_null();
break;
}
}
/**
* Transpose a src operand.
*/
static inline void
tsrc_transpose(struct toy_src src, struct toy_src *trans)
{
const enum toy_swizzle swizzle[4] = {
src.swizzle_x, src.swizzle_y,
src.swizzle_z, src.swizzle_w,
};
int i;
switch (src.file) {
case TOY_FILE_VRF:
assert(!src.indirect);
for (i = 0; i < 4; i++) {
trans[i] = tsrc_offset(src, swizzle[i], 0);
trans[i].swizzle_x = TOY_SWIZZLE_X;
trans[i].swizzle_y = TOY_SWIZZLE_Y;
trans[i].swizzle_z = TOY_SWIZZLE_Z;
trans[i].swizzle_w = TOY_SWIZZLE_W;
}
break;
case TOY_FILE_ARF:
assert(tsrc_is_null(src));
/* fall through */
case TOY_FILE_IMM:
for (i = 0; i < 4; i++)
trans[i] = src;
break;
case TOY_FILE_GRF:
case TOY_FILE_MRF:
default:
assert(!"unexpected file in src transposition");
for (i = 0; i < 4; i++)
trans[i] = tsrc_null();
break;
}
}
static inline struct toy_src
tsrc_imm_mdesc(const struct toy_compiler *tc,
bool eot,
unsigned message_length,
unsigned response_length,
bool header_present,
uint32_t function_control)
{
uint32_t desc;
assert(message_length >= 1 && message_length <= 15);
assert(response_length >= 0 && response_length <= 16);
assert(function_control < 1 << 19);
desc = eot << 31 |
message_length << 25 |
response_length << 20 |
header_present << 19 |
function_control;
return tsrc_imm_ud(desc);
}
static inline struct toy_src
tsrc_imm_mdesc_sampler(const struct toy_compiler *tc,
unsigned message_length,
unsigned response_length,
bool header_present,
unsigned simd_mode,
unsigned message_type,
unsigned sampler_index,
unsigned binding_table_index)
{
const bool eot = false;
uint32_t ctrl;
assert(simd_mode < 4);
assert(sampler_index < 16);
assert(binding_table_index < 256);
if (tc->gen >= ILO_GEN(7)) {
ctrl = simd_mode << 17 |
message_type << 12 |
sampler_index << 8 |
binding_table_index;
}
else {
ctrl = simd_mode << 16 |
message_type << 12 |
sampler_index << 8 |
binding_table_index;
}
return tsrc_imm_mdesc(tc, eot, message_length,
response_length, header_present, ctrl);
}
static inline struct toy_src
tsrc_imm_mdesc_data_port(const struct toy_compiler *tc,
bool eot,
unsigned message_length,
unsigned response_length,
bool header_present,
bool send_write_commit_message,
unsigned message_type,
unsigned message_specific_control,
unsigned binding_table_index)
{
uint32_t ctrl;
if (tc->gen >= ILO_GEN(7)) {
assert(!send_write_commit_message);
assert((message_specific_control & 0x3f00) == message_specific_control);
ctrl = message_type << 14 |
(message_specific_control & 0x3f00) |
binding_table_index;
}
else {
assert(!send_write_commit_message ||
message_type == GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE);
assert((message_specific_control & 0x1f00) == message_specific_control);
ctrl = send_write_commit_message << 17 |
message_type << 13 |
(message_specific_control & 0x1f00) |
binding_table_index;
}
return tsrc_imm_mdesc(tc, eot, message_length,
response_length, header_present, ctrl);
}
static inline struct toy_src
tsrc_imm_mdesc_data_port_scratch(const struct toy_compiler *tc,
unsigned message_length,
unsigned response_length,
bool write_type,
bool dword_mode,
bool invalidate_after_read,
int num_registers,
int hword_offset)
{
const bool eot = false;
const bool header_present = true;
uint32_t ctrl;
assert(tc->gen >= ILO_GEN(7));
assert(num_registers == 1 || num_registers == 2 || num_registers == 4);
ctrl = 1 << 18 |
write_type << 17 |
dword_mode << 16 |
invalidate_after_read << 15 |
(num_registers - 1) << 12 |
hword_offset;
return tsrc_imm_mdesc(tc, eot, message_length,
response_length, header_present, ctrl);
}
static inline struct toy_src
tsrc_imm_mdesc_urb(const struct toy_compiler *tc,
bool eot,
unsigned message_length,
unsigned response_length,
bool complete,
bool used,
bool allocate,
unsigned swizzle_control,
unsigned global_offset,
unsigned urb_opcode)
{
const bool header_present = true;
uint32_t ctrl;
if (tc->gen >= ILO_GEN(7)) {
const bool per_slot_offset = false;
ctrl = per_slot_offset << 16 |
complete << 15 |
swizzle_control << 14 |
global_offset << 3 |
urb_opcode;
}
else {
ctrl = complete << 15 |
used << 14 |
allocate << 13 |
swizzle_control << 10 |
global_offset << 4 |
urb_opcode;
}
return tsrc_imm_mdesc(tc, eot, message_length,
response_length, header_present, ctrl);
}
#endif /* TOY_HELPERS_H */

View file

@ -0,0 +1,632 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#include "pipe/p_shader_tokens.h"
#include "toy_compiler.h"
#include "toy_tgsi.h"
#include "toy_helpers.h"
#include "toy_legalize.h"
/**
* Lower an instruction to BRW_OPCODE_SEND(C).
*/
void
toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
bool sendc, unsigned sfid)
{
assert(inst->opcode >= 128);
inst->opcode = (sendc) ? BRW_OPCODE_SENDC : BRW_OPCODE_SEND;
/* thread control is reserved */
assert(inst->thread_ctrl == 0);
assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
inst->cond_modifier = sfid;
}
static int
math_op_to_func(unsigned opcode)
{
switch (opcode) {
case TOY_OPCODE_INV: return BRW_MATH_FUNCTION_INV;
case TOY_OPCODE_LOG: return BRW_MATH_FUNCTION_LOG;
case TOY_OPCODE_EXP: return BRW_MATH_FUNCTION_EXP;
case TOY_OPCODE_SQRT: return BRW_MATH_FUNCTION_SQRT;
case TOY_OPCODE_RSQ: return BRW_MATH_FUNCTION_RSQ;
case TOY_OPCODE_SIN: return BRW_MATH_FUNCTION_SIN;
case TOY_OPCODE_COS: return BRW_MATH_FUNCTION_COS;
case TOY_OPCODE_FDIV: return BRW_MATH_FUNCTION_FDIV;
case TOY_OPCODE_POW: return BRW_MATH_FUNCTION_POW;
case TOY_OPCODE_INT_DIV_QUOTIENT: return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
case TOY_OPCODE_INT_DIV_REMAINDER: return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
default:
assert(!"unknown math opcode");
return -1;
}
}
/**
* Lower virtual math opcodes to BRW_OPCODE_MATH.
*/
void
toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_dst tmp;
int i;
/* see commit 250770b74d33bb8625c780a74a89477af033d13a */
for (i = 0; i < Elements(inst->src); i++) {
if (tsrc_is_null(inst->src[i]))
break;
/* no swizzling in align1 */
/* XXX how about source modifiers? */
if (toy_file_is_virtual(inst->src[i].file) &&
!tsrc_is_swizzled(inst->src[i]) &&
!inst->src[i].absolute &&
!inst->src[i].negate)
continue;
tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type);
tc_MOV(tc, tmp, inst->src[i]);
inst->src[i] = tsrc_from(tmp);
}
/* FC[0:3] */
assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
inst->cond_modifier = math_op_to_func(inst->opcode);
/* FC[4:5] */
assert(inst->thread_ctrl == 0);
inst->thread_ctrl = 0;
inst->opcode = BRW_OPCODE_MATH;
tc_move_inst(tc, inst);
/* no writemask in align1 */
if (inst->dst.writemask != TOY_WRITEMASK_XYZW) {
struct toy_dst dst = inst->dst;
struct toy_inst *inst2;
tmp = tc_alloc_tmp(tc);
tmp.type = inst->dst.type;
inst->dst = tmp;
inst2 = tc_MOV(tc, dst, tsrc_from(tmp));
inst2->pred_ctrl = inst->pred_ctrl;
}
}
static uint32_t
absolute_imm(uint32_t imm32, enum toy_type type)
{
union fi val = { .ui = imm32 };
switch (type) {
case TOY_TYPE_F:
val.f = fabs(val.f);
break;
case TOY_TYPE_D:
if (val.i < 0)
val.i = -val.i;
break;
case TOY_TYPE_W:
if ((int16_t) (val.ui & 0xffff) < 0)
val.i = -((int16_t) (val.ui & 0xffff));
break;
case TOY_TYPE_V:
assert(!"cannot take absoulte of immediates of type V");
break;
default:
break;
}
return val.ui;
}
static uint32_t
negate_imm(uint32_t imm32, enum toy_type type)
{
union fi val = { .ui = imm32 };
switch (type) {
case TOY_TYPE_F:
val.f = -val.f;
break;
case TOY_TYPE_D:
case TOY_TYPE_UD:
val.i = -val.i;
break;
case TOY_TYPE_W:
case TOY_TYPE_UW:
val.i = -((int16_t) (val.ui & 0xffff));
break;
default:
assert(!"negate immediate of unknown type");
break;
}
return val.ui;
}
static void
validate_imm(struct toy_compiler *tc, struct toy_inst *inst)
{
bool move_inst = false;
int i;
for (i = 0; i < Elements(inst->src); i++) {
struct toy_dst tmp;
if (tsrc_is_null(inst->src[i]))
break;
if (inst->src[i].file != TOY_FILE_IMM)
continue;
if (inst->src[i].absolute) {
inst->src[i].val32 =
absolute_imm(inst->src[i].val32, inst->src[i].type);
inst->src[i].absolute = false;
}
if (inst->src[i].negate) {
inst->src[i].val32 =
negate_imm(inst->src[i].val32, inst->src[i].type);
inst->src[i].negate = false;
}
/* this is the last operand */
if (i + 1 == Elements(inst->src) || tsrc_is_null(inst->src[i + 1]))
break;
/* need to use a temp if this imm is not the last operand */
/* TODO we should simply swap the operands if the op is commutative */
tmp = tc_alloc_tmp(tc);
tmp = tdst_type(tmp, inst->src[i].type);
tc_MOV(tc, tmp, inst->src[i]);
inst->src[i] = tsrc_from(tmp);
move_inst = true;
}
if (move_inst)
tc_move_inst(tc, inst);
}
static void
lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst)
{
const enum toy_type inst_type = inst->dst.type;
const struct toy_dst acc0 =
tdst_type(tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0), inst_type);
struct toy_inst *inst2;
/* only need to take care of integer multiplications */
if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D)
return;
/* acc0 = (src0 & 0x0000ffff) * src1 */
tc_MUL(tc, acc0, inst->src[0], inst->src[1]);
/* acc0 = (src0 & 0xffff0000) * src1 + acc0 */
inst2 = tc_add2(tc, BRW_OPCODE_MACH, tdst_type(tdst_null(), inst_type),
inst->src[0], inst->src[1]);
inst2->acc_wr_ctrl = true;
/* dst = acc0 & 0xffffffff */
tc_MOV(tc, inst->dst, tsrc_from(acc0));
tc_discard_inst(tc, inst);
}
static void
lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst)
{
const enum toy_type inst_type = inst->dst.type;
if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) {
const struct toy_dst acc0 = tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0);
tc_MOV(tc, acc0, inst->src[2]);
inst->src[2] = tsrc_null();
tc_move_inst(tc, inst);
}
else {
struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type);
struct toy_inst *inst2;
inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]);
lower_opcode_mul(tc, inst2);
tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]);
tc_discard_inst(tc, inst);
}
}
/**
* Legalize the instructions for register allocation.
*/
void
toy_compiler_legalize_for_ra(struct toy_compiler *tc)
{
struct toy_inst *inst;
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case BRW_OPCODE_MAC:
lower_opcode_mac(tc, inst);
break;
case BRW_OPCODE_MAD:
/* TODO operands must be floats */
break;
case BRW_OPCODE_MUL:
lower_opcode_mul(tc, inst);
break;
default:
if (inst->opcode > TOY_OPCODE_LAST_HW)
tc_fail(tc, "internal opcodes not lowered");
}
}
/* loop again as the previous pass may add new instructions */
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
validate_imm(tc, inst);
}
}
static void
patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_inst *inst2;
int nest_level, dist;
nest_level = 0;
dist = -1;
/* search backward */
LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev,
&tc->instructions, list) {
if (inst2->marker) {
if (inst2->opcode == BRW_OPCODE_DO) {
if (nest_level) {
nest_level--;
}
else {
/* the following instruction */
dist++;
break;
}
}
continue;
}
if (inst2->opcode == BRW_OPCODE_WHILE)
nest_level++;
dist--;
}
if (tc->gen >= ILO_GEN(7))
inst->src[1] = tsrc_imm_w(dist * 2);
else
inst->dst = tdst_imm_w(dist * 2);
}
static void
patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_inst *inst2;
int nest_level, dist;
int jip, uip;
nest_level = 0;
dist = 1;
jip = 0;
uip = 0;
/* search forward */
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
if (inst2->marker)
continue;
if (inst2->opcode == BRW_OPCODE_ENDIF) {
if (nest_level) {
nest_level--;
}
else {
uip = dist * 2;
if (!jip)
jip = uip;
break;
}
}
else if (inst2->opcode == BRW_OPCODE_ELSE &&
inst->opcode == BRW_OPCODE_IF) {
if (!nest_level) {
/* the following instruction */
jip = (dist + 1) * 2;
if (tc->gen == ILO_GEN(6)) {
uip = jip;
break;
}
}
}
else if (inst2->opcode == BRW_OPCODE_IF) {
nest_level++;
}
dist++;
}
if (tc->gen >= ILO_GEN(7)) {
/* what should the type be? */
inst->dst.type = TOY_TYPE_D;
inst->src[0].type = TOY_TYPE_D;
inst->src[1] = tsrc_imm_d(uip << 16 | jip);
}
else {
inst->dst = tdst_imm_w(jip);
}
inst->thread_ctrl = BRW_THREAD_SWITCH;
}
static void
patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_inst *inst2;
bool found = false;
int dist = 1;
/* search forward for instructions that may enable channels */
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
if (inst2->marker)
continue;
switch (inst2->opcode) {
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_WHILE:
found = true;
break;
default:
break;
}
if (found)
break;
dist++;
}
/* should we set dist to (dist - 1) or 1? */
if (!found)
dist = 1;
if (tc->gen >= ILO_GEN(7))
inst->src[1] = tsrc_imm_w(dist * 2);
else
inst->dst = tdst_imm_w(dist * 2);
inst->thread_ctrl = BRW_THREAD_SWITCH;
}
static void
patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_inst *inst2, *inst3;
int nest_level, dist, jip, uip;
nest_level = 0;
dist = 1;
jip = 1 * 2;
uip = 1 * 2;
/* search forward */
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
if (inst2->marker) {
if (inst2->opcode == BRW_OPCODE_DO)
nest_level++;
continue;
}
if (inst2->opcode == BRW_OPCODE_ELSE ||
inst2->opcode == BRW_OPCODE_ENDIF ||
inst2->opcode == BRW_OPCODE_WHILE) {
jip = dist * 2;
break;
}
dist++;
}
/* go on to determine uip */
inst3 = inst2;
LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) {
if (inst2->marker) {
if (inst2->opcode == BRW_OPCODE_DO)
nest_level++;
continue;
}
if (inst2->opcode == BRW_OPCODE_WHILE) {
if (nest_level) {
nest_level--;
}
else {
/* the following instruction */
if (tc->gen == ILO_GEN(6) && inst->opcode == BRW_OPCODE_BREAK)
dist++;
uip = dist * 2;
break;
}
}
dist++;
}
/* should the type be D or W? */
inst->dst.type = TOY_TYPE_D;
inst->src[0].type = TOY_TYPE_D;
inst->src[1] = tsrc_imm_d(uip << 16 | jip);
}
/**
* Legalize the instructions for assembling.
*/
void
toy_compiler_legalize_for_asm(struct toy_compiler *tc)
{
struct toy_inst *inst;
int pc = 0;
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
int i;
pc++;
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 112:
*
* "Specifically, for instructions with a single source, it only
* uses the first source operand <src0>. In this case, the second
* source operand <src1> must be set to null and also with the same
* type as the first source operand <src0>. It is a special case
* when <src0> is an immediate, as an immediate <src0> uses DW3 of
* the instruction word, which is normally used by <src1>. In this
* case, <src1> must be programmed with register file ARF and the
* same data type as <src0>."
*
* Since we already fill unused operands with null, we only need to take
* care of the type.
*/
if (tsrc_is_null(inst->src[1]))
inst->src[1].type = inst->src[0].type;
switch (inst->opcode) {
case BRW_OPCODE_MATH:
/* math does not support align16 nor exec_size > 8 */
inst->access_mode = BRW_ALIGN_1;
if (inst->exec_size == BRW_EXECUTE_16) {
/*
* From the Ivy Bridge PRM, volume 4 part 3, page 192:
*
* "INT DIV function does not support SIMD16."
*/
if (tc->gen < ILO_GEN(7) ||
inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_REMAINDER) {
struct toy_inst *inst2;
inst->exec_size = BRW_EXECUTE_8;
inst->qtr_ctrl = GEN6_COMPRESSION_1Q;
inst2 = tc_duplicate_inst(tc, inst);
inst2->qtr_ctrl = GEN6_COMPRESSION_2Q;
inst2->dst = tdst_offset(inst2->dst, 1, 0);
inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0);
if (!tsrc_is_null(inst2->src[1]))
inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0);
pc++;
}
}
break;
case BRW_OPCODE_IF:
if (tc->gen >= ILO_GEN(7) &&
inst->cond_modifier != BRW_CONDITIONAL_NONE) {
struct toy_inst *inst2;
inst2 = tc_duplicate_inst(tc, inst);
/* replace the original IF by CMP */
inst->opcode = BRW_OPCODE_CMP;
/* predicate control instead of condition modifier */
inst2->dst = tdst_null();
inst2->src[0] = tsrc_null();
inst2->src[1] = tsrc_null();
inst2->cond_modifier = BRW_CONDITIONAL_NONE;
inst2->pred_ctrl = BRW_PREDICATE_NORMAL;
pc++;
}
break;
default:
break;
}
/* MRF to GRF */
if (tc->gen >= ILO_GEN(7)) {
for (i = 0; i < Elements(inst->src); i++) {
if (inst->src[i].file != TOY_FILE_MRF)
continue;
else if (tsrc_is_null(inst->src[i]))
break;
inst->src[i].file = TOY_FILE_GRF;
}
if (inst->dst.file == TOY_FILE_MRF)
inst->dst.file = TOY_FILE_GRF;
}
}
tc->num_instructions = pc;
/* set JIP/UIP */
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case BRW_OPCODE_IF:
case BRW_OPCODE_ELSE:
patch_if_else_jip(tc, inst);
break;
case BRW_OPCODE_ENDIF:
patch_endif_jip(tc, inst);
break;
case BRW_OPCODE_WHILE:
patch_while_jip(tc, inst);
break;
case BRW_OPCODE_BREAK:
case BRW_OPCODE_CONTINUE:
patch_break_continue_jip(tc, inst);
break;
default:
break;
}
}
}

View file

@ -0,0 +1,52 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#ifndef TOY_LEGALIZE_H
#define TOY_LEGALIZE_H
#include "toy_compiler.h"
#include "toy_tgsi.h"
void
toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
bool sendc, unsigned sfid);
void
toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst);
void
toy_compiler_allocate_registers(struct toy_compiler *tc,
int start_grf, int end_grf,
int num_grf_per_vrf);
void
toy_compiler_legalize_for_ra(struct toy_compiler *tc);
void
toy_compiler_legalize_for_asm(struct toy_compiler *tc);
#endif /* TOY_LEGALIZE_H */

View file

@ -0,0 +1,628 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#include <stdlib.h> /* for qsort() */
#include "toy_compiler.h"
#include "toy_legalize.h"
/**
* Live interval of a VRF register.
*/
struct linear_scan_live_interval {
int vrf;
int startpoint;
int endpoint;
/*
* should this be assigned a consecutive register of the previous
* interval's?
*/
bool consecutive;
int reg;
struct list_head list;
};
/**
* Linear scan.
*/
struct linear_scan {
struct linear_scan_live_interval *intervals;
int max_vrf, num_vrfs;
int num_regs;
struct list_head active_list;
int *free_regs;
int num_free_regs;
int *vrf_mapping;
};
/**
* Return a chunk of registers to the free register pool.
*/
static void
linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
{
int i;
for (i = 0; i < count; i++)
ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
}
static int
linear_scan_compare_regs(const void *elem1, const void *elem2)
{
const int *reg1 = elem1;
const int *reg2 = elem2;
/* in reverse order */
return (*reg2 - *reg1);
}
/**
* Allocate a chunk of registers from the free register pool.
*/
static int
linear_scan_allocate_regs(struct linear_scan *ls, int count)
{
bool sorted = false;
int reg;
/* simple cases */
if (count > ls->num_free_regs)
return -1;
else if (count == 1)
return ls->free_regs[--ls->num_free_regs];
/* TODO a free register pool */
/* TODO reserve some regs for spilling */
while (true) {
bool found = false;
int start;
/*
* find a chunk of registers that have consecutive register
* numbers
*/
for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
int i;
for (i = 1; i < count; i++) {
if (ls->free_regs[start - i] != ls->free_regs[start] + i)
break;
}
if (i >= count) {
found = true;
break;
}
}
if (found) {
reg = ls->free_regs[start];
if (start != ls->num_free_regs - 1) {
start++;
memmove(&ls->free_regs[start - count],
&ls->free_regs[start],
sizeof(*ls->free_regs) * (ls->num_free_regs - start));
}
ls->num_free_regs -= count;
break;
}
else if (!sorted) {
/* sort and retry */
qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
linear_scan_compare_regs);
sorted = true;
}
else {
/* failed */
reg = -1;
break;
}
}
return reg;
}
/**
* Add an interval to the active list.
*/
static void
linear_scan_add_active(struct linear_scan *ls,
struct linear_scan_live_interval *interval)
{
struct linear_scan_live_interval *pos;
/* keep the active list sorted by endpoints */
LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
if (pos->endpoint >= interval->endpoint)
break;
}
list_addtail(&interval->list, &pos->list);
}
/**
* Remove an interval from the active list.
*/
static void
linear_scan_remove_active(struct linear_scan *ls,
struct linear_scan_live_interval *interval)
{
list_del(&interval->list);
}
/**
* Remove intervals that are no longer active from the active list.
*/
static void
linear_scan_expire_active(struct linear_scan *ls, int pc)
{
struct linear_scan_live_interval *interval, *next;
LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
/*
* since we sort intervals on the active list by their endpoints, we
* know that this and the rest of the intervals are still active.
*/
if (interval->endpoint >= pc)
break;
linear_scan_remove_active(ls, interval);
/* recycle the reg */
linear_scan_free_regs(ls, interval->reg, 1);
}
}
/**
* Spill an interval.
*/
static void
linear_scan_spill(struct linear_scan *ls,
struct linear_scan_live_interval *interval,
bool is_active)
{
assert(!"no spilling support");
}
/**
* Spill a range of intervals.
*/
static void
linear_scan_spill_range(struct linear_scan *ls, int first, int count)
{
int i;
for (i = 0; i < count; i++) {
struct linear_scan_live_interval *interval = &ls->intervals[first + i];
linear_scan_spill(ls, interval, false);
}
}
/**
* Perform linear scan to allocate registers for the intervals.
*/
static bool
linear_scan_run(struct linear_scan *ls)
{
int i;
i = 0;
while (i < ls->num_vrfs) {
struct linear_scan_live_interval *first = &ls->intervals[i];
int reg, count;
/*
* BRW_OPCODE_SEND may write to multiple consecutive registers and we need to
* support that
*/
for (count = 1; i + count < ls->num_vrfs; count++) {
const struct linear_scan_live_interval *interval =
&ls->intervals[i + count];
if (interval->startpoint != first->startpoint ||
!interval->consecutive)
break;
}
reg = linear_scan_allocate_regs(ls, count);
/* expire intervals that are no longer active and try again */
if (reg < 0) {
linear_scan_expire_active(ls, first->startpoint);
reg = linear_scan_allocate_regs(ls, count);
}
/* have to spill some intervals */
if (reg < 0) {
struct linear_scan_live_interval *last_active =
container_of(ls->active_list.prev,
(struct linear_scan_live_interval *) NULL, list);
/* heuristically spill the interval that ends last */
if (count > 1 || last_active->endpoint < first->endpoint) {
linear_scan_spill_range(ls, i, count);
i += count;
continue;
}
/* make some room for the new interval */
linear_scan_spill(ls, last_active, true);
reg = linear_scan_allocate_regs(ls, count);
if (reg < 0) {
assert(!"failed to spill any register");
return false;
}
}
while (count--) {
struct linear_scan_live_interval *interval = &ls->intervals[i++];
interval->reg = reg++;
linear_scan_add_active(ls, interval);
ls->vrf_mapping[interval->vrf] = interval->reg;
/*
* this should and must be the case because of how we initialized the
* intervals
*/
assert(interval->vrf - first->vrf == interval->reg - first->reg);
}
}
return true;
}
/**
* Add a new interval.
*/
static void
linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
{
if (ls->intervals[vrf].vrf)
return;
ls->intervals[vrf].vrf = vrf;
ls->intervals[vrf].startpoint = pc;
ls->num_vrfs++;
if (vrf > ls->max_vrf)
ls->max_vrf = vrf;
}
/**
* Perform (oversimplified?) live variable analysis.
*/
static void
linear_scan_init_live_intervals(struct linear_scan *ls,
struct toy_compiler *tc)
{
const struct toy_inst *inst;
int pc, do_pc, while_pc;
pc = 0;
do_pc = -1;
while_pc = -1;
tc_head(tc);
while ((inst = tc_next_no_skip(tc)) != NULL) {
const int startpoint = (pc <= while_pc) ? do_pc : pc;
const int endpoint = (pc <= while_pc) ? while_pc : pc;
int vrf, i;
/*
* assume all registers used in this outermost loop are live through out
* the whole loop
*/
if (inst->marker) {
if (pc > while_pc) {
struct toy_inst *inst2;
int loop_level = 1;
assert(inst->opcode == BRW_OPCODE_DO);
do_pc = pc;
while_pc = pc + 1;
/* find the matching BRW_OPCODE_WHILE */
LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
&tc->instructions, list) {
if (inst2->marker) {
assert(inst->opcode == BRW_OPCODE_DO);
loop_level++;
continue;
}
if (inst2->opcode == BRW_OPCODE_WHILE) {
loop_level--;
if (!loop_level)
break;
}
while_pc++;
}
}
continue;
}
if (inst->dst.file == TOY_FILE_VRF) {
int num_dst;
/* TODO this is a hack */
if (inst->opcode == BRW_OPCODE_SEND ||
inst->opcode == BRW_OPCODE_SENDC) {
const uint32_t mdesc = inst->src[1].val32;
int response_length = (mdesc >> 20) & 0x1f;
num_dst = response_length;
if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16)
num_dst /= 2;
}
else {
num_dst = 1;
}
vrf = inst->dst.val32 / TOY_REG_WIDTH;
for (i = 0; i < num_dst; i++) {
/* first use */
if (!ls->intervals[vrf].vrf)
linear_scan_add_live_interval(ls, vrf, startpoint);
ls->intervals[vrf].endpoint = endpoint;
ls->intervals[vrf].consecutive = (i > 0);
vrf++;
}
}
for (i = 0; i < Elements(inst->src); i++) {
if (inst->src[i].file != TOY_FILE_VRF)
continue;
vrf = inst->src[i].val32 / TOY_REG_WIDTH;
/* first use */
if (!ls->intervals[vrf].vrf)
linear_scan_add_live_interval(ls, vrf, startpoint);
ls->intervals[vrf].endpoint = endpoint;
}
pc++;
}
}
/**
* Clean up after performing linear scan.
*/
static void
linear_scan_cleanup(struct linear_scan *ls)
{
FREE(ls->vrf_mapping);
FREE(ls->intervals);
FREE(ls->free_regs);
}
static int
linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
{
const struct linear_scan_live_interval *interval1 = elem1;
const struct linear_scan_live_interval *interval2 = elem2;
/* make unused elements appear at the end */
if (!interval1->vrf)
return 1;
else if (!interval2->vrf)
return -1;
/* sort by startpoints first, and then by vrf */
if (interval1->startpoint != interval2->startpoint)
return (interval1->startpoint - interval2->startpoint);
else
return (interval1->vrf - interval2->vrf);
}
/**
* Prepare for linear scan.
*/
static bool
linear_scan_init(struct linear_scan *ls, int num_regs,
struct toy_compiler *tc)
{
int num_intervals, i;
memset(ls, 0, sizeof(*ls));
/* this may be much larger than ls->num_vrfs... */
num_intervals = tc->next_vrf;
ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
if (!ls->intervals)
return false;
linear_scan_init_live_intervals(ls, tc);
/* sort intervals by startpoints */
qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
linear_scan_compare_live_intervals);
ls->num_regs = num_regs;
ls->num_free_regs = num_regs;
ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
if (!ls->free_regs) {
FREE(ls->intervals);
return false;
}
/* add in reverse order as we will allocate from the tail */
for (i = 0; i < ls->num_regs; i++)
ls->free_regs[i] = num_regs - i - 1;
list_inithead(&ls->active_list);
ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
if (!ls->vrf_mapping) {
FREE(ls->intervals);
FREE(ls->free_regs);
return false;
}
return true;
}
/**
* Allocate registers with linear scan.
*/
static void
linear_scan_allocation(struct toy_compiler *tc,
int start_grf, int end_grf,
int num_grf_per_vrf)
{
const int num_grfs = end_grf - start_grf + 1;
struct linear_scan ls;
struct toy_inst *inst;
if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
return;
if (!linear_scan_run(&ls)) {
tc_fail(tc, "failed to allocate registers");
return;
}
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
int i;
if (inst->dst.file == TOY_FILE_VRF) {
const uint32_t val32 = inst->dst.val32;
int reg = val32 / TOY_REG_WIDTH;
int subreg = val32 % TOY_REG_WIDTH;
/* map to GRF */
reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
inst->dst.file = TOY_FILE_GRF;
inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
}
for (i = 0; i < Elements(inst->src); i++) {
const uint32_t val32 = inst->src[i].val32;
int reg, subreg;
if (inst->src[i].file != TOY_FILE_VRF)
continue;
reg = val32 / TOY_REG_WIDTH;
subreg = val32 % TOY_REG_WIDTH;
/* map to GRF */
reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
inst->src[i].file = TOY_FILE_GRF;
inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
}
}
linear_scan_cleanup(&ls);
}
/**
* Trivially allocate registers.
*/
static void
trivial_allocation(struct toy_compiler *tc,
int start_grf, int end_grf,
int num_grf_per_vrf)
{
struct toy_inst *inst;
int max_grf = -1;
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
int i;
if (inst->dst.file == TOY_FILE_VRF) {
const uint32_t val32 = inst->dst.val32;
int reg = val32 / TOY_REG_WIDTH;
int subreg = val32 % TOY_REG_WIDTH;
reg = reg * num_grf_per_vrf + start_grf - 1;
inst->dst.file = TOY_FILE_GRF;
inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
if (reg > max_grf)
max_grf = reg;
}
for (i = 0; i < Elements(inst->src); i++) {
const uint32_t val32 = inst->src[i].val32;
int reg, subreg;
if (inst->src[i].file != TOY_FILE_VRF)
continue;
reg = val32 / TOY_REG_WIDTH;
subreg = val32 % TOY_REG_WIDTH;
reg = reg * num_grf_per_vrf + start_grf - 1;
inst->src[i].file = TOY_FILE_GRF;
inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
if (reg > max_grf)
max_grf = reg;
}
}
if (max_grf + num_grf_per_vrf - 1 > end_grf)
tc_fail(tc, "failed to allocate registers");
}
/**
* Allocate GRF registers to VRF registers.
*/
void
toy_compiler_allocate_registers(struct toy_compiler *tc,
int start_grf, int end_grf,
int num_grf_per_vrf)
{
if (true)
linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
else
trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
}

View file

@ -0,0 +1,71 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#include "toy_compiler.h"
#include "toy_tgsi.h"
#include "toy_optimize.h"
/**
* This just eliminates instructions with null dst so far.
*/
static void
eliminate_dead_code(struct toy_compiler *tc)
{
struct toy_inst *inst;
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case BRW_OPCODE_IF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_WHILE:
case BRW_OPCODE_BREAK:
case BRW_OPCODE_CONTINUE:
case BRW_OPCODE_SEND:
case BRW_OPCODE_SENDC:
case BRW_OPCODE_NOP:
/* never eliminated */
break;
default:
if (tdst_is_null(inst->dst) || !inst->dst.writemask) {
/* math is always BRW_CONDITIONAL_NONE */
if ((inst->opcode == BRW_OPCODE_MATH ||
inst->cond_modifier == BRW_CONDITIONAL_NONE) &&
!inst->acc_wr_ctrl)
tc_discard_inst(tc, inst);
}
break;
}
}
}
void
toy_compiler_optimize(struct toy_compiler *tc)
{
eliminate_dead_code(tc);
}

View file

@ -0,0 +1,36 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#ifndef TOY_OPTIMIZE_H
#define TOY_OPTIMIZE_H
#include "toy_compiler.h"
void
toy_compiler_optimize(struct toy_compiler *tc);
#endif /* TOY_OPTIMIZE_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,253 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#ifndef TOY_TGSI_H
#define TOY_TGSI_H
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
#include "toy_compiler.h"
struct tgsi_token;
struct tgsi_full_instruction;
struct util_hash_table;
typedef void (*toy_tgsi_translate)(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src);
struct toy_tgsi {
struct toy_compiler *tc;
bool aos;
const toy_tgsi_translate *translate_table;
struct util_hash_table *reg_mapping;
struct {
bool vs_prohibit_ucps;
int fs_coord_origin;
int fs_coord_pixel_center;
bool fs_color0_writes_all_cbufs;
int fs_depth_layout;
int gs_input_prim;
int gs_output_prim;
int gs_max_output_vertices;
} props;
struct {
enum toy_type *types;
uint32_t (*buf)[4];
int cur, size;
} imm_data;
struct {
int index:16;
unsigned usage_mask:4; /* TGSI_WRITEMASK_x */
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
unsigned semantic_index:8;
unsigned interp:4; /* TGSI_INTERPOLATE_x */
unsigned centroid:1;
} inputs[PIPE_MAX_SHADER_INPUTS];
int num_inputs;
struct {
int index:16;
unsigned undefined_mask:4;
unsigned usage_mask:4; /* TGSI_WRITEMASK_x */
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
unsigned semantic_index:8;
} outputs[PIPE_MAX_SHADER_OUTPUTS];
int num_outputs;
struct {
int index:16;
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
unsigned semantic_index:8;
} system_values[8];
int num_system_values;
bool uses_kill;
};
/**
* Find the slot of the TGSI input.
*/
static inline int
toy_tgsi_find_input(const struct toy_tgsi *tgsi, int index)
{
int slot;
for (slot = 0; slot < tgsi->num_inputs; slot++) {
if (tgsi->inputs[slot].index == index)
return slot;
}
return -1;
}
/**
* Find the slot of the TGSI system value.
*/
static inline int
toy_tgsi_find_system_value(const struct toy_tgsi *tgsi, int index)
{
int slot;
for (slot = 0; slot < tgsi->num_system_values; slot++) {
if (tgsi->system_values[slot].index == index)
return slot;
}
return -1;
}
/**
* Return the immediate data of the TGSI immediate.
*/
static inline const uint32_t *
toy_tgsi_get_imm(const struct toy_tgsi *tgsi, unsigned index,
enum toy_type *type)
{
const uint32_t *imm;
if (index >= tgsi->imm_data.cur)
return NULL;
imm = tgsi->imm_data.buf[index];
if (type)
*type = tgsi->imm_data.types[index];
return imm;
}
/**
* Return the dimension of the texture coordinates, as well as the location of
* the shadow reference value or the sample index.
*/
static inline int
toy_tgsi_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
{
int dim;
/*
* Depending on the texture target, (src0, src1.x) is interpreted
* differently:
*
* (s, *, *, *, *), for 1D
* (s, t, *, *, *), for 2D, RECT
* (s, t, r, *, *), for 3D, CUBE
*
* (s, layer, *, *, *), for 1D_ARRAY
* (s, t, layer, *, *), for 2D_ARRAY
* (s, t, r, layer, *), for CUBE_ARRAY
*
* (s, *, shadow, *, *), for SHADOW1D
* (s, t, shadow, *, *), for SHADOW2D, SHADOWRECT
* (s, t, r, shadow, *), for SHADOWCUBE
*
* (s, layer, shadow, *, *), for SHADOW1D_ARRAY
* (s, t, layer, shadow, *), for SHADOW2D_ARRAY
* (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY
*
* (s, t, sample, *, *), for 2D_MSAA
* (s, t, layer, sample, *), for 2D_ARRAY_MSAA
*/
switch (tgsi_tex) {
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
dim = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_2D_MSAA:
dim = 2;
break;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
dim = 3;
break;
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
dim = 4;
break;
default:
assert(!"unknown texture target");
dim = 0;
break;
}
if (shadow_or_sample) {
switch (tgsi_tex) {
case TGSI_TEXTURE_SHADOW1D:
/* there is a gap */
*shadow_or_sample = 2;
break;
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
case TGSI_TEXTURE_2D_MSAA:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
*shadow_or_sample = dim;
break;
default:
/* no shadow nor sample */
*shadow_or_sample = -1;
break;
}
}
return dim;
}
void
toy_compiler_translate_tgsi(struct toy_compiler *tc,
const struct tgsi_token *tokens, bool aos,
struct toy_tgsi *tgsi);
void
toy_tgsi_cleanup(struct toy_tgsi *tgsi);
int
toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
enum tgsi_file_type file, int dimension, int index);
void
toy_tgsi_dump(const struct toy_tgsi *tgsi);
#endif /* TOY_TGSI_H */