mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 17:50:11 +01:00
ilo: add a toy shader compiler
This is a simple shader compiler that performs almost zero optimizations. The generated code is usually much larger comparing to that generated by i965. The generated code also requires many more registers. Function-wise, it lacks register spilling and does not support most TGSI indirections. Other than those, it works alright.
This commit is contained in:
parent
0fa2d0e98a
commit
7118ff8bb0
14 changed files with 8669 additions and 1 deletions
|
|
@ -16,4 +16,11 @@ C_SOURCES := \
|
|||
ilo_screen.c \
|
||||
ilo_shader.c \
|
||||
ilo_state.c \
|
||||
ilo_video.c
|
||||
ilo_video.c \
|
||||
shader/toy_compiler.c \
|
||||
shader/toy_compiler_asm.c \
|
||||
shader/toy_compiler_disasm.c \
|
||||
shader/toy_legalize.c \
|
||||
shader/toy_legalize_ra.c \
|
||||
shader/toy_optimize.c \
|
||||
shader/toy_tgsi.c
|
||||
|
|
|
|||
556
src/gallium/drivers/ilo/shader/toy_compiler.c
Normal file
556
src/gallium/drivers/ilo/shader/toy_compiler.c
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#include "toy_compiler.h"
|
||||
|
||||
/**
|
||||
* Dump an operand.
|
||||
*/
|
||||
static void
|
||||
tc_dump_operand(struct toy_compiler *tc,
|
||||
enum toy_file file, enum toy_type type, enum toy_rect rect,
|
||||
bool indirect, unsigned indirect_subreg, uint32_t val32,
|
||||
bool is_dst)
|
||||
{
|
||||
static const char *toy_file_names[TOY_FILE_COUNT] = {
|
||||
[TOY_FILE_VRF] = "v",
|
||||
[TOY_FILE_ARF] = "NOT USED",
|
||||
[TOY_FILE_GRF] = "r",
|
||||
[TOY_FILE_MRF] = "m",
|
||||
[TOY_FILE_IMM] = "NOT USED",
|
||||
};
|
||||
const char *name = toy_file_names[file];
|
||||
int reg, subreg;
|
||||
|
||||
if (file != TOY_FILE_IMM) {
|
||||
reg = val32 / TOY_REG_WIDTH;
|
||||
subreg = (val32 % TOY_REG_WIDTH) / toy_type_size(type);
|
||||
}
|
||||
|
||||
switch (file) {
|
||||
case TOY_FILE_GRF:
|
||||
if (indirect) {
|
||||
const int addr_subreg = indirect_subreg / toy_type_size(TOY_TYPE_UW);
|
||||
|
||||
ilo_printf("%s[a0.%d", name, addr_subreg);
|
||||
if (val32)
|
||||
ilo_printf("%+d", (int) val32);
|
||||
ilo_printf("]");
|
||||
break;
|
||||
}
|
||||
/* fall through */
|
||||
case TOY_FILE_VRF:
|
||||
case TOY_FILE_MRF:
|
||||
ilo_printf("%s%d", name, reg);
|
||||
if (subreg)
|
||||
ilo_printf(".%d", subreg);
|
||||
break;
|
||||
case TOY_FILE_ARF:
|
||||
switch (reg) {
|
||||
case BRW_ARF_NULL:
|
||||
ilo_printf("null");
|
||||
break;
|
||||
case BRW_ARF_ADDRESS:
|
||||
ilo_printf("a0.%d", subreg);
|
||||
break;
|
||||
case BRW_ARF_ACCUMULATOR:
|
||||
case BRW_ARF_ACCUMULATOR + 1:
|
||||
ilo_printf("acc%d.%d", (reg & 1), subreg);
|
||||
break;
|
||||
case BRW_ARF_FLAG:
|
||||
ilo_printf("f0.%d", subreg);
|
||||
break;
|
||||
case BRW_ARF_STATE:
|
||||
ilo_printf("sr0.%d", subreg);
|
||||
break;
|
||||
case BRW_ARF_CONTROL:
|
||||
ilo_printf("cr0.%d", subreg);
|
||||
break;
|
||||
case BRW_ARF_NOTIFICATION_COUNT:
|
||||
case BRW_ARF_NOTIFICATION_COUNT + 1:
|
||||
ilo_printf("n%d.%d", (reg & 1), subreg);
|
||||
break;
|
||||
case BRW_ARF_IP:
|
||||
ilo_printf("ip");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TOY_FILE_IMM:
|
||||
switch (type) {
|
||||
case TOY_TYPE_F:
|
||||
{
|
||||
union fi fi = { .ui = val32 };
|
||||
ilo_printf("%f", fi.f);
|
||||
}
|
||||
break;
|
||||
case TOY_TYPE_D:
|
||||
ilo_printf("%d", (int32_t) val32);
|
||||
break;
|
||||
case TOY_TYPE_UD:
|
||||
ilo_printf("%u", val32);
|
||||
break;
|
||||
case TOY_TYPE_W:
|
||||
ilo_printf("%d", (int16_t) (val32 & 0xffff));
|
||||
break;
|
||||
case TOY_TYPE_UW:
|
||||
ilo_printf("%u", val32 & 0xffff);
|
||||
break;
|
||||
case TOY_TYPE_V:
|
||||
ilo_printf("0x%08x", val32);
|
||||
break;
|
||||
default:
|
||||
assert(!"unknown imm type");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(!"unexpected file");
|
||||
break;
|
||||
}
|
||||
|
||||
/* dump the region parameter */
|
||||
if (file != TOY_FILE_IMM) {
|
||||
int vert_stride, width, horz_stride;
|
||||
|
||||
switch (rect) {
|
||||
case TOY_RECT_LINEAR:
|
||||
vert_stride = tc->rect_linear_width;
|
||||
width = tc->rect_linear_width;
|
||||
horz_stride = 1;
|
||||
break;
|
||||
case TOY_RECT_041:
|
||||
vert_stride = 0;
|
||||
width = 4;
|
||||
horz_stride = 1;
|
||||
break;
|
||||
case TOY_RECT_010:
|
||||
vert_stride = 0;
|
||||
width = 1;
|
||||
horz_stride = 0;
|
||||
break;
|
||||
case TOY_RECT_220:
|
||||
vert_stride = 2;
|
||||
width = 2;
|
||||
horz_stride = 0;
|
||||
break;
|
||||
case TOY_RECT_440:
|
||||
vert_stride = 4;
|
||||
width = 4;
|
||||
horz_stride = 0;
|
||||
break;
|
||||
case TOY_RECT_240:
|
||||
vert_stride = 2;
|
||||
width = 4;
|
||||
horz_stride = 0;
|
||||
break;
|
||||
default:
|
||||
assert(!"unknown rect parameter");
|
||||
vert_stride = 0;
|
||||
width = 0;
|
||||
horz_stride = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_dst)
|
||||
ilo_printf("<%d>", horz_stride);
|
||||
else
|
||||
ilo_printf("<%d;%d,%d>", vert_stride, width, horz_stride);
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case TOY_TYPE_F:
|
||||
ilo_printf(":f");
|
||||
break;
|
||||
case TOY_TYPE_D:
|
||||
ilo_printf(":d");
|
||||
break;
|
||||
case TOY_TYPE_UD:
|
||||
ilo_printf(":ud");
|
||||
break;
|
||||
case TOY_TYPE_W:
|
||||
ilo_printf(":w");
|
||||
break;
|
||||
case TOY_TYPE_UW:
|
||||
ilo_printf(":uw");
|
||||
break;
|
||||
case TOY_TYPE_V:
|
||||
ilo_printf(":v");
|
||||
break;
|
||||
default:
|
||||
assert(!"unexpected type");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump a source operand.
|
||||
*/
|
||||
static void
|
||||
tc_dump_src(struct toy_compiler *tc, struct toy_src src)
|
||||
{
|
||||
if (src.negate)
|
||||
ilo_printf("-");
|
||||
if (src.absolute)
|
||||
ilo_printf("|");
|
||||
|
||||
tc_dump_operand(tc, src.file, src.type, src.rect,
|
||||
src.indirect, src.indirect_subreg, src.val32, false);
|
||||
|
||||
if (tsrc_is_swizzled(src)) {
|
||||
const char xyzw[] = "xyzw";
|
||||
ilo_printf(".%c%c%c%c",
|
||||
xyzw[src.swizzle_x],
|
||||
xyzw[src.swizzle_y],
|
||||
xyzw[src.swizzle_z],
|
||||
xyzw[src.swizzle_w]);
|
||||
}
|
||||
|
||||
if (src.absolute)
|
||||
ilo_printf("|");
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump a destination operand.
|
||||
*/
|
||||
static void
|
||||
tc_dump_dst(struct toy_compiler *tc, struct toy_dst dst)
|
||||
{
|
||||
tc_dump_operand(tc, dst.file, dst.type, dst.rect,
|
||||
dst.indirect, dst.indirect_subreg, dst.val32, true);
|
||||
|
||||
if (dst.writemask != TOY_WRITEMASK_XYZW) {
|
||||
ilo_printf(".");
|
||||
if (dst.writemask & TOY_WRITEMASK_X)
|
||||
ilo_printf("x");
|
||||
if (dst.writemask & TOY_WRITEMASK_Y)
|
||||
ilo_printf("y");
|
||||
if (dst.writemask & TOY_WRITEMASK_Z)
|
||||
ilo_printf("z");
|
||||
if (dst.writemask & TOY_WRITEMASK_W)
|
||||
ilo_printf("w");
|
||||
}
|
||||
}
|
||||
|
||||
static const char *
|
||||
get_opcode_name(unsigned opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case BRW_OPCODE_MOV: return "mov";
|
||||
case BRW_OPCODE_SEL: return "sel";
|
||||
case BRW_OPCODE_NOT: return "not";
|
||||
case BRW_OPCODE_AND: return "and";
|
||||
case BRW_OPCODE_OR: return "or";
|
||||
case BRW_OPCODE_XOR: return "xor";
|
||||
case BRW_OPCODE_SHR: return "shr";
|
||||
case BRW_OPCODE_SHL: return "shl";
|
||||
case BRW_OPCODE_RSR: return "rsr";
|
||||
case BRW_OPCODE_RSL: return "rsl";
|
||||
case BRW_OPCODE_ASR: return "asr";
|
||||
case BRW_OPCODE_CMP: return "cmp";
|
||||
case BRW_OPCODE_CMPN: return "cmpn";
|
||||
case BRW_OPCODE_JMPI: return "jmpi";
|
||||
case BRW_OPCODE_IF: return "if";
|
||||
case BRW_OPCODE_IFF: return "iff";
|
||||
case BRW_OPCODE_ELSE: return "else";
|
||||
case BRW_OPCODE_ENDIF: return "endif";
|
||||
case BRW_OPCODE_DO: return "do";
|
||||
case BRW_OPCODE_WHILE: return "while";
|
||||
case BRW_OPCODE_BREAK: return "break";
|
||||
case BRW_OPCODE_CONTINUE: return "continue";
|
||||
case BRW_OPCODE_HALT: return "halt";
|
||||
case BRW_OPCODE_MSAVE: return "msave";
|
||||
case BRW_OPCODE_MRESTORE: return "mrestore";
|
||||
case BRW_OPCODE_PUSH: return "push";
|
||||
case BRW_OPCODE_POP: return "pop";
|
||||
case BRW_OPCODE_WAIT: return "wait";
|
||||
case BRW_OPCODE_SEND: return "send";
|
||||
case BRW_OPCODE_SENDC: return "sendc";
|
||||
case BRW_OPCODE_MATH: return "math";
|
||||
case BRW_OPCODE_ADD: return "add";
|
||||
case BRW_OPCODE_MUL: return "mul";
|
||||
case BRW_OPCODE_AVG: return "avg";
|
||||
case BRW_OPCODE_FRC: return "frc";
|
||||
case BRW_OPCODE_RNDU: return "rndu";
|
||||
case BRW_OPCODE_RNDD: return "rndd";
|
||||
case BRW_OPCODE_RNDE: return "rnde";
|
||||
case BRW_OPCODE_RNDZ: return "rndz";
|
||||
case BRW_OPCODE_MAC: return "mac";
|
||||
case BRW_OPCODE_MACH: return "mach";
|
||||
case BRW_OPCODE_LZD: return "lzd";
|
||||
case BRW_OPCODE_SAD2: return "sad2";
|
||||
case BRW_OPCODE_SADA2: return "sada2";
|
||||
case BRW_OPCODE_DP4: return "dp4";
|
||||
case BRW_OPCODE_DPH: return "dph";
|
||||
case BRW_OPCODE_DP3: return "dp3";
|
||||
case BRW_OPCODE_DP2: return "dp2";
|
||||
case BRW_OPCODE_DPA2: return "dpa2";
|
||||
case BRW_OPCODE_LINE: return "line";
|
||||
case BRW_OPCODE_PLN: return "pln";
|
||||
case BRW_OPCODE_MAD: return "mad";
|
||||
case BRW_OPCODE_NOP: return "nop";
|
||||
/* TGSI */
|
||||
case TOY_OPCODE_TGSI_IN: return "tgsi.in";
|
||||
case TOY_OPCODE_TGSI_CONST: return "tgsi.const";
|
||||
case TOY_OPCODE_TGSI_SV: return "tgsi.sv";
|
||||
case TOY_OPCODE_TGSI_IMM: return "tgsi.imm";
|
||||
case TOY_OPCODE_TGSI_INDIRECT_FETCH: return "tgsi.indirect_fetch";
|
||||
case TOY_OPCODE_TGSI_INDIRECT_STORE: return "tgsi.indirect_store";
|
||||
case TOY_OPCODE_TGSI_TEX: return "tgsi.tex";
|
||||
case TOY_OPCODE_TGSI_TXB: return "tgsi.txb";
|
||||
case TOY_OPCODE_TGSI_TXD: return "tgsi.txd";
|
||||
case TOY_OPCODE_TGSI_TXL: return "tgsi.txl";
|
||||
case TOY_OPCODE_TGSI_TXP: return "tgsi.txp";
|
||||
case TOY_OPCODE_TGSI_TXF: return "tgsi.txf";
|
||||
case TOY_OPCODE_TGSI_TXQ: return "tgsi.txq";
|
||||
case TOY_OPCODE_TGSI_TXQ_LZ: return "tgsi.txq_lz";
|
||||
case TOY_OPCODE_TGSI_TEX2: return "tgsi.tex2";
|
||||
case TOY_OPCODE_TGSI_TXB2: return "tgsi.txb2";
|
||||
case TOY_OPCODE_TGSI_TXL2: return "tgsi.txl2";
|
||||
case TOY_OPCODE_TGSI_SAMPLE: return "tgsi.sample";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_I: return "tgsi.sample_i";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_I_MS: return "tgsi.sample_i_ms";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_B: return "tgsi.sample_b";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_C: return "tgsi.sample_c";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_C_LZ: return "tgsi.sample_c_lz";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_D: return "tgsi.sample_d";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_L: return "tgsi.sample_l";
|
||||
case TOY_OPCODE_TGSI_GATHER4: return "tgsi.gather4";
|
||||
case TOY_OPCODE_TGSI_SVIEWINFO: return "tgsi.sviewinfo";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_POS: return "tgsi.sample_pos";
|
||||
case TOY_OPCODE_TGSI_SAMPLE_INFO: return "tgsi.sample_info";
|
||||
/* math */
|
||||
case TOY_OPCODE_INV: return "math.inv";
|
||||
case TOY_OPCODE_LOG: return "math.log";
|
||||
case TOY_OPCODE_EXP: return "math.exp";
|
||||
case TOY_OPCODE_SQRT: return "math.sqrt";
|
||||
case TOY_OPCODE_RSQ: return "math.rsq";
|
||||
case TOY_OPCODE_SIN: return "math.sin";
|
||||
case TOY_OPCODE_COS: return "math.cos";
|
||||
case TOY_OPCODE_FDIV: return "math.fdiv";
|
||||
case TOY_OPCODE_POW: return "math.pow";
|
||||
case TOY_OPCODE_INT_DIV_QUOTIENT: return "math.int_div_quotient";
|
||||
case TOY_OPCODE_INT_DIV_REMAINDER: return "math.int_div_remainer";
|
||||
/* urb */
|
||||
case TOY_OPCODE_URB_WRITE: return "urb.urb_write";
|
||||
/* gs */
|
||||
case TOY_OPCODE_EMIT: return "gs.emit";
|
||||
case TOY_OPCODE_ENDPRIM: return "gs.endprim";
|
||||
/* fs */
|
||||
case TOY_OPCODE_DDX: return "fs.ddx";
|
||||
case TOY_OPCODE_DDY: return "fs.ddy";
|
||||
case TOY_OPCODE_FB_WRITE: return "fs.fb_write";
|
||||
case TOY_OPCODE_KIL: return "fs.kil";
|
||||
default: return "unk";
|
||||
}
|
||||
}
|
||||
|
||||
static const char *
|
||||
get_cond_modifier_name(unsigned opcode, unsigned cond_modifier)
|
||||
{
|
||||
switch (opcode) {
|
||||
case BRW_OPCODE_SEND:
|
||||
case BRW_OPCODE_SENDC:
|
||||
/* SFID */
|
||||
switch (cond_modifier) {
|
||||
case BRW_SFID_NULL: return "Null";
|
||||
case BRW_SFID_SAMPLER: return "Sampling Engine";
|
||||
case BRW_SFID_MESSAGE_GATEWAY: return "Message Gateway";
|
||||
case GEN6_SFID_DATAPORT_SAMPLER_CACHE: return "Data Port Sampler Cache";
|
||||
case GEN6_SFID_DATAPORT_RENDER_CACHE: return "Data Port Render Cache";
|
||||
case BRW_SFID_URB: return "URB";
|
||||
case BRW_SFID_THREAD_SPAWNER: return "Thread Spawner";
|
||||
case GEN6_SFID_DATAPORT_CONSTANT_CACHE: return "Constant Cache";
|
||||
default: return "Unknown";
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_MATH:
|
||||
/* FC */
|
||||
switch (cond_modifier) {
|
||||
case BRW_MATH_FUNCTION_INV: return "INV";
|
||||
case BRW_MATH_FUNCTION_LOG: return "LOG";
|
||||
case BRW_MATH_FUNCTION_EXP: return "EXP";
|
||||
case BRW_MATH_FUNCTION_SQRT: return "SQRT";
|
||||
case BRW_MATH_FUNCTION_RSQ: return "RSQ";
|
||||
case BRW_MATH_FUNCTION_SIN: return "SIN";
|
||||
case BRW_MATH_FUNCTION_COS: return "COS";
|
||||
case BRW_MATH_FUNCTION_FDIV: return "FDIV";
|
||||
case BRW_MATH_FUNCTION_POW: return "POW";
|
||||
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: return "INT DIV (quotient)";
|
||||
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: return "INT DIV (remainder)";
|
||||
default: return "UNK";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
switch (cond_modifier) {
|
||||
case BRW_CONDITIONAL_NONE: return NULL;
|
||||
case BRW_CONDITIONAL_Z: return "z";
|
||||
case BRW_CONDITIONAL_NZ: return "nz";
|
||||
case BRW_CONDITIONAL_G: return "g";
|
||||
case BRW_CONDITIONAL_GE: return "ge";
|
||||
case BRW_CONDITIONAL_L: return "l";
|
||||
case BRW_CONDITIONAL_LE: return "le";
|
||||
default: return "unk";
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump an instruction.
|
||||
*/
|
||||
static void
|
||||
tc_dump_inst(struct toy_compiler *tc, const struct toy_inst *inst)
|
||||
{
|
||||
const char *name;
|
||||
int i;
|
||||
|
||||
name = get_opcode_name(inst->opcode);
|
||||
|
||||
ilo_printf(" %s", name);
|
||||
|
||||
if (inst->opcode == BRW_OPCODE_NOP) {
|
||||
ilo_printf("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (inst->saturate)
|
||||
ilo_printf(".sat");
|
||||
|
||||
name = get_cond_modifier_name(inst->opcode, inst->cond_modifier);
|
||||
if (name)
|
||||
ilo_printf(".%s", name);
|
||||
|
||||
ilo_printf(" ");
|
||||
|
||||
tc_dump_dst(tc, inst->dst);
|
||||
|
||||
for (i = 0; i < Elements(inst->src); i++) {
|
||||
if (tsrc_is_null(inst->src[i]))
|
||||
break;
|
||||
|
||||
ilo_printf(", ");
|
||||
tc_dump_src(tc, inst->src[i]);
|
||||
}
|
||||
|
||||
ilo_printf("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump the instructions added to the compiler.
|
||||
*/
|
||||
void
|
||||
toy_compiler_dump(struct toy_compiler *tc)
|
||||
{
|
||||
struct toy_inst *inst;
|
||||
int pc;
|
||||
|
||||
pc = 0;
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next_no_skip(tc)) != NULL) {
|
||||
/* we do not generate code for markers */
|
||||
if (inst->marker)
|
||||
ilo_printf("marker:");
|
||||
else
|
||||
ilo_printf("%6d:", pc++);
|
||||
|
||||
tc_dump_inst(tc, inst);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up the toy compiler.
|
||||
*/
|
||||
void
|
||||
toy_compiler_cleanup(struct toy_compiler *tc)
|
||||
{
|
||||
struct toy_inst *inst, *next;
|
||||
|
||||
LIST_FOR_EACH_ENTRY_SAFE(inst, next, &tc->instructions, list)
|
||||
util_slab_free(&tc->mempool, inst);
|
||||
|
||||
util_slab_destroy(&tc->mempool);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the instruction template, from which tc_add() initializes the
|
||||
* newly added instructions.
|
||||
*/
|
||||
static void
|
||||
tc_init_inst_templ(struct toy_compiler *tc)
|
||||
{
|
||||
struct toy_inst *templ = &tc->templ;
|
||||
int i;
|
||||
|
||||
templ->opcode = BRW_OPCODE_NOP;
|
||||
templ->access_mode = BRW_ALIGN_1;
|
||||
templ->mask_ctrl = BRW_MASK_ENABLE;
|
||||
templ->dep_ctrl = BRW_DEPENDENCY_NORMAL;
|
||||
templ->qtr_ctrl = GEN6_COMPRESSION_1Q;
|
||||
templ->thread_ctrl = BRW_THREAD_NORMAL;
|
||||
templ->pred_ctrl = BRW_PREDICATE_NONE;
|
||||
templ->pred_inv = false;
|
||||
templ->exec_size = BRW_EXECUTE_1;
|
||||
templ->cond_modifier = BRW_CONDITIONAL_NONE;
|
||||
templ->acc_wr_ctrl = false;
|
||||
templ->saturate = false;
|
||||
|
||||
templ->marker = false;
|
||||
|
||||
templ->dst = tdst_null();
|
||||
for (i = 0; i < Elements(templ->src); i++)
|
||||
templ->src[i] = tsrc_null();
|
||||
|
||||
for (i = 0; i < Elements(templ->tex.offsets); i++)
|
||||
templ->tex.offsets[i] = tsrc_null();
|
||||
|
||||
list_inithead(&templ->list);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the toy compiler.
|
||||
*/
|
||||
void
|
||||
toy_compiler_init(struct toy_compiler *tc, int gen)
|
||||
{
|
||||
memset(tc, 0, sizeof(*tc));
|
||||
|
||||
tc->gen = gen;
|
||||
|
||||
tc_init_inst_templ(tc);
|
||||
|
||||
util_slab_create(&tc->mempool, sizeof(struct toy_inst),
|
||||
64, UTIL_SLAB_SINGLETHREADED);
|
||||
|
||||
list_inithead(&tc->instructions);
|
||||
/* instructions are added to the tail */
|
||||
tc_tail(tc);
|
||||
|
||||
tc->rect_linear_width = 1;
|
||||
|
||||
/* skip 0 so that util_hash_table_get() never returns NULL */
|
||||
tc->next_vrf = 1;
|
||||
}
|
||||
473
src/gallium/drivers/ilo/shader/toy_compiler.h
Normal file
473
src/gallium/drivers/ilo/shader/toy_compiler.h
Normal file
|
|
@ -0,0 +1,473 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#ifndef TOY_COMPILER_H
|
||||
#define TOY_COMPILER_H
|
||||
|
||||
#include "brw_defines.h"
|
||||
|
||||
#include "util/u_slab.h"
|
||||
#include "ilo_common.h"
|
||||
#include "toy_compiler_reg.h"
|
||||
|
||||
/**
|
||||
* Toy opcodes.
|
||||
*/
|
||||
enum toy_opcode {
|
||||
/* 0..127 are reserved for BRW_OPCODE_x */
|
||||
TOY_OPCODE_LAST_HW = 127,
|
||||
|
||||
/* TGSI register functions */
|
||||
TOY_OPCODE_TGSI_IN,
|
||||
TOY_OPCODE_TGSI_CONST,
|
||||
TOY_OPCODE_TGSI_SV,
|
||||
TOY_OPCODE_TGSI_IMM,
|
||||
TOY_OPCODE_TGSI_INDIRECT_FETCH,
|
||||
TOY_OPCODE_TGSI_INDIRECT_STORE,
|
||||
|
||||
/* TGSI sampling functions */
|
||||
TOY_OPCODE_TGSI_TEX,
|
||||
TOY_OPCODE_TGSI_TXB,
|
||||
TOY_OPCODE_TGSI_TXD,
|
||||
TOY_OPCODE_TGSI_TXL,
|
||||
TOY_OPCODE_TGSI_TXP,
|
||||
TOY_OPCODE_TGSI_TXF,
|
||||
TOY_OPCODE_TGSI_TXQ,
|
||||
TOY_OPCODE_TGSI_TXQ_LZ,
|
||||
TOY_OPCODE_TGSI_TEX2,
|
||||
TOY_OPCODE_TGSI_TXB2,
|
||||
TOY_OPCODE_TGSI_TXL2,
|
||||
TOY_OPCODE_TGSI_SAMPLE,
|
||||
TOY_OPCODE_TGSI_SAMPLE_I,
|
||||
TOY_OPCODE_TGSI_SAMPLE_I_MS,
|
||||
TOY_OPCODE_TGSI_SAMPLE_B,
|
||||
TOY_OPCODE_TGSI_SAMPLE_C,
|
||||
TOY_OPCODE_TGSI_SAMPLE_C_LZ,
|
||||
TOY_OPCODE_TGSI_SAMPLE_D,
|
||||
TOY_OPCODE_TGSI_SAMPLE_L,
|
||||
TOY_OPCODE_TGSI_GATHER4,
|
||||
TOY_OPCODE_TGSI_SVIEWINFO,
|
||||
TOY_OPCODE_TGSI_SAMPLE_POS,
|
||||
TOY_OPCODE_TGSI_SAMPLE_INFO,
|
||||
|
||||
/* math functions */
|
||||
TOY_OPCODE_INV,
|
||||
TOY_OPCODE_LOG,
|
||||
TOY_OPCODE_EXP,
|
||||
TOY_OPCODE_SQRT,
|
||||
TOY_OPCODE_RSQ,
|
||||
TOY_OPCODE_SIN,
|
||||
TOY_OPCODE_COS,
|
||||
TOY_OPCODE_FDIV,
|
||||
TOY_OPCODE_POW,
|
||||
TOY_OPCODE_INT_DIV_QUOTIENT,
|
||||
TOY_OPCODE_INT_DIV_REMAINDER,
|
||||
|
||||
/* URB functions */
|
||||
TOY_OPCODE_URB_WRITE,
|
||||
|
||||
/* GS-specific functions */
|
||||
TOY_OPCODE_EMIT,
|
||||
TOY_OPCODE_ENDPRIM,
|
||||
|
||||
/* FS-specific functions */
|
||||
TOY_OPCODE_DDX,
|
||||
TOY_OPCODE_DDY,
|
||||
TOY_OPCODE_FB_WRITE,
|
||||
TOY_OPCODE_KIL,
|
||||
};
|
||||
|
||||
/**
|
||||
* Toy instruction.
|
||||
*/
|
||||
struct toy_inst {
|
||||
unsigned opcode:8; /* enum toy_opcode */
|
||||
unsigned access_mode:1; /* BRW_ALIGN_x */
|
||||
unsigned mask_ctrl:1; /* BRW_MASK_x */
|
||||
unsigned dep_ctrl:2; /* BRW_DEPENDENCY_x */
|
||||
unsigned qtr_ctrl:2; /* GEN6_COMPRESSION_x */
|
||||
unsigned thread_ctrl:2; /* BRW_THREAD_x */
|
||||
unsigned pred_ctrl:4; /* BRW_PREDICATE_x */
|
||||
unsigned pred_inv:1; /* true or false */
|
||||
unsigned exec_size:3; /* BRW_EXECUTE_x */
|
||||
unsigned cond_modifier:4; /* BRW_CONDITIONAL_x */
|
||||
unsigned acc_wr_ctrl:1; /* true or false */
|
||||
unsigned saturate:1; /* true or false */
|
||||
|
||||
/* true if the instruction should be ignored for instruction iteration */
|
||||
unsigned marker:1;
|
||||
|
||||
unsigned pad:1;
|
||||
|
||||
struct toy_dst dst;
|
||||
struct toy_src src[5]; /* match TGSI_FULL_MAX_SRC_REGISTERS */
|
||||
|
||||
struct {
|
||||
int target; /* TGSI_TEXTURE_x */
|
||||
struct toy_src offsets[1]; /* need to be 4 when GATHER4 is supported */
|
||||
} tex;
|
||||
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
/**
|
||||
* Toy compiler.
|
||||
*/
|
||||
struct toy_compiler {
|
||||
int gen;
|
||||
|
||||
struct toy_inst templ;
|
||||
struct util_slab_mempool mempool;
|
||||
struct list_head instructions;
|
||||
struct list_head *iter, *iter_next;
|
||||
|
||||
/* this is not set until toy_compiler_legalize_for_asm() */
|
||||
int num_instructions;
|
||||
|
||||
int rect_linear_width;
|
||||
int next_vrf;
|
||||
|
||||
bool fail;
|
||||
const char *reason;
|
||||
};
|
||||
|
||||
/**
|
||||
* Allocate the given number of VRF registers.
|
||||
*/
|
||||
static inline int
|
||||
tc_alloc_vrf(struct toy_compiler *tc, int count)
|
||||
{
|
||||
const int vrf = tc->next_vrf;
|
||||
|
||||
tc->next_vrf += count;
|
||||
|
||||
return vrf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a temporary register.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tc_alloc_tmp(struct toy_compiler *tc)
|
||||
{
|
||||
return tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, 1), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate four temporary registers.
|
||||
*/
|
||||
static inline void
|
||||
tc_alloc_tmp4(struct toy_compiler *tc, struct toy_dst *tmp)
|
||||
{
|
||||
tmp[0] = tc_alloc_tmp(tc);
|
||||
tmp[1] = tc_alloc_tmp(tc);
|
||||
tmp[2] = tc_alloc_tmp(tc);
|
||||
tmp[3] = tc_alloc_tmp(tc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Duplicate an instruction at the current location.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_duplicate_inst(struct toy_compiler *tc, const struct toy_inst *inst)
|
||||
{
|
||||
struct toy_inst *new_inst;
|
||||
|
||||
new_inst = util_slab_alloc(&tc->mempool);
|
||||
if (!new_inst)
|
||||
return NULL;
|
||||
|
||||
*new_inst = *inst;
|
||||
list_addtail(&new_inst->list, tc->iter_next);
|
||||
|
||||
return new_inst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move an instruction to the current location.
|
||||
*/
|
||||
static inline void
|
||||
tc_move_inst(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
list_del(&inst->list);
|
||||
list_addtail(&inst->list, tc->iter_next);
|
||||
}
|
||||
|
||||
/**
|
||||
* Discard an instruction.
|
||||
*/
|
||||
static inline void
|
||||
tc_discard_inst(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
list_del(&inst->list);
|
||||
util_slab_free(&tc->mempool, inst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new instruction at the current location, using tc->templ as the
|
||||
* template.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_add(struct toy_compiler *tc)
|
||||
{
|
||||
return tc_duplicate_inst(tc, &tc->templ);
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenient version of tc_add() for instructions with 3 source operands.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_add3(struct toy_compiler *tc, unsigned opcode,
|
||||
struct toy_dst dst,
|
||||
struct toy_src src0,
|
||||
struct toy_src src1,
|
||||
struct toy_src src2)
|
||||
{
|
||||
struct toy_inst *inst;
|
||||
|
||||
inst = tc_add(tc);
|
||||
if (!inst)
|
||||
return NULL;
|
||||
|
||||
inst->opcode = opcode;
|
||||
inst->dst = dst;
|
||||
inst->src[0] = src0;
|
||||
inst->src[1] = src1;
|
||||
inst->src[2] = src2;
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenient version of tc_add() for instructions with 2 source operands.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_add2(struct toy_compiler *tc, int opcode,
|
||||
struct toy_dst dst,
|
||||
struct toy_src src0,
|
||||
struct toy_src src1)
|
||||
{
|
||||
return tc_add3(tc, opcode, dst, src0, src1, tsrc_null());
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenient version of tc_add() for instructions with 1 source operand.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_add1(struct toy_compiler *tc, unsigned opcode,
|
||||
struct toy_dst dst,
|
||||
struct toy_src src0)
|
||||
{
|
||||
return tc_add2(tc, opcode, dst, src0, tsrc_null());
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenient version of tc_add() for instructions without source or
|
||||
* destination operands.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_add0(struct toy_compiler *tc, unsigned opcode)
|
||||
{
|
||||
return tc_add1(tc, opcode, tdst_null(), tsrc_null());
|
||||
}
|
||||
|
||||
#define TC_ALU0(func, opcode) \
|
||||
static inline struct toy_inst * \
|
||||
func(struct toy_compiler *tc) \
|
||||
{ \
|
||||
return tc_add0(tc, opcode); \
|
||||
}
|
||||
|
||||
#define TC_ALU1(func, opcode) \
|
||||
static inline struct toy_inst * \
|
||||
func(struct toy_compiler *tc, \
|
||||
struct toy_dst dst, \
|
||||
struct toy_src src) \
|
||||
{ \
|
||||
return tc_add1(tc, opcode, dst, src); \
|
||||
}
|
||||
|
||||
#define TC_ALU2(func, opcode) \
|
||||
static inline struct toy_inst * \
|
||||
func(struct toy_compiler *tc, \
|
||||
struct toy_dst dst, \
|
||||
struct toy_src src0, \
|
||||
struct toy_src src1) \
|
||||
{ \
|
||||
return tc_add2(tc, opcode, \
|
||||
dst, src0, src1); \
|
||||
}
|
||||
|
||||
#define TC_ALU3(func, opcode) \
|
||||
static inline struct toy_inst * \
|
||||
func(struct toy_compiler *tc, \
|
||||
struct toy_dst dst, \
|
||||
struct toy_src src0, \
|
||||
struct toy_src src1, \
|
||||
struct toy_src src2) \
|
||||
{ \
|
||||
return tc_add3(tc, opcode, \
|
||||
dst, src0, src1, src2); \
|
||||
}
|
||||
|
||||
#define TC_CND2(func, opcode) \
|
||||
static inline struct toy_inst * \
|
||||
func(struct toy_compiler *tc, \
|
||||
struct toy_dst dst, \
|
||||
struct toy_src src0, \
|
||||
struct toy_src src1, \
|
||||
unsigned cond_modifier) \
|
||||
{ \
|
||||
struct toy_inst *inst; \
|
||||
inst = tc_add2(tc, opcode, \
|
||||
dst, src0, src1); \
|
||||
inst->cond_modifier = cond_modifier; \
|
||||
return inst; \
|
||||
}
|
||||
|
||||
TC_ALU0(tc_NOP, BRW_OPCODE_NOP)
|
||||
TC_ALU0(tc_ELSE, BRW_OPCODE_ELSE)
|
||||
TC_ALU0(tc_ENDIF, BRW_OPCODE_ENDIF)
|
||||
TC_ALU1(tc_MOV, BRW_OPCODE_MOV)
|
||||
TC_ALU1(tc_RNDD, BRW_OPCODE_RNDD)
|
||||
TC_ALU1(tc_INV, TOY_OPCODE_INV)
|
||||
TC_ALU1(tc_FRC, BRW_OPCODE_FRC)
|
||||
TC_ALU1(tc_EXP, TOY_OPCODE_EXP)
|
||||
TC_ALU1(tc_LOG, TOY_OPCODE_LOG)
|
||||
TC_ALU2(tc_ADD, BRW_OPCODE_ADD)
|
||||
TC_ALU2(tc_MUL, BRW_OPCODE_MUL)
|
||||
TC_ALU2(tc_AND, BRW_OPCODE_AND)
|
||||
TC_ALU2(tc_OR, BRW_OPCODE_OR)
|
||||
TC_ALU2(tc_DP2, BRW_OPCODE_DP2)
|
||||
TC_ALU2(tc_DP3, BRW_OPCODE_DP3)
|
||||
TC_ALU2(tc_DP4, BRW_OPCODE_DP4)
|
||||
TC_ALU2(tc_SHL, BRW_OPCODE_SHL)
|
||||
TC_ALU2(tc_SHR, BRW_OPCODE_SHR)
|
||||
TC_ALU2(tc_POW, TOY_OPCODE_POW)
|
||||
TC_ALU3(tc_MAC, BRW_OPCODE_MAC)
|
||||
TC_CND2(tc_SEL, BRW_OPCODE_SEL)
|
||||
TC_CND2(tc_CMP, BRW_OPCODE_CMP)
|
||||
TC_CND2(tc_IF, BRW_OPCODE_IF)
|
||||
TC_CND2(tc_SEND, BRW_OPCODE_SEND)
|
||||
|
||||
/**
|
||||
* Upcast a list_head to an instruction.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_list_to_inst(struct toy_compiler *tc, struct list_head *item)
|
||||
{
|
||||
return container_of(item, (struct toy_inst *) NULL, list);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the instruction at the current location.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_current(struct toy_compiler *tc)
|
||||
{
|
||||
return (tc->iter != &tc->instructions) ?
|
||||
tc_list_to_inst(tc, tc->iter) : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the current location to the head.
|
||||
*/
|
||||
static inline void
|
||||
tc_head(struct toy_compiler *tc)
|
||||
{
|
||||
tc->iter = &tc->instructions;
|
||||
tc->iter_next = tc->iter->next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the current location to the tail.
|
||||
*/
|
||||
static inline void
|
||||
tc_tail(struct toy_compiler *tc)
|
||||
{
|
||||
tc->iter = &tc->instructions;
|
||||
tc->iter_next = tc->iter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance the current location.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_next_no_skip(struct toy_compiler *tc)
|
||||
{
|
||||
/* stay at the tail so that new instructions are added there */
|
||||
if (tc->iter_next == &tc->instructions) {
|
||||
tc_tail(tc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tc->iter = tc->iter_next;
|
||||
tc->iter_next = tc->iter_next->next;
|
||||
|
||||
return tc_list_to_inst(tc, tc->iter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance the current location, skipping markers.
|
||||
*/
|
||||
static inline struct toy_inst *
|
||||
tc_next(struct toy_compiler *tc)
|
||||
{
|
||||
struct toy_inst *inst;
|
||||
|
||||
do {
|
||||
inst = tc_next_no_skip(tc);
|
||||
} while (inst && inst->marker);
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
static inline void
|
||||
tc_fail(struct toy_compiler *tc, const char *reason)
|
||||
{
|
||||
if (!tc->fail) {
|
||||
tc->fail = true;
|
||||
tc->reason = reason;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
toy_compiler_init(struct toy_compiler *tc, int gen);
|
||||
|
||||
void
|
||||
toy_compiler_cleanup(struct toy_compiler *tc);
|
||||
|
||||
void
|
||||
toy_compiler_dump(struct toy_compiler *tc);
|
||||
|
||||
void *
|
||||
toy_compiler_assemble(struct toy_compiler *tc, int *size);
|
||||
|
||||
void
|
||||
toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size);
|
||||
|
||||
#endif /* TOY_COMPILER_H */
|
||||
750
src/gallium/drivers/ilo/shader/toy_compiler_asm.c
Normal file
750
src/gallium/drivers/ilo/shader/toy_compiler_asm.c
Normal file
|
|
@ -0,0 +1,750 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#include "toy_compiler.h"
|
||||
|
||||
#define CG_REG_SHIFT 5
|
||||
#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
|
||||
|
||||
struct codegen {
|
||||
const struct toy_inst *inst;
|
||||
int pc;
|
||||
|
||||
unsigned flag_sub_reg_num;
|
||||
|
||||
struct codegen_dst {
|
||||
unsigned file;
|
||||
unsigned type;
|
||||
bool indirect;
|
||||
unsigned indirect_subreg;
|
||||
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
|
||||
|
||||
unsigned horz_stride;
|
||||
|
||||
unsigned writemask;
|
||||
} dst;
|
||||
|
||||
struct codegen_src {
|
||||
unsigned file;
|
||||
unsigned type;
|
||||
bool indirect;
|
||||
unsigned indirect_subreg;
|
||||
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
|
||||
|
||||
unsigned vert_stride;
|
||||
unsigned width;
|
||||
unsigned horz_stride;
|
||||
|
||||
unsigned swizzle[4];
|
||||
bool absolute;
|
||||
bool negate;
|
||||
} src[3];
|
||||
};
|
||||
|
||||
/**
|
||||
* Return true if the source operand is null.
|
||||
*/
|
||||
static bool
|
||||
src_is_null(const struct codegen *cg, int idx)
|
||||
{
|
||||
const struct codegen_src *src = &cg->src[idx];
|
||||
|
||||
return (src->file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
||||
src->origin == BRW_ARF_NULL << CG_REG_SHIFT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
|
||||
*/
|
||||
static uint32_t
|
||||
translate_src(const struct codegen *cg, int idx)
|
||||
{
|
||||
const struct codegen_src *src = &cg->src[idx];
|
||||
uint32_t dw;
|
||||
|
||||
/* special treatment may be needed if any of the operand is immediate */
|
||||
if (cg->src[0].file == BRW_IMMEDIATE_VALUE) {
|
||||
assert(!cg->src[0].absolute && !cg->src[0].negate);
|
||||
/* only the last src operand can be an immediate */
|
||||
assert(src_is_null(cg, 1));
|
||||
|
||||
if (idx == 0)
|
||||
return cg->flag_sub_reg_num << 25;
|
||||
else
|
||||
return cg->src[0].origin;
|
||||
}
|
||||
else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) {
|
||||
assert(!cg->src[1].absolute && !cg->src[1].negate);
|
||||
return cg->src[1].origin;
|
||||
}
|
||||
|
||||
assert(src->file != BRW_IMMEDIATE_VALUE);
|
||||
|
||||
if (src->indirect) {
|
||||
const int offset = (int) src->origin;
|
||||
|
||||
assert(src->file == BRW_GENERAL_REGISTER_FILE);
|
||||
assert(offset < 512 && offset >= -512);
|
||||
|
||||
if (cg->inst->access_mode == BRW_ALIGN_16) {
|
||||
assert(src->width == BRW_WIDTH_4);
|
||||
assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
|
||||
|
||||
/* the lower 4 bits are reserved for the swizzle_[xy] */
|
||||
assert(!(src->origin & 0xf));
|
||||
|
||||
dw = src->vert_stride << 21 |
|
||||
src->swizzle[3] << 18 |
|
||||
src->swizzle[2] << 16 |
|
||||
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
|
||||
src->negate << 14 |
|
||||
src->absolute << 13 |
|
||||
src->indirect_subreg << 10 |
|
||||
(src->origin & 0x3f0) |
|
||||
src->swizzle[1] << 2 |
|
||||
src->swizzle[0];
|
||||
}
|
||||
else {
|
||||
assert(src->swizzle[0] == TOY_SWIZZLE_X &&
|
||||
src->swizzle[1] == TOY_SWIZZLE_Y &&
|
||||
src->swizzle[2] == TOY_SWIZZLE_Z &&
|
||||
src->swizzle[3] == TOY_SWIZZLE_W);
|
||||
|
||||
dw = src->vert_stride << 21 |
|
||||
src->width << 18 |
|
||||
src->horz_stride << 16 |
|
||||
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
|
||||
src->negate << 14 |
|
||||
src->absolute << 13 |
|
||||
src->indirect_subreg << 10 |
|
||||
(src->origin & 0x3ff);
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (src->file) {
|
||||
case BRW_ARCHITECTURE_REGISTER_FILE:
|
||||
break;
|
||||
case BRW_GENERAL_REGISTER_FILE:
|
||||
assert(CG_REG_NUM(src->origin) < 128);
|
||||
break;
|
||||
case BRW_MESSAGE_REGISTER_FILE:
|
||||
assert(cg->inst->opcode == BRW_OPCODE_SEND ||
|
||||
cg->inst->opcode == BRW_OPCODE_SENDC);
|
||||
assert(CG_REG_NUM(src->origin) < 16);
|
||||
break;
|
||||
case BRW_IMMEDIATE_VALUE:
|
||||
default:
|
||||
assert(!"invalid src file");
|
||||
break;
|
||||
}
|
||||
|
||||
if (cg->inst->access_mode == BRW_ALIGN_16) {
|
||||
assert(src->width == BRW_WIDTH_4);
|
||||
assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
|
||||
|
||||
/* the lower 4 bits are reserved for the swizzle_[xy] */
|
||||
assert(!(src->origin & 0xf));
|
||||
|
||||
dw = src->vert_stride << 21 |
|
||||
src->swizzle[3] << 18 |
|
||||
src->swizzle[2] << 16 |
|
||||
BRW_ADDRESS_DIRECT << 15 |
|
||||
src->negate << 14 |
|
||||
src->absolute << 13 |
|
||||
src->origin |
|
||||
src->swizzle[1] << 2 |
|
||||
src->swizzle[0];
|
||||
}
|
||||
else {
|
||||
assert(src->swizzle[0] == TOY_SWIZZLE_X &&
|
||||
src->swizzle[1] == TOY_SWIZZLE_Y &&
|
||||
src->swizzle[2] == TOY_SWIZZLE_Z &&
|
||||
src->swizzle[3] == TOY_SWIZZLE_W);
|
||||
|
||||
dw = src->vert_stride << 21 |
|
||||
src->width << 18 |
|
||||
src->horz_stride << 16 |
|
||||
BRW_ADDRESS_DIRECT << 15 |
|
||||
src->negate << 14 |
|
||||
src->absolute << 13 |
|
||||
src->origin;
|
||||
}
|
||||
}
|
||||
|
||||
if (idx == 0)
|
||||
dw |= cg->flag_sub_reg_num << 25;
|
||||
|
||||
return dw;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate the destination operand to the higher 16 bits of DW1 of the
|
||||
* 1-src/2-src format.
|
||||
*/
|
||||
static uint16_t
|
||||
translate_dst_region(const struct codegen *cg)
|
||||
{
|
||||
const struct codegen_dst *dst = &cg->dst;
|
||||
uint16_t dw1_region;
|
||||
|
||||
if (dst->file == BRW_IMMEDIATE_VALUE) {
|
||||
/* dst is immediate (JIP) when the opcode is a conditional branch */
|
||||
switch (cg->inst->opcode) {
|
||||
case BRW_OPCODE_IF:
|
||||
case BRW_OPCODE_ELSE:
|
||||
case BRW_OPCODE_ENDIF:
|
||||
case BRW_OPCODE_WHILE:
|
||||
assert(dst->type == BRW_REGISTER_TYPE_W);
|
||||
dw1_region = (dst->origin & 0xffff);
|
||||
break;
|
||||
default:
|
||||
assert(!"dst cannot be immediate");
|
||||
dw1_region = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return dw1_region;
|
||||
}
|
||||
|
||||
if (dst->indirect) {
|
||||
const int offset = (int) dst->origin;
|
||||
|
||||
assert(dst->file == BRW_GENERAL_REGISTER_FILE);
|
||||
assert(offset < 512 && offset >= -512);
|
||||
|
||||
if (cg->inst->access_mode == BRW_ALIGN_16) {
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 4 part 2, page 144:
|
||||
*
|
||||
* "Allthough Dst.HorzStride is a don't care for Align16, HW
|
||||
* needs this to be programmed as 01."
|
||||
*/
|
||||
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
|
||||
/* the lower 4 bits are reserved for the writemask */
|
||||
assert(!(dst->origin & 0xf));
|
||||
|
||||
dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
|
||||
dst->horz_stride << 13 |
|
||||
dst->indirect_subreg << 10 |
|
||||
(dst->origin & 0x3f0) |
|
||||
dst->writemask;
|
||||
}
|
||||
else {
|
||||
assert(dst->writemask == TOY_WRITEMASK_XYZW);
|
||||
|
||||
dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
|
||||
dst->horz_stride << 13 |
|
||||
dst->indirect_subreg << 10 |
|
||||
(dst->origin & 0x3ff);
|
||||
}
|
||||
}
|
||||
else {
|
||||
assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
|
||||
CG_REG_NUM(dst->origin) < 128) ||
|
||||
(dst->file == BRW_MESSAGE_REGISTER_FILE &&
|
||||
CG_REG_NUM(dst->origin) < 16) ||
|
||||
(dst->file == BRW_ARCHITECTURE_REGISTER_FILE));
|
||||
|
||||
if (cg->inst->access_mode == BRW_ALIGN_16) {
|
||||
/* similar to the indirect case */
|
||||
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
|
||||
assert(!(dst->origin & 0xf));
|
||||
|
||||
dw1_region = BRW_ADDRESS_DIRECT << 15 |
|
||||
dst->horz_stride << 13 |
|
||||
dst->origin |
|
||||
dst->writemask;
|
||||
}
|
||||
else {
|
||||
assert(dst->writemask == TOY_WRITEMASK_XYZW);
|
||||
|
||||
dw1_region = BRW_ADDRESS_DIRECT << 15 |
|
||||
dst->horz_stride << 13 |
|
||||
dst->origin;
|
||||
}
|
||||
}
|
||||
|
||||
return dw1_region;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate the destination operand to DW1 of the 1-src/2-src format.
|
||||
*/
|
||||
static uint32_t
|
||||
translate_dst(const struct codegen *cg)
|
||||
{
|
||||
return translate_dst_region(cg) << 16 |
|
||||
cg->src[1].type << 12 |
|
||||
cg->src[1].file << 10 |
|
||||
cg->src[0].type << 7 |
|
||||
cg->src[0].file << 5 |
|
||||
cg->dst.type << 2 |
|
||||
cg->dst.file;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate the instruction to DW0 of the 1-src/2-src format.
|
||||
*/
|
||||
static uint32_t
|
||||
translate_inst(const struct codegen *cg)
|
||||
{
|
||||
const bool debug_ctrl = false;
|
||||
const bool cmpt_ctrl = false;
|
||||
|
||||
assert(cg->inst->opcode < 128);
|
||||
|
||||
return cg->inst->saturate << 31 |
|
||||
debug_ctrl << 30 |
|
||||
cmpt_ctrl << 29 |
|
||||
cg->inst->acc_wr_ctrl << 28 |
|
||||
cg->inst->cond_modifier << 24 |
|
||||
cg->inst->exec_size << 21 |
|
||||
cg->inst->pred_inv << 20 |
|
||||
cg->inst->pred_ctrl << 16 |
|
||||
cg->inst->thread_ctrl << 14 |
|
||||
cg->inst->qtr_ctrl << 12 |
|
||||
cg->inst->dep_ctrl << 10 |
|
||||
cg->inst->mask_ctrl << 9 |
|
||||
cg->inst->access_mode << 8 |
|
||||
cg->inst->opcode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Codegen an instruction in 1-src/2-src format.
|
||||
*/
|
||||
static void
|
||||
codegen_inst(const struct codegen *cg, uint32_t *code)
|
||||
{
|
||||
code[0] = translate_inst(cg);
|
||||
code[1] = translate_dst(cg);
|
||||
code[2] = translate_src(cg, 0);
|
||||
code[3] = translate_src(cg, 1);
|
||||
assert(src_is_null(cg, 2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Codegen an instruction in 3-src format.
|
||||
*/
|
||||
static void
|
||||
codegen_inst_3src(const struct codegen *cg, uint32_t *code)
|
||||
{
|
||||
const struct codegen_dst *dst = &cg->dst;
|
||||
uint32_t dw0, dw1, dw_src[3];
|
||||
int i;
|
||||
|
||||
dw0 = translate_inst(cg);
|
||||
|
||||
/*
|
||||
* 3-src instruction restrictions
|
||||
*
|
||||
* - align16 with direct addressing
|
||||
* - GRF or MRF dst
|
||||
* - GRF src
|
||||
* - sub_reg_num is DWORD aligned
|
||||
* - no regioning except replication control
|
||||
* (vert_stride == 0 && horz_stride == 0)
|
||||
*/
|
||||
assert(cg->inst->access_mode == BRW_ALIGN_16);
|
||||
|
||||
assert(!dst->indirect);
|
||||
assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
|
||||
CG_REG_NUM(dst->origin) < 128) ||
|
||||
(dst->file == BRW_MESSAGE_REGISTER_FILE &&
|
||||
CG_REG_NUM(dst->origin) < 16));
|
||||
assert(!(dst->origin & 0x3));
|
||||
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
|
||||
|
||||
dw1 = dst->origin << 19 |
|
||||
dst->writemask << 17 |
|
||||
cg->src[2].negate << 9 |
|
||||
cg->src[2].absolute << 8 |
|
||||
cg->src[1].negate << 7 |
|
||||
cg->src[1].absolute << 6 |
|
||||
cg->src[0].negate << 5 |
|
||||
cg->src[0].absolute << 4 |
|
||||
cg->flag_sub_reg_num << 1 |
|
||||
(dst->file == BRW_MESSAGE_REGISTER_FILE);
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
const struct codegen_src *src = &cg->src[i];
|
||||
|
||||
assert(!src->indirect);
|
||||
assert(src->file == BRW_GENERAL_REGISTER_FILE &&
|
||||
CG_REG_NUM(src->origin) < 128);
|
||||
assert(!(src->origin & 0x3));
|
||||
|
||||
assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 &&
|
||||
src->horz_stride == BRW_HORIZONTAL_STRIDE_1) ||
|
||||
(src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
|
||||
src->horz_stride == BRW_HORIZONTAL_STRIDE_0));
|
||||
assert(src->width == BRW_WIDTH_4);
|
||||
|
||||
dw_src[i] = src->origin << 7 |
|
||||
src->swizzle[3] << 7 |
|
||||
src->swizzle[2] << 5 |
|
||||
src->swizzle[1] << 3 |
|
||||
src->swizzle[0] << 1 |
|
||||
(src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
|
||||
src->horz_stride == BRW_HORIZONTAL_STRIDE_0);
|
||||
|
||||
/* only the lower 20 bits are used */
|
||||
assert((dw_src[i] & 0xfffff) == dw_src[i]);
|
||||
}
|
||||
|
||||
code[0] = dw0;
|
||||
code[1] = dw1;
|
||||
/* concatenate the bits of dw_src */
|
||||
code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
|
||||
code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanity check the region parameters of the operands.
|
||||
*/
|
||||
static void
|
||||
codegen_validate_region_restrictions(const struct codegen *cg)
|
||||
{
|
||||
const int exec_size_map[] = {
|
||||
[BRW_EXECUTE_1] = 1,
|
||||
[BRW_EXECUTE_2] = 2,
|
||||
[BRW_EXECUTE_4] = 4,
|
||||
[BRW_EXECUTE_8] = 8,
|
||||
[BRW_EXECUTE_16] = 16,
|
||||
[BRW_EXECUTE_32] = 32,
|
||||
};
|
||||
const int width_map[] = {
|
||||
[BRW_WIDTH_1] = 1,
|
||||
[BRW_WIDTH_2] = 2,
|
||||
[BRW_WIDTH_4] = 4,
|
||||
[BRW_WIDTH_8] = 8,
|
||||
[BRW_WIDTH_16] = 16,
|
||||
};
|
||||
const int horz_stride_map[] = {
|
||||
[BRW_HORIZONTAL_STRIDE_0] = 0,
|
||||
[BRW_HORIZONTAL_STRIDE_1] = 1,
|
||||
[BRW_HORIZONTAL_STRIDE_2] = 2,
|
||||
[BRW_HORIZONTAL_STRIDE_4] = 4,
|
||||
};
|
||||
const int vert_stride_map[] = {
|
||||
[BRW_VERTICAL_STRIDE_0] = 0,
|
||||
[BRW_VERTICAL_STRIDE_1] = 1,
|
||||
[BRW_VERTICAL_STRIDE_2] = 2,
|
||||
[BRW_VERTICAL_STRIDE_4] = 4,
|
||||
[BRW_VERTICAL_STRIDE_8] = 8,
|
||||
[BRW_VERTICAL_STRIDE_16] = 16,
|
||||
[BRW_VERTICAL_STRIDE_32] = 32,
|
||||
[BRW_VERTICAL_STRIDE_64] = 64,
|
||||
[BRW_VERTICAL_STRIDE_128] = 128,
|
||||
[BRW_VERTICAL_STRIDE_256] = 256,
|
||||
[BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0,
|
||||
};
|
||||
const int exec_size = exec_size_map[cg->inst->exec_size];
|
||||
int i;
|
||||
|
||||
/* Sandy Bridge PRM, volume 4 part 2, page 94 */
|
||||
|
||||
/* 1. (we don't do 32 anyway) */
|
||||
assert(exec_size <= 16);
|
||||
|
||||
for (i = 0; i < Elements(cg->src); i++) {
|
||||
const int width = width_map[cg->src[i].width];
|
||||
const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
|
||||
const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
|
||||
|
||||
if (src_is_null(cg, i))
|
||||
break;
|
||||
|
||||
/* 3. */
|
||||
assert(exec_size >= width);
|
||||
|
||||
if (exec_size == width) {
|
||||
/* 4. & 5. */
|
||||
if (horz_stride)
|
||||
assert(vert_stride == width * horz_stride);
|
||||
}
|
||||
|
||||
if (width == 1) {
|
||||
/* 6. */
|
||||
assert(horz_stride == 0);
|
||||
|
||||
/* 7. */
|
||||
if (exec_size == 1)
|
||||
assert(vert_stride == 0);
|
||||
}
|
||||
|
||||
/* 8. */
|
||||
if (!vert_stride && !horz_stride)
|
||||
assert(width == 1);
|
||||
}
|
||||
|
||||
/* derived from 10.1.2. & 10.2. */
|
||||
assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
translate_vfile(enum toy_file file)
|
||||
{
|
||||
switch (file) {
|
||||
case TOY_FILE_ARF: return BRW_ARCHITECTURE_REGISTER_FILE;
|
||||
case TOY_FILE_GRF: return BRW_GENERAL_REGISTER_FILE;
|
||||
case TOY_FILE_MRF: return BRW_MESSAGE_REGISTER_FILE;
|
||||
case TOY_FILE_IMM: return BRW_IMMEDIATE_VALUE;
|
||||
default:
|
||||
assert(!"unhandled toy file");
|
||||
return BRW_GENERAL_REGISTER_FILE;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
translate_vtype(enum toy_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case TOY_TYPE_F: return BRW_REGISTER_TYPE_F;
|
||||
case TOY_TYPE_D: return BRW_REGISTER_TYPE_D;
|
||||
case TOY_TYPE_UD: return BRW_REGISTER_TYPE_UD;
|
||||
case TOY_TYPE_W: return BRW_REGISTER_TYPE_W;
|
||||
case TOY_TYPE_UW: return BRW_REGISTER_TYPE_UW;
|
||||
case TOY_TYPE_V: return BRW_REGISTER_TYPE_V;
|
||||
default:
|
||||
assert(!"unhandled toy type");
|
||||
return BRW_REGISTER_TYPE_F;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
translate_writemask(enum toy_writemask writemask)
|
||||
{
|
||||
/* TOY_WRITEMASK_* are compatible with the hardware definitions */
|
||||
assert(writemask <= 0xf);
|
||||
return writemask;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
translate_swizzle(enum toy_swizzle swizzle)
|
||||
{
|
||||
/* TOY_SWIZZLE_* are compatible with the hardware definitions */
|
||||
assert(swizzle <= 3);
|
||||
return swizzle;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare for generating an instruction.
|
||||
*/
|
||||
static void
|
||||
codegen_prepare(struct codegen *cg, const struct toy_inst *inst,
|
||||
int pc, int rect_linear_width)
|
||||
{
|
||||
int i;
|
||||
|
||||
cg->inst = inst;
|
||||
cg->pc = pc;
|
||||
|
||||
cg->flag_sub_reg_num = 0;
|
||||
|
||||
cg->dst.file = translate_vfile(inst->dst.file);
|
||||
cg->dst.type = translate_vtype(inst->dst.type);
|
||||
cg->dst.indirect = inst->dst.indirect;
|
||||
cg->dst.indirect_subreg = inst->dst.indirect_subreg;
|
||||
cg->dst.origin = inst->dst.val32;
|
||||
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 4 part 2, page 81:
|
||||
*
|
||||
* "For a word or an unsigned word immediate data, software must
|
||||
* replicate the same 16-bit immediate value to both the lower word
|
||||
* and the high word of the 32-bit immediate field in an instruction."
|
||||
*/
|
||||
if (inst->dst.file == TOY_FILE_IMM) {
|
||||
switch (inst->dst.type) {
|
||||
case TOY_TYPE_W:
|
||||
case TOY_TYPE_UW:
|
||||
cg->dst.origin &= 0xffff;
|
||||
cg->dst.origin |= cg->dst.origin << 16;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cg->dst.writemask = translate_writemask(inst->dst.writemask);
|
||||
|
||||
switch (inst->dst.rect) {
|
||||
case TOY_RECT_LINEAR:
|
||||
cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
|
||||
break;
|
||||
default:
|
||||
assert(!"unsupported dst region");
|
||||
cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < Elements(cg->src); i++) {
|
||||
struct codegen_src *src = &cg->src[i];
|
||||
|
||||
src->file = translate_vfile(inst->src[i].file);
|
||||
src->type = translate_vtype(inst->src[i].type);
|
||||
src->indirect = inst->src[i].indirect;
|
||||
src->indirect_subreg = inst->src[i].indirect_subreg;
|
||||
src->origin = inst->src[i].val32;
|
||||
|
||||
/* do the same for src */
|
||||
if (inst->dst.file == TOY_FILE_IMM) {
|
||||
switch (inst->src[i].type) {
|
||||
case TOY_TYPE_W:
|
||||
case TOY_TYPE_UW:
|
||||
src->origin &= 0xffff;
|
||||
src->origin |= src->origin << 16;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
|
||||
src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
|
||||
src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
|
||||
src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
|
||||
src->absolute = inst->src[i].absolute;
|
||||
src->negate = inst->src[i].negate;
|
||||
|
||||
switch (inst->src[i].rect) {
|
||||
case TOY_RECT_LINEAR:
|
||||
switch (rect_linear_width) {
|
||||
case 1:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_1;
|
||||
src->width = BRW_WIDTH_1;
|
||||
break;
|
||||
case 2:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_2;
|
||||
src->width = BRW_WIDTH_2;
|
||||
break;
|
||||
case 4:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_4;
|
||||
src->width = BRW_WIDTH_4;
|
||||
break;
|
||||
case 8:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_8;
|
||||
src->width = BRW_WIDTH_8;
|
||||
break;
|
||||
case 16:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_16;
|
||||
src->width = BRW_WIDTH_16;
|
||||
break;
|
||||
default:
|
||||
assert(!"unsupported TOY_RECT_LINEAR width");
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_1;
|
||||
src->width = BRW_WIDTH_1;
|
||||
break;
|
||||
}
|
||||
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
|
||||
break;
|
||||
case TOY_RECT_041:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_0;
|
||||
src->width = BRW_WIDTH_4;
|
||||
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
|
||||
break;
|
||||
case TOY_RECT_010:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_0;
|
||||
src->width = BRW_WIDTH_1;
|
||||
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
|
||||
break;
|
||||
case TOY_RECT_220:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_2;
|
||||
src->width = BRW_WIDTH_2;
|
||||
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
|
||||
break;
|
||||
case TOY_RECT_440:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_4;
|
||||
src->width = BRW_WIDTH_4;
|
||||
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
|
||||
break;
|
||||
case TOY_RECT_240:
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_2;
|
||||
src->width = BRW_WIDTH_4;
|
||||
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
|
||||
break;
|
||||
default:
|
||||
assert(!"unsupported src region");
|
||||
src->vert_stride = BRW_VERTICAL_STRIDE_1;
|
||||
src->width = BRW_WIDTH_1;
|
||||
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate HW shader code. The instructions should have been legalized.
|
||||
*/
|
||||
void *
|
||||
toy_compiler_assemble(struct toy_compiler *tc, int *size)
|
||||
{
|
||||
const struct toy_inst *inst;
|
||||
uint32_t *code;
|
||||
int pc;
|
||||
|
||||
code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
|
||||
if (!code)
|
||||
return NULL;
|
||||
|
||||
pc = 0;
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next(tc)) != NULL) {
|
||||
uint32_t *dw = &code[pc * 4];
|
||||
struct codegen cg;
|
||||
|
||||
if (pc >= tc->num_instructions) {
|
||||
tc_fail(tc, "wrong instructoun count");
|
||||
break;
|
||||
}
|
||||
|
||||
codegen_prepare(&cg, inst, pc, tc->rect_linear_width);
|
||||
codegen_validate_region_restrictions(&cg);
|
||||
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MAD:
|
||||
codegen_inst_3src(&cg, dw);
|
||||
break;
|
||||
default:
|
||||
codegen_inst(&cg, dw);
|
||||
break;
|
||||
}
|
||||
|
||||
pc++;
|
||||
}
|
||||
|
||||
/* never return an invalid kernel */
|
||||
if (tc->fail) {
|
||||
FREE(code);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (size)
|
||||
*size = pc * 4 * sizeof(uint32_t);
|
||||
|
||||
return code;
|
||||
}
|
||||
1385
src/gallium/drivers/ilo/shader/toy_compiler_disasm.c
Normal file
1385
src/gallium/drivers/ilo/shader/toy_compiler_disasm.c
Normal file
File diff suppressed because it is too large
Load diff
800
src/gallium/drivers/ilo/shader/toy_compiler_reg.h
Normal file
800
src/gallium/drivers/ilo/shader/toy_compiler_reg.h
Normal file
|
|
@ -0,0 +1,800 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#ifndef TOY_REG_H
|
||||
#define TOY_REG_H
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "util/u_debug.h" /* for assert() */
|
||||
#include "util/u_math.h" /* for union fi */
|
||||
|
||||
/* a toy reg is 256-bit wide */
|
||||
#define TOY_REG_WIDTH 32
|
||||
|
||||
/**
|
||||
* Register files.
|
||||
*/
|
||||
enum toy_file {
|
||||
/* virtual register file */
|
||||
TOY_FILE_VRF,
|
||||
|
||||
TOY_FILE_ARF,
|
||||
TOY_FILE_GRF,
|
||||
TOY_FILE_MRF,
|
||||
TOY_FILE_IMM,
|
||||
|
||||
TOY_FILE_COUNT,
|
||||
};
|
||||
|
||||
/**
|
||||
* Register types.
|
||||
*/
|
||||
enum toy_type {
|
||||
TOY_TYPE_F,
|
||||
TOY_TYPE_D,
|
||||
TOY_TYPE_UD,
|
||||
TOY_TYPE_W,
|
||||
TOY_TYPE_UW,
|
||||
TOY_TYPE_V, /* only valid for immediates */
|
||||
|
||||
TOY_TYPE_COUNT,
|
||||
};
|
||||
|
||||
/**
|
||||
* Register rectangles. The three numbers stand for vertical stride, width,
|
||||
* and horizontal stride respectively.
|
||||
*/
|
||||
enum toy_rect {
|
||||
TOY_RECT_LINEAR,
|
||||
TOY_RECT_041,
|
||||
TOY_RECT_010,
|
||||
TOY_RECT_220,
|
||||
TOY_RECT_440,
|
||||
TOY_RECT_240,
|
||||
|
||||
TOY_RECT_COUNT,
|
||||
};
|
||||
|
||||
/**
|
||||
* Source swizzles. They are compatible with TGSI_SWIZZLE_x and hardware
|
||||
* values.
|
||||
*/
|
||||
enum toy_swizzle {
|
||||
TOY_SWIZZLE_X = 0,
|
||||
TOY_SWIZZLE_Y = 1,
|
||||
TOY_SWIZZLE_Z = 2,
|
||||
TOY_SWIZZLE_W = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* Destination writemasks. They are compatible with TGSI_WRITEMASK_x and
|
||||
* hardware values.
|
||||
*/
|
||||
enum toy_writemask {
|
||||
TOY_WRITEMASK_X = (1 << TOY_SWIZZLE_X),
|
||||
TOY_WRITEMASK_Y = (1 << TOY_SWIZZLE_Y),
|
||||
TOY_WRITEMASK_Z = (1 << TOY_SWIZZLE_Z),
|
||||
TOY_WRITEMASK_W = (1 << TOY_SWIZZLE_W),
|
||||
TOY_WRITEMASK_XY = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y),
|
||||
TOY_WRITEMASK_XZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z),
|
||||
TOY_WRITEMASK_XW = (TOY_WRITEMASK_X | TOY_WRITEMASK_W),
|
||||
TOY_WRITEMASK_YZ = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
|
||||
TOY_WRITEMASK_YW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
|
||||
TOY_WRITEMASK_ZW = (TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
|
||||
TOY_WRITEMASK_XYZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
|
||||
TOY_WRITEMASK_XYW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
|
||||
TOY_WRITEMASK_XZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
|
||||
TOY_WRITEMASK_YZW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
|
||||
TOY_WRITEMASK_XYZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y |
|
||||
TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
|
||||
};
|
||||
|
||||
/**
|
||||
* Destination operand.
|
||||
*/
|
||||
struct toy_dst {
|
||||
unsigned file:3; /* TOY_FILE_x */
|
||||
unsigned type:3; /* TOY_TYPE_x */
|
||||
unsigned rect:3; /* TOY_RECT_x */
|
||||
unsigned indirect:1; /* true or false */
|
||||
unsigned indirect_subreg:6; /* which subreg of a0? */
|
||||
|
||||
unsigned writemask:4; /* TOY_WRITEMASK_x */
|
||||
unsigned pad:12;
|
||||
|
||||
uint32_t val32;
|
||||
};
|
||||
|
||||
/**
|
||||
* Source operand.
|
||||
*/
|
||||
struct toy_src {
|
||||
unsigned file:3; /* TOY_FILE_x */
|
||||
unsigned type:3; /* TOY_TYPE_x */
|
||||
unsigned rect:3; /* TOY_RECT_x */
|
||||
unsigned indirect:1; /* true or false */
|
||||
unsigned indirect_subreg:6; /* which subreg of a0? */
|
||||
|
||||
unsigned swizzle_x:2; /* TOY_SWIZZLE_x */
|
||||
unsigned swizzle_y:2; /* TOY_SWIZZLE_x */
|
||||
unsigned swizzle_z:2; /* TOY_SWIZZLE_x */
|
||||
unsigned swizzle_w:2; /* TOY_SWIZZLE_x */
|
||||
unsigned absolute:1; /* true or false */
|
||||
unsigned negate:1; /* true or false */
|
||||
unsigned pad:6;
|
||||
|
||||
uint32_t val32;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return true if the file is virtual.
|
||||
*/
|
||||
static inline bool
|
||||
toy_file_is_virtual(enum toy_file file)
|
||||
{
|
||||
return (file == TOY_FILE_VRF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the file is a hardware one.
|
||||
*/
|
||||
static inline bool
|
||||
toy_file_is_hw(enum toy_file file)
|
||||
{
|
||||
return !toy_file_is_virtual(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the size of the file.
|
||||
*/
|
||||
static inline uint32_t
|
||||
toy_file_size(enum toy_file file)
|
||||
{
|
||||
switch (file) {
|
||||
case TOY_FILE_GRF:
|
||||
return 256 * TOY_REG_WIDTH;
|
||||
case TOY_FILE_MRF:
|
||||
/* there is no MRF on GEN7+ */
|
||||
return 256 * TOY_REG_WIDTH;
|
||||
default:
|
||||
assert(!"invalid toy file");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the size of the type.
|
||||
*/
|
||||
static inline int
|
||||
toy_type_size(enum toy_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case TOY_TYPE_F:
|
||||
case TOY_TYPE_D:
|
||||
case TOY_TYPE_UD:
|
||||
return 4;
|
||||
case TOY_TYPE_W:
|
||||
case TOY_TYPE_UW:
|
||||
return 2;
|
||||
case TOY_TYPE_V:
|
||||
default:
|
||||
assert(!"invalid toy type");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the destination operand is null.
|
||||
*/
|
||||
static inline bool
|
||||
tdst_is_null(struct toy_dst dst)
|
||||
{
|
||||
/* BRW_ARF_NULL happens to be 0 */
|
||||
return (dst.file == TOY_FILE_ARF && dst.val32 == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate the destination operand.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_validate(struct toy_dst dst)
|
||||
{
|
||||
switch (dst.file) {
|
||||
case TOY_FILE_VRF:
|
||||
case TOY_FILE_ARF:
|
||||
case TOY_FILE_MRF:
|
||||
assert(!dst.indirect);
|
||||
if (dst.file == TOY_FILE_MRF)
|
||||
assert(dst.val32 < toy_file_size(dst.file));
|
||||
break;
|
||||
case TOY_FILE_GRF:
|
||||
if (!dst.indirect)
|
||||
assert(dst.val32 < toy_file_size(dst.file));
|
||||
break;
|
||||
case TOY_FILE_IMM:
|
||||
/* yes, dst can be IMM of type W (for IF/ELSE/ENDIF/WHILE) */
|
||||
assert(!dst.indirect);
|
||||
assert(dst.type == TOY_TYPE_W);
|
||||
break;
|
||||
default:
|
||||
assert(!"invalid dst file");
|
||||
break;
|
||||
}
|
||||
|
||||
switch (dst.type) {
|
||||
case TOY_TYPE_V:
|
||||
assert(!"invalid dst type");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
assert(dst.rect == TOY_RECT_LINEAR);
|
||||
if (dst.file != TOY_FILE_IMM)
|
||||
assert(dst.val32 % toy_type_size(dst.type) == 0);
|
||||
|
||||
assert(dst.writemask <= TOY_WRITEMASK_XYZW);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the destination operand.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_type(struct toy_dst dst, enum toy_type type)
|
||||
{
|
||||
dst.type = type;
|
||||
return tdst_validate(dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the destination operand to TOY_TYPE_D.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_d(struct toy_dst dst)
|
||||
{
|
||||
return tdst_type(dst, TOY_TYPE_D);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the destination operand to TOY_TYPE_UD.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_ud(struct toy_dst dst)
|
||||
{
|
||||
return tdst_type(dst, TOY_TYPE_UD);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the destination operand to TOY_TYPE_W.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_w(struct toy_dst dst)
|
||||
{
|
||||
return tdst_type(dst, TOY_TYPE_W);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the destination operand to TOY_TYPE_UW.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_uw(struct toy_dst dst)
|
||||
{
|
||||
return tdst_type(dst, TOY_TYPE_UW);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the rectangle of the destination operand.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_rect(struct toy_dst dst, enum toy_rect rect)
|
||||
{
|
||||
dst.rect = rect;
|
||||
return tdst_validate(dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply writemask to the destination operand. Note that the current
|
||||
* writemask is honored.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_writemask(struct toy_dst dst, enum toy_writemask writemask)
|
||||
{
|
||||
dst.writemask &= writemask;
|
||||
return tdst_validate(dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Offset the destination operand.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_offset(struct toy_dst dst, int reg, int subreg)
|
||||
{
|
||||
dst.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(dst.type);
|
||||
return tdst_validate(dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a destination operand.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_full(enum toy_file file, enum toy_type type, enum toy_rect rect,
|
||||
bool indirect, unsigned indirect_subreg,
|
||||
enum toy_writemask writemask, uint32_t val32)
|
||||
{
|
||||
struct toy_dst dst;
|
||||
|
||||
dst.file = file;
|
||||
dst.type = type;
|
||||
dst.rect = rect;
|
||||
dst.indirect = indirect;
|
||||
dst.indirect_subreg = indirect_subreg;
|
||||
dst.writemask = writemask;
|
||||
dst.pad = 0;
|
||||
|
||||
dst.val32 = val32;
|
||||
|
||||
return tdst_validate(dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a null destination operand.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_null(void)
|
||||
{
|
||||
static const struct toy_dst null_dst = {
|
||||
.file = TOY_FILE_ARF,
|
||||
.type = TOY_TYPE_F,
|
||||
.rect = TOY_RECT_LINEAR,
|
||||
.indirect = false,
|
||||
.indirect_subreg = 0,
|
||||
.writemask = TOY_WRITEMASK_XYZW,
|
||||
.pad = 0,
|
||||
.val32 = 0,
|
||||
};
|
||||
|
||||
return null_dst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a destination operand from a source operand.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_from(struct toy_src src)
|
||||
{
|
||||
const enum toy_writemask writemask =
|
||||
(1 << src.swizzle_x) |
|
||||
(1 << src.swizzle_y) |
|
||||
(1 << src.swizzle_z) |
|
||||
(1 << src.swizzle_w);
|
||||
|
||||
return tdst_full(src.file, src.type, src.rect,
|
||||
src.indirect, src.indirect_subreg, writemask, src.val32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a destination operand, assuming the type is TOY_TYPE_F, the
|
||||
* rectangle is TOY_RECT_LINEAR, and the writemask is TOY_WRITEMASK_XYZW.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
|
||||
{
|
||||
const enum toy_type type = TOY_TYPE_F;
|
||||
const enum toy_rect rect = TOY_RECT_LINEAR;
|
||||
const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
|
||||
|
||||
return tdst_full(file, type, rect,
|
||||
false, 0, TOY_WRITEMASK_XYZW, val32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an immediate destination operand of type TOY_TYPE_W.
|
||||
*/
|
||||
static inline struct toy_dst
|
||||
tdst_imm_w(int16_t w)
|
||||
{
|
||||
const union fi fi = { .i = w };
|
||||
|
||||
return tdst_full(TOY_FILE_IMM, TOY_TYPE_W, TOY_RECT_LINEAR,
|
||||
false, 0, TOY_WRITEMASK_XYZW, fi.ui);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the source operand is null.
|
||||
*/
|
||||
static inline bool
|
||||
tsrc_is_null(struct toy_src src)
|
||||
{
|
||||
/* BRW_ARF_NULL happens to be 0 */
|
||||
return (src.file == TOY_FILE_ARF && src.val32 == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the source operand is swizzled.
|
||||
*/
|
||||
static inline bool
|
||||
tsrc_is_swizzled(struct toy_src src)
|
||||
{
|
||||
return (src.swizzle_x != TOY_SWIZZLE_X ||
|
||||
src.swizzle_y != TOY_SWIZZLE_Y ||
|
||||
src.swizzle_z != TOY_SWIZZLE_Z ||
|
||||
src.swizzle_w != TOY_SWIZZLE_W);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the source operand is swizzled to the same channel.
|
||||
*/
|
||||
static inline bool
|
||||
tsrc_is_swizzle1(struct toy_src src)
|
||||
{
|
||||
return (src.swizzle_x == src.swizzle_y &&
|
||||
src.swizzle_x == src.swizzle_z &&
|
||||
src.swizzle_x == src.swizzle_w);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate the source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_validate(struct toy_src src)
|
||||
{
|
||||
switch (src.file) {
|
||||
case TOY_FILE_VRF:
|
||||
case TOY_FILE_ARF:
|
||||
case TOY_FILE_MRF:
|
||||
assert(!src.indirect);
|
||||
if (src.file == TOY_FILE_MRF)
|
||||
assert(src.val32 < toy_file_size(src.file));
|
||||
break;
|
||||
case TOY_FILE_GRF:
|
||||
if (!src.indirect)
|
||||
assert(src.val32 < toy_file_size(src.file));
|
||||
break;
|
||||
case TOY_FILE_IMM:
|
||||
assert(!src.indirect);
|
||||
break;
|
||||
default:
|
||||
assert(!"invalid src file");
|
||||
break;
|
||||
}
|
||||
|
||||
switch (src.type) {
|
||||
case TOY_TYPE_V:
|
||||
assert(src.file == TOY_FILE_IMM);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (src.file != TOY_FILE_IMM)
|
||||
assert(src.val32 % toy_type_size(src.type) == 0);
|
||||
|
||||
assert(src.swizzle_x < 4 && src.swizzle_y < 4 &&
|
||||
src.swizzle_z < 4 && src.swizzle_w < 4);
|
||||
|
||||
return src;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_type(struct toy_src src, enum toy_type type)
|
||||
{
|
||||
src.type = type;
|
||||
return tsrc_validate(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the source operand to TOY_TYPE_D.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_d(struct toy_src src)
|
||||
{
|
||||
return tsrc_type(src, TOY_TYPE_D);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the source operand to TOY_TYPE_UD.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_ud(struct toy_src src)
|
||||
{
|
||||
return tsrc_type(src, TOY_TYPE_UD);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the source operand to TOY_TYPE_W.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_w(struct toy_src src)
|
||||
{
|
||||
return tsrc_type(src, TOY_TYPE_W);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the type of the source operand to TOY_TYPE_UW.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_uw(struct toy_src src)
|
||||
{
|
||||
return tsrc_type(src, TOY_TYPE_UW);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the rectangle of the source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_rect(struct toy_src src, enum toy_rect rect)
|
||||
{
|
||||
src.rect = rect;
|
||||
return tsrc_validate(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swizzle the source operand. Note that the current swizzles are honored.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_swizzle(struct toy_src src,
|
||||
enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
|
||||
enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w)
|
||||
{
|
||||
const enum toy_swizzle current[4] = {
|
||||
src.swizzle_x, src.swizzle_y,
|
||||
src.swizzle_z, src.swizzle_w,
|
||||
};
|
||||
|
||||
src.swizzle_x = current[swizzle_x];
|
||||
src.swizzle_y = current[swizzle_y];
|
||||
src.swizzle_z = current[swizzle_z];
|
||||
src.swizzle_w = current[swizzle_w];
|
||||
|
||||
return tsrc_validate(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swizzle the source operand to the same channel. Note that the current
|
||||
* swizzles are honored.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_swizzle1(struct toy_src src, enum toy_swizzle swizzle)
|
||||
{
|
||||
return tsrc_swizzle(src, swizzle, swizzle, swizzle, swizzle);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set absolute and unset negate of the source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_absolute(struct toy_src src)
|
||||
{
|
||||
src.absolute = true;
|
||||
src.negate = false;
|
||||
return tsrc_validate(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Negate the source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_negate(struct toy_src src)
|
||||
{
|
||||
src.negate = !src.negate;
|
||||
return tsrc_validate(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Offset the source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_offset(struct toy_src src, int reg, int subreg)
|
||||
{
|
||||
src.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(src.type);
|
||||
return tsrc_validate(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_full(enum toy_file file, enum toy_type type,
|
||||
enum toy_rect rect, bool indirect, unsigned indirect_subreg,
|
||||
enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
|
||||
enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w,
|
||||
bool absolute, bool negate,
|
||||
uint32_t val32)
|
||||
{
|
||||
struct toy_src src;
|
||||
|
||||
src.file = file;
|
||||
src.type = type;
|
||||
src.rect = rect;
|
||||
src.indirect = indirect;
|
||||
src.indirect_subreg = indirect_subreg;
|
||||
src.swizzle_x = swizzle_x;
|
||||
src.swizzle_y = swizzle_y;
|
||||
src.swizzle_z = swizzle_z;
|
||||
src.swizzle_w = swizzle_w;
|
||||
src.absolute = absolute;
|
||||
src.negate = negate;
|
||||
src.pad = 0;
|
||||
|
||||
src.val32 = val32;
|
||||
|
||||
return tsrc_validate(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a null source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_null(void)
|
||||
{
|
||||
static const struct toy_src null_src = {
|
||||
.file = TOY_FILE_ARF,
|
||||
.type = TOY_TYPE_F,
|
||||
.rect = TOY_RECT_LINEAR,
|
||||
.indirect = false,
|
||||
.indirect_subreg = 0,
|
||||
.swizzle_x = TOY_SWIZZLE_X,
|
||||
.swizzle_y = TOY_SWIZZLE_Y,
|
||||
.swizzle_z = TOY_SWIZZLE_Z,
|
||||
.swizzle_w = TOY_SWIZZLE_W,
|
||||
.absolute = false,
|
||||
.negate = false,
|
||||
.pad = 0,
|
||||
.val32 = 0,
|
||||
};
|
||||
|
||||
return null_src;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a source operand from a destination operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_from(struct toy_dst dst)
|
||||
{
|
||||
enum toy_swizzle swizzle[4];
|
||||
|
||||
if (dst.writemask == TOY_WRITEMASK_XYZW) {
|
||||
swizzle[0] = TOY_SWIZZLE_X;
|
||||
swizzle[1] = TOY_SWIZZLE_Y;
|
||||
swizzle[2] = TOY_SWIZZLE_Z;
|
||||
swizzle[3] = TOY_SWIZZLE_W;
|
||||
}
|
||||
else {
|
||||
const enum toy_swizzle first =
|
||||
(dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X :
|
||||
(dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y :
|
||||
(dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z :
|
||||
(dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W :
|
||||
TOY_SWIZZLE_X;
|
||||
|
||||
swizzle[0] = (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : first;
|
||||
swizzle[1] = (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : first;
|
||||
swizzle[2] = (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : first;
|
||||
swizzle[3] = (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : first;
|
||||
}
|
||||
|
||||
return tsrc_full(dst.file, dst.type, dst.rect,
|
||||
dst.indirect, dst.indirect_subreg,
|
||||
swizzle[0], swizzle[1], swizzle[2], swizzle[3],
|
||||
false, false, dst.val32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a source operand, assuming the type is TOY_TYPE_F, the
|
||||
* rectangle is TOY_RECT_LINEAR, and no swizzles/absolute/negate.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
|
||||
{
|
||||
const enum toy_type type = TOY_TYPE_F;
|
||||
const enum toy_rect rect = TOY_RECT_LINEAR;
|
||||
const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
|
||||
|
||||
return tsrc_full(file, type, rect, false, 0,
|
||||
TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
|
||||
TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
|
||||
false, false, val32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an immediate source operand.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_imm(enum toy_type type, uint32_t val32)
|
||||
{
|
||||
return tsrc_full(TOY_FILE_IMM, type, TOY_RECT_LINEAR, false, 0,
|
||||
TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
|
||||
TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
|
||||
false, false, val32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an immediate source operand of type TOY_TYPE_F.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_imm_f(float f)
|
||||
{
|
||||
const union fi fi = { .f = f };
|
||||
return tsrc_imm(TOY_TYPE_F, fi.ui);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an immediate source operand of type TOY_TYPE_D.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_imm_d(int32_t d)
|
||||
{
|
||||
const union fi fi = { .i = d };
|
||||
return tsrc_imm(TOY_TYPE_D, fi.ui);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an immediate source operand of type TOY_TYPE_UD.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_imm_ud(uint32_t ud)
|
||||
{
|
||||
const union fi fi = { .ui = ud };
|
||||
return tsrc_imm(TOY_TYPE_UD, fi.ui);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an immediate source operand of type TOY_TYPE_W.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_imm_w(int16_t w)
|
||||
{
|
||||
const union fi fi = { .i = w };
|
||||
return tsrc_imm(TOY_TYPE_W, fi.ui);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an immediate source operand of type TOY_TYPE_UW.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_imm_uw(uint16_t uw)
|
||||
{
|
||||
const union fi fi = { .ui = uw };
|
||||
return tsrc_imm(TOY_TYPE_UW, fi.ui);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an immediate source operand of type TOY_TYPE_V.
|
||||
*/
|
||||
static inline struct toy_src
|
||||
tsrc_imm_v(uint32_t v)
|
||||
{
|
||||
return tsrc_imm(TOY_TYPE_V, v);
|
||||
}
|
||||
|
||||
#endif /* TOY_REG_H */
|
||||
289
src/gallium/drivers/ilo/shader/toy_helpers.h
Normal file
289
src/gallium/drivers/ilo/shader/toy_helpers.h
Normal file
|
|
@ -0,0 +1,289 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#ifndef TOY_HELPERS_H
|
||||
#define TOY_HELPERS_H
|
||||
|
||||
#include "toy_compiler.h"
|
||||
|
||||
/**
|
||||
* Transpose a dst operand.
|
||||
*
|
||||
* Instead of processing a single vertex with each of its attributes in one
|
||||
* register, such as
|
||||
*
|
||||
* r0 = [x0, y0, z0, w0]
|
||||
*
|
||||
* we want to process four vertices at a time
|
||||
*
|
||||
* r0 = [x0, y0, z0, w0]
|
||||
* r1 = [x1, y1, z1, w1]
|
||||
* r2 = [x2, y2, z2, w2]
|
||||
* r3 = [x3, y3, z3, w3]
|
||||
*
|
||||
* but with the attribute data "transposed"
|
||||
*
|
||||
* r0 = [x0, x1, x2, x3]
|
||||
* r1 = [y0, y1, y2, y3]
|
||||
* r2 = [z0, z1, z2, z3]
|
||||
* r3 = [w0, w1, w2, w3]
|
||||
*
|
||||
* This is also known as the SoA form.
|
||||
*/
|
||||
static inline void
|
||||
tdst_transpose(struct toy_dst dst, struct toy_dst *trans)
|
||||
{
|
||||
int i;
|
||||
|
||||
switch (dst.file) {
|
||||
case TOY_FILE_VRF:
|
||||
assert(!dst.indirect);
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (dst.writemask & (1 << i)) {
|
||||
trans[i] = tdst_offset(dst, i, 0);
|
||||
trans[i].writemask = TOY_WRITEMASK_XYZW;
|
||||
}
|
||||
else {
|
||||
trans[i] = tdst_null();
|
||||
}
|
||||
}
|
||||
break;
|
||||
case TOY_FILE_ARF:
|
||||
assert(tdst_is_null(dst));
|
||||
for (i = 0; i < 4; i++)
|
||||
trans[i] = dst;
|
||||
break;
|
||||
case TOY_FILE_GRF:
|
||||
case TOY_FILE_MRF:
|
||||
case TOY_FILE_IMM:
|
||||
default:
|
||||
assert(!"unexpected file in dst transposition");
|
||||
for (i = 0; i < 4; i++)
|
||||
trans[i] = tdst_null();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transpose a src operand.
|
||||
*/
|
||||
static inline void
|
||||
tsrc_transpose(struct toy_src src, struct toy_src *trans)
|
||||
{
|
||||
const enum toy_swizzle swizzle[4] = {
|
||||
src.swizzle_x, src.swizzle_y,
|
||||
src.swizzle_z, src.swizzle_w,
|
||||
};
|
||||
int i;
|
||||
|
||||
switch (src.file) {
|
||||
case TOY_FILE_VRF:
|
||||
assert(!src.indirect);
|
||||
for (i = 0; i < 4; i++) {
|
||||
trans[i] = tsrc_offset(src, swizzle[i], 0);
|
||||
trans[i].swizzle_x = TOY_SWIZZLE_X;
|
||||
trans[i].swizzle_y = TOY_SWIZZLE_Y;
|
||||
trans[i].swizzle_z = TOY_SWIZZLE_Z;
|
||||
trans[i].swizzle_w = TOY_SWIZZLE_W;
|
||||
}
|
||||
break;
|
||||
case TOY_FILE_ARF:
|
||||
assert(tsrc_is_null(src));
|
||||
/* fall through */
|
||||
case TOY_FILE_IMM:
|
||||
for (i = 0; i < 4; i++)
|
||||
trans[i] = src;
|
||||
break;
|
||||
case TOY_FILE_GRF:
|
||||
case TOY_FILE_MRF:
|
||||
default:
|
||||
assert(!"unexpected file in src transposition");
|
||||
for (i = 0; i < 4; i++)
|
||||
trans[i] = tsrc_null();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct toy_src
|
||||
tsrc_imm_mdesc(const struct toy_compiler *tc,
|
||||
bool eot,
|
||||
unsigned message_length,
|
||||
unsigned response_length,
|
||||
bool header_present,
|
||||
uint32_t function_control)
|
||||
{
|
||||
uint32_t desc;
|
||||
|
||||
assert(message_length >= 1 && message_length <= 15);
|
||||
assert(response_length >= 0 && response_length <= 16);
|
||||
assert(function_control < 1 << 19);
|
||||
|
||||
desc = eot << 31 |
|
||||
message_length << 25 |
|
||||
response_length << 20 |
|
||||
header_present << 19 |
|
||||
function_control;
|
||||
|
||||
return tsrc_imm_ud(desc);
|
||||
}
|
||||
|
||||
static inline struct toy_src
|
||||
tsrc_imm_mdesc_sampler(const struct toy_compiler *tc,
|
||||
unsigned message_length,
|
||||
unsigned response_length,
|
||||
bool header_present,
|
||||
unsigned simd_mode,
|
||||
unsigned message_type,
|
||||
unsigned sampler_index,
|
||||
unsigned binding_table_index)
|
||||
{
|
||||
const bool eot = false;
|
||||
uint32_t ctrl;
|
||||
|
||||
assert(simd_mode < 4);
|
||||
assert(sampler_index < 16);
|
||||
assert(binding_table_index < 256);
|
||||
|
||||
if (tc->gen >= ILO_GEN(7)) {
|
||||
ctrl = simd_mode << 17 |
|
||||
message_type << 12 |
|
||||
sampler_index << 8 |
|
||||
binding_table_index;
|
||||
}
|
||||
else {
|
||||
ctrl = simd_mode << 16 |
|
||||
message_type << 12 |
|
||||
sampler_index << 8 |
|
||||
binding_table_index;
|
||||
}
|
||||
|
||||
return tsrc_imm_mdesc(tc, eot, message_length,
|
||||
response_length, header_present, ctrl);
|
||||
}
|
||||
|
||||
static inline struct toy_src
|
||||
tsrc_imm_mdesc_data_port(const struct toy_compiler *tc,
|
||||
bool eot,
|
||||
unsigned message_length,
|
||||
unsigned response_length,
|
||||
bool header_present,
|
||||
bool send_write_commit_message,
|
||||
unsigned message_type,
|
||||
unsigned message_specific_control,
|
||||
unsigned binding_table_index)
|
||||
{
|
||||
uint32_t ctrl;
|
||||
|
||||
if (tc->gen >= ILO_GEN(7)) {
|
||||
assert(!send_write_commit_message);
|
||||
assert((message_specific_control & 0x3f00) == message_specific_control);
|
||||
|
||||
ctrl = message_type << 14 |
|
||||
(message_specific_control & 0x3f00) |
|
||||
binding_table_index;
|
||||
}
|
||||
else {
|
||||
assert(!send_write_commit_message ||
|
||||
message_type == GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE);
|
||||
assert((message_specific_control & 0x1f00) == message_specific_control);
|
||||
|
||||
ctrl = send_write_commit_message << 17 |
|
||||
message_type << 13 |
|
||||
(message_specific_control & 0x1f00) |
|
||||
binding_table_index;
|
||||
}
|
||||
|
||||
return tsrc_imm_mdesc(tc, eot, message_length,
|
||||
response_length, header_present, ctrl);
|
||||
}
|
||||
|
||||
static inline struct toy_src
|
||||
tsrc_imm_mdesc_data_port_scratch(const struct toy_compiler *tc,
|
||||
unsigned message_length,
|
||||
unsigned response_length,
|
||||
bool write_type,
|
||||
bool dword_mode,
|
||||
bool invalidate_after_read,
|
||||
int num_registers,
|
||||
int hword_offset)
|
||||
{
|
||||
const bool eot = false;
|
||||
const bool header_present = true;
|
||||
uint32_t ctrl;
|
||||
|
||||
assert(tc->gen >= ILO_GEN(7));
|
||||
assert(num_registers == 1 || num_registers == 2 || num_registers == 4);
|
||||
|
||||
ctrl = 1 << 18 |
|
||||
write_type << 17 |
|
||||
dword_mode << 16 |
|
||||
invalidate_after_read << 15 |
|
||||
(num_registers - 1) << 12 |
|
||||
hword_offset;
|
||||
|
||||
return tsrc_imm_mdesc(tc, eot, message_length,
|
||||
response_length, header_present, ctrl);
|
||||
}
|
||||
|
||||
static inline struct toy_src
|
||||
tsrc_imm_mdesc_urb(const struct toy_compiler *tc,
|
||||
bool eot,
|
||||
unsigned message_length,
|
||||
unsigned response_length,
|
||||
bool complete,
|
||||
bool used,
|
||||
bool allocate,
|
||||
unsigned swizzle_control,
|
||||
unsigned global_offset,
|
||||
unsigned urb_opcode)
|
||||
{
|
||||
const bool header_present = true;
|
||||
uint32_t ctrl;
|
||||
|
||||
if (tc->gen >= ILO_GEN(7)) {
|
||||
const bool per_slot_offset = false;
|
||||
|
||||
ctrl = per_slot_offset << 16 |
|
||||
complete << 15 |
|
||||
swizzle_control << 14 |
|
||||
global_offset << 3 |
|
||||
urb_opcode;
|
||||
}
|
||||
else {
|
||||
ctrl = complete << 15 |
|
||||
used << 14 |
|
||||
allocate << 13 |
|
||||
swizzle_control << 10 |
|
||||
global_offset << 4 |
|
||||
urb_opcode;
|
||||
}
|
||||
|
||||
return tsrc_imm_mdesc(tc, eot, message_length,
|
||||
response_length, header_present, ctrl);
|
||||
}
|
||||
|
||||
#endif /* TOY_HELPERS_H */
|
||||
632
src/gallium/drivers/ilo/shader/toy_legalize.c
Normal file
632
src/gallium/drivers/ilo/shader/toy_legalize.c
Normal file
|
|
@ -0,0 +1,632 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "toy_compiler.h"
|
||||
#include "toy_tgsi.h"
|
||||
#include "toy_helpers.h"
|
||||
#include "toy_legalize.h"
|
||||
|
||||
/**
|
||||
* Lower an instruction to BRW_OPCODE_SEND(C).
|
||||
*/
|
||||
void
|
||||
toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
|
||||
bool sendc, unsigned sfid)
|
||||
{
|
||||
assert(inst->opcode >= 128);
|
||||
|
||||
inst->opcode = (sendc) ? BRW_OPCODE_SENDC : BRW_OPCODE_SEND;
|
||||
|
||||
/* thread control is reserved */
|
||||
assert(inst->thread_ctrl == 0);
|
||||
|
||||
assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
|
||||
inst->cond_modifier = sfid;
|
||||
}
|
||||
|
||||
static int
|
||||
math_op_to_func(unsigned opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case TOY_OPCODE_INV: return BRW_MATH_FUNCTION_INV;
|
||||
case TOY_OPCODE_LOG: return BRW_MATH_FUNCTION_LOG;
|
||||
case TOY_OPCODE_EXP: return BRW_MATH_FUNCTION_EXP;
|
||||
case TOY_OPCODE_SQRT: return BRW_MATH_FUNCTION_SQRT;
|
||||
case TOY_OPCODE_RSQ: return BRW_MATH_FUNCTION_RSQ;
|
||||
case TOY_OPCODE_SIN: return BRW_MATH_FUNCTION_SIN;
|
||||
case TOY_OPCODE_COS: return BRW_MATH_FUNCTION_COS;
|
||||
case TOY_OPCODE_FDIV: return BRW_MATH_FUNCTION_FDIV;
|
||||
case TOY_OPCODE_POW: return BRW_MATH_FUNCTION_POW;
|
||||
case TOY_OPCODE_INT_DIV_QUOTIENT: return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
|
||||
case TOY_OPCODE_INT_DIV_REMAINDER: return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
|
||||
default:
|
||||
assert(!"unknown math opcode");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lower virtual math opcodes to BRW_OPCODE_MATH.
|
||||
*/
|
||||
void
|
||||
toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
struct toy_dst tmp;
|
||||
int i;
|
||||
|
||||
/* see commit 250770b74d33bb8625c780a74a89477af033d13a */
|
||||
for (i = 0; i < Elements(inst->src); i++) {
|
||||
if (tsrc_is_null(inst->src[i]))
|
||||
break;
|
||||
|
||||
/* no swizzling in align1 */
|
||||
/* XXX how about source modifiers? */
|
||||
if (toy_file_is_virtual(inst->src[i].file) &&
|
||||
!tsrc_is_swizzled(inst->src[i]) &&
|
||||
!inst->src[i].absolute &&
|
||||
!inst->src[i].negate)
|
||||
continue;
|
||||
|
||||
tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type);
|
||||
tc_MOV(tc, tmp, inst->src[i]);
|
||||
inst->src[i] = tsrc_from(tmp);
|
||||
}
|
||||
|
||||
/* FC[0:3] */
|
||||
assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
|
||||
inst->cond_modifier = math_op_to_func(inst->opcode);
|
||||
/* FC[4:5] */
|
||||
assert(inst->thread_ctrl == 0);
|
||||
inst->thread_ctrl = 0;
|
||||
|
||||
inst->opcode = BRW_OPCODE_MATH;
|
||||
tc_move_inst(tc, inst);
|
||||
|
||||
/* no writemask in align1 */
|
||||
if (inst->dst.writemask != TOY_WRITEMASK_XYZW) {
|
||||
struct toy_dst dst = inst->dst;
|
||||
struct toy_inst *inst2;
|
||||
|
||||
tmp = tc_alloc_tmp(tc);
|
||||
tmp.type = inst->dst.type;
|
||||
inst->dst = tmp;
|
||||
|
||||
inst2 = tc_MOV(tc, dst, tsrc_from(tmp));
|
||||
inst2->pred_ctrl = inst->pred_ctrl;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
absolute_imm(uint32_t imm32, enum toy_type type)
|
||||
{
|
||||
union fi val = { .ui = imm32 };
|
||||
|
||||
switch (type) {
|
||||
case TOY_TYPE_F:
|
||||
val.f = fabs(val.f);
|
||||
break;
|
||||
case TOY_TYPE_D:
|
||||
if (val.i < 0)
|
||||
val.i = -val.i;
|
||||
break;
|
||||
case TOY_TYPE_W:
|
||||
if ((int16_t) (val.ui & 0xffff) < 0)
|
||||
val.i = -((int16_t) (val.ui & 0xffff));
|
||||
break;
|
||||
case TOY_TYPE_V:
|
||||
assert(!"cannot take absoulte of immediates of type V");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return val.ui;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
negate_imm(uint32_t imm32, enum toy_type type)
|
||||
{
|
||||
union fi val = { .ui = imm32 };
|
||||
|
||||
switch (type) {
|
||||
case TOY_TYPE_F:
|
||||
val.f = -val.f;
|
||||
break;
|
||||
case TOY_TYPE_D:
|
||||
case TOY_TYPE_UD:
|
||||
val.i = -val.i;
|
||||
break;
|
||||
case TOY_TYPE_W:
|
||||
case TOY_TYPE_UW:
|
||||
val.i = -((int16_t) (val.ui & 0xffff));
|
||||
break;
|
||||
default:
|
||||
assert(!"negate immediate of unknown type");
|
||||
break;
|
||||
}
|
||||
|
||||
return val.ui;
|
||||
}
|
||||
|
||||
static void
|
||||
validate_imm(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
bool move_inst = false;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Elements(inst->src); i++) {
|
||||
struct toy_dst tmp;
|
||||
|
||||
if (tsrc_is_null(inst->src[i]))
|
||||
break;
|
||||
|
||||
if (inst->src[i].file != TOY_FILE_IMM)
|
||||
continue;
|
||||
|
||||
if (inst->src[i].absolute) {
|
||||
inst->src[i].val32 =
|
||||
absolute_imm(inst->src[i].val32, inst->src[i].type);
|
||||
inst->src[i].absolute = false;
|
||||
}
|
||||
|
||||
if (inst->src[i].negate) {
|
||||
inst->src[i].val32 =
|
||||
negate_imm(inst->src[i].val32, inst->src[i].type);
|
||||
inst->src[i].negate = false;
|
||||
}
|
||||
|
||||
/* this is the last operand */
|
||||
if (i + 1 == Elements(inst->src) || tsrc_is_null(inst->src[i + 1]))
|
||||
break;
|
||||
|
||||
/* need to use a temp if this imm is not the last operand */
|
||||
/* TODO we should simply swap the operands if the op is commutative */
|
||||
tmp = tc_alloc_tmp(tc);
|
||||
tmp = tdst_type(tmp, inst->src[i].type);
|
||||
tc_MOV(tc, tmp, inst->src[i]);
|
||||
inst->src[i] = tsrc_from(tmp);
|
||||
|
||||
move_inst = true;
|
||||
}
|
||||
|
||||
if (move_inst)
|
||||
tc_move_inst(tc, inst);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
const enum toy_type inst_type = inst->dst.type;
|
||||
const struct toy_dst acc0 =
|
||||
tdst_type(tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0), inst_type);
|
||||
struct toy_inst *inst2;
|
||||
|
||||
/* only need to take care of integer multiplications */
|
||||
if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D)
|
||||
return;
|
||||
|
||||
/* acc0 = (src0 & 0x0000ffff) * src1 */
|
||||
tc_MUL(tc, acc0, inst->src[0], inst->src[1]);
|
||||
|
||||
/* acc0 = (src0 & 0xffff0000) * src1 + acc0 */
|
||||
inst2 = tc_add2(tc, BRW_OPCODE_MACH, tdst_type(tdst_null(), inst_type),
|
||||
inst->src[0], inst->src[1]);
|
||||
inst2->acc_wr_ctrl = true;
|
||||
|
||||
/* dst = acc0 & 0xffffffff */
|
||||
tc_MOV(tc, inst->dst, tsrc_from(acc0));
|
||||
|
||||
tc_discard_inst(tc, inst);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
const enum toy_type inst_type = inst->dst.type;
|
||||
|
||||
if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) {
|
||||
const struct toy_dst acc0 = tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0);
|
||||
|
||||
tc_MOV(tc, acc0, inst->src[2]);
|
||||
inst->src[2] = tsrc_null();
|
||||
tc_move_inst(tc, inst);
|
||||
}
|
||||
else {
|
||||
struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type);
|
||||
struct toy_inst *inst2;
|
||||
|
||||
inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]);
|
||||
lower_opcode_mul(tc, inst2);
|
||||
|
||||
tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]);
|
||||
|
||||
tc_discard_inst(tc, inst);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Legalize the instructions for register allocation.
|
||||
*/
|
||||
void
|
||||
toy_compiler_legalize_for_ra(struct toy_compiler *tc)
|
||||
{
|
||||
struct toy_inst *inst;
|
||||
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next(tc)) != NULL) {
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MAC:
|
||||
lower_opcode_mac(tc, inst);
|
||||
break;
|
||||
case BRW_OPCODE_MAD:
|
||||
/* TODO operands must be floats */
|
||||
break;
|
||||
case BRW_OPCODE_MUL:
|
||||
lower_opcode_mul(tc, inst);
|
||||
break;
|
||||
default:
|
||||
if (inst->opcode > TOY_OPCODE_LAST_HW)
|
||||
tc_fail(tc, "internal opcodes not lowered");
|
||||
}
|
||||
}
|
||||
|
||||
/* loop again as the previous pass may add new instructions */
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next(tc)) != NULL) {
|
||||
validate_imm(tc, inst);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
struct toy_inst *inst2;
|
||||
int nest_level, dist;
|
||||
|
||||
nest_level = 0;
|
||||
dist = -1;
|
||||
|
||||
/* search backward */
|
||||
LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev,
|
||||
&tc->instructions, list) {
|
||||
if (inst2->marker) {
|
||||
if (inst2->opcode == BRW_OPCODE_DO) {
|
||||
if (nest_level) {
|
||||
nest_level--;
|
||||
}
|
||||
else {
|
||||
/* the following instruction */
|
||||
dist++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst2->opcode == BRW_OPCODE_WHILE)
|
||||
nest_level++;
|
||||
|
||||
dist--;
|
||||
}
|
||||
|
||||
if (tc->gen >= ILO_GEN(7))
|
||||
inst->src[1] = tsrc_imm_w(dist * 2);
|
||||
else
|
||||
inst->dst = tdst_imm_w(dist * 2);
|
||||
}
|
||||
|
||||
static void
|
||||
patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
struct toy_inst *inst2;
|
||||
int nest_level, dist;
|
||||
int jip, uip;
|
||||
|
||||
nest_level = 0;
|
||||
dist = 1;
|
||||
jip = 0;
|
||||
uip = 0;
|
||||
|
||||
/* search forward */
|
||||
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
|
||||
if (inst2->marker)
|
||||
continue;
|
||||
|
||||
if (inst2->opcode == BRW_OPCODE_ENDIF) {
|
||||
if (nest_level) {
|
||||
nest_level--;
|
||||
}
|
||||
else {
|
||||
uip = dist * 2;
|
||||
if (!jip)
|
||||
jip = uip;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (inst2->opcode == BRW_OPCODE_ELSE &&
|
||||
inst->opcode == BRW_OPCODE_IF) {
|
||||
if (!nest_level) {
|
||||
/* the following instruction */
|
||||
jip = (dist + 1) * 2;
|
||||
|
||||
if (tc->gen == ILO_GEN(6)) {
|
||||
uip = jip;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (inst2->opcode == BRW_OPCODE_IF) {
|
||||
nest_level++;
|
||||
}
|
||||
|
||||
dist++;
|
||||
}
|
||||
|
||||
if (tc->gen >= ILO_GEN(7)) {
|
||||
/* what should the type be? */
|
||||
inst->dst.type = TOY_TYPE_D;
|
||||
inst->src[0].type = TOY_TYPE_D;
|
||||
inst->src[1] = tsrc_imm_d(uip << 16 | jip);
|
||||
}
|
||||
else {
|
||||
inst->dst = tdst_imm_w(jip);
|
||||
}
|
||||
|
||||
inst->thread_ctrl = BRW_THREAD_SWITCH;
|
||||
}
|
||||
|
||||
static void
|
||||
patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
struct toy_inst *inst2;
|
||||
bool found = false;
|
||||
int dist = 1;
|
||||
|
||||
/* search forward for instructions that may enable channels */
|
||||
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
|
||||
if (inst2->marker)
|
||||
continue;
|
||||
|
||||
switch (inst2->opcode) {
|
||||
case BRW_OPCODE_ENDIF:
|
||||
case BRW_OPCODE_ELSE:
|
||||
case BRW_OPCODE_WHILE:
|
||||
found = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (found)
|
||||
break;
|
||||
|
||||
dist++;
|
||||
}
|
||||
|
||||
/* should we set dist to (dist - 1) or 1? */
|
||||
if (!found)
|
||||
dist = 1;
|
||||
|
||||
if (tc->gen >= ILO_GEN(7))
|
||||
inst->src[1] = tsrc_imm_w(dist * 2);
|
||||
else
|
||||
inst->dst = tdst_imm_w(dist * 2);
|
||||
|
||||
inst->thread_ctrl = BRW_THREAD_SWITCH;
|
||||
}
|
||||
|
||||
static void
|
||||
patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst)
|
||||
{
|
||||
struct toy_inst *inst2, *inst3;
|
||||
int nest_level, dist, jip, uip;
|
||||
|
||||
nest_level = 0;
|
||||
dist = 1;
|
||||
jip = 1 * 2;
|
||||
uip = 1 * 2;
|
||||
|
||||
/* search forward */
|
||||
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
|
||||
if (inst2->marker) {
|
||||
if (inst2->opcode == BRW_OPCODE_DO)
|
||||
nest_level++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst2->opcode == BRW_OPCODE_ELSE ||
|
||||
inst2->opcode == BRW_OPCODE_ENDIF ||
|
||||
inst2->opcode == BRW_OPCODE_WHILE) {
|
||||
jip = dist * 2;
|
||||
break;
|
||||
}
|
||||
|
||||
dist++;
|
||||
}
|
||||
|
||||
/* go on to determine uip */
|
||||
inst3 = inst2;
|
||||
LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) {
|
||||
if (inst2->marker) {
|
||||
if (inst2->opcode == BRW_OPCODE_DO)
|
||||
nest_level++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst2->opcode == BRW_OPCODE_WHILE) {
|
||||
if (nest_level) {
|
||||
nest_level--;
|
||||
}
|
||||
else {
|
||||
/* the following instruction */
|
||||
if (tc->gen == ILO_GEN(6) && inst->opcode == BRW_OPCODE_BREAK)
|
||||
dist++;
|
||||
|
||||
uip = dist * 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dist++;
|
||||
}
|
||||
|
||||
/* should the type be D or W? */
|
||||
inst->dst.type = TOY_TYPE_D;
|
||||
inst->src[0].type = TOY_TYPE_D;
|
||||
inst->src[1] = tsrc_imm_d(uip << 16 | jip);
|
||||
}
|
||||
|
||||
/**
|
||||
* Legalize the instructions for assembling.
|
||||
*/
|
||||
void
|
||||
toy_compiler_legalize_for_asm(struct toy_compiler *tc)
|
||||
{
|
||||
struct toy_inst *inst;
|
||||
int pc = 0;
|
||||
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next(tc)) != NULL) {
|
||||
int i;
|
||||
|
||||
pc++;
|
||||
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 4 part 2, page 112:
|
||||
*
|
||||
* "Specifically, for instructions with a single source, it only
|
||||
* uses the first source operand <src0>. In this case, the second
|
||||
* source operand <src1> must be set to null and also with the same
|
||||
* type as the first source operand <src0>. It is a special case
|
||||
* when <src0> is an immediate, as an immediate <src0> uses DW3 of
|
||||
* the instruction word, which is normally used by <src1>. In this
|
||||
* case, <src1> must be programmed with register file ARF and the
|
||||
* same data type as <src0>."
|
||||
*
|
||||
* Since we already fill unused operands with null, we only need to take
|
||||
* care of the type.
|
||||
*/
|
||||
if (tsrc_is_null(inst->src[1]))
|
||||
inst->src[1].type = inst->src[0].type;
|
||||
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MATH:
|
||||
/* math does not support align16 nor exec_size > 8 */
|
||||
inst->access_mode = BRW_ALIGN_1;
|
||||
|
||||
if (inst->exec_size == BRW_EXECUTE_16) {
|
||||
/*
|
||||
* From the Ivy Bridge PRM, volume 4 part 3, page 192:
|
||||
*
|
||||
* "INT DIV function does not support SIMD16."
|
||||
*/
|
||||
if (tc->gen < ILO_GEN(7) ||
|
||||
inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
|
||||
inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_REMAINDER) {
|
||||
struct toy_inst *inst2;
|
||||
|
||||
inst->exec_size = BRW_EXECUTE_8;
|
||||
inst->qtr_ctrl = GEN6_COMPRESSION_1Q;
|
||||
|
||||
inst2 = tc_duplicate_inst(tc, inst);
|
||||
inst2->qtr_ctrl = GEN6_COMPRESSION_2Q;
|
||||
inst2->dst = tdst_offset(inst2->dst, 1, 0);
|
||||
inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0);
|
||||
if (!tsrc_is_null(inst2->src[1]))
|
||||
inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0);
|
||||
|
||||
pc++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_IF:
|
||||
if (tc->gen >= ILO_GEN(7) &&
|
||||
inst->cond_modifier != BRW_CONDITIONAL_NONE) {
|
||||
struct toy_inst *inst2;
|
||||
|
||||
inst2 = tc_duplicate_inst(tc, inst);
|
||||
|
||||
/* replace the original IF by CMP */
|
||||
inst->opcode = BRW_OPCODE_CMP;
|
||||
|
||||
/* predicate control instead of condition modifier */
|
||||
inst2->dst = tdst_null();
|
||||
inst2->src[0] = tsrc_null();
|
||||
inst2->src[1] = tsrc_null();
|
||||
inst2->cond_modifier = BRW_CONDITIONAL_NONE;
|
||||
inst2->pred_ctrl = BRW_PREDICATE_NORMAL;
|
||||
|
||||
pc++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* MRF to GRF */
|
||||
if (tc->gen >= ILO_GEN(7)) {
|
||||
for (i = 0; i < Elements(inst->src); i++) {
|
||||
if (inst->src[i].file != TOY_FILE_MRF)
|
||||
continue;
|
||||
else if (tsrc_is_null(inst->src[i]))
|
||||
break;
|
||||
|
||||
inst->src[i].file = TOY_FILE_GRF;
|
||||
}
|
||||
|
||||
if (inst->dst.file == TOY_FILE_MRF)
|
||||
inst->dst.file = TOY_FILE_GRF;
|
||||
}
|
||||
}
|
||||
|
||||
tc->num_instructions = pc;
|
||||
|
||||
/* set JIP/UIP */
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next(tc)) != NULL) {
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_IF:
|
||||
case BRW_OPCODE_ELSE:
|
||||
patch_if_else_jip(tc, inst);
|
||||
break;
|
||||
case BRW_OPCODE_ENDIF:
|
||||
patch_endif_jip(tc, inst);
|
||||
break;
|
||||
case BRW_OPCODE_WHILE:
|
||||
patch_while_jip(tc, inst);
|
||||
break;
|
||||
case BRW_OPCODE_BREAK:
|
||||
case BRW_OPCODE_CONTINUE:
|
||||
patch_break_continue_jip(tc, inst);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
52
src/gallium/drivers/ilo/shader/toy_legalize.h
Normal file
52
src/gallium/drivers/ilo/shader/toy_legalize.h
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#ifndef TOY_LEGALIZE_H
|
||||
#define TOY_LEGALIZE_H
|
||||
|
||||
#include "toy_compiler.h"
|
||||
#include "toy_tgsi.h"
|
||||
|
||||
void
|
||||
toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
|
||||
bool sendc, unsigned sfid);
|
||||
|
||||
void
|
||||
toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst);
|
||||
|
||||
void
|
||||
toy_compiler_allocate_registers(struct toy_compiler *tc,
|
||||
int start_grf, int end_grf,
|
||||
int num_grf_per_vrf);
|
||||
|
||||
void
|
||||
toy_compiler_legalize_for_ra(struct toy_compiler *tc);
|
||||
|
||||
void
|
||||
toy_compiler_legalize_for_asm(struct toy_compiler *tc);
|
||||
|
||||
#endif /* TOY_LEGALIZE_H */
|
||||
628
src/gallium/drivers/ilo/shader/toy_legalize_ra.c
Normal file
628
src/gallium/drivers/ilo/shader/toy_legalize_ra.c
Normal file
|
|
@ -0,0 +1,628 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#include <stdlib.h> /* for qsort() */
|
||||
#include "toy_compiler.h"
|
||||
#include "toy_legalize.h"
|
||||
|
||||
/**
|
||||
* Live interval of a VRF register.
|
||||
*/
|
||||
struct linear_scan_live_interval {
|
||||
int vrf;
|
||||
int startpoint;
|
||||
int endpoint;
|
||||
|
||||
/*
|
||||
* should this be assigned a consecutive register of the previous
|
||||
* interval's?
|
||||
*/
|
||||
bool consecutive;
|
||||
|
||||
int reg;
|
||||
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
/**
|
||||
* Linear scan.
|
||||
*/
|
||||
struct linear_scan {
|
||||
struct linear_scan_live_interval *intervals;
|
||||
int max_vrf, num_vrfs;
|
||||
|
||||
int num_regs;
|
||||
|
||||
struct list_head active_list;
|
||||
int *free_regs;
|
||||
int num_free_regs;
|
||||
|
||||
int *vrf_mapping;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a chunk of registers to the free register pool.
|
||||
*/
|
||||
static void
|
||||
linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
|
||||
}
|
||||
|
||||
static int
|
||||
linear_scan_compare_regs(const void *elem1, const void *elem2)
|
||||
{
|
||||
const int *reg1 = elem1;
|
||||
const int *reg2 = elem2;
|
||||
|
||||
/* in reverse order */
|
||||
return (*reg2 - *reg1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a chunk of registers from the free register pool.
|
||||
*/
|
||||
static int
|
||||
linear_scan_allocate_regs(struct linear_scan *ls, int count)
|
||||
{
|
||||
bool sorted = false;
|
||||
int reg;
|
||||
|
||||
/* simple cases */
|
||||
if (count > ls->num_free_regs)
|
||||
return -1;
|
||||
else if (count == 1)
|
||||
return ls->free_regs[--ls->num_free_regs];
|
||||
|
||||
/* TODO a free register pool */
|
||||
/* TODO reserve some regs for spilling */
|
||||
while (true) {
|
||||
bool found = false;
|
||||
int start;
|
||||
|
||||
/*
|
||||
* find a chunk of registers that have consecutive register
|
||||
* numbers
|
||||
*/
|
||||
for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
|
||||
int i;
|
||||
|
||||
for (i = 1; i < count; i++) {
|
||||
if (ls->free_regs[start - i] != ls->free_regs[start] + i)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i >= count) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
reg = ls->free_regs[start];
|
||||
|
||||
if (start != ls->num_free_regs - 1) {
|
||||
start++;
|
||||
memmove(&ls->free_regs[start - count],
|
||||
&ls->free_regs[start],
|
||||
sizeof(*ls->free_regs) * (ls->num_free_regs - start));
|
||||
}
|
||||
ls->num_free_regs -= count;
|
||||
break;
|
||||
}
|
||||
else if (!sorted) {
|
||||
/* sort and retry */
|
||||
qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
|
||||
linear_scan_compare_regs);
|
||||
sorted = true;
|
||||
}
|
||||
else {
|
||||
/* failed */
|
||||
reg = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an interval to the active list.
|
||||
*/
|
||||
static void
|
||||
linear_scan_add_active(struct linear_scan *ls,
|
||||
struct linear_scan_live_interval *interval)
|
||||
{
|
||||
struct linear_scan_live_interval *pos;
|
||||
|
||||
/* keep the active list sorted by endpoints */
|
||||
LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
|
||||
if (pos->endpoint >= interval->endpoint)
|
||||
break;
|
||||
}
|
||||
|
||||
list_addtail(&interval->list, &pos->list);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an interval from the active list.
|
||||
*/
|
||||
static void
|
||||
linear_scan_remove_active(struct linear_scan *ls,
|
||||
struct linear_scan_live_interval *interval)
|
||||
{
|
||||
list_del(&interval->list);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove intervals that are no longer active from the active list.
|
||||
*/
|
||||
static void
|
||||
linear_scan_expire_active(struct linear_scan *ls, int pc)
|
||||
{
|
||||
struct linear_scan_live_interval *interval, *next;
|
||||
|
||||
LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
|
||||
/*
|
||||
* since we sort intervals on the active list by their endpoints, we
|
||||
* know that this and the rest of the intervals are still active.
|
||||
*/
|
||||
if (interval->endpoint >= pc)
|
||||
break;
|
||||
|
||||
linear_scan_remove_active(ls, interval);
|
||||
|
||||
/* recycle the reg */
|
||||
linear_scan_free_regs(ls, interval->reg, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Spill an interval.
|
||||
*/
|
||||
static void
|
||||
linear_scan_spill(struct linear_scan *ls,
|
||||
struct linear_scan_live_interval *interval,
|
||||
bool is_active)
|
||||
{
|
||||
assert(!"no spilling support");
|
||||
}
|
||||
|
||||
/**
|
||||
* Spill a range of intervals.
|
||||
*/
|
||||
static void
|
||||
linear_scan_spill_range(struct linear_scan *ls, int first, int count)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct linear_scan_live_interval *interval = &ls->intervals[first + i];
|
||||
|
||||
linear_scan_spill(ls, interval, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform linear scan to allocate registers for the intervals.
|
||||
*/
|
||||
static bool
|
||||
linear_scan_run(struct linear_scan *ls)
|
||||
{
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
while (i < ls->num_vrfs) {
|
||||
struct linear_scan_live_interval *first = &ls->intervals[i];
|
||||
int reg, count;
|
||||
|
||||
/*
|
||||
* BRW_OPCODE_SEND may write to multiple consecutive registers and we need to
|
||||
* support that
|
||||
*/
|
||||
for (count = 1; i + count < ls->num_vrfs; count++) {
|
||||
const struct linear_scan_live_interval *interval =
|
||||
&ls->intervals[i + count];
|
||||
|
||||
if (interval->startpoint != first->startpoint ||
|
||||
!interval->consecutive)
|
||||
break;
|
||||
}
|
||||
|
||||
reg = linear_scan_allocate_regs(ls, count);
|
||||
|
||||
/* expire intervals that are no longer active and try again */
|
||||
if (reg < 0) {
|
||||
linear_scan_expire_active(ls, first->startpoint);
|
||||
reg = linear_scan_allocate_regs(ls, count);
|
||||
}
|
||||
|
||||
/* have to spill some intervals */
|
||||
if (reg < 0) {
|
||||
struct linear_scan_live_interval *last_active =
|
||||
container_of(ls->active_list.prev,
|
||||
(struct linear_scan_live_interval *) NULL, list);
|
||||
|
||||
/* heuristically spill the interval that ends last */
|
||||
if (count > 1 || last_active->endpoint < first->endpoint) {
|
||||
linear_scan_spill_range(ls, i, count);
|
||||
i += count;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* make some room for the new interval */
|
||||
linear_scan_spill(ls, last_active, true);
|
||||
reg = linear_scan_allocate_regs(ls, count);
|
||||
if (reg < 0) {
|
||||
assert(!"failed to spill any register");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
while (count--) {
|
||||
struct linear_scan_live_interval *interval = &ls->intervals[i++];
|
||||
|
||||
interval->reg = reg++;
|
||||
linear_scan_add_active(ls, interval);
|
||||
|
||||
ls->vrf_mapping[interval->vrf] = interval->reg;
|
||||
|
||||
/*
|
||||
* this should and must be the case because of how we initialized the
|
||||
* intervals
|
||||
*/
|
||||
assert(interval->vrf - first->vrf == interval->reg - first->reg);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new interval.
|
||||
*/
|
||||
static void
|
||||
linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
|
||||
{
|
||||
if (ls->intervals[vrf].vrf)
|
||||
return;
|
||||
|
||||
ls->intervals[vrf].vrf = vrf;
|
||||
ls->intervals[vrf].startpoint = pc;
|
||||
|
||||
ls->num_vrfs++;
|
||||
if (vrf > ls->max_vrf)
|
||||
ls->max_vrf = vrf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform (oversimplified?) live variable analysis.
|
||||
*/
|
||||
static void
|
||||
linear_scan_init_live_intervals(struct linear_scan *ls,
|
||||
struct toy_compiler *tc)
|
||||
{
|
||||
const struct toy_inst *inst;
|
||||
int pc, do_pc, while_pc;
|
||||
|
||||
pc = 0;
|
||||
do_pc = -1;
|
||||
while_pc = -1;
|
||||
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next_no_skip(tc)) != NULL) {
|
||||
const int startpoint = (pc <= while_pc) ? do_pc : pc;
|
||||
const int endpoint = (pc <= while_pc) ? while_pc : pc;
|
||||
int vrf, i;
|
||||
|
||||
/*
|
||||
* assume all registers used in this outermost loop are live through out
|
||||
* the whole loop
|
||||
*/
|
||||
if (inst->marker) {
|
||||
if (pc > while_pc) {
|
||||
struct toy_inst *inst2;
|
||||
int loop_level = 1;
|
||||
|
||||
assert(inst->opcode == BRW_OPCODE_DO);
|
||||
do_pc = pc;
|
||||
while_pc = pc + 1;
|
||||
|
||||
/* find the matching BRW_OPCODE_WHILE */
|
||||
LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
|
||||
&tc->instructions, list) {
|
||||
if (inst2->marker) {
|
||||
assert(inst->opcode == BRW_OPCODE_DO);
|
||||
loop_level++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst2->opcode == BRW_OPCODE_WHILE) {
|
||||
loop_level--;
|
||||
if (!loop_level)
|
||||
break;
|
||||
}
|
||||
while_pc++;
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst->dst.file == TOY_FILE_VRF) {
|
||||
int num_dst;
|
||||
|
||||
/* TODO this is a hack */
|
||||
if (inst->opcode == BRW_OPCODE_SEND ||
|
||||
inst->opcode == BRW_OPCODE_SENDC) {
|
||||
const uint32_t mdesc = inst->src[1].val32;
|
||||
int response_length = (mdesc >> 20) & 0x1f;
|
||||
|
||||
num_dst = response_length;
|
||||
if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16)
|
||||
num_dst /= 2;
|
||||
}
|
||||
else {
|
||||
num_dst = 1;
|
||||
}
|
||||
|
||||
vrf = inst->dst.val32 / TOY_REG_WIDTH;
|
||||
|
||||
for (i = 0; i < num_dst; i++) {
|
||||
/* first use */
|
||||
if (!ls->intervals[vrf].vrf)
|
||||
linear_scan_add_live_interval(ls, vrf, startpoint);
|
||||
|
||||
ls->intervals[vrf].endpoint = endpoint;
|
||||
ls->intervals[vrf].consecutive = (i > 0);
|
||||
|
||||
vrf++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < Elements(inst->src); i++) {
|
||||
if (inst->src[i].file != TOY_FILE_VRF)
|
||||
continue;
|
||||
|
||||
vrf = inst->src[i].val32 / TOY_REG_WIDTH;
|
||||
|
||||
/* first use */
|
||||
if (!ls->intervals[vrf].vrf)
|
||||
linear_scan_add_live_interval(ls, vrf, startpoint);
|
||||
|
||||
ls->intervals[vrf].endpoint = endpoint;
|
||||
}
|
||||
|
||||
pc++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up after performing linear scan.
|
||||
*/
|
||||
static void
|
||||
linear_scan_cleanup(struct linear_scan *ls)
|
||||
{
|
||||
FREE(ls->vrf_mapping);
|
||||
FREE(ls->intervals);
|
||||
FREE(ls->free_regs);
|
||||
}
|
||||
|
||||
static int
|
||||
linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
|
||||
{
|
||||
const struct linear_scan_live_interval *interval1 = elem1;
|
||||
const struct linear_scan_live_interval *interval2 = elem2;
|
||||
|
||||
/* make unused elements appear at the end */
|
||||
if (!interval1->vrf)
|
||||
return 1;
|
||||
else if (!interval2->vrf)
|
||||
return -1;
|
||||
|
||||
/* sort by startpoints first, and then by vrf */
|
||||
if (interval1->startpoint != interval2->startpoint)
|
||||
return (interval1->startpoint - interval2->startpoint);
|
||||
else
|
||||
return (interval1->vrf - interval2->vrf);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare for linear scan.
|
||||
*/
|
||||
static bool
|
||||
linear_scan_init(struct linear_scan *ls, int num_regs,
|
||||
struct toy_compiler *tc)
|
||||
{
|
||||
int num_intervals, i;
|
||||
|
||||
memset(ls, 0, sizeof(*ls));
|
||||
|
||||
/* this may be much larger than ls->num_vrfs... */
|
||||
num_intervals = tc->next_vrf;
|
||||
ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
|
||||
if (!ls->intervals)
|
||||
return false;
|
||||
|
||||
linear_scan_init_live_intervals(ls, tc);
|
||||
/* sort intervals by startpoints */
|
||||
qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
|
||||
linear_scan_compare_live_intervals);
|
||||
|
||||
ls->num_regs = num_regs;
|
||||
ls->num_free_regs = num_regs;
|
||||
|
||||
ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
|
||||
if (!ls->free_regs) {
|
||||
FREE(ls->intervals);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* add in reverse order as we will allocate from the tail */
|
||||
for (i = 0; i < ls->num_regs; i++)
|
||||
ls->free_regs[i] = num_regs - i - 1;
|
||||
|
||||
list_inithead(&ls->active_list);
|
||||
|
||||
ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
|
||||
if (!ls->vrf_mapping) {
|
||||
FREE(ls->intervals);
|
||||
FREE(ls->free_regs);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate registers with linear scan.
|
||||
*/
|
||||
static void
|
||||
linear_scan_allocation(struct toy_compiler *tc,
|
||||
int start_grf, int end_grf,
|
||||
int num_grf_per_vrf)
|
||||
{
|
||||
const int num_grfs = end_grf - start_grf + 1;
|
||||
struct linear_scan ls;
|
||||
struct toy_inst *inst;
|
||||
|
||||
if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
|
||||
return;
|
||||
|
||||
if (!linear_scan_run(&ls)) {
|
||||
tc_fail(tc, "failed to allocate registers");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next(tc)) != NULL) {
|
||||
int i;
|
||||
|
||||
if (inst->dst.file == TOY_FILE_VRF) {
|
||||
const uint32_t val32 = inst->dst.val32;
|
||||
int reg = val32 / TOY_REG_WIDTH;
|
||||
int subreg = val32 % TOY_REG_WIDTH;
|
||||
|
||||
/* map to GRF */
|
||||
reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
|
||||
|
||||
inst->dst.file = TOY_FILE_GRF;
|
||||
inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
|
||||
}
|
||||
|
||||
for (i = 0; i < Elements(inst->src); i++) {
|
||||
const uint32_t val32 = inst->src[i].val32;
|
||||
int reg, subreg;
|
||||
|
||||
if (inst->src[i].file != TOY_FILE_VRF)
|
||||
continue;
|
||||
|
||||
reg = val32 / TOY_REG_WIDTH;
|
||||
subreg = val32 % TOY_REG_WIDTH;
|
||||
|
||||
/* map to GRF */
|
||||
reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
|
||||
|
||||
inst->src[i].file = TOY_FILE_GRF;
|
||||
inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
|
||||
}
|
||||
}
|
||||
|
||||
linear_scan_cleanup(&ls);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trivially allocate registers.
|
||||
*/
|
||||
static void
|
||||
trivial_allocation(struct toy_compiler *tc,
|
||||
int start_grf, int end_grf,
|
||||
int num_grf_per_vrf)
|
||||
{
|
||||
struct toy_inst *inst;
|
||||
int max_grf = -1;
|
||||
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next(tc)) != NULL) {
|
||||
int i;
|
||||
|
||||
if (inst->dst.file == TOY_FILE_VRF) {
|
||||
const uint32_t val32 = inst->dst.val32;
|
||||
int reg = val32 / TOY_REG_WIDTH;
|
||||
int subreg = val32 % TOY_REG_WIDTH;
|
||||
|
||||
reg = reg * num_grf_per_vrf + start_grf - 1;
|
||||
|
||||
inst->dst.file = TOY_FILE_GRF;
|
||||
inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
|
||||
|
||||
if (reg > max_grf)
|
||||
max_grf = reg;
|
||||
}
|
||||
|
||||
for (i = 0; i < Elements(inst->src); i++) {
|
||||
const uint32_t val32 = inst->src[i].val32;
|
||||
int reg, subreg;
|
||||
|
||||
if (inst->src[i].file != TOY_FILE_VRF)
|
||||
continue;
|
||||
|
||||
reg = val32 / TOY_REG_WIDTH;
|
||||
subreg = val32 % TOY_REG_WIDTH;
|
||||
|
||||
reg = reg * num_grf_per_vrf + start_grf - 1;
|
||||
|
||||
inst->src[i].file = TOY_FILE_GRF;
|
||||
inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
|
||||
|
||||
if (reg > max_grf)
|
||||
max_grf = reg;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_grf + num_grf_per_vrf - 1 > end_grf)
|
||||
tc_fail(tc, "failed to allocate registers");
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate GRF registers to VRF registers.
|
||||
*/
|
||||
void
|
||||
toy_compiler_allocate_registers(struct toy_compiler *tc,
|
||||
int start_grf, int end_grf,
|
||||
int num_grf_per_vrf)
|
||||
{
|
||||
if (true)
|
||||
linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
|
||||
else
|
||||
trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
|
||||
}
|
||||
71
src/gallium/drivers/ilo/shader/toy_optimize.c
Normal file
71
src/gallium/drivers/ilo/shader/toy_optimize.c
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#include "toy_compiler.h"
|
||||
#include "toy_tgsi.h"
|
||||
#include "toy_optimize.h"
|
||||
|
||||
/**
|
||||
* This just eliminates instructions with null dst so far.
|
||||
*/
|
||||
static void
|
||||
eliminate_dead_code(struct toy_compiler *tc)
|
||||
{
|
||||
struct toy_inst *inst;
|
||||
|
||||
tc_head(tc);
|
||||
while ((inst = tc_next(tc)) != NULL) {
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_IF:
|
||||
case BRW_OPCODE_ELSE:
|
||||
case BRW_OPCODE_ENDIF:
|
||||
case BRW_OPCODE_WHILE:
|
||||
case BRW_OPCODE_BREAK:
|
||||
case BRW_OPCODE_CONTINUE:
|
||||
case BRW_OPCODE_SEND:
|
||||
case BRW_OPCODE_SENDC:
|
||||
case BRW_OPCODE_NOP:
|
||||
/* never eliminated */
|
||||
break;
|
||||
default:
|
||||
if (tdst_is_null(inst->dst) || !inst->dst.writemask) {
|
||||
/* math is always BRW_CONDITIONAL_NONE */
|
||||
if ((inst->opcode == BRW_OPCODE_MATH ||
|
||||
inst->cond_modifier == BRW_CONDITIONAL_NONE) &&
|
||||
!inst->acc_wr_ctrl)
|
||||
tc_discard_inst(tc, inst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
toy_compiler_optimize(struct toy_compiler *tc)
|
||||
{
|
||||
eliminate_dead_code(tc);
|
||||
}
|
||||
36
src/gallium/drivers/ilo/shader/toy_optimize.h
Normal file
36
src/gallium/drivers/ilo/shader/toy_optimize.h
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#ifndef TOY_OPTIMIZE_H
|
||||
#define TOY_OPTIMIZE_H
|
||||
|
||||
#include "toy_compiler.h"
|
||||
|
||||
void
|
||||
toy_compiler_optimize(struct toy_compiler *tc);
|
||||
|
||||
#endif /* TOY_OPTIMIZE_H */
|
||||
2736
src/gallium/drivers/ilo/shader/toy_tgsi.c
Normal file
2736
src/gallium/drivers/ilo/shader/toy_tgsi.c
Normal file
File diff suppressed because it is too large
Load diff
253
src/gallium/drivers/ilo/shader/toy_tgsi.h
Normal file
253
src/gallium/drivers/ilo/shader/toy_tgsi.h
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2012-2013 LunarG, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Chia-I Wu <olv@lunarg.com>
|
||||
*/
|
||||
|
||||
#ifndef TOY_TGSI_H
|
||||
#define TOY_TGSI_H
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "toy_compiler.h"
|
||||
|
||||
struct tgsi_token;
|
||||
struct tgsi_full_instruction;
|
||||
struct util_hash_table;
|
||||
|
||||
typedef void (*toy_tgsi_translate)(struct toy_compiler *tc,
|
||||
const struct tgsi_full_instruction *tgsi_inst,
|
||||
struct toy_dst *dst,
|
||||
struct toy_src *src);
|
||||
|
||||
struct toy_tgsi {
|
||||
struct toy_compiler *tc;
|
||||
bool aos;
|
||||
const toy_tgsi_translate *translate_table;
|
||||
|
||||
struct util_hash_table *reg_mapping;
|
||||
|
||||
struct {
|
||||
bool vs_prohibit_ucps;
|
||||
int fs_coord_origin;
|
||||
int fs_coord_pixel_center;
|
||||
bool fs_color0_writes_all_cbufs;
|
||||
int fs_depth_layout;
|
||||
int gs_input_prim;
|
||||
int gs_output_prim;
|
||||
int gs_max_output_vertices;
|
||||
} props;
|
||||
|
||||
struct {
|
||||
enum toy_type *types;
|
||||
uint32_t (*buf)[4];
|
||||
int cur, size;
|
||||
} imm_data;
|
||||
|
||||
struct {
|
||||
int index:16;
|
||||
unsigned usage_mask:4; /* TGSI_WRITEMASK_x */
|
||||
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
|
||||
unsigned semantic_index:8;
|
||||
unsigned interp:4; /* TGSI_INTERPOLATE_x */
|
||||
unsigned centroid:1;
|
||||
} inputs[PIPE_MAX_SHADER_INPUTS];
|
||||
int num_inputs;
|
||||
|
||||
struct {
|
||||
int index:16;
|
||||
unsigned undefined_mask:4;
|
||||
unsigned usage_mask:4; /* TGSI_WRITEMASK_x */
|
||||
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
|
||||
unsigned semantic_index:8;
|
||||
} outputs[PIPE_MAX_SHADER_OUTPUTS];
|
||||
int num_outputs;
|
||||
|
||||
struct {
|
||||
int index:16;
|
||||
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
|
||||
unsigned semantic_index:8;
|
||||
} system_values[8];
|
||||
int num_system_values;
|
||||
|
||||
bool uses_kill;
|
||||
};
|
||||
|
||||
/**
|
||||
* Find the slot of the TGSI input.
|
||||
*/
|
||||
static inline int
|
||||
toy_tgsi_find_input(const struct toy_tgsi *tgsi, int index)
|
||||
{
|
||||
int slot;
|
||||
|
||||
for (slot = 0; slot < tgsi->num_inputs; slot++) {
|
||||
if (tgsi->inputs[slot].index == index)
|
||||
return slot;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the slot of the TGSI system value.
|
||||
*/
|
||||
static inline int
|
||||
toy_tgsi_find_system_value(const struct toy_tgsi *tgsi, int index)
|
||||
{
|
||||
int slot;
|
||||
|
||||
for (slot = 0; slot < tgsi->num_system_values; slot++) {
|
||||
if (tgsi->system_values[slot].index == index)
|
||||
return slot;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the immediate data of the TGSI immediate.
|
||||
*/
|
||||
static inline const uint32_t *
|
||||
toy_tgsi_get_imm(const struct toy_tgsi *tgsi, unsigned index,
|
||||
enum toy_type *type)
|
||||
{
|
||||
const uint32_t *imm;
|
||||
|
||||
if (index >= tgsi->imm_data.cur)
|
||||
return NULL;
|
||||
|
||||
imm = tgsi->imm_data.buf[index];
|
||||
if (type)
|
||||
*type = tgsi->imm_data.types[index];
|
||||
|
||||
return imm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the dimension of the texture coordinates, as well as the location of
|
||||
* the shadow reference value or the sample index.
|
||||
*/
|
||||
static inline int
|
||||
toy_tgsi_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
|
||||
{
|
||||
int dim;
|
||||
|
||||
/*
|
||||
* Depending on the texture target, (src0, src1.x) is interpreted
|
||||
* differently:
|
||||
*
|
||||
* (s, *, *, *, *), for 1D
|
||||
* (s, t, *, *, *), for 2D, RECT
|
||||
* (s, t, r, *, *), for 3D, CUBE
|
||||
*
|
||||
* (s, layer, *, *, *), for 1D_ARRAY
|
||||
* (s, t, layer, *, *), for 2D_ARRAY
|
||||
* (s, t, r, layer, *), for CUBE_ARRAY
|
||||
*
|
||||
* (s, *, shadow, *, *), for SHADOW1D
|
||||
* (s, t, shadow, *, *), for SHADOW2D, SHADOWRECT
|
||||
* (s, t, r, shadow, *), for SHADOWCUBE
|
||||
*
|
||||
* (s, layer, shadow, *, *), for SHADOW1D_ARRAY
|
||||
* (s, t, layer, shadow, *), for SHADOW2D_ARRAY
|
||||
* (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY
|
||||
*
|
||||
* (s, t, sample, *, *), for 2D_MSAA
|
||||
* (s, t, layer, sample, *), for 2D_ARRAY_MSAA
|
||||
*/
|
||||
switch (tgsi_tex) {
|
||||
case TGSI_TEXTURE_1D:
|
||||
case TGSI_TEXTURE_SHADOW1D:
|
||||
dim = 1;
|
||||
break;
|
||||
case TGSI_TEXTURE_2D:
|
||||
case TGSI_TEXTURE_RECT:
|
||||
case TGSI_TEXTURE_1D_ARRAY:
|
||||
case TGSI_TEXTURE_SHADOW2D:
|
||||
case TGSI_TEXTURE_SHADOWRECT:
|
||||
case TGSI_TEXTURE_SHADOW1D_ARRAY:
|
||||
case TGSI_TEXTURE_2D_MSAA:
|
||||
dim = 2;
|
||||
break;
|
||||
case TGSI_TEXTURE_3D:
|
||||
case TGSI_TEXTURE_CUBE:
|
||||
case TGSI_TEXTURE_2D_ARRAY:
|
||||
case TGSI_TEXTURE_SHADOWCUBE:
|
||||
case TGSI_TEXTURE_SHADOW2D_ARRAY:
|
||||
case TGSI_TEXTURE_2D_ARRAY_MSAA:
|
||||
dim = 3;
|
||||
break;
|
||||
case TGSI_TEXTURE_CUBE_ARRAY:
|
||||
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
|
||||
dim = 4;
|
||||
break;
|
||||
default:
|
||||
assert(!"unknown texture target");
|
||||
dim = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (shadow_or_sample) {
|
||||
switch (tgsi_tex) {
|
||||
case TGSI_TEXTURE_SHADOW1D:
|
||||
/* there is a gap */
|
||||
*shadow_or_sample = 2;
|
||||
break;
|
||||
case TGSI_TEXTURE_SHADOW2D:
|
||||
case TGSI_TEXTURE_SHADOWRECT:
|
||||
case TGSI_TEXTURE_SHADOWCUBE:
|
||||
case TGSI_TEXTURE_SHADOW1D_ARRAY:
|
||||
case TGSI_TEXTURE_SHADOW2D_ARRAY:
|
||||
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
|
||||
case TGSI_TEXTURE_2D_MSAA:
|
||||
case TGSI_TEXTURE_2D_ARRAY_MSAA:
|
||||
*shadow_or_sample = dim;
|
||||
break;
|
||||
default:
|
||||
/* no shadow nor sample */
|
||||
*shadow_or_sample = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return dim;
|
||||
}
|
||||
|
||||
void
|
||||
toy_compiler_translate_tgsi(struct toy_compiler *tc,
|
||||
const struct tgsi_token *tokens, bool aos,
|
||||
struct toy_tgsi *tgsi);
|
||||
|
||||
void
|
||||
toy_tgsi_cleanup(struct toy_tgsi *tgsi);
|
||||
|
||||
int
|
||||
toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
|
||||
enum tgsi_file_type file, int dimension, int index);
|
||||
|
||||
void
|
||||
toy_tgsi_dump(const struct toy_tgsi *tgsi);
|
||||
|
||||
#endif /* TOY_TGSI_H */
|
||||
Loading…
Add table
Reference in a new issue