i965: Add support for instruction compaction.

This reduces program size by using some smaller encodings for common bit
patterns in the Gen ISA, with the hope of making programs fit in the
instruction cache better.

v2: Use larger bitshifts for the uncompressed field setups, in line with the
    way it's described in the spec.  Consistently name a brw_compile "p" like
    all other code.  Add a couple more tests.  Consistently call things
    "compacted" not "compressed" (which is a different feature).  Drop the
    explicit check for not compacting SENDs, which is unjustified and already
    implied by our lack of support for immediate values.

Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
Eric Anholt 2012-01-31 16:55:20 -08:00
parent f5e2706395
commit 077d01b673
10 changed files with 946 additions and 8 deletions

View file

@ -1,3 +1,4 @@
Makefile Makefile
i965_symbols_test i965_symbols_test
libi965_dri.la libi965_dri.la
test_eu_compact

View file

@ -51,16 +51,30 @@ libi965_dri_la_SOURCES = \
$(i965_C_FILES) \ $(i965_C_FILES) \
$(i965_CXX_FILES) $(i965_CXX_FILES)
# list of libs to be linked against by i965_dri.so and i965 test programs.
COMMON_LIBS = \ COMMON_LIBS = \
libi965_dri.la \ libi965_dri.la \
$(DRI_LIB_DEPS) \ $(DRI_LIB_DEPS) \
$(INTEL_LIBS) \ $(INTEL_LIBS) \
../common/libdricommon.la ../common/libdricommon.la
TEST_LIBS = \
$(COMMON_LIBS) \
-lrt \
../common/libdri_test_stubs.la
i965_dri_la_SOURCES = i965_dri_la_SOURCES =
i965_dri_la_LIBADD = $(COMMON_LIBS) i965_dri_la_LIBADD = $(COMMON_LIBS)
i965_dri_la_LDFLAGS = -module -avoid-version -shared i965_dri_la_LDFLAGS = -module -avoid-version -shared
TESTS = test_eu_compact
check_PROGRAMS = test_eu_compact
test_eu_compact_SOURCES = \
test_eu_compact.c
nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp
test_eu_compact_LDADD = $(TEST_LIBS)
# Provide compatibility with scripts for the old Mesa build system for # Provide compatibility with scripts for the old Mesa build system for
# a while by putting a link to the driver into /lib of the build tree. # a while by putting a link to the driver into /lib of the build tree.
all-local: i965_dri.la all-local: i965_dri.la

View file

@ -44,6 +44,7 @@ i965_C_FILES = \
brw_draw.c \ brw_draw.c \
brw_draw_upload.c \ brw_draw_upload.c \
brw_eu.c \ brw_eu.c \
brw_eu_compact.c \
brw_eu_debug.c \ brw_eu_debug.c \
brw_eu_emit.c \ brw_eu_emit.c \
brw_eu_util.c \ brw_eu_util.c \

View file

@ -214,6 +214,11 @@ const GLuint *brw_get_program( struct brw_compile *p,
{ {
GLuint i; GLuint i;
brw_compact_instructions(p);
/* We emit a cacheline (8 instructions) of NOPs at the end of the program to
* make sure that instruction prefetch doesn't wander off into some other BO.
*/
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
brw_NOP(p); brw_NOP(p);
@ -224,20 +229,37 @@ const GLuint *brw_get_program( struct brw_compile *p,
void void
brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end) brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end)
{ {
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
void *store = p->store; void *store = p->store;
bool dump_hex = false;
for (int offset = start; offset < end; offset += 16) { for (int offset = start; offset < end;) {
struct brw_instruction *insn = store + offset; struct brw_instruction *insn = store + offset;
struct brw_instruction uncompacted;
printf("0x%08x: ", offset); printf("0x%08x: ", offset);
if (0) { if (insn->header.cmpt_control) {
struct brw_compact_instruction *compacted = (void *)insn;
if (dump_hex) {
printf("0x%08x 0x%08x ",
((uint32_t *)insn)[1],
((uint32_t *)insn)[0]);
}
brw_uncompact_instruction(intel, &uncompacted, compacted);
insn = &uncompacted;
offset += 8;
} else {
if (dump_hex) {
printf("0x%08x 0x%08x 0x%08x 0x%08x ", printf("0x%08x 0x%08x 0x%08x 0x%08x ",
((uint32_t *)insn)[3], ((uint32_t *)insn)[3],
((uint32_t *)insn)[2], ((uint32_t *)insn)[2],
((uint32_t *)insn)[1], ((uint32_t *)insn)[1],
((uint32_t *)insn)[0]); ((uint32_t *)insn)[0]);
} }
offset += 16;
}
brw_disasm(stdout, insn, p->brw->intel.gen); brw_disasm(stdout, insn, p->brw->intel.gen);
} }

View file

@ -1107,6 +1107,19 @@ void brw_set_uip_jip(struct brw_compile *p);
uint32_t brw_swap_cmod(uint32_t cmod); uint32_t brw_swap_cmod(uint32_t cmod);
/* brw_eu_compact.c */
void brw_compact_instructions(struct brw_compile *p);
void brw_uncompact_instruction(struct intel_context *intel,
struct brw_instruction *dst,
struct brw_compact_instruction *src);
bool brw_try_compact_instruction(struct brw_compile *p,
struct brw_compact_instruction *dst,
struct brw_instruction *src);
void brw_debug_compact_uncompact(struct intel_context *intel,
struct brw_instruction *orig,
struct brw_instruction *uncompacted);
/* brw_optimize.c */ /* brw_optimize.c */
void brw_optimize(struct brw_compile *p); void brw_optimize(struct brw_compile *p);
void brw_remove_duplicate_mrf_moves(struct brw_compile *p); void brw_remove_duplicate_mrf_moves(struct brw_compile *p);

View file

@ -0,0 +1,558 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/** @file brw_eu_compact.c
*
* Instruction compaction is a feature of gm45 and newer hardware that allows
* for a smaller instruction encoding.
*
* The instruction cache is on the order of 32KB, and many programs generate
* far more instructions than that. The instruction cache is built to barely
* keep up with instruction dispatch abaility in cache hit cases -- L1
* instruction cache misses that still hit in the next level could limit
* throughput by around 50%.
*
* The idea of instruction compaction is that most instructions use a tiny
* subset of the GPU functionality, so we can encode what would be a 16 byte
* instruction in 8 bytes using some lookup tables for various fields.
*/
#include "brw_context.h"
#include "brw_eu.h"
static const uint32_t gen6_control_index_table[32] = {
0b00000000000000000,
0b01000000000000000,
0b00110000000000000,
0b00000000100000000,
0b00010000000000000,
0b00001000100000000,
0b00000000100000010,
0b00000000000000010,
0b01000000100000000,
0b01010000000000000,
0b10110000000000000,
0b00100000000000000,
0b11010000000000000,
0b11000000000000000,
0b01001000100000000,
0b01000000000001000,
0b01000000000000100,
0b00000000000001000,
0b00000000000000100,
0b00111000100000000,
0b00001000100000010,
0b00110000100000000,
0b00110000000000001,
0b00100000000000001,
0b00110000000000010,
0b00110000000000101,
0b00110000000001001,
0b00110000000010000,
0b00110000000000011,
0b00110000000000100,
0b00110000100001000,
0b00100000000001001
};
static const uint32_t gen6_datatype_table[32] = {
0b001001110000000000,
0b001000110000100000,
0b001001110000000001,
0b001000000001100000,
0b001010110100101001,
0b001000000110101101,
0b001100011000101100,
0b001011110110101101,
0b001000000111101100,
0b001000000001100001,
0b001000110010100101,
0b001000000001000001,
0b001000001000110001,
0b001000001000101001,
0b001000000000100000,
0b001000001000110010,
0b001010010100101001,
0b001011010010100101,
0b001000000110100101,
0b001100011000101001,
0b001011011000101100,
0b001011010110100101,
0b001011110110100101,
0b001111011110111101,
0b001111011110111100,
0b001111011110111101,
0b001111011110011101,
0b001111011110111110,
0b001000000000100001,
0b001000000000100010,
0b001001111111011101,
0b001000001110111110,
};
static const uint32_t gen6_subreg_table[32] = {
0b000000000000000,
0b000000000000100,
0b000000110000000,
0b111000000000000,
0b011110000001000,
0b000010000000000,
0b000000000010000,
0b000110000001100,
0b001000000000000,
0b000001000000000,
0b000001010010100,
0b000000001010110,
0b010000000000000,
0b110000000000000,
0b000100000000000,
0b000000010000000,
0b000000000001000,
0b100000000000000,
0b000001010000000,
0b001010000000000,
0b001100000000000,
0b000000001010100,
0b101101010010100,
0b010100000000000,
0b000000010001111,
0b011000000000000,
0b111110000000000,
0b101000000000000,
0b000000000001111,
0b000100010001111,
0b001000010001111,
0b000110000000000,
};
static const uint32_t gen6_src_index_table[32] = {
0b000000000000,
0b010110001000,
0b010001101000,
0b001000101000,
0b011010010000,
0b000100100000,
0b010001101100,
0b010101110000,
0b011001111000,
0b001100101000,
0b010110001100,
0b001000100000,
0b010110001010,
0b000000000010,
0b010101010000,
0b010101101000,
0b111101001100,
0b111100101100,
0b011001110000,
0b010110001001,
0b010101011000,
0b001101001000,
0b010000101100,
0b010000000000,
0b001101110000,
0b001100010000,
0b001100000000,
0b010001101010,
0b001101111000,
0b000001110000,
0b001100100000,
0b001101010000,
};
static bool
set_control_index(struct brw_compact_instruction *dst,
struct brw_instruction *src)
{
uint32_t *src_u32 = (uint32_t *)src;
uint32_t uncompacted = 0;
uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0;
uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16;
for (int i = 0; i < ARRAY_SIZE(gen6_control_index_table); i++) {
if (gen6_control_index_table[i] == uncompacted) {
dst->dw0.control_index = i;
return true;
}
}
return false;
}
static bool
set_datatype_index(struct brw_compact_instruction *dst,
struct brw_instruction *src)
{
uint32_t uncompacted = 0;
uncompacted |= src->bits1.ud & 0x7fff;
uncompacted |= (src->bits1.ud >> 29) << 15;
for (int i = 0; i < ARRAY_SIZE(gen6_datatype_table); i++) {
if (gen6_datatype_table[i] == uncompacted) {
dst->dw0.data_type_index = i;
return true;
}
}
return false;
}
static bool
set_subreg_index(struct brw_compact_instruction *dst,
struct brw_instruction *src)
{
uint32_t uncompacted = 0;
uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
for (int i = 0; i < ARRAY_SIZE(gen6_subreg_table); i++) {
if (gen6_subreg_table[i] == uncompacted) {
dst->dw0.sub_reg_index = i;
return true;
}
}
return false;
}
static bool
get_src_index(uint32_t uncompacted,
uint32_t *compacted)
{
for (int i = 0; i < ARRAY_SIZE(gen6_src_index_table); i++) {
if (gen6_src_index_table[i] == uncompacted) {
*compacted = i;
return true;
}
}
return false;
}
static bool
set_src0_index(struct brw_compact_instruction *dst,
struct brw_instruction *src)
{
uint32_t compacted, uncompacted = 0;
uncompacted |= (src->bits2.ud >> 13) & 0xfff;
if (!get_src_index(uncompacted, &compacted))
return false;
dst->dw0.src0_index = compacted & 0x3;
dst->dw1.src0_index = compacted >> 2;
return true;
}
static bool
set_src1_index(struct brw_compact_instruction *dst,
struct brw_instruction *src)
{
uint32_t compacted, uncompacted = 0;
uncompacted |= (src->bits3.ud >> 13) & 0xfff;
if (!get_src_index(uncompacted, &compacted))
return false;
dst->dw1.src1_index = compacted;
return true;
}
/**
* Tries to compact instruction src into dst.
*
* It doesn't modify dst unless src is compactable, which is relied on by
* brw_compact_instructions().
*/
bool
brw_try_compact_instruction(struct brw_compile *p,
struct brw_compact_instruction *dst,
struct brw_instruction *src)
{
struct brw_compact_instruction temp;
/* FINISHME: immediates */
if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
return false;
memset(&temp, 0, sizeof(temp));
temp.dw0.opcode = src->header.opcode;
temp.dw0.debug_control = src->header.debug_control;
if (!set_control_index(&temp, src))
return false;
if (!set_datatype_index(&temp, src))
return false;
if (!set_subreg_index(&temp, src))
return false;
temp.dw0.acc_wr_control = src->header.acc_wr_control;
temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
temp.dw0.flag_reg_nr = src->bits2.da1.flag_reg_nr;
temp.dw0.cmpt_ctrl = 1;
if (!set_src0_index(&temp, src))
return false;
if (!set_src1_index(&temp, src))
return false;
temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
*dst = temp;
return true;
}
static void
set_uncompacted_control(struct brw_instruction *dst,
struct brw_compact_instruction *src)
{
uint32_t *dst_u32 = (uint32_t *)dst;
uint32_t uncompacted = gen6_control_index_table[src->dw0.control_index];
dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8;
dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31;
}
static void
set_uncompacted_datatype(struct brw_instruction *dst,
struct brw_compact_instruction *src)
{
uint32_t uncompacted = gen6_datatype_table[src->dw0.data_type_index];
dst->bits1.ud &= ~(0x7 << 29);
dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29;
dst->bits1.ud &= ~0x7fff;
dst->bits1.ud |= uncompacted & 0x7fff;
}
static void
set_uncompacted_subreg(struct brw_instruction *dst,
struct brw_compact_instruction *src)
{
uint32_t uncompacted = gen6_subreg_table[src->dw0.sub_reg_index];
dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0) & 0x1f;
dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5) & 0x1f;
dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f;
}
static void
set_uncompacted_src0(struct brw_instruction *dst,
struct brw_compact_instruction *src)
{
uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2;
uint32_t uncompacted = gen6_src_index_table[compacted];
dst->bits2.ud |= uncompacted << 13;
}
static void
set_uncompacted_src1(struct brw_instruction *dst,
struct brw_compact_instruction *src)
{
uint32_t uncompacted = gen6_src_index_table[src->dw1.src1_index];
dst->bits3.ud |= uncompacted << 13;
}
void
brw_uncompact_instruction(struct intel_context *intel,
struct brw_instruction *dst,
struct brw_compact_instruction *src)
{
memset(dst, 0, sizeof(*dst));
dst->header.opcode = src->dw0.opcode;
dst->header.debug_control = src->dw0.debug_control;
set_uncompacted_control(dst, src);
set_uncompacted_datatype(dst, src);
set_uncompacted_subreg(dst, src);
dst->header.acc_wr_control = src->dw0.acc_wr_control;
dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
dst->bits2.da1.flag_reg_nr = src->dw0.flag_reg_nr;
set_uncompacted_src0(dst, src);
set_uncompacted_src1(dst, src);
dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
}
void brw_debug_compact_uncompact(struct intel_context *intel,
struct brw_instruction *orig,
struct brw_instruction *uncompacted)
{
fprintf(stderr, "Instruction compact/uncompact changed:\n");
fprintf(stderr, " before: ");
brw_disasm(stderr, orig, intel->gen);
fprintf(stderr, " after: ");
brw_disasm(stderr, uncompacted, intel->gen);
uint32_t *before_bits = (uint32_t *)orig;
uint32_t *after_bits = (uint32_t *)uncompacted;
printf(" changed bits:\n");
for (int i = 0; i < 128; i++) {
uint32_t before = before_bits[i / 32] & (1 << (i & 31));
uint32_t after = after_bits[i / 32] & (1 << (i & 31));
if (before != after) {
printf(" bit %d, %s to %s\n", i,
before ? "set" : "unset",
after ? "set" : "unset");
}
}
}
void
brw_compact_instructions(struct brw_compile *p)
{
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
void *store = p->store;
assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
if (intel->gen != 6)
return;
/* FINISHME: If we are going to compress instructions between flow control,
* we have to do fixups to flow control offsets to represent the new
* distances, since flow control uses (virtual address distance)/2, not a
* logical instruction count. We can at least compress up until an IF
* instruction, but there's no instruction indicating the start of a
* do/while loop.
*/
bool continue_compressing = true;
for (int i = 0; i < p->nr_insn; i++) {
if (p->store[i].header.opcode == BRW_OPCODE_WHILE)
return;
}
int src_offset;
int offset = 0;
for (src_offset = 0; src_offset < p->nr_insn * 16;) {
struct brw_instruction *src = store + src_offset;
void *dst = store + offset;
switch (src->header.opcode) {
case BRW_OPCODE_IF:
case BRW_OPCODE_HALT:
case BRW_OPCODE_JMPI:
continue_compressing = false;
break;
}
struct brw_instruction saved = *src;
if (continue_compressing &&
!src->header.cmpt_control &&
brw_try_compact_instruction(p, dst, src)) {
/* debug */
if (INTEL_DEBUG) {
struct brw_instruction uncompacted;
brw_uncompact_instruction(intel, &uncompacted, dst);
if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
brw_debug_compact_uncompact(intel, &saved, &uncompacted);
}
}
offset += 8;
src_offset += 16;
} else {
int size = src->header.cmpt_control ? 8 : 16;
/* It appears that the end of thread SEND instruction needs to be
* aligned, or the GPU hangs.
*/
if ((src->header.opcode == BRW_OPCODE_SEND ||
src->header.opcode == BRW_OPCODE_SENDC) &&
src->bits3.generic.end_of_thread &&
(offset & 8) != 0) {
struct brw_compact_instruction *align = store + offset;
memset(align, 0, sizeof(*align));
align->dw0.opcode = BRW_OPCODE_NOP;
align->dw0.cmpt_ctrl = 1;
offset += 8;
dst = store + offset;
}
/* If we didn't compact this instruction, we need to move it down into
* place.
*/
if (offset != src_offset) {
memmove(dst, src, size);
}
offset += size;
src_offset += size;
}
}
/* p->nr_insn is counting the number of uncompacted instructions still, so
* divide. We do want to be sure there's a valid instruction in any
* alignment padding, so that the next compression pass (for the FS 8/16
* compile passes) parses correctly.
*/
if (offset & 8) {
struct brw_compact_instruction *align = store + offset;
memset(align, 0, sizeof(*align));
align->dw0.opcode = BRW_OPCODE_NOP;
align->dw0.cmpt_ctrl = 1;
offset += 8;
}
p->next_insn_offset = offset;
p->nr_insn = offset / 16;
if (0) {
fprintf(stdout, "dumping compacted program\n");
brw_dump_compile(p, stdout, 0, p->next_insn_offset);
int cmp = 0;
for (offset = 0; offset < p->next_insn_offset;) {
struct brw_instruction *insn = store + offset;
if (insn->header.cmpt_control) {
offset += 8;
cmp++;
} else {
offset += 16;
}
}
fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8,
cmp * 8 * 100 / (offset + cmp * 8));
}
}

View file

@ -32,6 +32,7 @@
#include "main/mtypes.h" #include "main/mtypes.h"
#include "main/imports.h" #include "main/imports.h"
#include "brw_context.h"
#include "brw_eu.h" #include "brw_eu.h"
void brw_print_reg( struct brw_reg hwreg ) void brw_print_reg( struct brw_reg hwreg )

View file

@ -1947,6 +1947,12 @@ fs_visitor::run()
brw_wm_payload_setup(brw, c); brw_wm_payload_setup(brw, c);
if (c->dispatch_width == 16) { if (c->dispatch_width == 16) {
/* We have to do a compaction pass now, or the one at the end of
* execution will squash down where our prog_offset start needs
* to be.
*/
brw_compact_instructions(p);
/* align to 64 byte boundary. */ /* align to 64 byte boundary. */
while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) { while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) {
brw_NOP(p); brw_NOP(p);

View file

@ -1048,6 +1048,8 @@ struct brw_instruction
GLuint dest_subreg_nr:3; GLuint dest_subreg_nr:3;
GLuint dest_reg_nr:8; GLuint dest_reg_nr:8;
} da3src; } da3src;
uint32_t ud;
} bits1; } bits1;
@ -1137,6 +1139,8 @@ struct brw_instruction
GLuint src1_swizzle:8; GLuint src1_swizzle:8;
GLuint src1_subreg_nr_low:2; GLuint src1_subreg_nr_low:2;
} da3src; } da3src;
uint32_t ud;
} bits2; } bits2;
union union
@ -1534,5 +1538,27 @@ struct brw_instruction
} bits3; } bits3;
}; };
struct brw_compact_instruction {
struct {
unsigned opcode:7; /* 0- 6 */
unsigned debug_control:1; /* 7- 7 */
unsigned control_index:5; /* 8-12 */
unsigned data_type_index:5; /* 13-17 */
unsigned sub_reg_index:5; /* 18-22 */
unsigned acc_wr_control:1; /* 23-23 */
unsigned conditionalmod:4; /* 24-27 */
unsigned flag_reg_nr:1; /* 28-28 */
unsigned cmpt_ctrl:1; /* 29-29 */
unsigned src0_index:2; /* 30-31 */
} dw0;
struct {
unsigned src0_index:3; /* 32-24 */
unsigned src1_index:5; /* 35-39 */
unsigned dst_reg_nr:8; /* 40-47 */
unsigned src0_reg_nr:8; /* 48-55 */
unsigned src1_reg_nr:8; /* 56-63 */
} dw1;
};
#endif #endif

View file

@ -0,0 +1,296 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include "glsl/ralloc.h"
#include "brw_context.h"
#include "brw_eu.h"
static bool
test_compact_instruction(struct brw_compile *p, struct brw_instruction src)
{
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
struct brw_compact_instruction dst;
memset(&dst, 0xd0, sizeof(dst));
if (brw_try_compact_instruction(p, &dst, &src)) {
struct brw_instruction uncompacted;
brw_uncompact_instruction(intel, &uncompacted, &dst);
if (memcmp(&uncompacted, &src, sizeof(src))) {
brw_debug_compact_uncompact(intel, &src, &uncompacted);
return false;
}
} else {
struct brw_compact_instruction unchanged;
memset(&unchanged, 0xd0, sizeof(unchanged));
/* It's not supposed to change dst unless it compacted. */
if (memcmp(&unchanged, &dst, sizeof(dst))) {
fprintf(stderr, "Failed to compact, but dst changed\n");
fprintf(stderr, " Instruction: ");
brw_disasm(stderr, &src, intel->gen);
return false;
}
}
return true;
}
/**
* When doing fuzz testing, pad bits won't round-trip.
*
* This sort of a superset of skip_bit, which is testing for changing bits that
* aren't worth testing for fuzzing. We also just want to clear bits that
* become meaningless once fuzzing twiddles a related bit.
*/
static void
clear_pad_bits(struct brw_instruction *inst)
{
if (inst->header.opcode != BRW_OPCODE_SEND &&
inst->header.opcode != BRW_OPCODE_SENDC &&
inst->header.opcode != BRW_OPCODE_BREAK &&
inst->header.opcode != BRW_OPCODE_CONTINUE &&
inst->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE &&
inst->bits1.da1.src1_reg_file != BRW_IMMEDIATE_VALUE) {
if (inst->bits3.da1.src1_address_mode)
inst->bits3.ia1.pad1 = 0;
else
inst->bits3.da1.pad0 = 0;
}
}
static bool
skip_bit(struct brw_instruction *src, int bit)
{
/* pad bit */
if (bit == 7)
return true;
/* The compact bit -- uncompacted can't have it set. */
if (bit == 29)
return true;
/* pad bit */
if (bit == 47)
return true;
/* pad bits */
if (bit >= 90 && bit <= 95)
return true;
/* sometimes these are pad bits. */
if (src->header.opcode != BRW_OPCODE_SEND &&
src->header.opcode != BRW_OPCODE_SENDC &&
src->header.opcode != BRW_OPCODE_BREAK &&
src->header.opcode != BRW_OPCODE_CONTINUE &&
src->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE &&
src->bits1.da1.src1_reg_file != BRW_IMMEDIATE_VALUE &&
bit >= 121) {
return true;
}
return false;
}
static bool
test_fuzz_compact_instruction(struct brw_compile *p,
struct brw_instruction src)
{
for (int bit0 = 0; bit0 < 128; bit0++) {
if (skip_bit(&src, bit0))
continue;
for (int bit1 = 0; bit1 < 128; bit1++) {
struct brw_instruction instr = src;
uint32_t *bits = (uint32_t *)&instr;
if (skip_bit(&src, bit1))
continue;
bits[bit0 / 32] ^= (1 << (bit0 & 31));
bits[bit1 / 32] ^= (1 << (bit1 & 31));
clear_pad_bits(&instr);
if (!test_compact_instruction(p, instr)) {
printf(" twiddled bits for fuzzing %d, %d\n", bit0, bit1);
return false;
}
}
}
return true;
}
static void
gen_ADD_GRF_GRF_GRF(struct brw_compile *p)
{
struct brw_reg g0 = brw_vec8_grf(0, 0);
struct brw_reg g2 = brw_vec8_grf(2, 0);
struct brw_reg g4 = brw_vec8_grf(4, 0);
brw_ADD(p, g0, g2, g4);
}
static void
gen_ADD_GRF_GRF_IMM(struct brw_compile *p)
{
struct brw_reg g0 = brw_vec8_grf(0, 0);
struct brw_reg g2 = brw_vec8_grf(2, 0);
brw_ADD(p, g0, g2, brw_imm_f(1.0));
}
static void
gen_ADD_GRF_GRF_IMM_d(struct brw_compile *p)
{
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D);
struct brw_reg g2 = retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_D);
brw_ADD(p, g0, g2, brw_imm_d(1));
}
static void
gen_MOV_GRF_GRF(struct brw_compile *p)
{
struct brw_reg g0 = brw_vec8_grf(0, 0);
struct brw_reg g2 = brw_vec8_grf(2, 0);
brw_MOV(p, g0, g2);
}
static void
gen_ADD_MRF_GRF_GRF(struct brw_compile *p)
{
struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0);
struct brw_reg g2 = brw_vec8_grf(2, 0);
struct brw_reg g4 = brw_vec8_grf(4, 0);
brw_ADD(p, m6, g2, g4);
}
static void
gen_ADD_vec1_GRF_GRF_GRF(struct brw_compile *p)
{
struct brw_reg g0 = brw_vec1_grf(0, 0);
struct brw_reg g2 = brw_vec1_grf(2, 0);
struct brw_reg g4 = brw_vec1_grf(4, 0);
brw_ADD(p, g0, g2, g4);
}
static void
gen_PLN_MRF_GRF_GRF(struct brw_compile *p)
{
struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0);
struct brw_reg interp = brw_vec1_grf(2, 0);
struct brw_reg g4 = brw_vec8_grf(4, 0);
brw_PLN(p, m6, interp, g4);
}
static void
gen_f0_MOV_GRF_GRF(struct brw_compile *p)
{
struct brw_reg g0 = brw_vec8_grf(0, 0);
struct brw_reg g2 = brw_vec8_grf(2, 0);
brw_push_insn_state(p);
brw_set_predicate_control(p, true);
brw_MOV(p, g0, g2);
brw_pop_insn_state(p);
}
/* The handling of f1 vs f0 changes between gen6 and gen7. Explicitly test
* it, so that we run the fuzzing can run over all the other bits that might
* interact with it.
*/
static void
gen_f1_MOV_GRF_GRF(struct brw_compile *p)
{
struct brw_reg g0 = brw_vec8_grf(0, 0);
struct brw_reg g2 = brw_vec8_grf(2, 0);
brw_push_insn_state(p);
brw_set_predicate_control(p, true);
current_insn(p)->bits2.da1.flag_reg_nr = 1;
brw_MOV(p, g0, g2);
brw_pop_insn_state(p);
}
struct {
void (*func)(struct brw_compile *p);
} tests[] = {
{ gen_MOV_GRF_GRF },
{ gen_ADD_GRF_GRF_GRF },
{ gen_ADD_GRF_GRF_IMM },
{ gen_ADD_GRF_GRF_IMM_d },
{ gen_ADD_MRF_GRF_GRF },
{ gen_ADD_vec1_GRF_GRF_GRF },
{ gen_PLN_MRF_GRF_GRF },
{ gen_f0_MOV_GRF_GRF },
{ gen_f1_MOV_GRF_GRF },
};
int
main(int argc, char **argv)
{
struct brw_context *brw = calloc(1, sizeof(*brw));
struct intel_context *intel = &brw->intel;
intel->gen = 6;
int ret = 0;
for (int i = 0; i < ARRAY_SIZE(tests); i++) {
for (int align_16 = 0; align_16 <= 1; align_16++) {
struct brw_compile *p = rzalloc(NULL, struct brw_compile);
brw_init_compile(brw, p, p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
if (align_16)
brw_set_access_mode(p, BRW_ALIGN_16);
else
brw_set_access_mode(p, BRW_ALIGN_1);
tests[i].func(p);
assert(p->nr_insn == 1);
if (!test_compact_instruction(p, p->store[0])) {
ret = 1;
continue;
}
if (!test_fuzz_compact_instruction(p, p->store[0])) {
ret = 1;
continue;
}
ralloc_free(p);
}
}
return ret;
}