i965: Delete the Gen8 code generators.

We now use the brw_eu_emit.c code instead.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Kenneth Graunke 2014-08-11 10:07:07 -07:00
parent f17bfc9ba9
commit 8c229d306b
9 changed files with 0 additions and 4076 deletions

View file

@ -150,16 +150,12 @@ i965_FILES = \
gen8_depth_state.c \
gen8_disable.c \
gen8_draw_upload.c \
gen8_fs_generator.cpp \
gen8_generator.cpp \
gen8_instruction.c \
gen8_gs_state.c \
gen8_misc_state.c \
gen8_multisample_state.c \
gen8_sf_state.c \
gen8_sol_state.c \
gen8_surface_state.c \
gen8_vec4_generator.cpp \
gen8_viewport_state.c \
gen8_vs_state.c \
gen8_wm_depth_stencil.c \

View file

@ -48,7 +48,6 @@ extern "C" {
#include "brw_shader.h"
#include "intel_asm_annotation.h"
}
#include "gen8_generator.h"
#include "glsl/glsl_types.h"
#include "glsl/ir.h"
@ -691,92 +690,6 @@ private:
void *mem_ctx;
};
/**
* The fragment shader code generator.
*
* Translates FS IR to actual i965 assembly code.
*/
class gen8_fs_generator : public gen8_generator
{
public:
gen8_fs_generator(struct brw_context *brw,
void *mem_ctx,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data,
struct gl_shader_program *prog,
struct gl_fragment_program *fp,
bool dual_source_output);
~gen8_fs_generator();
const unsigned *generate_assembly(exec_list *simd8_instructions,
exec_list *simd16_instructions,
unsigned *assembly_size);
private:
void generate_code(exec_list *instructions);
void generate_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
struct brw_reg sampler_index);
void generate_math1(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_math2(fs_inst *inst, struct brw_reg dst,
struct brw_reg src0, struct brw_reg src1);
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
bool negate_value);
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
void generate_uniform_pull_constant_load(fs_inst *inst,
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
void generate_varying_pull_constant_load(fs_inst *inst,
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
void generate_mov_dispatch_to_flags(fs_inst *ir);
void generate_set_omask(fs_inst *ir,
struct brw_reg dst,
struct brw_reg sample_mask);
void generate_set_sample_id(fs_inst *ir,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1);
void generate_set_simd4x2_offset(fs_inst *ir,
struct brw_reg dst,
struct brw_reg offset);
void generate_pack_half_2x16_split(fs_inst *inst,
struct brw_reg dst,
struct brw_reg x,
struct brw_reg y);
void generate_unpack_half_2x16_split(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src);
void generate_untyped_atomic(fs_inst *inst,
struct brw_reg dst,
struct brw_reg atomic_op,
struct brw_reg surf_index);
void generate_untyped_surface_read(fs_inst *inst,
struct brw_reg dst,
struct brw_reg surf_index);
void generate_discard_jump(fs_inst *ir);
bool patch_discard_jumps_to_fb_writes();
const struct brw_wm_prog_key *const key;
struct brw_wm_prog_data *prog_data;
const struct gl_fragment_program *fp;
unsigned dispatch_width; /** 8 or 16 */
bool dual_source_output;
exec_list discard_halt_patches;
};
bool brw_do_channel_expressions(struct exec_list *instructions);
bool brw_do_vector_splitting(struct exec_list *instructions);
bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);

View file

@ -40,7 +40,6 @@ extern "C" {
#ifdef __cplusplus
}; /* extern "C" */
#include "gen8_generator.h"
#endif
#include "glsl/ir.h"
@ -702,72 +701,6 @@ private:
const bool debug_flag;
};
/**
* The vertex shader code generator.
*
* Translates VS IR to actual i965 assembly code.
*/
class gen8_vec4_generator : public gen8_generator
{
public:
gen8_vec4_generator(struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
struct brw_vec4_prog_data *prog_data,
void *mem_ctx,
bool debug_flag);
~gen8_vec4_generator();
const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size);
private:
void generate_code(exec_list *instructions);
void generate_vec4_instruction(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg *src);
void generate_tex(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg sampler_index);
void generate_urb_write(vec4_instruction *ir, bool copy_g0);
void generate_gs_thread_end(vec4_instruction *ir);
void generate_gs_set_write_offset(struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1);
void generate_gs_set_vertex_count(struct brw_reg dst,
struct brw_reg src);
void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src);
void generate_gs_prepare_channel_masks(struct brw_reg dst);
void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
void generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index);
void generate_scratch_write(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg src,
struct brw_reg index);
void generate_scratch_read(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg index);
void generate_pull_constant_load(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
void generate_untyped_atomic(vec4_instruction *ir,
struct brw_reg dst,
struct brw_reg atomic_op,
struct brw_reg surf_index);
void generate_untyped_surface_read(vec4_instruction *ir,
struct brw_reg dst,
struct brw_reg surf_index);
struct brw_vec4_prog_data *prog_data;
const bool debug_flag;
};
} /* namespace brw */
#endif /* __cplusplus */

File diff suppressed because it is too large Load diff

View file

@ -1,620 +0,0 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/** @file gen8_generator.cpp
*
* Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
*/
extern "C" {
#include "main/compiler.h"
#include "main/macros.h"
#include "brw_context.h"
} /* extern "C" */
#include "util/ralloc.h"
#include "brw_eu.h"
#include "brw_reg.h"
#include "gen8_generator.h"
gen8_generator::gen8_generator(struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
void *mem_ctx)
: shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx)
{
ctx = &brw->ctx;
memset(&default_state, 0, sizeof(default_state));
default_state.mask_control = BRW_MASK_ENABLE;
store_size = 1024;
store = rzalloc_array(mem_ctx, gen8_instruction, store_size);
nr_inst = 0;
next_inst_offset = 0;
/* Set up the control flow stacks. */
if_stack_depth = 0;
if_stack_array_size = 16;
if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size);
loop_stack_depth = 0;
loop_stack_array_size = 16;
loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size);
}
gen8_generator::~gen8_generator()
{
}
gen8_instruction *
gen8_generator::next_inst(unsigned opcode)
{
gen8_instruction *inst;
if (nr_inst + 1 > unsigned(store_size)) {
store_size <<= 1;
store = reralloc(mem_ctx, store, gen8_instruction, store_size);
assert(store);
}
next_inst_offset += 16;
inst = &store[nr_inst++];
memset(inst, 0, sizeof(gen8_instruction));
gen8_set_opcode(inst, opcode);
gen8_set_exec_size(inst, default_state.exec_size);
gen8_set_access_mode(inst, default_state.access_mode);
gen8_set_mask_control(inst, default_state.mask_control);
gen8_set_qtr_control(inst, default_state.qtr_control);
gen8_set_cond_modifier(inst, default_state.conditional_mod);
gen8_set_pred_control(inst, default_state.predicate);
gen8_set_pred_inv(inst, default_state.predicate_inverse);
gen8_set_saturate(inst, default_state.saturate);
gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr);
return inst;
}
#define ALU1(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \
{ \
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
gen8_set_dst(brw, inst, dst); \
gen8_set_src0(brw, inst, src); \
return inst; \
}
#define ALU2(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
{ \
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
gen8_set_dst(brw, inst, dst); \
gen8_set_src0(brw, inst, s0); \
gen8_set_src1(brw, inst, s1); \
return inst; \
}
#define ALU2_ACCUMULATE(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
{ \
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
gen8_set_dst(brw, inst, dst); \
gen8_set_src0(brw, inst, s0); \
gen8_set_src1(brw, inst, s1); \
gen8_set_acc_wr_control(inst, true); \
return inst; \
}
#define ALU3(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
struct brw_reg s1, struct brw_reg s2) \
{ \
return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
}
#define ALU3F(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
struct brw_reg s1, struct brw_reg s2) \
{ \
assert(dst.type == BRW_REGISTER_TYPE_F); \
assert(s0.type == BRW_REGISTER_TYPE_F); \
assert(s1.type == BRW_REGISTER_TYPE_F); \
assert(s2.type == BRW_REGISTER_TYPE_F); \
return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
}
ALU2(ADD)
ALU2(AND)
ALU2(ASR)
ALU3(BFE)
ALU2(BFI1)
ALU3(BFI2)
ALU1(BFREV)
ALU1(CBIT)
ALU2_ACCUMULATE(ADDC)
ALU2_ACCUMULATE(SUBB)
ALU2(DP2)
ALU2(DP3)
ALU2(DP4)
ALU2(DPH)
ALU1(FBH)
ALU1(FBL)
ALU1(FRC)
ALU2(LINE)
ALU3F(LRP)
ALU3F(MAD)
ALU2(MUL)
ALU1(MOV)
ALU1(NOT)
ALU2(OR)
ALU2(PLN)
ALU1(RNDD)
ALU1(RNDE)
ALU1(RNDZ)
ALU2_ACCUMULATE(MAC)
ALU2_ACCUMULATE(MACH)
ALU2(SEL)
ALU2(SHL)
ALU2(SHR)
ALU2(XOR)
gen8_instruction *
gen8_generator::CMP(struct brw_reg dst, unsigned conditional,
struct brw_reg src0, struct brw_reg src1)
{
gen8_instruction *inst = next_inst(BRW_OPCODE_CMP);
gen8_set_cond_modifier(inst, conditional);
/* The CMP instruction appears to behave erratically for floating point
* sources unless the destination type is also float. Overriding it to
* match src0 makes it work in all cases.
*/
dst.type = src0.type;
gen8_set_dst(brw, inst, dst);
gen8_set_src0(brw, inst, src0);
gen8_set_src1(brw, inst, src1);
return inst;
}
static int
get_3src_subreg_nr(struct brw_reg reg)
{
if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
} else {
return reg.subnr / 4;
}
}
gen8_instruction *
gen8_generator::alu3(unsigned opcode,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1,
struct brw_reg src2)
{
/* MRFs haven't existed since Gen7, so we better not be using them. */
if (dst.file == BRW_MESSAGE_REGISTER_FILE) {
dst.file = BRW_GENERAL_REGISTER_FILE;
dst.nr += GEN7_MRF_HACK_START;
}
gen8_instruction *inst = next_inst(opcode);
assert(gen8_access_mode(inst) == BRW_ALIGN_16);
assert(dst.file == BRW_GENERAL_REGISTER_FILE);
assert(dst.nr < 128);
assert(dst.address_mode == BRW_ADDRESS_DIRECT);
assert(dst.type == BRW_REGISTER_TYPE_F ||
dst.type == BRW_REGISTER_TYPE_D ||
dst.type == BRW_REGISTER_TYPE_UD);
gen8_set_dst_3src_reg_nr(inst, dst.nr);
gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16);
gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask);
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
assert(src0.address_mode == BRW_ADDRESS_DIRECT);
assert(src0.nr < 128);
gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle);
gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0));
gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0);
gen8_set_src0_3src_reg_nr(inst, src0.nr);
gen8_set_src0_3src_abs(inst, src0.abs);
gen8_set_src0_3src_negate(inst, src0.negate);
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
assert(src1.address_mode == BRW_ADDRESS_DIRECT);
assert(src1.nr < 128);
gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle);
gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1));
gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0);
gen8_set_src1_3src_reg_nr(inst, src1.nr);
gen8_set_src1_3src_abs(inst, src1.abs);
gen8_set_src1_3src_negate(inst, src1.negate);
assert(src2.file == BRW_GENERAL_REGISTER_FILE);
assert(src2.address_mode == BRW_ADDRESS_DIRECT);
assert(src2.nr < 128);
gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle);
gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2));
gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0);
gen8_set_src2_3src_reg_nr(inst, src2.nr);
gen8_set_src2_3src_abs(inst, src2.abs);
gen8_set_src2_3src_negate(inst, src2.negate);
/* Set both the source and destination types based on dst.type, ignoring
* the source register types. The MAD and LRP emitters both ensure that
* all register types are float. The BFE and BFI2 emitters, however, may
* send us mixed D and UD source types and want us to ignore that.
*/
switch (dst.type) {
case BRW_REGISTER_TYPE_F:
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F);
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F);
break;
case BRW_REGISTER_TYPE_D:
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D);
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D);
break;
case BRW_REGISTER_TYPE_UD:
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD);
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD);
break;
}
return inst;
}
gen8_instruction *
gen8_generator::math(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0)
{
gen8_instruction *inst = next_inst(BRW_OPCODE_MATH);
assert(src0.hstride == 0 || src0.hstride == dst.hstride);
gen8_set_math_function(inst, math_function);
gen8_set_dst(brw, inst, dst);
gen8_set_src0(brw, inst, src0);
return inst;
}
gen8_instruction *
gen8_generator::MATH(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0)
{
assert(src0.type == BRW_REGISTER_TYPE_F);
gen8_instruction *inst = math(math_function, dst, src0);
return inst;
}
gen8_instruction *
gen8_generator::MATH(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1)
{
bool int_math =
math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER;
if (int_math) {
assert(src0.type != BRW_REGISTER_TYPE_F);
assert(src1.type != BRW_REGISTER_TYPE_F);
} else {
assert(src0.type == BRW_REGISTER_TYPE_F);
}
gen8_instruction *inst = math(math_function, dst, src0);
gen8_set_src1(brw, inst, src1);
return inst;
}
gen8_instruction *
gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0)
{
gen8_instruction *inst = next_inst(BRW_OPCODE_MOV);
gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD));
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
return inst;
}
gen8_instruction *
gen8_generator::NOP()
{
return next_inst(BRW_OPCODE_NOP);
}
void
gen8_generator::push_if_stack(gen8_instruction *inst)
{
if_stack[if_stack_depth] = inst - store;
++if_stack_depth;
if (if_stack_array_size <= if_stack_depth) {
if_stack_array_size *= 2;
if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size);
}
}
gen8_instruction *
gen8_generator::pop_if_stack()
{
--if_stack_depth;
return &store[if_stack[if_stack_depth]];
}
/**
* Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.)
*/
void
gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst,
gen8_instruction *else_inst,
gen8_instruction *endif_inst)
{
assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF);
assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE);
assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF);
gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst));
if (else_inst == NULL) {
/* Patch IF -> ENDIF */
gen8_set_jip(if_inst, 16 * (endif_inst - if_inst));
gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
} else {
gen8_set_exec_size(else_inst, gen8_exec_size(if_inst));
/* Patch IF -> ELSE and ELSE -> ENDIF:
*
* The IF's JIP should point at the instruction after the ELSE.
* The IF's UIP should point to the ENDIF.
*
* Both are expressed in bytes, hence the multiply by 16...128-bits.
*/
gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1));
gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
/* Patch ELSE -> ENDIF:
*
* Since we don't set branch_ctrl, both JIP and UIP point to ENDIF.
*/
gen8_set_jip(else_inst, 16 * (endif_inst - else_inst));
gen8_set_uip(else_inst, 16 * (endif_inst - else_inst));
}
gen8_set_jip(endif_inst, 16);
}
gen8_instruction *
gen8_generator::IF(unsigned predicate)
{
gen8_instruction *inst = next_inst(BRW_OPCODE_IF);
gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
gen8_set_src0(brw, inst, brw_imm_d(0));
gen8_set_exec_size(inst, default_state.exec_size);
gen8_set_pred_control(inst, predicate);
gen8_set_mask_control(inst, BRW_MASK_ENABLE);
push_if_stack(inst);
return inst;
}
gen8_instruction *
gen8_generator::ELSE()
{
gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE);
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src0(brw, inst, brw_imm_d(0));
gen8_set_mask_control(inst, BRW_MASK_ENABLE);
push_if_stack(inst);
return inst;
}
gen8_instruction *
gen8_generator::ENDIF()
{
gen8_instruction *if_inst = NULL;
gen8_instruction *else_inst = NULL;
gen8_instruction *tmp = pop_if_stack();
if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) {
else_inst = tmp;
tmp = pop_if_stack();
}
assert(gen8_opcode(tmp) == BRW_OPCODE_IF);
if_inst = tmp;
gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF);
gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE);
gen8_set_src0(brw, endif_inst, brw_imm_d(0));
patch_IF_ELSE(if_inst, else_inst, endif_inst);
return endif_inst;
}
unsigned
gen8_generator::next_ip(unsigned ip) const
{
return ip + 16;
}
unsigned
gen8_generator::find_next_block_end(unsigned start) const
{
for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
gen8_instruction *inst = &store[ip / 16];
switch (gen8_opcode(inst)) {
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_WHILE:
case BRW_OPCODE_HALT:
return ip;
}
}
return 0;
}
/* There is no DO instruction on Gen6+, so to find the end of the loop
* we have to see if the loop is jumping back before our start
* instruction.
*/
unsigned
gen8_generator::find_loop_end(unsigned start) const
{
/* Always start after the instruction (such as a WHILE) we're trying to fix
* up.
*/
for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
gen8_instruction *inst = &store[ip / 16];
if (gen8_opcode(inst) == BRW_OPCODE_WHILE) {
if (ip + gen8_jip(inst) <= start)
return ip;
}
}
unreachable("not reached");
}
/* After program generation, go back and update the UIP and JIP of
* BREAK, CONT, and HALT instructions to their correct locations.
*/
void
gen8_generator::patch_jump_targets()
{
for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) {
gen8_instruction *inst = &store[ip / 16];
int block_end_ip = find_next_block_end(ip);
switch (gen8_opcode(inst)) {
case BRW_OPCODE_BREAK:
assert(block_end_ip != 0);
gen8_set_jip(inst, block_end_ip - ip);
gen8_set_uip(inst, find_loop_end(ip) - ip);
assert(gen8_uip(inst) != 0);
assert(gen8_jip(inst) != 0);
break;
case BRW_OPCODE_CONTINUE:
assert(block_end_ip != 0);
gen8_set_jip(inst, block_end_ip - ip);
gen8_set_uip(inst, find_loop_end(ip) - ip);
assert(gen8_uip(inst) != 0);
assert(gen8_jip(inst) != 0);
break;
case BRW_OPCODE_ENDIF:
if (block_end_ip == 0)
gen8_set_jip(inst, 16);
else
gen8_set_jip(inst, block_end_ip - ip);
break;
case BRW_OPCODE_HALT:
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
*
* "In case of the halt instruction not inside any conditional
* code block, the value of <JIP> and <UIP> should be the
* same. In case of the halt instruction inside conditional code
* block, the <UIP> should be the end of the program, and the
* <JIP> should be end of the most inner conditional code block."
*
* The uip will have already been set by whoever set up the
* instruction.
*/
if (block_end_ip == 0) {
gen8_set_jip(inst, gen8_uip(inst));
} else {
gen8_set_jip(inst, block_end_ip - ip);
}
assert(gen8_uip(inst) != 0);
assert(gen8_jip(inst) != 0);
break;
}
}
}
void
gen8_generator::DO()
{
if (loop_stack_array_size < loop_stack_depth) {
loop_stack_array_size *= 2;
loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size);
}
loop_stack[loop_stack_depth++] = nr_inst;
}
gen8_instruction *
gen8_generator::BREAK()
{
gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK);
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src0(brw, inst, brw_imm_d(0));
gen8_set_exec_size(inst, default_state.exec_size);
return inst;
}
gen8_instruction *
gen8_generator::CONTINUE()
{
gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE);
gen8_set_dst(brw, inst, brw_ip_reg());
gen8_set_src0(brw, inst, brw_imm_d(0));
gen8_set_exec_size(inst, default_state.exec_size);
return inst;
}
gen8_instruction *
gen8_generator::WHILE()
{
gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]];
gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE);
gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src0(brw, while_inst, brw_imm_d(0));
gen8_set_jip(while_inst, 16 * (do_inst - while_inst));
gen8_set_exec_size(while_inst, default_state.exec_size);
return while_inst;
}
gen8_instruction *
gen8_generator::HALT()
{
gen8_instruction *inst = next_inst(BRW_OPCODE_HALT);
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src0(brw, inst, brw_imm_d(0));
gen8_set_exec_size(inst, default_state.exec_size);
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
return inst;
}

View file

@ -1,196 +0,0 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file gen8_generator.h
*
* Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
*/
#pragma once
extern "C" {
#include "main/macros.h"
} /* extern "C" */
#include "gen8_instruction.h"
class gen8_generator {
public:
gen8_generator(struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
void *mem_ctx);
~gen8_generator();
/**
* Instruction emitters.
* @{
*/
#define ALU1(OP) \
gen8_instruction *OP(struct brw_reg dst, struct brw_reg src);
#define ALU2(OP) \
gen8_instruction *OP(struct brw_reg d, struct brw_reg, struct brw_reg);
#define ALU3(OP) \
gen8_instruction *OP(struct brw_reg d, \
struct brw_reg, struct brw_reg, struct brw_reg);
ALU2(ADD)
ALU2(AND)
ALU2(ASR)
ALU3(BFE)
ALU2(BFI1)
ALU3(BFI2)
ALU1(F32TO16)
ALU1(F16TO32)
ALU1(BFREV)
ALU1(CBIT)
ALU2(ADDC)
ALU2(SUBB)
ALU2(DP2)
ALU2(DP3)
ALU2(DP4)
ALU2(DPH)
ALU1(FBH)
ALU1(FBL)
ALU1(FRC)
ALU2(LINE)
ALU3(LRP)
ALU2(MAC)
ALU2(MACH)
ALU3(MAD)
ALU2(MUL)
ALU1(MOV)
ALU1(MOV_RAW)
ALU1(NOT)
ALU2(OR)
ALU2(PLN)
ALU1(RNDD)
ALU1(RNDE)
ALU1(RNDZ)
ALU2(SEL)
ALU2(SHL)
ALU2(SHR)
ALU2(XOR)
#undef ALU1
#undef ALU2
#undef ALU3
gen8_instruction *CMP(struct brw_reg dst, unsigned conditional,
struct brw_reg src0, struct brw_reg src1);
gen8_instruction *IF(unsigned predicate);
gen8_instruction *ELSE();
gen8_instruction *ENDIF();
void DO();
gen8_instruction *BREAK();
gen8_instruction *CONTINUE();
gen8_instruction *WHILE();
gen8_instruction *HALT();
gen8_instruction *MATH(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0);
gen8_instruction *MATH(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1);
gen8_instruction *NOP();
/** @} */
protected:
gen8_instruction *alu3(unsigned opcode,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1,
struct brw_reg src2);
gen8_instruction *math(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0);
gen8_instruction *next_inst(unsigned opcode);
struct gl_shader_program *shader_prog;
struct gl_program *prog;
struct brw_context *brw;
struct intel_context *intel;
struct gl_context *ctx;
gen8_instruction *store;
unsigned store_size;
unsigned nr_inst;
unsigned next_inst_offset;
/**
* Control flow stacks:
*
* if_stack contains IF and ELSE instructions which must be patched with
* the final jump offsets (and popped) once the matching ENDIF is encountered.
*
* We actually store an array index into the store, rather than pointers
* to the instructions. This is necessary since we may realloc the store.
*
* @{
*/
int *if_stack;
int if_stack_depth;
int if_stack_array_size;
int *loop_stack;
int loop_stack_depth;
int loop_stack_array_size;
int if_depth_in_loop;
void push_if_stack(gen8_instruction *inst);
gen8_instruction *pop_if_stack();
/** @} */
void patch_IF_ELSE(gen8_instruction *if_inst,
gen8_instruction *else_inst,
gen8_instruction *endif_inst);
unsigned next_ip(unsigned ip) const;
unsigned find_next_block_end(unsigned start_ip) const;
unsigned find_loop_end(unsigned start) const;
void patch_jump_targets();
/**
* Default state for new instructions.
*/
struct {
unsigned exec_size;
unsigned access_mode;
unsigned mask_control;
unsigned qtr_control;
unsigned flag_subreg_nr;
unsigned conditional_mod;
unsigned predicate;
bool predicate_inverse;
bool saturate;
} default_state;
void *mem_ctx;
};

View file

@ -1,458 +0,0 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file gen8_instruction.c
*
* A representation of a Gen8+ EU instruction, with helper methods to get
* and set various fields. This is the actual hardware format.
*/
#include "main/compiler.h"
#include "brw_defines.h"
#include "gen8_instruction.h"
static void
gen8_convert_mrf_to_grf(struct brw_reg *reg)
{
/* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
* "The send with EOT should use register space R112-R127 for <src>. This is
* to enable loading of a new thread into the same slot while the message
* with EOT for current thread is pending dispatch."
*
* Since we're pretending to have 16 MRFs anyway, we may as well use the
* registers required for messages with EOT.
*/
if (reg->file == BRW_MESSAGE_REGISTER_FILE) {
reg->file = BRW_GENERAL_REGISTER_FILE;
reg->nr += GEN7_MRF_HACK_START;
}
}
void
gen8_set_dst(const struct brw_context *brw,
struct gen8_instruction *inst,
struct brw_reg reg)
{
gen8_convert_mrf_to_grf(&reg);
if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < BRW_MAX_GRF);
gen8_set_dst_reg_file(inst, reg.file);
gen8_set_dst_reg_type(inst, brw_reg_type_to_hw_type(brw, reg.type, reg.file));
gen8_set_dst_address_mode(inst, reg.address_mode);
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
gen8_set_dst_da_reg_nr(inst, reg.nr);
if (gen8_access_mode(inst) == BRW_ALIGN_1) {
/* Set Dst.SubRegNum[4:0] */
gen8_set_dst_da1_subreg_nr(inst, reg.subnr);
/* Set Dst.HorzStride */
if (reg.hstride == BRW_HORIZONTAL_STRIDE_0)
reg.hstride = BRW_HORIZONTAL_STRIDE_1;
gen8_set_dst_da1_hstride(inst, reg.hstride);
} else {
/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
assert(reg.subnr == 0 || reg.subnr == 16);
gen8_set_dst_da16_subreg_nr(inst, reg.subnr >> 4);
gen8_set_da16_writemask(inst, reg.dw1.bits.writemask);
}
} else {
/* Indirect addressing */
assert(gen8_access_mode(inst) == BRW_ALIGN_1);
/* Set Dst.HorzStride */
if (reg.hstride == BRW_HORIZONTAL_STRIDE_0)
reg.hstride = BRW_HORIZONTAL_STRIDE_1;
gen8_set_dst_da1_hstride(inst, reg.hstride);
gen8_set_dst_ia1_subreg_nr(inst, reg.subnr);
gen8_set_dst_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
}
/* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
* or 16 (SIMD16), as that's normally correct. However, when dealing with
* small registers, we automatically reduce it to match the register size.
*/
if (reg.width < BRW_EXECUTE_8)
gen8_set_exec_size(inst, reg.width);
}
static void
gen8_validate_reg(struct gen8_instruction *inst, struct brw_reg reg)
{
int hstride_for_reg[] = {0, 1, 2, 4};
int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
int width_for_reg[] = {1, 2, 4, 8, 16};
int execsize_for_reg[] = {1, 2, 4, 8, 16};
int width, hstride, vstride, execsize;
if (reg.file == BRW_IMMEDIATE_VALUE) {
/* TODO: check immediate vectors */
return;
}
if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE)
return;
assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
hstride = hstride_for_reg[reg.hstride];
if (reg.vstride == 0xf) {
vstride = -1;
} else {
assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
vstride = vstride_for_reg[reg.vstride];
}
assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
width = width_for_reg[reg.width];
assert(gen8_exec_size(inst) >= 0 &&
gen8_exec_size(inst) < ARRAY_SIZE(execsize_for_reg));
execsize = execsize_for_reg[gen8_exec_size(inst)];
/* Restrictions from 3.3.10: Register Region Restrictions. */
/* 3. */
assert(execsize >= width);
/* 4. */
if (execsize == width && hstride != 0) {
assert(vstride == -1 || vstride == width * hstride);
}
/* 5. */
if (execsize == width && hstride == 0) {
/* no restriction on vstride. */
}
/* 6. */
if (width == 1) {
assert(hstride == 0);
}
/* 7. */
if (execsize == 1 && width == 1) {
assert(hstride == 0);
assert(vstride == 0);
}
/* 8. */
if (vstride == 0 && hstride == 0) {
assert(width == 1);
}
/* 10. Check destination issues. */
}
void
gen8_set_src0(const struct brw_context *brw,
struct gen8_instruction *inst,
struct brw_reg reg)
{
gen8_convert_mrf_to_grf(&reg);
if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < BRW_MAX_GRF);
gen8_validate_reg(inst, reg);
gen8_set_src0_reg_file(inst, reg.file);
gen8_set_src0_reg_type(inst,
brw_reg_type_to_hw_type(brw, reg.type, reg.file));
gen8_set_src0_abs(inst, reg.abs);
gen8_set_src0_negate(inst, reg.negate);
if (reg.file == BRW_IMMEDIATE_VALUE) {
inst->data[3] = reg.dw1.ud;
/* Required to set some fields in src1 as well: */
gen8_set_src1_reg_file(inst, BRW_ARCHITECTURE_REGISTER_FILE);
gen8_set_src1_reg_type(inst,
brw_reg_type_to_hw_type(brw, reg.type, reg.file));
return;
}
gen8_set_src0_address_mode(inst, reg.address_mode);
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
gen8_set_src0_da_reg_nr(inst, reg.nr);
if (gen8_access_mode(inst) == BRW_ALIGN_1) {
/* Set Src0.SubRegNum[4:0] */
gen8_set_src0_da1_subreg_nr(inst, reg.subnr);
if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
} else {
gen8_set_src0_da1_hstride(inst, reg.hstride);
gen8_set_src0_vert_stride(inst, reg.vstride);
}
gen8_set_src0_da1_width(inst, reg.width);
} else {
/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
assert(reg.subnr == 0 || reg.subnr == 16);
gen8_set_src0_da16_subreg_nr(inst, reg.subnr >> 4);
gen8_set_src0_da16_swiz_x(inst,
BRW_GET_SWZ(reg.dw1.bits.swizzle,
BRW_CHANNEL_X));
gen8_set_src0_da16_swiz_y(inst,
BRW_GET_SWZ(reg.dw1.bits.swizzle,
BRW_CHANNEL_Y));
gen8_set_src0_da16_swiz_z(inst,
BRW_GET_SWZ(reg.dw1.bits.swizzle,
BRW_CHANNEL_Z));
gen8_set_src0_da16_swiz_w(inst,
BRW_GET_SWZ(reg.dw1.bits.swizzle,
BRW_CHANNEL_W));
/* This is an oddity of the fact that we're using the same
* descriptions for registers in both Align16 and Align1 modes.
*/
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
else
gen8_set_src0_vert_stride(inst, reg.vstride);
}
} else {
/* Indirect addressing */
assert(gen8_access_mode(inst) == BRW_ALIGN_1);
if (reg.width == BRW_WIDTH_1 &&
gen8_exec_size(inst) == BRW_EXECUTE_1) {
gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
} else {
gen8_set_src0_da1_hstride(inst, reg.hstride);
gen8_set_src0_vert_stride(inst, reg.vstride);
}
gen8_set_src0_da1_width(inst, reg.width);
gen8_set_src0_ia1_subreg_nr(inst, reg.subnr);
gen8_set_src0_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
}
}
void
gen8_set_src1(const struct brw_context *brw,
struct gen8_instruction *inst,
struct brw_reg reg)
{
gen8_convert_mrf_to_grf(&reg);
if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < BRW_MAX_GRF);
gen8_validate_reg(inst, reg);
gen8_set_src1_reg_file(inst, reg.file);
gen8_set_src1_reg_type(inst,
brw_reg_type_to_hw_type(brw, reg.type, reg.file));
gen8_set_src1_abs(inst, reg.abs);
gen8_set_src1_negate(inst, reg.negate);
/* Only src1 can be an immediate in two-argument instructions. */
assert(gen8_src0_reg_file(inst) != BRW_IMMEDIATE_VALUE);
if (reg.file == BRW_IMMEDIATE_VALUE) {
inst->data[3] = reg.dw1.ud;
return;
}
gen8_set_src1_address_mode(inst, reg.address_mode);
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
gen8_set_src1_da_reg_nr(inst, reg.nr);
if (gen8_access_mode(inst) == BRW_ALIGN_1) {
/* Set Src0.SubRegNum[4:0] */
gen8_set_src1_da1_subreg_nr(inst, reg.subnr);
if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
} else {
gen8_set_src1_da1_hstride(inst, reg.hstride);
gen8_set_src1_vert_stride(inst, reg.vstride);
}
gen8_set_src1_da1_width(inst, reg.width);
} else {
/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
assert(reg.subnr == 0 || reg.subnr == 16);
gen8_set_src1_da16_subreg_nr(inst, reg.subnr >> 4);
gen8_set_src1_da16_swiz_x(inst,
BRW_GET_SWZ(reg.dw1.bits.swizzle,
BRW_CHANNEL_X));
gen8_set_src1_da16_swiz_y(inst,
BRW_GET_SWZ(reg.dw1.bits.swizzle,
BRW_CHANNEL_Y));
gen8_set_src1_da16_swiz_z(inst,
BRW_GET_SWZ(reg.dw1.bits.swizzle,
BRW_CHANNEL_Z));
gen8_set_src1_da16_swiz_w(inst,
BRW_GET_SWZ(reg.dw1.bits.swizzle,
BRW_CHANNEL_W));
/* This is an oddity of the fact that we're using the same
* descriptions for registers in both Align16 and Align1 modes.
*/
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
else
gen8_set_src1_vert_stride(inst, reg.vstride);
}
} else {
/* Indirect addressing */
assert(gen8_access_mode(inst) == BRW_ALIGN_1);
if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
} else {
gen8_set_src1_da1_hstride(inst, reg.hstride);
gen8_set_src1_vert_stride(inst, reg.vstride);
}
gen8_set_src1_da1_width(inst, reg.width);
gen8_set_src1_ia1_subreg_nr(inst, reg.subnr);
gen8_set_src1_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
}
}
/**
* Set the Message Descriptor and Extended Message Descriptor fields
* for SEND messages.
*
* \note This zeroes out the Function Control bits, so it must be called
* \b before filling out any message-specific data. Callers can
* choose not to fill in irrelevant bits; they will be zero.
*/
static void
gen8_set_message_descriptor(const struct brw_context *brw,
struct gen8_instruction *inst,
enum brw_message_target sfid,
unsigned msg_length,
unsigned response_length,
bool header_present,
bool end_of_thread)
{
gen8_set_src1(brw, inst, brw_imm_d(0));
gen8_set_sfid(inst, sfid);
gen8_set_mlen(inst, msg_length);
gen8_set_rlen(inst, response_length);
gen8_set_header_present(inst, header_present);
gen8_set_eot(inst, end_of_thread);
}
void
gen8_set_urb_message(const struct brw_context *brw,
struct gen8_instruction *inst,
enum brw_urb_write_flags flags,
unsigned msg_length,
unsigned response_length,
unsigned offset,
bool interleave)
{
gen8_set_message_descriptor(brw, inst, BRW_SFID_URB,
msg_length, response_length,
true, flags & BRW_URB_WRITE_EOT);
gen8_set_src0(brw, inst, brw_vec8_grf(GEN7_MRF_HACK_START + 1, 0));
if (flags & BRW_URB_WRITE_OWORD) {
assert(msg_length == 2);
gen8_set_urb_opcode(inst, BRW_URB_OPCODE_WRITE_OWORD);
} else {
gen8_set_urb_opcode(inst, BRW_URB_OPCODE_WRITE_HWORD);
}
gen8_set_urb_global_offset(inst, offset);
gen8_set_urb_interleave(inst, interleave);
gen8_set_urb_per_slot_offset(inst,
flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0);
}
void
gen8_set_sampler_message(const struct brw_context *brw,
struct gen8_instruction *inst,
unsigned binding_table_index,
unsigned sampler,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode)
{
gen8_set_message_descriptor(brw, inst, BRW_SFID_SAMPLER, msg_length,
response_length, header_present, false);
gen8_set_binding_table_index(inst, binding_table_index);
gen8_set_sampler(inst, sampler);
gen8_set_sampler_msg_type(inst, msg_type);
gen8_set_sampler_simd_mode(inst, simd_mode);
}
void
gen8_set_dp_message(const struct brw_context *brw,
struct gen8_instruction *inst,
enum brw_message_target sfid,
unsigned binding_table_index,
unsigned msg_type,
unsigned msg_control,
unsigned mlen,
unsigned rlen,
bool header_present,
bool end_of_thread)
{
gen8_set_message_descriptor(brw, inst, sfid, mlen, rlen, header_present,
end_of_thread);
gen8_set_binding_table_index(inst, binding_table_index);
gen8_set_dp_message_type(inst, msg_type);
gen8_set_dp_message_control(inst, msg_control);
}
void
gen8_set_dp_scratch_message(const struct brw_context *brw,
struct gen8_instruction *inst,
bool write,
bool dword,
bool invalidate_after_read,
unsigned num_regs,
unsigned addr_offset,
unsigned mlen,
unsigned rlen,
bool header_present,
bool end_of_thread)
{
assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || num_regs == 8);
gen8_set_message_descriptor(brw, inst, GEN7_SFID_DATAPORT_DATA_CACHE,
mlen, rlen, header_present, end_of_thread);
gen8_set_dp_category(inst, 1); /* Scratch Block Read/Write messages */
gen8_set_scratch_read_write(inst, write);
gen8_set_scratch_type(inst, dword);
gen8_set_scratch_invalidate_after_read(inst, invalidate_after_read);
gen8_set_scratch_block_size(inst, ffs(num_regs) - 1);
gen8_set_scratch_addr_offset(inst, addr_offset);
}

View file

@ -1,418 +0,0 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file gen8_instruction.h
*
* A representation of a Gen8+ EU instruction, with helper methods to get
* and set various fields. This is the actual hardware format.
*/
#ifndef GEN8_INSTRUCTION_H
#define GEN8_INSTRUCTION_H
#include <stdio.h>
#include <stdint.h>
#include "brw_context.h"
#include "brw_reg.h"
#ifdef __cplusplus
extern "C" {
#endif
struct gen8_instruction {
uint32_t data[4];
};
static inline unsigned gen8_instruction_bits(struct gen8_instruction *inst,
unsigned high,
unsigned low);
static inline void gen8_instruction_set_bits(struct gen8_instruction *inst,
unsigned high,
unsigned low,
unsigned value);
#define F(name, high, low) \
static inline void gen8_set_##name(struct gen8_instruction *inst, unsigned v) \
{ \
gen8_instruction_set_bits(inst, high, low, v); \
} \
static inline unsigned gen8_##name(struct gen8_instruction *inst) \
{ \
return gen8_instruction_bits(inst, high, low); \
}
F(src1_vert_stride, 120, 117)
F(src1_da1_width, 116, 114)
F(src1_da16_swiz_w, 115, 114)
F(src1_da16_swiz_z, 113, 112)
F(src1_da1_hstride, 113, 112)
F(src1_address_mode, 111, 111)
/** Src1.SrcMod @{ */
F(src1_negate, 110, 110)
F(src1_abs, 109, 109)
/** @} */
F(src1_ia1_subreg_nr, 108, 105)
F(src1_da_reg_nr, 108, 101)
F(src1_da16_subreg_nr, 100, 100)
F(src1_da1_subreg_nr, 100, 96)
F(src1_da16_swiz_y, 99, 98)
F(src1_da16_swiz_x, 97, 96)
F(src1_reg_type, 94, 91)
F(src1_reg_file, 90, 89)
F(src0_vert_stride, 88, 85)
F(src0_da1_width, 84, 82)
F(src0_da16_swiz_w, 83, 82)
F(src0_da16_swiz_z, 81, 80)
F(src0_da1_hstride, 81, 80)
F(src0_address_mode, 79, 79)
/** Src0.SrcMod @{ */
F(src0_negate, 78, 78)
F(src0_abs, 77, 77)
/** @} */
F(src0_ia1_subreg_nr, 76, 73)
F(src0_da_reg_nr, 76, 69)
F(src0_da16_subreg_nr, 68, 68)
F(src0_da1_subreg_nr, 68, 64)
F(src0_da16_swiz_y, 67, 66)
F(src0_da16_swiz_x, 65, 64)
F(dst_address_mode, 63, 63)
F(dst_da1_hstride, 62, 61)
F(dst_ia1_subreg_nr, 60, 57)
F(dst_da_reg_nr, 60, 53)
F(dst_da16_subreg_nr, 52, 52)
F(dst_da1_subreg_nr, 52, 48)
F(da16_writemask, 51, 48) /* Dst.ChanEn */
F(src0_reg_type, 46, 43)
F(src0_reg_file, 42, 41)
F(dst_reg_type, 40, 37)
F(dst_reg_file, 36, 35)
F(mask_control, 34, 34)
F(flag_reg_nr, 33, 33)
F(flag_subreg_nr, 32, 32)
F(saturate, 31, 31)
F(branch_control, 30, 30)
F(debug_control, 30, 30)
F(cmpt_control, 29, 29)
F(acc_wr_control, 28, 28)
F(cond_modifier, 27, 24)
F(exec_size, 23, 21)
F(pred_inv, 20, 20)
F(pred_control, 19, 16)
F(thread_control, 15, 14)
F(qtr_control, 13, 12)
F(nib_control, 11, 11)
F(no_dd_check, 10, 10)
F(no_dd_clear, 9, 9)
F(access_mode, 8, 8)
/* Bit 7 is Reserved (for future Opcode expansion) */
F(opcode, 6, 0)
/**
* Three-source instructions:
* @{
*/
F(src2_3src_reg_nr, 125, 118)
F(src2_3src_subreg_nr, 117, 115)
F(src2_3src_swizzle, 114, 107)
F(src2_3src_rep_ctrl, 106, 106)
F(src1_3src_reg_nr, 104, 97)
/* src1_3src_subreg_nr spans word boundaries and has to be handled specially */
F(src1_3src_swizzle, 93, 86)
F(src1_3src_rep_ctrl, 85, 85)
F(src0_3src_reg_nr, 83, 76)
F(src0_3src_subreg_nr, 75, 73)
F(src0_3src_swizzle, 72, 65)
F(src0_3src_rep_ctrl, 64, 64)
F(dst_3src_reg_nr, 63, 56)
F(dst_3src_subreg_nr, 55, 53)
F(dst_3src_writemask, 52, 49)
F(dst_3src_type, 48, 46)
F(src_3src_type, 45, 43)
F(src2_3src_negate, 42, 42)
F(src2_3src_abs, 41, 41)
F(src1_3src_negate, 40, 40)
F(src1_3src_abs, 39, 39)
F(src0_3src_negate, 38, 38)
F(src0_3src_abs, 37, 37)
/** @} */
/**
* Fields for SEND messages:
* @{
*/
F(eot, 127, 127)
F(mlen, 124, 121)
F(rlen, 120, 116)
F(header_present, 115, 115)
F(function_control, 114, 96)
F(sfid, 27, 24)
F(math_function, 27, 24)
/** @} */
/**
* URB message function control bits:
* @{
*/
F(urb_per_slot_offset, 113, 113)
F(urb_interleave, 111, 111)
F(urb_global_offset, 110, 100)
F(urb_opcode, 99, 96)
/** @} */
/* Message descriptor bits */
#define MD(name, high, low) F(name, (high + 96), (low + 96))
/**
* Sampler message function control bits:
* @{
*/
MD(sampler_simd_mode, 18, 17)
MD(sampler_msg_type, 16, 12)
MD(sampler, 11, 8)
MD(binding_table_index, 7, 0) /* also used by other messages */
/** @} */
/**
* Data port message function control bits:
* @{
*/
MD(dp_category, 18, 18)
MD(dp_message_type, 17, 14)
MD(dp_message_control, 13, 8)
/** @} */
/**
* Scratch message bits:
* @{
*/
MD(scratch_read_write, 17, 17) /* 0 = read, 1 = write */
MD(scratch_type, 16, 16) /* 0 = OWord, 1 = DWord */
MD(scratch_invalidate_after_read, 15, 15)
MD(scratch_block_size, 13, 12)
MD(scratch_addr_offset, 11, 0)
/** @} */
/**
* Render Target message function control bits:
* @{
*/
MD(rt_last, 12, 12)
MD(rt_slot_group, 11, 11)
MD(rt_message_type, 10, 8)
/** @} */
/**
* Thread Spawn message function control bits:
* @{
*/
MD(ts_resource_select, 4, 4)
MD(ts_request_type, 1, 1)
MD(ts_opcode, 0, 0)
/** @} */
/**
* Video Motion Estimation message function control bits:
* @{
*/
F(vme_message_type, 14, 13)
/** @} */
/**
* Check & Refinement Engine message function control bits:
* @{
*/
F(cre_message_type, 14, 13)
/** @} */
#undef MD
#undef F
static inline void
gen8_set_src1_3src_subreg_nr(struct gen8_instruction *inst, unsigned v)
{
assert((v & ~0x7) == 0);
gen8_instruction_set_bits(inst, 95, 94, v & 0x3);
gen8_instruction_set_bits(inst, 96, 96, v >> 2);
}
static inline unsigned
gen8_src1_3src_subreg_nr(struct gen8_instruction *inst)
{
return gen8_instruction_bits(inst, 95, 94) |
(gen8_instruction_bits(inst, 96, 96) << 2);
}
#define GEN8_IA1_ADDR_IMM(reg, nine, high, low) \
static inline void \
gen8_set_##reg##_ia1_addr_imm(struct gen8_instruction *inst, unsigned value) \
{ \
assert((value & ~0x3ff) == 0); \
gen8_instruction_set_bits(inst, high, low, value & 0x1ff); \
gen8_instruction_set_bits(inst, nine, nine, value >> 9); \
} \
\
static inline unsigned \
gen8_##reg##_ia1_addr_imm(struct gen8_instruction *inst) \
{ \
return gen8_instruction_bits(inst, high, low) | \
(gen8_instruction_bits(inst, nine, nine) << 9); \
}
/* AddrImm[9:0] for Align1 Indirect Addressing */
GEN8_IA1_ADDR_IMM(src1, 121, 104, 96)
GEN8_IA1_ADDR_IMM(src0, 95, 72, 64)
GEN8_IA1_ADDR_IMM(dst, 47, 56, 48)
/**
* Flow control instruction bits:
* @{
*/
static inline unsigned gen8_uip(struct gen8_instruction *inst)
{
return inst->data[2];
}
static inline void gen8_set_uip(struct gen8_instruction *inst, unsigned uip)
{
inst->data[2] = uip;
}
static inline unsigned gen8_jip(struct gen8_instruction *inst)
{
return inst->data[3];
}
static inline void gen8_set_jip(struct gen8_instruction *inst, unsigned jip)
{
inst->data[3] = jip;
}
/** @} */
static inline int gen8_src1_imm_d(struct gen8_instruction *inst)
{
return inst->data[3];
}
static inline unsigned gen8_src1_imm_ud(struct gen8_instruction *inst)
{
return inst->data[3];
}
static inline float gen8_src1_imm_f(struct gen8_instruction *inst)
{
fi_type ft;
ft.u = inst->data[3];
return ft.f;
}
void gen8_set_dst(const struct brw_context *brw,
struct gen8_instruction *inst, struct brw_reg reg);
void gen8_set_src0(const struct brw_context *brw,
struct gen8_instruction *inst, struct brw_reg reg);
void gen8_set_src1(const struct brw_context *brw,
struct gen8_instruction *inst, struct brw_reg reg);
void gen8_set_urb_message(const struct brw_context *brw,
struct gen8_instruction *inst,
enum brw_urb_write_flags flags,
unsigned mlen, unsigned rlen,
unsigned offset, bool interleave);
void gen8_set_sampler_message(const struct brw_context *brw,
struct gen8_instruction *inst,
unsigned binding_table_index, unsigned sampler,
unsigned msg_type, unsigned rlen, unsigned mlen,
bool header_present, unsigned simd_mode);
void gen8_set_dp_message(const struct brw_context *brw,
struct gen8_instruction *inst,
enum brw_message_target sfid,
unsigned binding_table_index,
unsigned msg_type,
unsigned msg_control,
unsigned msg_length,
unsigned response_length,
bool header_present,
bool end_of_thread);
void gen8_set_dp_scratch_message(const struct brw_context *brw,
struct gen8_instruction *inst,
bool write,
bool dword,
bool invalidate_after_read,
unsigned num_regs,
unsigned addr_offset,
unsigned msg_length,
unsigned response_length,
bool header_present,
bool end_of_thread);
/**
* Fetch a set of contiguous bits from the instruction.
*
* Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
*/
static inline unsigned
gen8_instruction_bits(struct gen8_instruction *inst, unsigned high, unsigned low)
{
/* We assume the field doesn't cross 32-bit boundaries. */
const unsigned word = high / 32;
assert(word == low / 32);
high %= 32;
low %= 32;
const unsigned mask = (((1 << (high - low + 1)) - 1) << low);
return (inst->data[word] & mask) >> low;
}
/**
* Set bits in the instruction, with proper shifting and masking.
*
* Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
*/
static inline void
gen8_instruction_set_bits(struct gen8_instruction *inst,
unsigned high,
unsigned low,
unsigned value)
{
const unsigned word = high / 32;
assert(word == low / 32);
high %= 32;
low %= 32;
const unsigned mask = (((1 << (high - low + 1)) - 1) << low);
/* Make sure the supplied value actually fits in the given bitfield. */
assert((value & (mask >> low)) == value);
inst->data[word] = (inst->data[word] & ~mask) | ((value << low) & mask);
}
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,925 +0,0 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_vec4.h"
#include "brw_cfg.h"
extern "C" {
#include "brw_eu.h"
#include "main/macros.h"
#include "program/prog_print.h"
#include "program/prog_parameter.h"
};
namespace brw {
gen8_vec4_generator::gen8_vec4_generator(struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
struct brw_vec4_prog_data *prog_data,
void *mem_ctx,
bool debug_flag)
: gen8_generator(brw, shader_prog, prog, mem_ctx),
prog_data(prog_data),
debug_flag(debug_flag)
{
}
gen8_vec4_generator::~gen8_vec4_generator()
{
}
void
gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst,
struct brw_reg sampler_index)
{
int msg_type = 0;
switch (ir->opcode) {
case SHADER_OPCODE_TEX:
case SHADER_OPCODE_TXL:
if (ir->shadow_compare) {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
} else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
break;
case SHADER_OPCODE_TXD:
if (ir->shadow_compare) {
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
} else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
}
break;
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_TXF_CMS:
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
break;
case SHADER_OPCODE_TXF_MCS:
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
break;
case SHADER_OPCODE_TXS:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
break;
case SHADER_OPCODE_TG4:
if (ir->shadow_compare) {
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
} else {
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
}
break;
case SHADER_OPCODE_TG4_OFFSET:
if (ir->shadow_compare) {
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
} else {
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
}
break;
default:
unreachable("should not get here: invalid VS texture opcode");
}
assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
uint32_t sampler = sampler_index.dw1.ud;
if (ir->header_present) {
MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD),
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
default_state.access_mode = BRW_ALIGN_1;
if (ir->texture_offset) {
/* Set the offset bits in DWord 2. */
MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2),
BRW_REGISTER_TYPE_UD),
brw_imm_ud(ir->texture_offset));
}
if (sampler >= 16) {
/* The "Sampler Index" field can only store values between 0 and 15.
* However, we can add an offset to the "Sampler State Pointer"
* field, effectively selecting a different set of 16 samplers.
*
* The "Sampler State Pointer" needs to be aligned to a 32-byte
* offset, and each sampler state is only 16-bytes, so we can't
* exclusively use the offset - we have to use both.
*/
const int sampler_state_size = 16; /* 16 bytes */
gen8_instruction *add =
ADD(get_element_ud(brw_message_reg(ir->base_mrf), 3),
get_element_ud(brw_vec8_grf(0, 0), 3),
brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
gen8_set_mask_control(add, BRW_MASK_DISABLE);
}
default_state.access_mode = BRW_ALIGN_16;
}
uint32_t surf_index =
prog_data->base.binding_table.texture_start + sampler;
gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
gen8_set_dst(brw, inst, dst);
gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
gen8_set_sampler_message(brw, inst,
surf_index,
sampler % 16,
msg_type,
1,
ir->mlen,
ir->header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2);
brw_mark_surface_used(&prog_data->base, surf_index);
}
void
gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs)
{
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
/* Copy g0. */
if (vs)
MOV_RAW(header, brw_vec8_grf(0, 0));
gen8_instruction *inst;
if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
/* Enable Channel Masks in the URB_WRITE_OWORD message header */
default_state.access_mode = BRW_ALIGN_1;
MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
brw_imm_ud(0xff00));
default_state.access_mode = BRW_ALIGN_16;
}
inst = next_inst(BRW_OPCODE_SEND);
gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset,
true);
gen8_set_dst(brw, inst, brw_null_reg());
gen8_set_src0(brw, inst, header);
}
void
gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header,
struct brw_reg src)
{
/* Move the vertex count into the second MRF for the EOT write. */
assert(eot_mrf_header.file == BRW_MESSAGE_REGISTER_FILE);
int dst_nr = GEN7_MRF_HACK_START + eot_mrf_header.nr + 1;
gen8_instruction *inst =
MOV(retype(brw_vec8_grf(dst_nr, 0), BRW_REGISTER_TYPE_UD), src);
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
}
void
gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir)
{
struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
gen8_instruction *inst;
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
default_state.access_mode = BRW_ALIGN_1;
inst = MOV(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
BRW_REGISTER_TYPE_UD),
brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
default_state.access_mode = BRW_ALIGN_16;
/* mlen = 2: g0 header + vertex count */
inst = next_inst(BRW_OPCODE_SEND);
gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true);
gen8_set_dst(brw, inst, brw_null_reg());
gen8_set_src0(brw, inst, src);
}
void
gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1)
{
/* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.3):
*
* Slot 0 Offset. This field, after adding to the Global Offset field
* in the message descriptor, specifies the offset (in 256-bit units)
* from the start of the URB entry, as referenced by URB Handle 0, at
* which the data will be accessed.
*
* Similar text describes DWORD M0.4, which is slot 1 offset.
*
* Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
* of the register for geometry shader invocations 0 and 1) by the
* immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
*
* We can do this with the following EU instruction:
*
* mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
*/
default_state.access_mode = BRW_ALIGN_1;
gen8_instruction *inst =
MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1);
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
default_state.access_mode = BRW_ALIGN_16;
}
void
gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
struct brw_reg src)
{
assert(src.file == BRW_IMMEDIATE_VALUE);
default_state.access_mode = BRW_ALIGN_1;
gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src);
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
default_state.access_mode = BRW_ALIGN_16;
}
void
gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
{
/* We want to left shift just DWORD 4 (the x component belonging to the
* second geometry shader invocation) by 4 bits. So generate the
* instruction:
*
* shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
*/
dst = suboffset(vec1(dst), 4);
default_state.access_mode = BRW_ALIGN_1;
gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
default_state.access_mode = BRW_ALIGN_16;
}
void
gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
struct brw_reg src)
{
/* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.5):
*
* 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
*
* When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
* DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
* Vertex 0 DATA[7]. This bit is ANDed with the corresponding
* channel enable to determine the final channel enable. For the
* URB_READ_OWORD & URB_READ_HWORD messages, when final channel
* enable is 1 it indicates that Vertex 1 DATA [3] will be included
* in the writeback message. For the URB_WRITE_OWORD &
* URB_WRITE_HWORD messages, when final channel enable is 1 it
* indicates that Vertex 1 DATA [3] will be written to the surface.
*
* 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
* 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
*
* 14 Vertex 1 DATA [2] Channel Mask
* 13 Vertex 1 DATA [1] Channel Mask
* 12 Vertex 1 DATA [0] Channel Mask
* 11 Vertex 0 DATA [3] Channel Mask
* 10 Vertex 0 DATA [2] Channel Mask
* 9 Vertex 0 DATA [1] Channel Mask
* 8 Vertex 0 DATA [0] Channel Mask
*
* (This is from a section of the PRM that is agnostic to the particular
* type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
* geometry shader invocations 0 and 1, respectively). Since we have the
* enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
* and the enable flags for geometry shader invocation 1 in bits 7:0 of
* DWORD 4, we just need to OR them together and store the result in bits
* 15:8 of DWORD 5.
*
* It's easier to get the EU to do this if we think of the src and dst
* registers as composed of 32 bytes each; then, we want to pick up the
* contents of bytes 0 and 16 from src, OR them together, and store them in
* byte 21.
*
* We can do that by the following EU instruction:
*
* or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
*
* Note: this relies on the source register having zeros in (a) bits 7:4 of
* DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
* source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
* shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
* the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
* contain valid channel mask values (which are in the range 0x0-0xf).
*/
dst = retype(dst, BRW_REGISTER_TYPE_UB);
src = retype(src, BRW_REGISTER_TYPE_UB);
default_state.access_mode = BRW_ALIGN_1;
gen8_instruction *inst =
OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
default_state.access_mode = BRW_ALIGN_16;
}
void
gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index)
{
int second_vertex_offset = 1;
m1 = retype(m1, BRW_REGISTER_TYPE_D);
/* Set up M1 (message payload). Only the block offsets in M1.0 and
* M1.4 are used, and the rest are ignored.
*/
struct brw_reg m1_0 = suboffset(vec1(m1), 0);
struct brw_reg m1_4 = suboffset(vec1(m1), 4);
struct brw_reg index_0 = suboffset(vec1(index), 0);
struct brw_reg index_4 = suboffset(vec1(index), 4);
default_state.mask_control = BRW_MASK_DISABLE;
default_state.access_mode = BRW_ALIGN_1;
MOV(m1_0, index_0);
if (index.file == BRW_IMMEDIATE_VALUE) {
index_4.dw1.ud += second_vertex_offset;
MOV(m1_4, index_4);
} else {
ADD(m1_4, index_4, brw_imm_d(second_vertex_offset));
}
default_state.mask_control = BRW_MASK_ENABLE;
default_state.access_mode = BRW_ALIGN_16;
}
void
gen8_vec4_generator::generate_scratch_read(vec4_instruction *ir,
struct brw_reg dst,
struct brw_reg index)
{
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
MOV_RAW(header, brw_vec8_grf(0, 0));
generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
/* Each of the 8 channel enables is considered for whether each
* dword is written.
*/
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
gen8_set_dst(brw, send, dst);
gen8_set_src0(brw, send, header);
gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
255, /* binding table index: stateless access */
GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2, /* mlen */
1, /* rlen */
true, /* header present */
false); /* EOT */
}
void
gen8_vec4_generator::generate_scratch_write(vec4_instruction *ir,
struct brw_reg dst,
struct brw_reg src,
struct brw_reg index)
{
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
MOV_RAW(header, brw_vec8_grf(0, 0));
generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
MOV(retype(brw_message_reg(ir->base_mrf + 2), BRW_REGISTER_TYPE_D),
retype(src, BRW_REGISTER_TYPE_D));
/* Each of the 8 channel enables is considered for whether each
* dword is written.
*/
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
gen8_set_dst(brw, send, dst);
gen8_set_src0(brw, send, header);
gen8_set_pred_control(send, ir->predicate);
gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
255, /* binding table index: stateless access */
GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
3, /* mlen */
0, /* rlen */
true, /* header present */
false); /* EOT */
}
void
gen8_vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset)
{
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
assert(offset.file == BRW_GENERAL_REGISTER_FILE);
/* Each of the 8 channel enables is considered for whether each
* dword is written.
*/
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
gen8_set_dst(brw, send, dst);
gen8_set_src0(brw, send, offset);
gen8_set_sampler_message(brw, send,
surf_index,
0, /* The LD message ignores the sampler unit. */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
1, /* mlen */
false, /* no header */
BRW_SAMPLER_SIMD_MODE_SIMD4X2);
brw_mark_surface_used(&prog_data->base, surf_index);
}
void
gen8_vec4_generator::generate_untyped_atomic(vec4_instruction *ir,
struct brw_reg dst,
struct brw_reg atomic_op,
struct brw_reg surf_index)
{
assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
atomic_op.type == BRW_REGISTER_TYPE_UD &&
surf_index.file == BRW_IMMEDIATE_VALUE &&
surf_index.type == BRW_REGISTER_TYPE_UD);
assert((atomic_op.dw1.ud & ~0xf) == 0);
unsigned msg_control =
atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */
(1 << 5); /* Return data expected */
gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf),
BRW_REGISTER_TYPE_UD));
gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
surf_index.dw1.ud,
HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2,
msg_control,
ir->mlen,
1,
ir->header_present,
false);
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
void
gen8_vec4_generator::generate_untyped_surface_read(vec4_instruction *ir,
struct brw_reg dst,
struct brw_reg surf_index)
{
assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
surf_index.type == BRW_REGISTER_TYPE_UD);
gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf),
BRW_REGISTER_TYPE_UD));
gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
surf_index.dw1.ud,
HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ,
0xe, /* enable only the R channel */
ir->mlen,
1,
ir->header_present,
false);
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
void
gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
struct brw_reg dst,
struct brw_reg *src)
{
vec4_instruction *ir = (vec4_instruction *) instruction;
if (dst.width == BRW_WIDTH_4) {
/* This happens in attribute fixups for "dual instanced" geometry
* shaders, since they use attributes that are vec4's. Since the exec
* width is only 4, it's essential that the caller set
* force_writemask_all in order to make sure the instruction is executed
* regardless of which channels are enabled.
*/
assert(ir->force_writemask_all);
/* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
* the following register region restrictions (from Graphics BSpec:
* 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
* > Register Region Restrictions)
*
* 1. ExecSize must be greater than or equal to Width.
*
* 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
* to Width * HorzStride."
*/
for (int i = 0; i < 3; i++) {
if (src[i].file == BRW_GENERAL_REGISTER_FILE)
src[i] = stride(src[i], 4, 4, 1);
}
}
switch (ir->opcode) {
case BRW_OPCODE_MOV:
MOV(dst, src[0]);
break;
case BRW_OPCODE_ADD:
ADD(dst, src[0], src[1]);
break;
case BRW_OPCODE_MUL:
MUL(dst, src[0], src[1]);
break;
case BRW_OPCODE_MACH:
MACH(dst, src[0], src[1]);
break;
case BRW_OPCODE_MAD:
MAD(dst, src[0], src[1], src[2]);
break;
case BRW_OPCODE_FRC:
FRC(dst, src[0]);
break;
case BRW_OPCODE_RNDD:
RNDD(dst, src[0]);
break;
case BRW_OPCODE_RNDE:
RNDE(dst, src[0]);
break;
case BRW_OPCODE_RNDZ:
RNDZ(dst, src[0]);
break;
case BRW_OPCODE_AND:
AND(dst, src[0], src[1]);
break;
case BRW_OPCODE_OR:
OR(dst, src[0], src[1]);
break;
case BRW_OPCODE_XOR:
XOR(dst, src[0], src[1]);
break;
case BRW_OPCODE_NOT:
NOT(dst, src[0]);
break;
case BRW_OPCODE_ASR:
ASR(dst, src[0], src[1]);
break;
case BRW_OPCODE_SHR:
SHR(dst, src[0], src[1]);
break;
case BRW_OPCODE_SHL:
SHL(dst, src[0], src[1]);
break;
case BRW_OPCODE_CMP:
CMP(dst, ir->conditional_mod, src[0], src[1]);
break;
case BRW_OPCODE_SEL:
SEL(dst, src[0], src[1]);
break;
case BRW_OPCODE_DPH:
DPH(dst, src[0], src[1]);
break;
case BRW_OPCODE_DP4:
DP4(dst, src[0], src[1]);
break;
case BRW_OPCODE_DP3:
DP3(dst, src[0], src[1]);
break;
case BRW_OPCODE_DP2:
DP2(dst, src[0], src[1]);
break;
case BRW_OPCODE_F32TO16:
/* Emulate the Gen7 zeroing bug. */
MOV(retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]);
break;
case BRW_OPCODE_F16TO32:
MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF));
break;
case BRW_OPCODE_LRP:
LRP(dst, src[0], src[1], src[2]);
break;
case BRW_OPCODE_BFREV:
/* BFREV only supports UD type for src and dst. */
BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
retype(src[0], BRW_REGISTER_TYPE_UD));
break;
case BRW_OPCODE_FBH:
/* FBH only supports UD type for dst. */
FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_FBL:
/* FBL only supports UD type for dst. */
FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_CBIT:
/* CBIT only supports UD type for dst. */
CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_ADDC:
ADDC(dst, src[0], src[1]);
break;
case BRW_OPCODE_SUBB:
SUBB(dst, src[0], src[1]);
break;
case BRW_OPCODE_BFE:
BFE(dst, src[0], src[1], src[2]);
break;
case BRW_OPCODE_BFI1:
BFI1(dst, src[0], src[1]);
break;
case BRW_OPCODE_BFI2:
BFI2(dst, src[0], src[1], src[2]);
break;
case BRW_OPCODE_IF:
IF(ir->predicate);
break;
case BRW_OPCODE_ELSE:
ELSE();
break;
case BRW_OPCODE_ENDIF:
ENDIF();
break;
case BRW_OPCODE_DO:
DO();
break;
case BRW_OPCODE_BREAK:
BREAK();
break;
case BRW_OPCODE_CONTINUE:
CONTINUE();
break;
case BRW_OPCODE_WHILE:
WHILE();
break;
case SHADER_OPCODE_RCP:
MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
break;
case SHADER_OPCODE_RSQ:
MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
break;
case SHADER_OPCODE_SQRT:
MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
break;
case SHADER_OPCODE_EXP2:
MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
break;
case SHADER_OPCODE_LOG2:
MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
break;
case SHADER_OPCODE_SIN:
MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
break;
case SHADER_OPCODE_COS:
MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
break;
case SHADER_OPCODE_POW:
MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
break;
case SHADER_OPCODE_INT_QUOTIENT:
MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
break;
case SHADER_OPCODE_INT_REMAINDER:
MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
break;
case SHADER_OPCODE_TEX:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
/* note: src[0] is unused. */
generate_tex(ir, dst, src[1]);
break;
case VS_OPCODE_URB_WRITE:
generate_urb_write(ir, true);
break;
case SHADER_OPCODE_GEN4_SCRATCH_READ:
generate_scratch_read(ir, dst, src[0]);
break;
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
generate_scratch_write(ir, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
generate_pull_constant_load(ir, dst, src[0], src[1]);
break;
case GS_OPCODE_URB_WRITE:
generate_urb_write(ir, false);
break;
case GS_OPCODE_THREAD_END:
generate_gs_thread_end(ir);
break;
case GS_OPCODE_SET_WRITE_OFFSET:
generate_gs_set_write_offset(dst, src[0], src[1]);
break;
case GS_OPCODE_SET_VERTEX_COUNT:
generate_gs_set_vertex_count(dst, src[0]);
break;
case GS_OPCODE_SET_DWORD_2_IMMED:
generate_gs_set_dword_2_immed(dst, src[0]);
break;
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
generate_gs_prepare_channel_masks(dst);
break;
case GS_OPCODE_SET_CHANNEL_MASKS:
generate_gs_set_channel_masks(dst, src[0]);
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
unreachable("XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
case SHADER_OPCODE_UNTYPED_ATOMIC:
generate_untyped_atomic(ir, dst, src[0], src[1]);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
generate_untyped_surface_read(ir, dst, src[0]);
break;
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
unreachable("VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
default:
if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
_mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
opcode_descs[ir->opcode].name);
} else {
_mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
}
abort();
}
}
void
gen8_vec4_generator::generate_code(exec_list *instructions)
{
struct annotation_info annotation;
memset(&annotation, 0, sizeof(annotation));
cfg_t *cfg = NULL;
if (unlikely(debug_flag))
cfg = new(mem_ctx) cfg_t(instructions);
foreach_in_list(vec4_instruction, ir, instructions) {
struct brw_reg src[3], dst;
if (unlikely(debug_flag))
annotate(brw, &annotation, cfg, ir, next_inst_offset);
for (unsigned int i = 0; i < 3; i++) {
src[i] = ir->get_src(prog_data, i);
}
dst = ir->get_dst();
default_state.conditional_mod = ir->conditional_mod;
default_state.predicate = ir->predicate;
default_state.predicate_inverse = ir->predicate_inverse;
default_state.saturate = ir->saturate;
default_state.mask_control = ir->force_writemask_all;
const unsigned pre_emit_nr_inst = nr_inst;
generate_vec4_instruction(ir, dst, src);
if (ir->no_dd_clear || ir->no_dd_check) {
assert(nr_inst == pre_emit_nr_inst + 1 ||
!"no_dd_check or no_dd_clear set for IR emitting more "
"than 1 instruction");
gen8_instruction *last = &store[pre_emit_nr_inst];
gen8_set_no_dd_clear(last, ir->no_dd_clear);
gen8_set_no_dd_check(last, ir->no_dd_check);
}
}
patch_jump_targets();
annotation_finalize(&annotation, next_inst_offset);
int before_size = next_inst_offset;
if (unlikely(debug_flag)) {
if (shader_prog) {
fprintf(stderr, "Native code for %s vertex shader %d:\n",
shader_prog->Label ? shader_prog->Label : "unnamed",
shader_prog->Name);
} else {
fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
}
fprintf(stderr, "vec4 shader: %d instructions.\n", before_size / 16);
dump_assembly(store, annotation.ann_count, annotation.ann, brw, prog);
ralloc_free(annotation.ann);
}
}
const unsigned *
gen8_vec4_generator::generate_assembly(exec_list *instructions,
unsigned *assembly_size)
{
default_state.access_mode = BRW_ALIGN_16;
default_state.exec_size = BRW_EXECUTE_8;
generate_code(instructions);
*assembly_size = next_inst_offset;
return (const unsigned *) store;
}
} /* namespace brw */