mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
i965: Delete the Gen8 code generators.
We now use the brw_eu_emit.c code instead. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Chris Forbes <chrisf@ijw.co.nz> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
f17bfc9ba9
commit
8c229d306b
9 changed files with 0 additions and 4076 deletions
|
|
@ -150,16 +150,12 @@ i965_FILES = \
|
|||
gen8_depth_state.c \
|
||||
gen8_disable.c \
|
||||
gen8_draw_upload.c \
|
||||
gen8_fs_generator.cpp \
|
||||
gen8_generator.cpp \
|
||||
gen8_instruction.c \
|
||||
gen8_gs_state.c \
|
||||
gen8_misc_state.c \
|
||||
gen8_multisample_state.c \
|
||||
gen8_sf_state.c \
|
||||
gen8_sol_state.c \
|
||||
gen8_surface_state.c \
|
||||
gen8_vec4_generator.cpp \
|
||||
gen8_viewport_state.c \
|
||||
gen8_vs_state.c \
|
||||
gen8_wm_depth_stencil.c \
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ extern "C" {
|
|||
#include "brw_shader.h"
|
||||
#include "intel_asm_annotation.h"
|
||||
}
|
||||
#include "gen8_generator.h"
|
||||
#include "glsl/glsl_types.h"
|
||||
#include "glsl/ir.h"
|
||||
|
||||
|
|
@ -691,92 +690,6 @@ private:
|
|||
void *mem_ctx;
|
||||
};
|
||||
|
||||
/**
|
||||
* The fragment shader code generator.
|
||||
*
|
||||
* Translates FS IR to actual i965 assembly code.
|
||||
*/
|
||||
class gen8_fs_generator : public gen8_generator
|
||||
{
|
||||
public:
|
||||
gen8_fs_generator(struct brw_context *brw,
|
||||
void *mem_ctx,
|
||||
const struct brw_wm_prog_key *key,
|
||||
struct brw_wm_prog_data *prog_data,
|
||||
struct gl_shader_program *prog,
|
||||
struct gl_fragment_program *fp,
|
||||
bool dual_source_output);
|
||||
~gen8_fs_generator();
|
||||
|
||||
const unsigned *generate_assembly(exec_list *simd8_instructions,
|
||||
exec_list *simd16_instructions,
|
||||
unsigned *assembly_size);
|
||||
|
||||
private:
|
||||
void generate_code(exec_list *instructions);
|
||||
void generate_fb_write(fs_inst *inst);
|
||||
void generate_linterp(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg *src);
|
||||
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
||||
struct brw_reg sampler_index);
|
||||
void generate_math1(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
|
||||
void generate_math2(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg src0, struct brw_reg src1);
|
||||
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
|
||||
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
||||
bool negate_value);
|
||||
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
|
||||
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
|
||||
void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
|
||||
void generate_uniform_pull_constant_load(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset);
|
||||
void generate_varying_pull_constant_load(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset);
|
||||
void generate_mov_dispatch_to_flags(fs_inst *ir);
|
||||
void generate_set_omask(fs_inst *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg sample_mask);
|
||||
void generate_set_sample_id(fs_inst *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1);
|
||||
void generate_set_simd4x2_offset(fs_inst *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg offset);
|
||||
void generate_pack_half_2x16_split(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg x,
|
||||
struct brw_reg y);
|
||||
void generate_unpack_half_2x16_split(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src);
|
||||
void generate_untyped_atomic(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg atomic_op,
|
||||
struct brw_reg surf_index);
|
||||
|
||||
void generate_untyped_surface_read(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg surf_index);
|
||||
void generate_discard_jump(fs_inst *ir);
|
||||
|
||||
bool patch_discard_jumps_to_fb_writes();
|
||||
|
||||
const struct brw_wm_prog_key *const key;
|
||||
struct brw_wm_prog_data *prog_data;
|
||||
const struct gl_fragment_program *fp;
|
||||
|
||||
unsigned dispatch_width; /** 8 or 16 */
|
||||
|
||||
bool dual_source_output;
|
||||
|
||||
exec_list discard_halt_patches;
|
||||
};
|
||||
|
||||
bool brw_do_channel_expressions(struct exec_list *instructions);
|
||||
bool brw_do_vector_splitting(struct exec_list *instructions);
|
||||
bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ extern "C" {
|
|||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#include "gen8_generator.h"
|
||||
#endif
|
||||
|
||||
#include "glsl/ir.h"
|
||||
|
|
@ -702,72 +701,6 @@ private:
|
|||
const bool debug_flag;
|
||||
};
|
||||
|
||||
/**
|
||||
* The vertex shader code generator.
|
||||
*
|
||||
* Translates VS IR to actual i965 assembly code.
|
||||
*/
|
||||
class gen8_vec4_generator : public gen8_generator
|
||||
{
|
||||
public:
|
||||
gen8_vec4_generator(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
struct brw_vec4_prog_data *prog_data,
|
||||
void *mem_ctx,
|
||||
bool debug_flag);
|
||||
~gen8_vec4_generator();
|
||||
|
||||
const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size);
|
||||
|
||||
private:
|
||||
void generate_code(exec_list *instructions);
|
||||
void generate_vec4_instruction(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg *src);
|
||||
|
||||
void generate_tex(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg sampler_index);
|
||||
|
||||
void generate_urb_write(vec4_instruction *ir, bool copy_g0);
|
||||
void generate_gs_thread_end(vec4_instruction *ir);
|
||||
void generate_gs_set_write_offset(struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1);
|
||||
void generate_gs_set_vertex_count(struct brw_reg dst,
|
||||
struct brw_reg src);
|
||||
void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src);
|
||||
void generate_gs_prepare_channel_masks(struct brw_reg dst);
|
||||
void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
|
||||
|
||||
void generate_oword_dual_block_offsets(struct brw_reg m1,
|
||||
struct brw_reg index);
|
||||
void generate_scratch_write(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src,
|
||||
struct brw_reg index);
|
||||
void generate_scratch_read(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index);
|
||||
void generate_pull_constant_load(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset);
|
||||
void generate_untyped_atomic(vec4_instruction *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg atomic_op,
|
||||
struct brw_reg surf_index);
|
||||
void generate_untyped_surface_read(vec4_instruction *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg surf_index);
|
||||
|
||||
struct brw_vec4_prog_data *prog_data;
|
||||
|
||||
const bool debug_flag;
|
||||
};
|
||||
|
||||
|
||||
} /* namespace brw */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,620 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** @file gen8_generator.cpp
|
||||
*
|
||||
* Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
#include "main/compiler.h"
|
||||
#include "main/macros.h"
|
||||
#include "brw_context.h"
|
||||
} /* extern "C" */
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "brw_eu.h"
|
||||
#include "brw_reg.h"
|
||||
#include "gen8_generator.h"
|
||||
|
||||
gen8_generator::gen8_generator(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
void *mem_ctx)
|
||||
: shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx)
|
||||
{
|
||||
ctx = &brw->ctx;
|
||||
|
||||
memset(&default_state, 0, sizeof(default_state));
|
||||
default_state.mask_control = BRW_MASK_ENABLE;
|
||||
|
||||
store_size = 1024;
|
||||
store = rzalloc_array(mem_ctx, gen8_instruction, store_size);
|
||||
nr_inst = 0;
|
||||
next_inst_offset = 0;
|
||||
|
||||
/* Set up the control flow stacks. */
|
||||
if_stack_depth = 0;
|
||||
if_stack_array_size = 16;
|
||||
if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size);
|
||||
|
||||
loop_stack_depth = 0;
|
||||
loop_stack_array_size = 16;
|
||||
loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size);
|
||||
}
|
||||
|
||||
gen8_generator::~gen8_generator()
|
||||
{
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::next_inst(unsigned opcode)
|
||||
{
|
||||
gen8_instruction *inst;
|
||||
|
||||
if (nr_inst + 1 > unsigned(store_size)) {
|
||||
store_size <<= 1;
|
||||
store = reralloc(mem_ctx, store, gen8_instruction, store_size);
|
||||
assert(store);
|
||||
}
|
||||
|
||||
next_inst_offset += 16;
|
||||
inst = &store[nr_inst++];
|
||||
|
||||
memset(inst, 0, sizeof(gen8_instruction));
|
||||
|
||||
gen8_set_opcode(inst, opcode);
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
gen8_set_access_mode(inst, default_state.access_mode);
|
||||
gen8_set_mask_control(inst, default_state.mask_control);
|
||||
gen8_set_qtr_control(inst, default_state.qtr_control);
|
||||
gen8_set_cond_modifier(inst, default_state.conditional_mod);
|
||||
gen8_set_pred_control(inst, default_state.predicate);
|
||||
gen8_set_pred_inv(inst, default_state.predicate_inverse);
|
||||
gen8_set_saturate(inst, default_state.saturate);
|
||||
gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr);
|
||||
return inst;
|
||||
}
|
||||
|
||||
#define ALU1(OP) \
|
||||
gen8_instruction * \
|
||||
gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \
|
||||
{ \
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
|
||||
gen8_set_dst(brw, inst, dst); \
|
||||
gen8_set_src0(brw, inst, src); \
|
||||
return inst; \
|
||||
}
|
||||
|
||||
#define ALU2(OP) \
|
||||
gen8_instruction * \
|
||||
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
|
||||
{ \
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
|
||||
gen8_set_dst(brw, inst, dst); \
|
||||
gen8_set_src0(brw, inst, s0); \
|
||||
gen8_set_src1(brw, inst, s1); \
|
||||
return inst; \
|
||||
}
|
||||
|
||||
#define ALU2_ACCUMULATE(OP) \
|
||||
gen8_instruction * \
|
||||
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
|
||||
{ \
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
|
||||
gen8_set_dst(brw, inst, dst); \
|
||||
gen8_set_src0(brw, inst, s0); \
|
||||
gen8_set_src1(brw, inst, s1); \
|
||||
gen8_set_acc_wr_control(inst, true); \
|
||||
return inst; \
|
||||
}
|
||||
|
||||
#define ALU3(OP) \
|
||||
gen8_instruction * \
|
||||
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
|
||||
struct brw_reg s1, struct brw_reg s2) \
|
||||
{ \
|
||||
return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
|
||||
}
|
||||
|
||||
#define ALU3F(OP) \
|
||||
gen8_instruction * \
|
||||
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
|
||||
struct brw_reg s1, struct brw_reg s2) \
|
||||
{ \
|
||||
assert(dst.type == BRW_REGISTER_TYPE_F); \
|
||||
assert(s0.type == BRW_REGISTER_TYPE_F); \
|
||||
assert(s1.type == BRW_REGISTER_TYPE_F); \
|
||||
assert(s2.type == BRW_REGISTER_TYPE_F); \
|
||||
return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
|
||||
}
|
||||
|
||||
ALU2(ADD)
|
||||
ALU2(AND)
|
||||
ALU2(ASR)
|
||||
ALU3(BFE)
|
||||
ALU2(BFI1)
|
||||
ALU3(BFI2)
|
||||
ALU1(BFREV)
|
||||
ALU1(CBIT)
|
||||
ALU2_ACCUMULATE(ADDC)
|
||||
ALU2_ACCUMULATE(SUBB)
|
||||
ALU2(DP2)
|
||||
ALU2(DP3)
|
||||
ALU2(DP4)
|
||||
ALU2(DPH)
|
||||
ALU1(FBH)
|
||||
ALU1(FBL)
|
||||
ALU1(FRC)
|
||||
ALU2(LINE)
|
||||
ALU3F(LRP)
|
||||
ALU3F(MAD)
|
||||
ALU2(MUL)
|
||||
ALU1(MOV)
|
||||
ALU1(NOT)
|
||||
ALU2(OR)
|
||||
ALU2(PLN)
|
||||
ALU1(RNDD)
|
||||
ALU1(RNDE)
|
||||
ALU1(RNDZ)
|
||||
ALU2_ACCUMULATE(MAC)
|
||||
ALU2_ACCUMULATE(MACH)
|
||||
ALU2(SEL)
|
||||
ALU2(SHL)
|
||||
ALU2(SHR)
|
||||
ALU2(XOR)
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::CMP(struct brw_reg dst, unsigned conditional,
|
||||
struct brw_reg src0, struct brw_reg src1)
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_CMP);
|
||||
gen8_set_cond_modifier(inst, conditional);
|
||||
/* The CMP instruction appears to behave erratically for floating point
|
||||
* sources unless the destination type is also float. Overriding it to
|
||||
* match src0 makes it work in all cases.
|
||||
*/
|
||||
dst.type = src0.type;
|
||||
gen8_set_dst(brw, inst, dst);
|
||||
gen8_set_src0(brw, inst, src0);
|
||||
gen8_set_src1(brw, inst, src1);
|
||||
return inst;
|
||||
}
|
||||
|
||||
static int
|
||||
get_3src_subreg_nr(struct brw_reg reg)
|
||||
{
|
||||
if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
|
||||
assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
|
||||
return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
|
||||
} else {
|
||||
return reg.subnr / 4;
|
||||
}
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::alu3(unsigned opcode,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1,
|
||||
struct brw_reg src2)
|
||||
{
|
||||
/* MRFs haven't existed since Gen7, so we better not be using them. */
|
||||
if (dst.file == BRW_MESSAGE_REGISTER_FILE) {
|
||||
dst.file = BRW_GENERAL_REGISTER_FILE;
|
||||
dst.nr += GEN7_MRF_HACK_START;
|
||||
}
|
||||
|
||||
gen8_instruction *inst = next_inst(opcode);
|
||||
assert(gen8_access_mode(inst) == BRW_ALIGN_16);
|
||||
|
||||
assert(dst.file == BRW_GENERAL_REGISTER_FILE);
|
||||
assert(dst.nr < 128);
|
||||
assert(dst.address_mode == BRW_ADDRESS_DIRECT);
|
||||
assert(dst.type == BRW_REGISTER_TYPE_F ||
|
||||
dst.type == BRW_REGISTER_TYPE_D ||
|
||||
dst.type == BRW_REGISTER_TYPE_UD);
|
||||
gen8_set_dst_3src_reg_nr(inst, dst.nr);
|
||||
gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16);
|
||||
gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask);
|
||||
|
||||
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
|
||||
assert(src0.address_mode == BRW_ADDRESS_DIRECT);
|
||||
assert(src0.nr < 128);
|
||||
gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle);
|
||||
gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0));
|
||||
gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0);
|
||||
gen8_set_src0_3src_reg_nr(inst, src0.nr);
|
||||
gen8_set_src0_3src_abs(inst, src0.abs);
|
||||
gen8_set_src0_3src_negate(inst, src0.negate);
|
||||
|
||||
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
|
||||
assert(src1.address_mode == BRW_ADDRESS_DIRECT);
|
||||
assert(src1.nr < 128);
|
||||
gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle);
|
||||
gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1));
|
||||
gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0);
|
||||
gen8_set_src1_3src_reg_nr(inst, src1.nr);
|
||||
gen8_set_src1_3src_abs(inst, src1.abs);
|
||||
gen8_set_src1_3src_negate(inst, src1.negate);
|
||||
|
||||
assert(src2.file == BRW_GENERAL_REGISTER_FILE);
|
||||
assert(src2.address_mode == BRW_ADDRESS_DIRECT);
|
||||
assert(src2.nr < 128);
|
||||
gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle);
|
||||
gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2));
|
||||
gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0);
|
||||
gen8_set_src2_3src_reg_nr(inst, src2.nr);
|
||||
gen8_set_src2_3src_abs(inst, src2.abs);
|
||||
gen8_set_src2_3src_negate(inst, src2.negate);
|
||||
|
||||
/* Set both the source and destination types based on dst.type, ignoring
|
||||
* the source register types. The MAD and LRP emitters both ensure that
|
||||
* all register types are float. The BFE and BFI2 emitters, however, may
|
||||
* send us mixed D and UD source types and want us to ignore that.
|
||||
*/
|
||||
switch (dst.type) {
|
||||
case BRW_REGISTER_TYPE_F:
|
||||
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F);
|
||||
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F);
|
||||
break;
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D);
|
||||
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D);
|
||||
break;
|
||||
case BRW_REGISTER_TYPE_UD:
|
||||
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD);
|
||||
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD);
|
||||
break;
|
||||
}
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::math(unsigned math_function,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0)
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_MATH);
|
||||
|
||||
assert(src0.hstride == 0 || src0.hstride == dst.hstride);
|
||||
|
||||
gen8_set_math_function(inst, math_function);
|
||||
gen8_set_dst(brw, inst, dst);
|
||||
gen8_set_src0(brw, inst, src0);
|
||||
return inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::MATH(unsigned math_function,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0)
|
||||
{
|
||||
assert(src0.type == BRW_REGISTER_TYPE_F);
|
||||
gen8_instruction *inst = math(math_function, dst, src0);
|
||||
return inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::MATH(unsigned math_function,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1)
|
||||
{
|
||||
bool int_math =
|
||||
math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
|
||||
math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
|
||||
math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER;
|
||||
|
||||
if (int_math) {
|
||||
assert(src0.type != BRW_REGISTER_TYPE_F);
|
||||
assert(src1.type != BRW_REGISTER_TYPE_F);
|
||||
} else {
|
||||
assert(src0.type == BRW_REGISTER_TYPE_F);
|
||||
}
|
||||
|
||||
gen8_instruction *inst = math(math_function, dst, src0);
|
||||
gen8_set_src1(brw, inst, src1);
|
||||
return inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0)
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_MOV);
|
||||
gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
|
||||
gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD));
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::NOP()
|
||||
{
|
||||
return next_inst(BRW_OPCODE_NOP);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_generator::push_if_stack(gen8_instruction *inst)
|
||||
{
|
||||
if_stack[if_stack_depth] = inst - store;
|
||||
|
||||
++if_stack_depth;
|
||||
if (if_stack_array_size <= if_stack_depth) {
|
||||
if_stack_array_size *= 2;
|
||||
if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size);
|
||||
}
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::pop_if_stack()
|
||||
{
|
||||
--if_stack_depth;
|
||||
return &store[if_stack[if_stack_depth]];
|
||||
}
|
||||
|
||||
/**
|
||||
* Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.)
|
||||
*/
|
||||
void
|
||||
gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst,
|
||||
gen8_instruction *else_inst,
|
||||
gen8_instruction *endif_inst)
|
||||
{
|
||||
assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF);
|
||||
assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE);
|
||||
assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF);
|
||||
|
||||
gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst));
|
||||
|
||||
if (else_inst == NULL) {
|
||||
/* Patch IF -> ENDIF */
|
||||
gen8_set_jip(if_inst, 16 * (endif_inst - if_inst));
|
||||
gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
|
||||
} else {
|
||||
gen8_set_exec_size(else_inst, gen8_exec_size(if_inst));
|
||||
|
||||
/* Patch IF -> ELSE and ELSE -> ENDIF:
|
||||
*
|
||||
* The IF's JIP should point at the instruction after the ELSE.
|
||||
* The IF's UIP should point to the ENDIF.
|
||||
*
|
||||
* Both are expressed in bytes, hence the multiply by 16...128-bits.
|
||||
*/
|
||||
gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1));
|
||||
gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
|
||||
|
||||
/* Patch ELSE -> ENDIF:
|
||||
*
|
||||
* Since we don't set branch_ctrl, both JIP and UIP point to ENDIF.
|
||||
*/
|
||||
gen8_set_jip(else_inst, 16 * (endif_inst - else_inst));
|
||||
gen8_set_uip(else_inst, 16 * (endif_inst - else_inst));
|
||||
}
|
||||
gen8_set_jip(endif_inst, 16);
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::IF(unsigned predicate)
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_IF);
|
||||
gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
gen8_set_pred_control(inst, predicate);
|
||||
gen8_set_mask_control(inst, BRW_MASK_ENABLE);
|
||||
push_if_stack(inst);
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::ELSE()
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE);
|
||||
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_mask_control(inst, BRW_MASK_ENABLE);
|
||||
push_if_stack(inst);
|
||||
return inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::ENDIF()
|
||||
{
|
||||
gen8_instruction *if_inst = NULL;
|
||||
gen8_instruction *else_inst = NULL;
|
||||
|
||||
gen8_instruction *tmp = pop_if_stack();
|
||||
if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) {
|
||||
else_inst = tmp;
|
||||
tmp = pop_if_stack();
|
||||
}
|
||||
assert(gen8_opcode(tmp) == BRW_OPCODE_IF);
|
||||
if_inst = tmp;
|
||||
|
||||
gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF);
|
||||
gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE);
|
||||
gen8_set_src0(brw, endif_inst, brw_imm_d(0));
|
||||
patch_IF_ELSE(if_inst, else_inst, endif_inst);
|
||||
|
||||
return endif_inst;
|
||||
}
|
||||
|
||||
unsigned
|
||||
gen8_generator::next_ip(unsigned ip) const
|
||||
{
|
||||
return ip + 16;
|
||||
}
|
||||
|
||||
unsigned
|
||||
gen8_generator::find_next_block_end(unsigned start) const
|
||||
{
|
||||
for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
|
||||
gen8_instruction *inst = &store[ip / 16];
|
||||
|
||||
switch (gen8_opcode(inst)) {
|
||||
case BRW_OPCODE_ENDIF:
|
||||
case BRW_OPCODE_ELSE:
|
||||
case BRW_OPCODE_WHILE:
|
||||
case BRW_OPCODE_HALT:
|
||||
return ip;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* There is no DO instruction on Gen6+, so to find the end of the loop
|
||||
* we have to see if the loop is jumping back before our start
|
||||
* instruction.
|
||||
*/
|
||||
unsigned
|
||||
gen8_generator::find_loop_end(unsigned start) const
|
||||
{
|
||||
/* Always start after the instruction (such as a WHILE) we're trying to fix
|
||||
* up.
|
||||
*/
|
||||
for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
|
||||
gen8_instruction *inst = &store[ip / 16];
|
||||
|
||||
if (gen8_opcode(inst) == BRW_OPCODE_WHILE) {
|
||||
if (ip + gen8_jip(inst) <= start)
|
||||
return ip;
|
||||
}
|
||||
}
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
||||
/* After program generation, go back and update the UIP and JIP of
|
||||
* BREAK, CONT, and HALT instructions to their correct locations.
|
||||
*/
|
||||
void
|
||||
gen8_generator::patch_jump_targets()
|
||||
{
|
||||
for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) {
|
||||
gen8_instruction *inst = &store[ip / 16];
|
||||
|
||||
int block_end_ip = find_next_block_end(ip);
|
||||
switch (gen8_opcode(inst)) {
|
||||
case BRW_OPCODE_BREAK:
|
||||
assert(block_end_ip != 0);
|
||||
gen8_set_jip(inst, block_end_ip - ip);
|
||||
gen8_set_uip(inst, find_loop_end(ip) - ip);
|
||||
assert(gen8_uip(inst) != 0);
|
||||
assert(gen8_jip(inst) != 0);
|
||||
break;
|
||||
case BRW_OPCODE_CONTINUE:
|
||||
assert(block_end_ip != 0);
|
||||
gen8_set_jip(inst, block_end_ip - ip);
|
||||
gen8_set_uip(inst, find_loop_end(ip) - ip);
|
||||
assert(gen8_uip(inst) != 0);
|
||||
assert(gen8_jip(inst) != 0);
|
||||
break;
|
||||
case BRW_OPCODE_ENDIF:
|
||||
if (block_end_ip == 0)
|
||||
gen8_set_jip(inst, 16);
|
||||
else
|
||||
gen8_set_jip(inst, block_end_ip - ip);
|
||||
break;
|
||||
case BRW_OPCODE_HALT:
|
||||
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
|
||||
*
|
||||
* "In case of the halt instruction not inside any conditional
|
||||
* code block, the value of <JIP> and <UIP> should be the
|
||||
* same. In case of the halt instruction inside conditional code
|
||||
* block, the <UIP> should be the end of the program, and the
|
||||
* <JIP> should be end of the most inner conditional code block."
|
||||
*
|
||||
* The uip will have already been set by whoever set up the
|
||||
* instruction.
|
||||
*/
|
||||
if (block_end_ip == 0) {
|
||||
gen8_set_jip(inst, gen8_uip(inst));
|
||||
} else {
|
||||
gen8_set_jip(inst, block_end_ip - ip);
|
||||
}
|
||||
assert(gen8_uip(inst) != 0);
|
||||
assert(gen8_jip(inst) != 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gen8_generator::DO()
|
||||
{
|
||||
if (loop_stack_array_size < loop_stack_depth) {
|
||||
loop_stack_array_size *= 2;
|
||||
loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size);
|
||||
}
|
||||
loop_stack[loop_stack_depth++] = nr_inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::BREAK()
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK);
|
||||
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
return inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::CONTINUE()
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE);
|
||||
gen8_set_dst(brw, inst, brw_ip_reg());
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
return inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::WHILE()
|
||||
{
|
||||
gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]];
|
||||
gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE);
|
||||
|
||||
gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, while_inst, brw_imm_d(0));
|
||||
gen8_set_jip(while_inst, 16 * (do_inst - while_inst));
|
||||
gen8_set_exec_size(while_inst, default_state.exec_size);
|
||||
|
||||
return while_inst;
|
||||
}
|
||||
|
||||
gen8_instruction *
|
||||
gen8_generator::HALT()
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_HALT);
|
||||
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
return inst;
|
||||
}
|
||||
|
|
@ -1,196 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file gen8_generator.h
|
||||
*
|
||||
* Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
extern "C" {
|
||||
#include "main/macros.h"
|
||||
} /* extern "C" */
|
||||
|
||||
#include "gen8_instruction.h"
|
||||
|
||||
class gen8_generator {
|
||||
public:
|
||||
gen8_generator(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
void *mem_ctx);
|
||||
~gen8_generator();
|
||||
|
||||
/**
|
||||
* Instruction emitters.
|
||||
* @{
|
||||
*/
|
||||
#define ALU1(OP) \
|
||||
gen8_instruction *OP(struct brw_reg dst, struct brw_reg src);
|
||||
#define ALU2(OP) \
|
||||
gen8_instruction *OP(struct brw_reg d, struct brw_reg, struct brw_reg);
|
||||
#define ALU3(OP) \
|
||||
gen8_instruction *OP(struct brw_reg d, \
|
||||
struct brw_reg, struct brw_reg, struct brw_reg);
|
||||
ALU2(ADD)
|
||||
ALU2(AND)
|
||||
ALU2(ASR)
|
||||
ALU3(BFE)
|
||||
ALU2(BFI1)
|
||||
ALU3(BFI2)
|
||||
ALU1(F32TO16)
|
||||
ALU1(F16TO32)
|
||||
ALU1(BFREV)
|
||||
ALU1(CBIT)
|
||||
ALU2(ADDC)
|
||||
ALU2(SUBB)
|
||||
ALU2(DP2)
|
||||
ALU2(DP3)
|
||||
ALU2(DP4)
|
||||
ALU2(DPH)
|
||||
ALU1(FBH)
|
||||
ALU1(FBL)
|
||||
ALU1(FRC)
|
||||
ALU2(LINE)
|
||||
ALU3(LRP)
|
||||
ALU2(MAC)
|
||||
ALU2(MACH)
|
||||
ALU3(MAD)
|
||||
ALU2(MUL)
|
||||
ALU1(MOV)
|
||||
ALU1(MOV_RAW)
|
||||
ALU1(NOT)
|
||||
ALU2(OR)
|
||||
ALU2(PLN)
|
||||
ALU1(RNDD)
|
||||
ALU1(RNDE)
|
||||
ALU1(RNDZ)
|
||||
ALU2(SEL)
|
||||
ALU2(SHL)
|
||||
ALU2(SHR)
|
||||
ALU2(XOR)
|
||||
#undef ALU1
|
||||
#undef ALU2
|
||||
#undef ALU3
|
||||
|
||||
gen8_instruction *CMP(struct brw_reg dst, unsigned conditional,
|
||||
struct brw_reg src0, struct brw_reg src1);
|
||||
gen8_instruction *IF(unsigned predicate);
|
||||
gen8_instruction *ELSE();
|
||||
gen8_instruction *ENDIF();
|
||||
void DO();
|
||||
gen8_instruction *BREAK();
|
||||
gen8_instruction *CONTINUE();
|
||||
gen8_instruction *WHILE();
|
||||
|
||||
gen8_instruction *HALT();
|
||||
|
||||
gen8_instruction *MATH(unsigned math_function,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0);
|
||||
gen8_instruction *MATH(unsigned math_function,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1);
|
||||
gen8_instruction *NOP();
|
||||
/** @} */
|
||||
|
||||
protected:
|
||||
gen8_instruction *alu3(unsigned opcode,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1,
|
||||
struct brw_reg src2);
|
||||
|
||||
gen8_instruction *math(unsigned math_function,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0);
|
||||
|
||||
gen8_instruction *next_inst(unsigned opcode);
|
||||
|
||||
struct gl_shader_program *shader_prog;
|
||||
struct gl_program *prog;
|
||||
|
||||
struct brw_context *brw;
|
||||
struct intel_context *intel;
|
||||
struct gl_context *ctx;
|
||||
|
||||
gen8_instruction *store;
|
||||
unsigned store_size;
|
||||
unsigned nr_inst;
|
||||
unsigned next_inst_offset;
|
||||
|
||||
/**
|
||||
* Control flow stacks:
|
||||
*
|
||||
* if_stack contains IF and ELSE instructions which must be patched with
|
||||
* the final jump offsets (and popped) once the matching ENDIF is encountered.
|
||||
*
|
||||
* We actually store an array index into the store, rather than pointers
|
||||
* to the instructions. This is necessary since we may realloc the store.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
int *if_stack;
|
||||
int if_stack_depth;
|
||||
int if_stack_array_size;
|
||||
|
||||
int *loop_stack;
|
||||
int loop_stack_depth;
|
||||
int loop_stack_array_size;
|
||||
|
||||
int if_depth_in_loop;
|
||||
|
||||
void push_if_stack(gen8_instruction *inst);
|
||||
gen8_instruction *pop_if_stack();
|
||||
/** @} */
|
||||
|
||||
void patch_IF_ELSE(gen8_instruction *if_inst,
|
||||
gen8_instruction *else_inst,
|
||||
gen8_instruction *endif_inst);
|
||||
|
||||
unsigned next_ip(unsigned ip) const;
|
||||
unsigned find_next_block_end(unsigned start_ip) const;
|
||||
unsigned find_loop_end(unsigned start) const;
|
||||
|
||||
void patch_jump_targets();
|
||||
|
||||
/**
|
||||
* Default state for new instructions.
|
||||
*/
|
||||
struct {
|
||||
unsigned exec_size;
|
||||
unsigned access_mode;
|
||||
unsigned mask_control;
|
||||
unsigned qtr_control;
|
||||
unsigned flag_subreg_nr;
|
||||
unsigned conditional_mod;
|
||||
unsigned predicate;
|
||||
bool predicate_inverse;
|
||||
bool saturate;
|
||||
} default_state;
|
||||
|
||||
void *mem_ctx;
|
||||
};
|
||||
|
|
@ -1,458 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file gen8_instruction.c
|
||||
*
|
||||
* A representation of a Gen8+ EU instruction, with helper methods to get
|
||||
* and set various fields. This is the actual hardware format.
|
||||
*/
|
||||
|
||||
#include "main/compiler.h"
|
||||
#include "brw_defines.h"
|
||||
#include "gen8_instruction.h"
|
||||
|
||||
static void
|
||||
gen8_convert_mrf_to_grf(struct brw_reg *reg)
|
||||
{
|
||||
/* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
|
||||
* "The send with EOT should use register space R112-R127 for <src>. This is
|
||||
* to enable loading of a new thread into the same slot while the message
|
||||
* with EOT for current thread is pending dispatch."
|
||||
*
|
||||
* Since we're pretending to have 16 MRFs anyway, we may as well use the
|
||||
* registers required for messages with EOT.
|
||||
*/
|
||||
if (reg->file == BRW_MESSAGE_REGISTER_FILE) {
|
||||
reg->file = BRW_GENERAL_REGISTER_FILE;
|
||||
reg->nr += GEN7_MRF_HACK_START;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gen8_set_dst(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
struct brw_reg reg)
|
||||
{
|
||||
gen8_convert_mrf_to_grf(®);
|
||||
|
||||
if (reg.file == BRW_GENERAL_REGISTER_FILE)
|
||||
assert(reg.nr < BRW_MAX_GRF);
|
||||
|
||||
gen8_set_dst_reg_file(inst, reg.file);
|
||||
gen8_set_dst_reg_type(inst, brw_reg_type_to_hw_type(brw, reg.type, reg.file));
|
||||
gen8_set_dst_address_mode(inst, reg.address_mode);
|
||||
|
||||
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
|
||||
gen8_set_dst_da_reg_nr(inst, reg.nr);
|
||||
|
||||
if (gen8_access_mode(inst) == BRW_ALIGN_1) {
|
||||
/* Set Dst.SubRegNum[4:0] */
|
||||
gen8_set_dst_da1_subreg_nr(inst, reg.subnr);
|
||||
|
||||
/* Set Dst.HorzStride */
|
||||
if (reg.hstride == BRW_HORIZONTAL_STRIDE_0)
|
||||
reg.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||
gen8_set_dst_da1_hstride(inst, reg.hstride);
|
||||
} else {
|
||||
/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
|
||||
assert(reg.subnr == 0 || reg.subnr == 16);
|
||||
gen8_set_dst_da16_subreg_nr(inst, reg.subnr >> 4);
|
||||
gen8_set_da16_writemask(inst, reg.dw1.bits.writemask);
|
||||
}
|
||||
} else {
|
||||
/* Indirect addressing */
|
||||
assert(gen8_access_mode(inst) == BRW_ALIGN_1);
|
||||
|
||||
/* Set Dst.HorzStride */
|
||||
if (reg.hstride == BRW_HORIZONTAL_STRIDE_0)
|
||||
reg.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||
gen8_set_dst_da1_hstride(inst, reg.hstride);
|
||||
gen8_set_dst_ia1_subreg_nr(inst, reg.subnr);
|
||||
gen8_set_dst_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
|
||||
}
|
||||
|
||||
/* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
|
||||
* or 16 (SIMD16), as that's normally correct. However, when dealing with
|
||||
* small registers, we automatically reduce it to match the register size.
|
||||
*/
|
||||
if (reg.width < BRW_EXECUTE_8)
|
||||
gen8_set_exec_size(inst, reg.width);
|
||||
}
|
||||
|
||||
static void
|
||||
gen8_validate_reg(struct gen8_instruction *inst, struct brw_reg reg)
|
||||
{
|
||||
int hstride_for_reg[] = {0, 1, 2, 4};
|
||||
int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
|
||||
int width_for_reg[] = {1, 2, 4, 8, 16};
|
||||
int execsize_for_reg[] = {1, 2, 4, 8, 16};
|
||||
int width, hstride, vstride, execsize;
|
||||
|
||||
if (reg.file == BRW_IMMEDIATE_VALUE) {
|
||||
/* TODO: check immediate vectors */
|
||||
return;
|
||||
}
|
||||
|
||||
if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE)
|
||||
return;
|
||||
|
||||
assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
|
||||
hstride = hstride_for_reg[reg.hstride];
|
||||
|
||||
if (reg.vstride == 0xf) {
|
||||
vstride = -1;
|
||||
} else {
|
||||
assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
|
||||
vstride = vstride_for_reg[reg.vstride];
|
||||
}
|
||||
|
||||
assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
|
||||
width = width_for_reg[reg.width];
|
||||
|
||||
assert(gen8_exec_size(inst) >= 0 &&
|
||||
gen8_exec_size(inst) < ARRAY_SIZE(execsize_for_reg));
|
||||
execsize = execsize_for_reg[gen8_exec_size(inst)];
|
||||
|
||||
/* Restrictions from 3.3.10: Register Region Restrictions. */
|
||||
/* 3. */
|
||||
assert(execsize >= width);
|
||||
|
||||
/* 4. */
|
||||
if (execsize == width && hstride != 0) {
|
||||
assert(vstride == -1 || vstride == width * hstride);
|
||||
}
|
||||
|
||||
/* 5. */
|
||||
if (execsize == width && hstride == 0) {
|
||||
/* no restriction on vstride. */
|
||||
}
|
||||
|
||||
/* 6. */
|
||||
if (width == 1) {
|
||||
assert(hstride == 0);
|
||||
}
|
||||
|
||||
/* 7. */
|
||||
if (execsize == 1 && width == 1) {
|
||||
assert(hstride == 0);
|
||||
assert(vstride == 0);
|
||||
}
|
||||
|
||||
/* 8. */
|
||||
if (vstride == 0 && hstride == 0) {
|
||||
assert(width == 1);
|
||||
}
|
||||
|
||||
/* 10. Check destination issues. */
|
||||
}
|
||||
|
||||
void
|
||||
gen8_set_src0(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
struct brw_reg reg)
|
||||
{
|
||||
gen8_convert_mrf_to_grf(®);
|
||||
|
||||
if (reg.file == BRW_GENERAL_REGISTER_FILE)
|
||||
assert(reg.nr < BRW_MAX_GRF);
|
||||
|
||||
gen8_validate_reg(inst, reg);
|
||||
|
||||
gen8_set_src0_reg_file(inst, reg.file);
|
||||
gen8_set_src0_reg_type(inst,
|
||||
brw_reg_type_to_hw_type(brw, reg.type, reg.file));
|
||||
gen8_set_src0_abs(inst, reg.abs);
|
||||
gen8_set_src0_negate(inst, reg.negate);
|
||||
|
||||
if (reg.file == BRW_IMMEDIATE_VALUE) {
|
||||
inst->data[3] = reg.dw1.ud;
|
||||
|
||||
/* Required to set some fields in src1 as well: */
|
||||
gen8_set_src1_reg_file(inst, BRW_ARCHITECTURE_REGISTER_FILE);
|
||||
gen8_set_src1_reg_type(inst,
|
||||
brw_reg_type_to_hw_type(brw, reg.type, reg.file));
|
||||
return;
|
||||
}
|
||||
|
||||
gen8_set_src0_address_mode(inst, reg.address_mode);
|
||||
|
||||
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
|
||||
gen8_set_src0_da_reg_nr(inst, reg.nr);
|
||||
|
||||
if (gen8_access_mode(inst) == BRW_ALIGN_1) {
|
||||
/* Set Src0.SubRegNum[4:0] */
|
||||
gen8_set_src0_da1_subreg_nr(inst, reg.subnr);
|
||||
|
||||
if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
|
||||
gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
|
||||
gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
|
||||
} else {
|
||||
gen8_set_src0_da1_hstride(inst, reg.hstride);
|
||||
gen8_set_src0_vert_stride(inst, reg.vstride);
|
||||
}
|
||||
gen8_set_src0_da1_width(inst, reg.width);
|
||||
|
||||
} else {
|
||||
/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
|
||||
assert(reg.subnr == 0 || reg.subnr == 16);
|
||||
gen8_set_src0_da16_subreg_nr(inst, reg.subnr >> 4);
|
||||
|
||||
gen8_set_src0_da16_swiz_x(inst,
|
||||
BRW_GET_SWZ(reg.dw1.bits.swizzle,
|
||||
BRW_CHANNEL_X));
|
||||
gen8_set_src0_da16_swiz_y(inst,
|
||||
BRW_GET_SWZ(reg.dw1.bits.swizzle,
|
||||
BRW_CHANNEL_Y));
|
||||
gen8_set_src0_da16_swiz_z(inst,
|
||||
BRW_GET_SWZ(reg.dw1.bits.swizzle,
|
||||
BRW_CHANNEL_Z));
|
||||
gen8_set_src0_da16_swiz_w(inst,
|
||||
BRW_GET_SWZ(reg.dw1.bits.swizzle,
|
||||
BRW_CHANNEL_W));
|
||||
|
||||
/* This is an oddity of the fact that we're using the same
|
||||
* descriptions for registers in both Align16 and Align1 modes.
|
||||
*/
|
||||
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
|
||||
gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
|
||||
else
|
||||
gen8_set_src0_vert_stride(inst, reg.vstride);
|
||||
}
|
||||
} else {
|
||||
/* Indirect addressing */
|
||||
assert(gen8_access_mode(inst) == BRW_ALIGN_1);
|
||||
if (reg.width == BRW_WIDTH_1 &&
|
||||
gen8_exec_size(inst) == BRW_EXECUTE_1) {
|
||||
gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
|
||||
gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
|
||||
} else {
|
||||
gen8_set_src0_da1_hstride(inst, reg.hstride);
|
||||
gen8_set_src0_vert_stride(inst, reg.vstride);
|
||||
}
|
||||
|
||||
gen8_set_src0_da1_width(inst, reg.width);
|
||||
gen8_set_src0_ia1_subreg_nr(inst, reg.subnr);
|
||||
gen8_set_src0_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gen8_set_src1(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
struct brw_reg reg)
|
||||
{
|
||||
gen8_convert_mrf_to_grf(®);
|
||||
|
||||
if (reg.file == BRW_GENERAL_REGISTER_FILE)
|
||||
assert(reg.nr < BRW_MAX_GRF);
|
||||
|
||||
gen8_validate_reg(inst, reg);
|
||||
|
||||
gen8_set_src1_reg_file(inst, reg.file);
|
||||
gen8_set_src1_reg_type(inst,
|
||||
brw_reg_type_to_hw_type(brw, reg.type, reg.file));
|
||||
gen8_set_src1_abs(inst, reg.abs);
|
||||
gen8_set_src1_negate(inst, reg.negate);
|
||||
|
||||
/* Only src1 can be an immediate in two-argument instructions. */
|
||||
assert(gen8_src0_reg_file(inst) != BRW_IMMEDIATE_VALUE);
|
||||
|
||||
if (reg.file == BRW_IMMEDIATE_VALUE) {
|
||||
inst->data[3] = reg.dw1.ud;
|
||||
return;
|
||||
}
|
||||
|
||||
gen8_set_src1_address_mode(inst, reg.address_mode);
|
||||
|
||||
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
|
||||
gen8_set_src1_da_reg_nr(inst, reg.nr);
|
||||
|
||||
if (gen8_access_mode(inst) == BRW_ALIGN_1) {
|
||||
/* Set Src0.SubRegNum[4:0] */
|
||||
gen8_set_src1_da1_subreg_nr(inst, reg.subnr);
|
||||
|
||||
if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
|
||||
gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
|
||||
gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
|
||||
} else {
|
||||
gen8_set_src1_da1_hstride(inst, reg.hstride);
|
||||
gen8_set_src1_vert_stride(inst, reg.vstride);
|
||||
}
|
||||
gen8_set_src1_da1_width(inst, reg.width);
|
||||
} else {
|
||||
/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
|
||||
assert(reg.subnr == 0 || reg.subnr == 16);
|
||||
gen8_set_src1_da16_subreg_nr(inst, reg.subnr >> 4);
|
||||
|
||||
gen8_set_src1_da16_swiz_x(inst,
|
||||
BRW_GET_SWZ(reg.dw1.bits.swizzle,
|
||||
BRW_CHANNEL_X));
|
||||
gen8_set_src1_da16_swiz_y(inst,
|
||||
BRW_GET_SWZ(reg.dw1.bits.swizzle,
|
||||
BRW_CHANNEL_Y));
|
||||
gen8_set_src1_da16_swiz_z(inst,
|
||||
BRW_GET_SWZ(reg.dw1.bits.swizzle,
|
||||
BRW_CHANNEL_Z));
|
||||
gen8_set_src1_da16_swiz_w(inst,
|
||||
BRW_GET_SWZ(reg.dw1.bits.swizzle,
|
||||
BRW_CHANNEL_W));
|
||||
|
||||
/* This is an oddity of the fact that we're using the same
|
||||
* descriptions for registers in both Align16 and Align1 modes.
|
||||
*/
|
||||
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
|
||||
gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
|
||||
else
|
||||
gen8_set_src1_vert_stride(inst, reg.vstride);
|
||||
}
|
||||
} else {
|
||||
/* Indirect addressing */
|
||||
assert(gen8_access_mode(inst) == BRW_ALIGN_1);
|
||||
if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
|
||||
gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
|
||||
gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
|
||||
} else {
|
||||
gen8_set_src1_da1_hstride(inst, reg.hstride);
|
||||
gen8_set_src1_vert_stride(inst, reg.vstride);
|
||||
}
|
||||
|
||||
gen8_set_src1_da1_width(inst, reg.width);
|
||||
gen8_set_src1_ia1_subreg_nr(inst, reg.subnr);
|
||||
gen8_set_src1_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the Message Descriptor and Extended Message Descriptor fields
|
||||
* for SEND messages.
|
||||
*
|
||||
* \note This zeroes out the Function Control bits, so it must be called
|
||||
* \b before filling out any message-specific data. Callers can
|
||||
* choose not to fill in irrelevant bits; they will be zero.
|
||||
*/
|
||||
static void
|
||||
gen8_set_message_descriptor(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
enum brw_message_target sfid,
|
||||
unsigned msg_length,
|
||||
unsigned response_length,
|
||||
bool header_present,
|
||||
bool end_of_thread)
|
||||
{
|
||||
gen8_set_src1(brw, inst, brw_imm_d(0));
|
||||
|
||||
gen8_set_sfid(inst, sfid);
|
||||
gen8_set_mlen(inst, msg_length);
|
||||
gen8_set_rlen(inst, response_length);
|
||||
gen8_set_header_present(inst, header_present);
|
||||
gen8_set_eot(inst, end_of_thread);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_set_urb_message(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
enum brw_urb_write_flags flags,
|
||||
unsigned msg_length,
|
||||
unsigned response_length,
|
||||
unsigned offset,
|
||||
bool interleave)
|
||||
{
|
||||
gen8_set_message_descriptor(brw, inst, BRW_SFID_URB,
|
||||
msg_length, response_length,
|
||||
true, flags & BRW_URB_WRITE_EOT);
|
||||
gen8_set_src0(brw, inst, brw_vec8_grf(GEN7_MRF_HACK_START + 1, 0));
|
||||
if (flags & BRW_URB_WRITE_OWORD) {
|
||||
assert(msg_length == 2);
|
||||
gen8_set_urb_opcode(inst, BRW_URB_OPCODE_WRITE_OWORD);
|
||||
} else {
|
||||
gen8_set_urb_opcode(inst, BRW_URB_OPCODE_WRITE_HWORD);
|
||||
}
|
||||
gen8_set_urb_global_offset(inst, offset);
|
||||
gen8_set_urb_interleave(inst, interleave);
|
||||
gen8_set_urb_per_slot_offset(inst,
|
||||
flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_set_sampler_message(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
unsigned binding_table_index,
|
||||
unsigned sampler,
|
||||
unsigned msg_type,
|
||||
unsigned response_length,
|
||||
unsigned msg_length,
|
||||
bool header_present,
|
||||
unsigned simd_mode)
|
||||
{
|
||||
gen8_set_message_descriptor(brw, inst, BRW_SFID_SAMPLER, msg_length,
|
||||
response_length, header_present, false);
|
||||
|
||||
gen8_set_binding_table_index(inst, binding_table_index);
|
||||
gen8_set_sampler(inst, sampler);
|
||||
gen8_set_sampler_msg_type(inst, msg_type);
|
||||
gen8_set_sampler_simd_mode(inst, simd_mode);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_set_dp_message(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
enum brw_message_target sfid,
|
||||
unsigned binding_table_index,
|
||||
unsigned msg_type,
|
||||
unsigned msg_control,
|
||||
unsigned mlen,
|
||||
unsigned rlen,
|
||||
bool header_present,
|
||||
bool end_of_thread)
|
||||
{
|
||||
gen8_set_message_descriptor(brw, inst, sfid, mlen, rlen, header_present,
|
||||
end_of_thread);
|
||||
gen8_set_binding_table_index(inst, binding_table_index);
|
||||
gen8_set_dp_message_type(inst, msg_type);
|
||||
gen8_set_dp_message_control(inst, msg_control);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_set_dp_scratch_message(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
bool write,
|
||||
bool dword,
|
||||
bool invalidate_after_read,
|
||||
unsigned num_regs,
|
||||
unsigned addr_offset,
|
||||
unsigned mlen,
|
||||
unsigned rlen,
|
||||
bool header_present,
|
||||
bool end_of_thread)
|
||||
{
|
||||
assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || num_regs == 8);
|
||||
gen8_set_message_descriptor(brw, inst, GEN7_SFID_DATAPORT_DATA_CACHE,
|
||||
mlen, rlen, header_present, end_of_thread);
|
||||
gen8_set_dp_category(inst, 1); /* Scratch Block Read/Write messages */
|
||||
gen8_set_scratch_read_write(inst, write);
|
||||
gen8_set_scratch_type(inst, dword);
|
||||
gen8_set_scratch_invalidate_after_read(inst, invalidate_after_read);
|
||||
gen8_set_scratch_block_size(inst, ffs(num_regs) - 1);
|
||||
gen8_set_scratch_addr_offset(inst, addr_offset);
|
||||
}
|
||||
|
|
@ -1,418 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file gen8_instruction.h
|
||||
*
|
||||
* A representation of a Gen8+ EU instruction, with helper methods to get
|
||||
* and set various fields. This is the actual hardware format.
|
||||
*/
|
||||
|
||||
#ifndef GEN8_INSTRUCTION_H
|
||||
#define GEN8_INSTRUCTION_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_reg.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct gen8_instruction {
|
||||
uint32_t data[4];
|
||||
};
|
||||
|
||||
static inline unsigned gen8_instruction_bits(struct gen8_instruction *inst,
|
||||
unsigned high,
|
||||
unsigned low);
|
||||
static inline void gen8_instruction_set_bits(struct gen8_instruction *inst,
|
||||
unsigned high,
|
||||
unsigned low,
|
||||
unsigned value);
|
||||
|
||||
#define F(name, high, low) \
|
||||
static inline void gen8_set_##name(struct gen8_instruction *inst, unsigned v) \
|
||||
{ \
|
||||
gen8_instruction_set_bits(inst, high, low, v); \
|
||||
} \
|
||||
static inline unsigned gen8_##name(struct gen8_instruction *inst) \
|
||||
{ \
|
||||
return gen8_instruction_bits(inst, high, low); \
|
||||
}
|
||||
|
||||
F(src1_vert_stride, 120, 117)
|
||||
F(src1_da1_width, 116, 114)
|
||||
F(src1_da16_swiz_w, 115, 114)
|
||||
F(src1_da16_swiz_z, 113, 112)
|
||||
F(src1_da1_hstride, 113, 112)
|
||||
F(src1_address_mode, 111, 111)
|
||||
/** Src1.SrcMod @{ */
|
||||
F(src1_negate, 110, 110)
|
||||
F(src1_abs, 109, 109)
|
||||
/** @} */
|
||||
F(src1_ia1_subreg_nr, 108, 105)
|
||||
F(src1_da_reg_nr, 108, 101)
|
||||
F(src1_da16_subreg_nr, 100, 100)
|
||||
F(src1_da1_subreg_nr, 100, 96)
|
||||
F(src1_da16_swiz_y, 99, 98)
|
||||
F(src1_da16_swiz_x, 97, 96)
|
||||
F(src1_reg_type, 94, 91)
|
||||
F(src1_reg_file, 90, 89)
|
||||
F(src0_vert_stride, 88, 85)
|
||||
F(src0_da1_width, 84, 82)
|
||||
F(src0_da16_swiz_w, 83, 82)
|
||||
F(src0_da16_swiz_z, 81, 80)
|
||||
F(src0_da1_hstride, 81, 80)
|
||||
F(src0_address_mode, 79, 79)
|
||||
/** Src0.SrcMod @{ */
|
||||
F(src0_negate, 78, 78)
|
||||
F(src0_abs, 77, 77)
|
||||
/** @} */
|
||||
F(src0_ia1_subreg_nr, 76, 73)
|
||||
F(src0_da_reg_nr, 76, 69)
|
||||
F(src0_da16_subreg_nr, 68, 68)
|
||||
F(src0_da1_subreg_nr, 68, 64)
|
||||
F(src0_da16_swiz_y, 67, 66)
|
||||
F(src0_da16_swiz_x, 65, 64)
|
||||
F(dst_address_mode, 63, 63)
|
||||
F(dst_da1_hstride, 62, 61)
|
||||
F(dst_ia1_subreg_nr, 60, 57)
|
||||
F(dst_da_reg_nr, 60, 53)
|
||||
F(dst_da16_subreg_nr, 52, 52)
|
||||
F(dst_da1_subreg_nr, 52, 48)
|
||||
F(da16_writemask, 51, 48) /* Dst.ChanEn */
|
||||
F(src0_reg_type, 46, 43)
|
||||
F(src0_reg_file, 42, 41)
|
||||
F(dst_reg_type, 40, 37)
|
||||
F(dst_reg_file, 36, 35)
|
||||
F(mask_control, 34, 34)
|
||||
F(flag_reg_nr, 33, 33)
|
||||
F(flag_subreg_nr, 32, 32)
|
||||
F(saturate, 31, 31)
|
||||
F(branch_control, 30, 30)
|
||||
F(debug_control, 30, 30)
|
||||
F(cmpt_control, 29, 29)
|
||||
F(acc_wr_control, 28, 28)
|
||||
F(cond_modifier, 27, 24)
|
||||
F(exec_size, 23, 21)
|
||||
F(pred_inv, 20, 20)
|
||||
F(pred_control, 19, 16)
|
||||
F(thread_control, 15, 14)
|
||||
F(qtr_control, 13, 12)
|
||||
F(nib_control, 11, 11)
|
||||
F(no_dd_check, 10, 10)
|
||||
F(no_dd_clear, 9, 9)
|
||||
F(access_mode, 8, 8)
|
||||
/* Bit 7 is Reserved (for future Opcode expansion) */
|
||||
F(opcode, 6, 0)
|
||||
|
||||
/**
|
||||
* Three-source instructions:
|
||||
* @{
|
||||
*/
|
||||
F(src2_3src_reg_nr, 125, 118)
|
||||
F(src2_3src_subreg_nr, 117, 115)
|
||||
F(src2_3src_swizzle, 114, 107)
|
||||
F(src2_3src_rep_ctrl, 106, 106)
|
||||
F(src1_3src_reg_nr, 104, 97)
|
||||
/* src1_3src_subreg_nr spans word boundaries and has to be handled specially */
|
||||
F(src1_3src_swizzle, 93, 86)
|
||||
F(src1_3src_rep_ctrl, 85, 85)
|
||||
F(src0_3src_reg_nr, 83, 76)
|
||||
F(src0_3src_subreg_nr, 75, 73)
|
||||
F(src0_3src_swizzle, 72, 65)
|
||||
F(src0_3src_rep_ctrl, 64, 64)
|
||||
F(dst_3src_reg_nr, 63, 56)
|
||||
F(dst_3src_subreg_nr, 55, 53)
|
||||
F(dst_3src_writemask, 52, 49)
|
||||
F(dst_3src_type, 48, 46)
|
||||
F(src_3src_type, 45, 43)
|
||||
F(src2_3src_negate, 42, 42)
|
||||
F(src2_3src_abs, 41, 41)
|
||||
F(src1_3src_negate, 40, 40)
|
||||
F(src1_3src_abs, 39, 39)
|
||||
F(src0_3src_negate, 38, 38)
|
||||
F(src0_3src_abs, 37, 37)
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Fields for SEND messages:
|
||||
* @{
|
||||
*/
|
||||
F(eot, 127, 127)
|
||||
F(mlen, 124, 121)
|
||||
F(rlen, 120, 116)
|
||||
F(header_present, 115, 115)
|
||||
F(function_control, 114, 96)
|
||||
F(sfid, 27, 24)
|
||||
F(math_function, 27, 24)
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* URB message function control bits:
|
||||
* @{
|
||||
*/
|
||||
F(urb_per_slot_offset, 113, 113)
|
||||
F(urb_interleave, 111, 111)
|
||||
F(urb_global_offset, 110, 100)
|
||||
F(urb_opcode, 99, 96)
|
||||
/** @} */
|
||||
|
||||
/* Message descriptor bits */
|
||||
#define MD(name, high, low) F(name, (high + 96), (low + 96))
|
||||
|
||||
/**
|
||||
* Sampler message function control bits:
|
||||
* @{
|
||||
*/
|
||||
MD(sampler_simd_mode, 18, 17)
|
||||
MD(sampler_msg_type, 16, 12)
|
||||
MD(sampler, 11, 8)
|
||||
MD(binding_table_index, 7, 0) /* also used by other messages */
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Data port message function control bits:
|
||||
* @{
|
||||
*/
|
||||
MD(dp_category, 18, 18)
|
||||
MD(dp_message_type, 17, 14)
|
||||
MD(dp_message_control, 13, 8)
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Scratch message bits:
|
||||
* @{
|
||||
*/
|
||||
MD(scratch_read_write, 17, 17) /* 0 = read, 1 = write */
|
||||
MD(scratch_type, 16, 16) /* 0 = OWord, 1 = DWord */
|
||||
MD(scratch_invalidate_after_read, 15, 15)
|
||||
MD(scratch_block_size, 13, 12)
|
||||
MD(scratch_addr_offset, 11, 0)
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Render Target message function control bits:
|
||||
* @{
|
||||
*/
|
||||
MD(rt_last, 12, 12)
|
||||
MD(rt_slot_group, 11, 11)
|
||||
MD(rt_message_type, 10, 8)
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Thread Spawn message function control bits:
|
||||
* @{
|
||||
*/
|
||||
MD(ts_resource_select, 4, 4)
|
||||
MD(ts_request_type, 1, 1)
|
||||
MD(ts_opcode, 0, 0)
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Video Motion Estimation message function control bits:
|
||||
* @{
|
||||
*/
|
||||
F(vme_message_type, 14, 13)
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Check & Refinement Engine message function control bits:
|
||||
* @{
|
||||
*/
|
||||
F(cre_message_type, 14, 13)
|
||||
/** @} */
|
||||
|
||||
#undef MD
|
||||
#undef F
|
||||
|
||||
static inline void
|
||||
gen8_set_src1_3src_subreg_nr(struct gen8_instruction *inst, unsigned v)
|
||||
{
|
||||
assert((v & ~0x7) == 0);
|
||||
|
||||
gen8_instruction_set_bits(inst, 95, 94, v & 0x3);
|
||||
gen8_instruction_set_bits(inst, 96, 96, v >> 2);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
gen8_src1_3src_subreg_nr(struct gen8_instruction *inst)
|
||||
{
|
||||
return gen8_instruction_bits(inst, 95, 94) |
|
||||
(gen8_instruction_bits(inst, 96, 96) << 2);
|
||||
}
|
||||
|
||||
#define GEN8_IA1_ADDR_IMM(reg, nine, high, low) \
|
||||
static inline void \
|
||||
gen8_set_##reg##_ia1_addr_imm(struct gen8_instruction *inst, unsigned value) \
|
||||
{ \
|
||||
assert((value & ~0x3ff) == 0); \
|
||||
gen8_instruction_set_bits(inst, high, low, value & 0x1ff); \
|
||||
gen8_instruction_set_bits(inst, nine, nine, value >> 9); \
|
||||
} \
|
||||
\
|
||||
static inline unsigned \
|
||||
gen8_##reg##_ia1_addr_imm(struct gen8_instruction *inst) \
|
||||
{ \
|
||||
return gen8_instruction_bits(inst, high, low) | \
|
||||
(gen8_instruction_bits(inst, nine, nine) << 9); \
|
||||
}
|
||||
|
||||
/* AddrImm[9:0] for Align1 Indirect Addressing */
|
||||
GEN8_IA1_ADDR_IMM(src1, 121, 104, 96)
|
||||
GEN8_IA1_ADDR_IMM(src0, 95, 72, 64)
|
||||
GEN8_IA1_ADDR_IMM(dst, 47, 56, 48)
|
||||
|
||||
/**
|
||||
* Flow control instruction bits:
|
||||
* @{
|
||||
*/
|
||||
static inline unsigned gen8_uip(struct gen8_instruction *inst)
|
||||
{
|
||||
return inst->data[2];
|
||||
}
|
||||
static inline void gen8_set_uip(struct gen8_instruction *inst, unsigned uip)
|
||||
{
|
||||
inst->data[2] = uip;
|
||||
}
|
||||
static inline unsigned gen8_jip(struct gen8_instruction *inst)
|
||||
{
|
||||
return inst->data[3];
|
||||
}
|
||||
static inline void gen8_set_jip(struct gen8_instruction *inst, unsigned jip)
|
||||
{
|
||||
inst->data[3] = jip;
|
||||
}
|
||||
/** @} */
|
||||
|
||||
static inline int gen8_src1_imm_d(struct gen8_instruction *inst)
|
||||
{
|
||||
return inst->data[3];
|
||||
}
|
||||
static inline unsigned gen8_src1_imm_ud(struct gen8_instruction *inst)
|
||||
{
|
||||
return inst->data[3];
|
||||
}
|
||||
static inline float gen8_src1_imm_f(struct gen8_instruction *inst)
|
||||
{
|
||||
fi_type ft;
|
||||
|
||||
ft.u = inst->data[3];
|
||||
return ft.f;
|
||||
}
|
||||
|
||||
void gen8_set_dst(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst, struct brw_reg reg);
|
||||
void gen8_set_src0(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst, struct brw_reg reg);
|
||||
void gen8_set_src1(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst, struct brw_reg reg);
|
||||
|
||||
void gen8_set_urb_message(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
enum brw_urb_write_flags flags,
|
||||
unsigned mlen, unsigned rlen,
|
||||
unsigned offset, bool interleave);
|
||||
|
||||
void gen8_set_sampler_message(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
unsigned binding_table_index, unsigned sampler,
|
||||
unsigned msg_type, unsigned rlen, unsigned mlen,
|
||||
bool header_present, unsigned simd_mode);
|
||||
|
||||
void gen8_set_dp_message(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
enum brw_message_target sfid,
|
||||
unsigned binding_table_index,
|
||||
unsigned msg_type,
|
||||
unsigned msg_control,
|
||||
unsigned msg_length,
|
||||
unsigned response_length,
|
||||
bool header_present,
|
||||
bool end_of_thread);
|
||||
|
||||
void gen8_set_dp_scratch_message(const struct brw_context *brw,
|
||||
struct gen8_instruction *inst,
|
||||
bool write,
|
||||
bool dword,
|
||||
bool invalidate_after_read,
|
||||
unsigned num_regs,
|
||||
unsigned addr_offset,
|
||||
unsigned msg_length,
|
||||
unsigned response_length,
|
||||
bool header_present,
|
||||
bool end_of_thread);
|
||||
|
||||
/**
|
||||
* Fetch a set of contiguous bits from the instruction.
|
||||
*
|
||||
* Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
|
||||
*/
|
||||
static inline unsigned
|
||||
gen8_instruction_bits(struct gen8_instruction *inst, unsigned high, unsigned low)
|
||||
{
|
||||
/* We assume the field doesn't cross 32-bit boundaries. */
|
||||
const unsigned word = high / 32;
|
||||
assert(word == low / 32);
|
||||
|
||||
high %= 32;
|
||||
low %= 32;
|
||||
|
||||
const unsigned mask = (((1 << (high - low + 1)) - 1) << low);
|
||||
|
||||
return (inst->data[word] & mask) >> low;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set bits in the instruction, with proper shifting and masking.
|
||||
*
|
||||
* Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
|
||||
*/
|
||||
static inline void
|
||||
gen8_instruction_set_bits(struct gen8_instruction *inst,
|
||||
unsigned high,
|
||||
unsigned low,
|
||||
unsigned value)
|
||||
{
|
||||
const unsigned word = high / 32;
|
||||
assert(word == low / 32);
|
||||
|
||||
high %= 32;
|
||||
low %= 32;
|
||||
|
||||
const unsigned mask = (((1 << (high - low + 1)) - 1) << low);
|
||||
|
||||
/* Make sure the supplied value actually fits in the given bitfield. */
|
||||
assert((value & (mask >> low)) == value);
|
||||
|
||||
inst->data[word] = (inst->data[word] & ~mask) | ((value << low) & mask);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,925 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2011 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_vec4.h"
|
||||
#include "brw_cfg.h"
|
||||
|
||||
extern "C" {
|
||||
#include "brw_eu.h"
|
||||
#include "main/macros.h"
|
||||
#include "program/prog_print.h"
|
||||
#include "program/prog_parameter.h"
|
||||
};
|
||||
|
||||
namespace brw {
|
||||
|
||||
gen8_vec4_generator::gen8_vec4_generator(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
struct brw_vec4_prog_data *prog_data,
|
||||
void *mem_ctx,
|
||||
bool debug_flag)
|
||||
: gen8_generator(brw, shader_prog, prog, mem_ctx),
|
||||
prog_data(prog_data),
|
||||
debug_flag(debug_flag)
|
||||
{
|
||||
}
|
||||
|
||||
gen8_vec4_generator::~gen8_vec4_generator()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst,
|
||||
struct brw_reg sampler_index)
|
||||
{
|
||||
int msg_type = 0;
|
||||
|
||||
switch (ir->opcode) {
|
||||
case SHADER_OPCODE_TEX:
|
||||
case SHADER_OPCODE_TXL:
|
||||
if (ir->shadow_compare) {
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
|
||||
} else {
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TXD:
|
||||
if (ir->shadow_compare) {
|
||||
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
|
||||
} else {
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TXF:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
|
||||
break;
|
||||
case SHADER_OPCODE_TXS:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
|
||||
break;
|
||||
case SHADER_OPCODE_TG4:
|
||||
if (ir->shadow_compare) {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
|
||||
} else {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
if (ir->shadow_compare) {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
|
||||
} else {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("should not get here: invalid VS texture opcode");
|
||||
}
|
||||
|
||||
assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
|
||||
assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
|
||||
|
||||
uint32_t sampler = sampler_index.dw1.ud;
|
||||
|
||||
if (ir->header_present) {
|
||||
MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD),
|
||||
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
|
||||
if (ir->texture_offset) {
|
||||
/* Set the offset bits in DWord 2. */
|
||||
MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2),
|
||||
BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(ir->texture_offset));
|
||||
}
|
||||
|
||||
if (sampler >= 16) {
|
||||
/* The "Sampler Index" field can only store values between 0 and 15.
|
||||
* However, we can add an offset to the "Sampler State Pointer"
|
||||
* field, effectively selecting a different set of 16 samplers.
|
||||
*
|
||||
* The "Sampler State Pointer" needs to be aligned to a 32-byte
|
||||
* offset, and each sampler state is only 16-bytes, so we can't
|
||||
* exclusively use the offset - we have to use both.
|
||||
*/
|
||||
const int sampler_state_size = 16; /* 16 bytes */
|
||||
gen8_instruction *add =
|
||||
ADD(get_element_ud(brw_message_reg(ir->base_mrf), 3),
|
||||
get_element_ud(brw_vec8_grf(0, 0), 3),
|
||||
brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
|
||||
gen8_set_mask_control(add, BRW_MASK_DISABLE);
|
||||
}
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
uint32_t surf_index =
|
||||
prog_data->base.binding_table.texture_start + sampler;
|
||||
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, inst, dst);
|
||||
gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
|
||||
gen8_set_sampler_message(brw, inst,
|
||||
surf_index,
|
||||
sampler % 16,
|
||||
msg_type,
|
||||
1,
|
||||
ir->mlen,
|
||||
ir->header_present,
|
||||
BRW_SAMPLER_SIMD_MODE_SIMD4X2);
|
||||
|
||||
brw_mark_surface_used(&prog_data->base, surf_index);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs)
|
||||
{
|
||||
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
|
||||
|
||||
/* Copy g0. */
|
||||
if (vs)
|
||||
MOV_RAW(header, brw_vec8_grf(0, 0));
|
||||
|
||||
gen8_instruction *inst;
|
||||
if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
|
||||
/* Enable Channel Masks in the URB_WRITE_OWORD message header */
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
|
||||
brw_imm_ud(0xff00));
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
inst = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset,
|
||||
true);
|
||||
gen8_set_dst(brw, inst, brw_null_reg());
|
||||
gen8_set_src0(brw, inst, header);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header,
|
||||
struct brw_reg src)
|
||||
{
|
||||
/* Move the vertex count into the second MRF for the EOT write. */
|
||||
assert(eot_mrf_header.file == BRW_MESSAGE_REGISTER_FILE);
|
||||
int dst_nr = GEN7_MRF_HACK_START + eot_mrf_header.nr + 1;
|
||||
gen8_instruction *inst =
|
||||
MOV(retype(brw_vec8_grf(dst_nr, 0), BRW_REGISTER_TYPE_UD), src);
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir)
|
||||
{
|
||||
struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
|
||||
gen8_instruction *inst;
|
||||
|
||||
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
inst = MOV(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
|
||||
BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
|
||||
/* mlen = 2: g0 header + vertex count */
|
||||
inst = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true);
|
||||
gen8_set_dst(brw, inst, brw_null_reg());
|
||||
gen8_set_src0(brw, inst, src);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1)
|
||||
{
|
||||
/* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
|
||||
* Header: M0.3):
|
||||
*
|
||||
* Slot 0 Offset. This field, after adding to the Global Offset field
|
||||
* in the message descriptor, specifies the offset (in 256-bit units)
|
||||
* from the start of the URB entry, as referenced by URB Handle 0, at
|
||||
* which the data will be accessed.
|
||||
*
|
||||
* Similar text describes DWORD M0.4, which is slot 1 offset.
|
||||
*
|
||||
* Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
|
||||
* of the register for geometry shader invocations 0 and 1) by the
|
||||
* immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
|
||||
*
|
||||
* We can do this with the following EU instruction:
|
||||
*
|
||||
* mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
|
||||
*/
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
gen8_instruction *inst =
|
||||
MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1);
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
assert(src.file == BRW_IMMEDIATE_VALUE);
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
|
||||
gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src);
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
|
||||
{
|
||||
/* We want to left shift just DWORD 4 (the x component belonging to the
|
||||
* second geometry shader invocation) by 4 bits. So generate the
|
||||
* instruction:
|
||||
*
|
||||
* shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
|
||||
*/
|
||||
dst = suboffset(vec1(dst), 4);
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
/* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
|
||||
* Header: M0.5):
|
||||
*
|
||||
* 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
|
||||
*
|
||||
* When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
|
||||
* DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
|
||||
* Vertex 0 DATA[7]. This bit is ANDed with the corresponding
|
||||
* channel enable to determine the final channel enable. For the
|
||||
* URB_READ_OWORD & URB_READ_HWORD messages, when final channel
|
||||
* enable is 1 it indicates that Vertex 1 DATA [3] will be included
|
||||
* in the writeback message. For the URB_WRITE_OWORD &
|
||||
* URB_WRITE_HWORD messages, when final channel enable is 1 it
|
||||
* indicates that Vertex 1 DATA [3] will be written to the surface.
|
||||
*
|
||||
* 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
|
||||
* 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
|
||||
*
|
||||
* 14 Vertex 1 DATA [2] Channel Mask
|
||||
* 13 Vertex 1 DATA [1] Channel Mask
|
||||
* 12 Vertex 1 DATA [0] Channel Mask
|
||||
* 11 Vertex 0 DATA [3] Channel Mask
|
||||
* 10 Vertex 0 DATA [2] Channel Mask
|
||||
* 9 Vertex 0 DATA [1] Channel Mask
|
||||
* 8 Vertex 0 DATA [0] Channel Mask
|
||||
*
|
||||
* (This is from a section of the PRM that is agnostic to the particular
|
||||
* type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
|
||||
* geometry shader invocations 0 and 1, respectively). Since we have the
|
||||
* enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
|
||||
* and the enable flags for geometry shader invocation 1 in bits 7:0 of
|
||||
* DWORD 4, we just need to OR them together and store the result in bits
|
||||
* 15:8 of DWORD 5.
|
||||
*
|
||||
* It's easier to get the EU to do this if we think of the src and dst
|
||||
* registers as composed of 32 bytes each; then, we want to pick up the
|
||||
* contents of bytes 0 and 16 from src, OR them together, and store them in
|
||||
* byte 21.
|
||||
*
|
||||
* We can do that by the following EU instruction:
|
||||
*
|
||||
* or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
|
||||
*
|
||||
* Note: this relies on the source register having zeros in (a) bits 7:4 of
|
||||
* DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
|
||||
* source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
|
||||
* shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
|
||||
* the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
|
||||
* contain valid channel mask values (which are in the range 0x0-0xf).
|
||||
*/
|
||||
dst = retype(dst, BRW_REGISTER_TYPE_UB);
|
||||
src = retype(src, BRW_REGISTER_TYPE_UB);
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
|
||||
gen8_instruction *inst =
|
||||
OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
|
||||
struct brw_reg index)
|
||||
{
|
||||
int second_vertex_offset = 1;
|
||||
|
||||
m1 = retype(m1, BRW_REGISTER_TYPE_D);
|
||||
|
||||
/* Set up M1 (message payload). Only the block offsets in M1.0 and
|
||||
* M1.4 are used, and the rest are ignored.
|
||||
*/
|
||||
struct brw_reg m1_0 = suboffset(vec1(m1), 0);
|
||||
struct brw_reg m1_4 = suboffset(vec1(m1), 4);
|
||||
struct brw_reg index_0 = suboffset(vec1(index), 0);
|
||||
struct brw_reg index_4 = suboffset(vec1(index), 4);
|
||||
|
||||
default_state.mask_control = BRW_MASK_DISABLE;
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
|
||||
MOV(m1_0, index_0);
|
||||
|
||||
if (index.file == BRW_IMMEDIATE_VALUE) {
|
||||
index_4.dw1.ud += second_vertex_offset;
|
||||
MOV(m1_4, index_4);
|
||||
} else {
|
||||
ADD(m1_4, index_4, brw_imm_d(second_vertex_offset));
|
||||
}
|
||||
|
||||
default_state.mask_control = BRW_MASK_ENABLE;
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_scratch_read(vec4_instruction *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index)
|
||||
{
|
||||
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
|
||||
|
||||
MOV_RAW(header, brw_vec8_grf(0, 0));
|
||||
|
||||
generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
|
||||
|
||||
/* Each of the 8 channel enables is considered for whether each
|
||||
* dword is written.
|
||||
*/
|
||||
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, send, dst);
|
||||
gen8_set_src0(brw, send, header);
|
||||
gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
|
||||
255, /* binding table index: stateless access */
|
||||
GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
|
||||
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
|
||||
2, /* mlen */
|
||||
1, /* rlen */
|
||||
true, /* header present */
|
||||
false); /* EOT */
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_scratch_write(vec4_instruction *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src,
|
||||
struct brw_reg index)
|
||||
{
|
||||
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
|
||||
|
||||
MOV_RAW(header, brw_vec8_grf(0, 0));
|
||||
|
||||
generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
|
||||
|
||||
MOV(retype(brw_message_reg(ir->base_mrf + 2), BRW_REGISTER_TYPE_D),
|
||||
retype(src, BRW_REGISTER_TYPE_D));
|
||||
|
||||
/* Each of the 8 channel enables is considered for whether each
|
||||
* dword is written.
|
||||
*/
|
||||
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, send, dst);
|
||||
gen8_set_src0(brw, send, header);
|
||||
gen8_set_pred_control(send, ir->predicate);
|
||||
gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
|
||||
255, /* binding table index: stateless access */
|
||||
GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE,
|
||||
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
|
||||
3, /* mlen */
|
||||
0, /* rlen */
|
||||
true, /* header present */
|
||||
false); /* EOT */
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset)
|
||||
{
|
||||
assert(index.file == BRW_IMMEDIATE_VALUE &&
|
||||
index.type == BRW_REGISTER_TYPE_UD);
|
||||
uint32_t surf_index = index.dw1.ud;
|
||||
|
||||
assert(offset.file == BRW_GENERAL_REGISTER_FILE);
|
||||
|
||||
/* Each of the 8 channel enables is considered for whether each
|
||||
* dword is written.
|
||||
*/
|
||||
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, send, dst);
|
||||
gen8_set_src0(brw, send, offset);
|
||||
gen8_set_sampler_message(brw, send,
|
||||
surf_index,
|
||||
0, /* The LD message ignores the sampler unit. */
|
||||
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
|
||||
1, /* rlen */
|
||||
1, /* mlen */
|
||||
false, /* no header */
|
||||
BRW_SAMPLER_SIMD_MODE_SIMD4X2);
|
||||
|
||||
brw_mark_surface_used(&prog_data->base, surf_index);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_untyped_atomic(vec4_instruction *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg atomic_op,
|
||||
struct brw_reg surf_index)
|
||||
{
|
||||
assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
|
||||
atomic_op.type == BRW_REGISTER_TYPE_UD &&
|
||||
surf_index.file == BRW_IMMEDIATE_VALUE &&
|
||||
surf_index.type == BRW_REGISTER_TYPE_UD);
|
||||
assert((atomic_op.dw1.ud & ~0xf) == 0);
|
||||
|
||||
unsigned msg_control =
|
||||
atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */
|
||||
(1 << 5); /* Return data expected */
|
||||
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
|
||||
gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf),
|
||||
BRW_REGISTER_TYPE_UD));
|
||||
gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
|
||||
surf_index.dw1.ud,
|
||||
HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2,
|
||||
msg_control,
|
||||
ir->mlen,
|
||||
1,
|
||||
ir->header_present,
|
||||
false);
|
||||
|
||||
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_untyped_surface_read(vec4_instruction *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg surf_index)
|
||||
{
|
||||
assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
|
||||
surf_index.type == BRW_REGISTER_TYPE_UD);
|
||||
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
|
||||
gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf),
|
||||
BRW_REGISTER_TYPE_UD));
|
||||
gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
|
||||
surf_index.dw1.ud,
|
||||
HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ,
|
||||
0xe, /* enable only the R channel */
|
||||
ir->mlen,
|
||||
1,
|
||||
ir->header_present,
|
||||
false);
|
||||
|
||||
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg *src)
|
||||
{
|
||||
vec4_instruction *ir = (vec4_instruction *) instruction;
|
||||
|
||||
if (dst.width == BRW_WIDTH_4) {
|
||||
/* This happens in attribute fixups for "dual instanced" geometry
|
||||
* shaders, since they use attributes that are vec4's. Since the exec
|
||||
* width is only 4, it's essential that the caller set
|
||||
* force_writemask_all in order to make sure the instruction is executed
|
||||
* regardless of which channels are enabled.
|
||||
*/
|
||||
assert(ir->force_writemask_all);
|
||||
|
||||
/* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
|
||||
* the following register region restrictions (from Graphics BSpec:
|
||||
* 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
|
||||
* > Register Region Restrictions)
|
||||
*
|
||||
* 1. ExecSize must be greater than or equal to Width.
|
||||
*
|
||||
* 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
|
||||
* to Width * HorzStride."
|
||||
*/
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (src[i].file == BRW_GENERAL_REGISTER_FILE)
|
||||
src[i] = stride(src[i], 4, 4, 1);
|
||||
}
|
||||
}
|
||||
|
||||
switch (ir->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
MOV(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ADD:
|
||||
ADD(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
MUL(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MACH:
|
||||
MACH(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MAD:
|
||||
MAD(dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_FRC:
|
||||
FRC(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_RNDD:
|
||||
RNDD(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_RNDE:
|
||||
RNDE(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_RNDZ:
|
||||
RNDZ(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
AND(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_OR:
|
||||
OR(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_XOR:
|
||||
XOR(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_NOT:
|
||||
NOT(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ASR:
|
||||
ASR(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SHR:
|
||||
SHR(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SHL:
|
||||
SHL(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
CMP(dst, ir->conditional_mod, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SEL:
|
||||
SEL(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DPH:
|
||||
DPH(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DP4:
|
||||
DP4(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DP3:
|
||||
DP3(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DP2:
|
||||
DP2(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_F32TO16:
|
||||
/* Emulate the Gen7 zeroing bug. */
|
||||
MOV(retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
|
||||
MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_F16TO32:
|
||||
MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF));
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_LRP:
|
||||
LRP(dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BFREV:
|
||||
/* BFREV only supports UD type for src and dst. */
|
||||
BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
|
||||
retype(src[0], BRW_REGISTER_TYPE_UD));
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_FBH:
|
||||
/* FBH only supports UD type for dst. */
|
||||
FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_FBL:
|
||||
/* FBL only supports UD type for dst. */
|
||||
FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CBIT:
|
||||
/* CBIT only supports UD type for dst. */
|
||||
CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ADDC:
|
||||
ADDC(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SUBB:
|
||||
SUBB(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BFE:
|
||||
BFE(dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BFI1:
|
||||
BFI1(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BFI2:
|
||||
BFI2(dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_IF:
|
||||
IF(ir->predicate);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ELSE:
|
||||
ELSE();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ENDIF:
|
||||
ENDIF();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DO:
|
||||
DO();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BREAK:
|
||||
BREAK();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CONTINUE:
|
||||
CONTINUE();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_WHILE:
|
||||
WHILE();
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_RCP:
|
||||
MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_RSQ:
|
||||
MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SQRT:
|
||||
MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_EXP2:
|
||||
MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_LOG2:
|
||||
MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SIN:
|
||||
MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_COS:
|
||||
MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_POW:
|
||||
MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TEX:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
/* note: src[0] is unused. */
|
||||
generate_tex(ir, dst, src[1]);
|
||||
break;
|
||||
|
||||
case VS_OPCODE_URB_WRITE:
|
||||
generate_urb_write(ir, true);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||
generate_scratch_read(ir, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
||||
generate_scratch_write(ir, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
||||
generate_pull_constant_load(ir, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_URB_WRITE:
|
||||
generate_urb_write(ir, false);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_THREAD_END:
|
||||
generate_gs_thread_end(ir);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_WRITE_OFFSET:
|
||||
generate_gs_set_write_offset(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_VERTEX_COUNT:
|
||||
generate_gs_set_vertex_count(dst, src[0]);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_DWORD_2_IMMED:
|
||||
generate_gs_set_dword_2_immed(dst, src[0]);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
|
||||
generate_gs_prepare_channel_masks(dst);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_CHANNEL_MASKS:
|
||||
generate_gs_set_channel_masks(dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SHADER_TIME_ADD:
|
||||
unreachable("XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
|
||||
|
||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||
generate_untyped_atomic(ir, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||
generate_untyped_surface_read(ir, dst, src[0]);
|
||||
break;
|
||||
|
||||
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
|
||||
unreachable("VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
|
||||
|
||||
default:
|
||||
if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
|
||||
_mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
|
||||
opcode_descs[ir->opcode].name);
|
||||
} else {
|
||||
_mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
|
||||
}
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_code(exec_list *instructions)
|
||||
{
|
||||
struct annotation_info annotation;
|
||||
memset(&annotation, 0, sizeof(annotation));
|
||||
|
||||
cfg_t *cfg = NULL;
|
||||
if (unlikely(debug_flag))
|
||||
cfg = new(mem_ctx) cfg_t(instructions);
|
||||
|
||||
foreach_in_list(vec4_instruction, ir, instructions) {
|
||||
struct brw_reg src[3], dst;
|
||||
|
||||
if (unlikely(debug_flag))
|
||||
annotate(brw, &annotation, cfg, ir, next_inst_offset);
|
||||
|
||||
for (unsigned int i = 0; i < 3; i++) {
|
||||
src[i] = ir->get_src(prog_data, i);
|
||||
}
|
||||
dst = ir->get_dst();
|
||||
|
||||
default_state.conditional_mod = ir->conditional_mod;
|
||||
default_state.predicate = ir->predicate;
|
||||
default_state.predicate_inverse = ir->predicate_inverse;
|
||||
default_state.saturate = ir->saturate;
|
||||
default_state.mask_control = ir->force_writemask_all;
|
||||
|
||||
const unsigned pre_emit_nr_inst = nr_inst;
|
||||
|
||||
generate_vec4_instruction(ir, dst, src);
|
||||
|
||||
if (ir->no_dd_clear || ir->no_dd_check) {
|
||||
assert(nr_inst == pre_emit_nr_inst + 1 ||
|
||||
!"no_dd_check or no_dd_clear set for IR emitting more "
|
||||
"than 1 instruction");
|
||||
|
||||
gen8_instruction *last = &store[pre_emit_nr_inst];
|
||||
gen8_set_no_dd_clear(last, ir->no_dd_clear);
|
||||
gen8_set_no_dd_check(last, ir->no_dd_check);
|
||||
}
|
||||
}
|
||||
|
||||
patch_jump_targets();
|
||||
annotation_finalize(&annotation, next_inst_offset);
|
||||
|
||||
int before_size = next_inst_offset;
|
||||
|
||||
if (unlikely(debug_flag)) {
|
||||
if (shader_prog) {
|
||||
fprintf(stderr, "Native code for %s vertex shader %d:\n",
|
||||
shader_prog->Label ? shader_prog->Label : "unnamed",
|
||||
shader_prog->Name);
|
||||
} else {
|
||||
fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
|
||||
}
|
||||
fprintf(stderr, "vec4 shader: %d instructions.\n", before_size / 16);
|
||||
|
||||
dump_assembly(store, annotation.ann_count, annotation.ann, brw, prog);
|
||||
ralloc_free(annotation.ann);
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned *
|
||||
gen8_vec4_generator::generate_assembly(exec_list *instructions,
|
||||
unsigned *assembly_size)
|
||||
{
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
default_state.exec_size = BRW_EXECUTE_8;
|
||||
generate_code(instructions);
|
||||
|
||||
*assembly_size = next_inst_offset;
|
||||
return (const unsigned *) store;
|
||||
}
|
||||
|
||||
} /* namespace brw */
|
||||
Loading…
Add table
Reference in a new issue