mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
i965/fs: Split final assembly code generation out of fs_visitor.
Compiling shaders requires several main steps:
1. Generating FS IR from either GLSL IR or Mesa IR
2. Optimizing the IR
3. Register allocation
4. Generating assembly code
This patch splits out step 4 into a separate class named "fs_generator."
There are several reasons for doing so:
1. Future hardware has a different instruction encoding. Splitting
this out will allow us to replace fs_generator (which relies
heavily on the brw_eu_emit.c code and struct brw_instruction) with
a new code generator that writes the new format.
2. It reduces the size of the fs_visitor monolith. (Arguably, a lot
more should be split out, but that's left for "future work.")
3. Separate namespaces allow us to make helper functions for
generating instructions in both classes: ADD() can exist in
fs_visitor and create IR, while ADD() in fs_generator() can
create brw_instructions. (Patches for this upcoming.)
Furthermore, this patch changes the order of operations slightly.
Rather than doing steps 1-4 for SIMD8, then 1-4 for SIMD16, we now:
- Do steps 1-3 for SIMD8, then repeat 1-3 for SIMD16
- Generate final assembly code for both modes together
This is because the frontend work can be done independently, but final
assembly generation needs to pack both into a single program store to
feed the GPU.
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
parent
dd1fd30047
commit
ea681a0d64
3 changed files with 156 additions and 78 deletions
|
|
@ -2025,7 +2025,6 @@ fs_visitor::setup_payload_gen6()
|
||||||
bool
|
bool
|
||||||
fs_visitor::run()
|
fs_visitor::run()
|
||||||
{
|
{
|
||||||
uint32_t prog_offset_16 = 0;
|
|
||||||
uint32_t orig_nr_params = c->prog_data.nr_params;
|
uint32_t orig_nr_params = c->prog_data.nr_params;
|
||||||
|
|
||||||
if (intel->gen >= 6)
|
if (intel->gen >= 6)
|
||||||
|
|
@ -2033,24 +2032,6 @@ fs_visitor::run()
|
||||||
else
|
else
|
||||||
setup_payload_gen4();
|
setup_payload_gen4();
|
||||||
|
|
||||||
if (dispatch_width == 16) {
|
|
||||||
/* We have to do a compaction pass now, or the one at the end of
|
|
||||||
* execution will squash down where our prog_offset start needs
|
|
||||||
* to be.
|
|
||||||
*/
|
|
||||||
brw_compact_instructions(p);
|
|
||||||
|
|
||||||
/* align to 64 byte boundary. */
|
|
||||||
while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) {
|
|
||||||
brw_NOP(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Save off the start of this 16-wide program in case we succeed. */
|
|
||||||
prog_offset_16 = c->func.nr_insn * sizeof(struct brw_instruction);
|
|
||||||
|
|
||||||
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (0) {
|
if (0) {
|
||||||
emit_dummy_fs();
|
emit_dummy_fs();
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -2129,13 +2110,10 @@ fs_visitor::run()
|
||||||
if (failed)
|
if (failed)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
generate_code();
|
|
||||||
|
|
||||||
if (dispatch_width == 8) {
|
if (dispatch_width == 8) {
|
||||||
c->prog_data.reg_blocks = brw_register_blocks(grf_used);
|
c->prog_data.reg_blocks = brw_register_blocks(grf_used);
|
||||||
} else {
|
} else {
|
||||||
c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
|
c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
|
||||||
c->prog_data.prog_offset_16 = prog_offset_16;
|
|
||||||
|
|
||||||
/* Make sure we didn't try to sneak in an extra uniform */
|
/* Make sure we didn't try to sneak in an extra uniform */
|
||||||
assert(orig_nr_params == c->prog_data.nr_params);
|
assert(orig_nr_params == c->prog_data.nr_params);
|
||||||
|
|
@ -2192,12 +2170,15 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exec_list *simd16_instructions = NULL;
|
||||||
|
fs_visitor v2(brw, c, prog, fp, 16);
|
||||||
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
|
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
|
||||||
fs_visitor v2(brw, c, prog, fp, 16);
|
|
||||||
v2.import_uniforms(&v);
|
v2.import_uniforms(&v);
|
||||||
if (!v2.run()) {
|
if (!v2.run()) {
|
||||||
perf_debug("16-wide shader failed to compile, falling back to "
|
perf_debug("16-wide shader failed to compile, falling back to "
|
||||||
"8-wide at a 10-20%% performance cost: %s", v2.fail_msg);
|
"8-wide at a 10-20%% performance cost: %s", v2.fail_msg);
|
||||||
|
} else {
|
||||||
|
simd16_instructions = &v2.instructions;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2214,7 +2195,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return brw_get_program(&c->func, final_assembly_size);
|
fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE);
|
||||||
|
return g.generate_assembly(&v.instructions, simd16_instructions,
|
||||||
|
final_assembly_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
|
||||||
|
|
@ -180,6 +180,11 @@ public:
|
||||||
/** @} */
|
/** @} */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The fragment shader front-end.
|
||||||
|
*
|
||||||
|
* Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
|
||||||
|
*/
|
||||||
class fs_visitor : public backend_visitor
|
class fs_visitor : public backend_visitor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
@ -293,40 +298,6 @@ public:
|
||||||
void push_force_sechalf();
|
void push_force_sechalf();
|
||||||
void pop_force_sechalf();
|
void pop_force_sechalf();
|
||||||
|
|
||||||
void generate_code();
|
|
||||||
void generate_fb_write(fs_inst *inst);
|
|
||||||
void generate_pixel_xy(struct brw_reg dst, bool is_x);
|
|
||||||
void generate_linterp(fs_inst *inst, struct brw_reg dst,
|
|
||||||
struct brw_reg *src);
|
|
||||||
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
|
|
||||||
void generate_math1_gen7(fs_inst *inst,
|
|
||||||
struct brw_reg dst,
|
|
||||||
struct brw_reg src);
|
|
||||||
void generate_math2_gen7(fs_inst *inst,
|
|
||||||
struct brw_reg dst,
|
|
||||||
struct brw_reg src0,
|
|
||||||
struct brw_reg src1);
|
|
||||||
void generate_math1_gen6(fs_inst *inst,
|
|
||||||
struct brw_reg dst,
|
|
||||||
struct brw_reg src);
|
|
||||||
void generate_math2_gen6(fs_inst *inst,
|
|
||||||
struct brw_reg dst,
|
|
||||||
struct brw_reg src0,
|
|
||||||
struct brw_reg src1);
|
|
||||||
void generate_math_gen4(fs_inst *inst,
|
|
||||||
struct brw_reg dst,
|
|
||||||
struct brw_reg src);
|
|
||||||
void generate_discard(fs_inst *inst);
|
|
||||||
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
|
|
||||||
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
|
||||||
bool negate_value);
|
|
||||||
void generate_spill(fs_inst *inst, struct brw_reg src);
|
|
||||||
void generate_unspill(fs_inst *inst, struct brw_reg dst);
|
|
||||||
void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
|
||||||
struct brw_reg index,
|
|
||||||
struct brw_reg offset);
|
|
||||||
void generate_mov_dispatch_to_flags();
|
|
||||||
|
|
||||||
void emit_dummy_fs();
|
void emit_dummy_fs();
|
||||||
fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
|
fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
|
||||||
fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
|
fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
|
||||||
|
|
@ -456,6 +427,77 @@ public:
|
||||||
int force_sechalf_stack;
|
int force_sechalf_stack;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The fragment shader code generator.
|
||||||
|
*
|
||||||
|
* Translates FS IR to actual i965 assembly code.
|
||||||
|
*/
|
||||||
|
class fs_generator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
fs_generator(struct brw_context *brw,
|
||||||
|
struct brw_wm_compile *c,
|
||||||
|
struct gl_shader_program *prog,
|
||||||
|
struct gl_fragment_program *fp,
|
||||||
|
bool dual_source_output);
|
||||||
|
~fs_generator();
|
||||||
|
|
||||||
|
const unsigned *generate_assembly(exec_list *simd8_instructions,
|
||||||
|
exec_list *simd16_instructions,
|
||||||
|
unsigned *assembly_size);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void generate_code(exec_list *instructions);
|
||||||
|
void generate_fb_write(fs_inst *inst);
|
||||||
|
void generate_pixel_xy(struct brw_reg dst, bool is_x);
|
||||||
|
void generate_linterp(fs_inst *inst, struct brw_reg dst,
|
||||||
|
struct brw_reg *src);
|
||||||
|
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
|
||||||
|
void generate_math1_gen7(fs_inst *inst,
|
||||||
|
struct brw_reg dst,
|
||||||
|
struct brw_reg src);
|
||||||
|
void generate_math2_gen7(fs_inst *inst,
|
||||||
|
struct brw_reg dst,
|
||||||
|
struct brw_reg src0,
|
||||||
|
struct brw_reg src1);
|
||||||
|
void generate_math1_gen6(fs_inst *inst,
|
||||||
|
struct brw_reg dst,
|
||||||
|
struct brw_reg src);
|
||||||
|
void generate_math2_gen6(fs_inst *inst,
|
||||||
|
struct brw_reg dst,
|
||||||
|
struct brw_reg src0,
|
||||||
|
struct brw_reg src1);
|
||||||
|
void generate_math_gen4(fs_inst *inst,
|
||||||
|
struct brw_reg dst,
|
||||||
|
struct brw_reg src);
|
||||||
|
void generate_discard(fs_inst *inst);
|
||||||
|
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
|
||||||
|
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
||||||
|
bool negate_value);
|
||||||
|
void generate_spill(fs_inst *inst, struct brw_reg src);
|
||||||
|
void generate_unspill(fs_inst *inst, struct brw_reg dst);
|
||||||
|
void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
||||||
|
struct brw_reg index,
|
||||||
|
struct brw_reg offset);
|
||||||
|
void generate_mov_dispatch_to_flags();
|
||||||
|
|
||||||
|
struct brw_context *brw;
|
||||||
|
struct intel_context *intel;
|
||||||
|
struct gl_context *ctx;
|
||||||
|
|
||||||
|
struct brw_compile *p;
|
||||||
|
struct brw_wm_compile *c;
|
||||||
|
|
||||||
|
struct gl_shader_program *prog;
|
||||||
|
struct gl_shader *shader;
|
||||||
|
const struct gl_fragment_program *fp;
|
||||||
|
|
||||||
|
unsigned dispatch_width; /**< 8 or 16 */
|
||||||
|
|
||||||
|
bool dual_source_output;
|
||||||
|
void *mem_ctx;
|
||||||
|
};
|
||||||
|
|
||||||
bool brw_do_channel_expressions(struct exec_list *instructions);
|
bool brw_do_channel_expressions(struct exec_list *instructions);
|
||||||
bool brw_do_vector_splitting(struct exec_list *instructions);
|
bool brw_do_vector_splitting(struct exec_list *instructions);
|
||||||
bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
|
bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
|
||||||
|
|
|
||||||
|
|
@ -37,8 +37,29 @@ extern "C" {
|
||||||
#include "brw_cfg.h"
|
#include "brw_cfg.h"
|
||||||
#include "glsl/ir_print_visitor.h"
|
#include "glsl/ir_print_visitor.h"
|
||||||
|
|
||||||
|
fs_generator::fs_generator(struct brw_context *brw,
|
||||||
|
struct brw_wm_compile *c,
|
||||||
|
struct gl_shader_program *prog,
|
||||||
|
struct gl_fragment_program *fp,
|
||||||
|
bool dual_source_output)
|
||||||
|
|
||||||
|
: brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output)
|
||||||
|
{
|
||||||
|
p = &c->func;
|
||||||
|
intel = &brw->intel;
|
||||||
|
ctx = &intel->ctx;
|
||||||
|
|
||||||
|
shader = prog ? prog->_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL;
|
||||||
|
|
||||||
|
mem_ctx = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
fs_generator::~fs_generator()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_fb_write(fs_inst *inst)
|
fs_generator::generate_fb_write(fs_inst *inst)
|
||||||
{
|
{
|
||||||
bool eot = inst->eot;
|
bool eot = inst->eot;
|
||||||
struct brw_reg implied_header;
|
struct brw_reg implied_header;
|
||||||
|
|
@ -91,7 +112,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)
|
||||||
implied_header = brw_null_reg();
|
implied_header = brw_null_reg();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->dual_src_output.file != BAD_FILE)
|
if (this->dual_source_output)
|
||||||
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
|
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
|
||||||
else if (dispatch_width == 16)
|
else if (dispatch_width == 16)
|
||||||
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
|
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
|
||||||
|
|
@ -119,7 +140,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)
|
||||||
* interpolation.
|
* interpolation.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
|
fs_generator::generate_pixel_xy(struct brw_reg dst, bool is_x)
|
||||||
{
|
{
|
||||||
struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
|
struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
|
||||||
struct brw_reg src;
|
struct brw_reg src;
|
||||||
|
|
@ -147,7 +168,7 @@ fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_linterp(fs_inst *inst,
|
fs_generator::generate_linterp(fs_inst *inst,
|
||||||
struct brw_reg dst, struct brw_reg *src)
|
struct brw_reg dst, struct brw_reg *src)
|
||||||
{
|
{
|
||||||
struct brw_reg delta_x = src[0];
|
struct brw_reg delta_x = src[0];
|
||||||
|
|
@ -165,7 +186,7 @@ fs_visitor::generate_linterp(fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_math1_gen7(fs_inst *inst,
|
fs_generator::generate_math1_gen7(fs_inst *inst,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg src0)
|
struct brw_reg src0)
|
||||||
{
|
{
|
||||||
|
|
@ -178,7 +199,7 @@ fs_visitor::generate_math1_gen7(fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_math2_gen7(fs_inst *inst,
|
fs_generator::generate_math2_gen7(fs_inst *inst,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg src0,
|
struct brw_reg src0,
|
||||||
struct brw_reg src1)
|
struct brw_reg src1)
|
||||||
|
|
@ -188,7 +209,7 @@ fs_visitor::generate_math2_gen7(fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_math1_gen6(fs_inst *inst,
|
fs_generator::generate_math1_gen6(fs_inst *inst,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg src0)
|
struct brw_reg src0)
|
||||||
{
|
{
|
||||||
|
|
@ -215,7 +236,7 @@ fs_visitor::generate_math1_gen6(fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_math2_gen6(fs_inst *inst,
|
fs_generator::generate_math2_gen6(fs_inst *inst,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg src0,
|
struct brw_reg src0,
|
||||||
struct brw_reg src1)
|
struct brw_reg src1)
|
||||||
|
|
@ -235,7 +256,7 @@ fs_visitor::generate_math2_gen6(fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_math_gen4(fs_inst *inst,
|
fs_generator::generate_math_gen4(fs_inst *inst,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg src)
|
struct brw_reg src)
|
||||||
{
|
{
|
||||||
|
|
@ -263,7 +284,7 @@ fs_visitor::generate_math_gen4(fs_inst *inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
|
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
|
||||||
{
|
{
|
||||||
int msg_type = -1;
|
int msg_type = -1;
|
||||||
int rlen = 4;
|
int rlen = 4;
|
||||||
|
|
@ -447,7 +468,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
|
||||||
* ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
|
* ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
|
fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
|
||||||
{
|
{
|
||||||
struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
|
struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
|
||||||
BRW_REGISTER_TYPE_F,
|
BRW_REGISTER_TYPE_F,
|
||||||
|
|
@ -469,7 +490,7 @@ fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
|
||||||
* left.
|
* left.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
||||||
bool negate_value)
|
bool negate_value)
|
||||||
{
|
{
|
||||||
struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
|
struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
|
||||||
|
|
@ -491,7 +512,7 @@ fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_discard(fs_inst *inst)
|
fs_generator::generate_discard(fs_inst *inst)
|
||||||
{
|
{
|
||||||
struct brw_reg f0 = brw_flag_reg();
|
struct brw_reg f0 = brw_flag_reg();
|
||||||
|
|
||||||
|
|
@ -543,7 +564,7 @@ fs_visitor::generate_discard(fs_inst *inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
|
fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
|
||||||
{
|
{
|
||||||
assert(inst->mlen != 0);
|
assert(inst->mlen != 0);
|
||||||
|
|
||||||
|
|
@ -555,7 +576,7 @@ fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
|
fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst)
|
||||||
{
|
{
|
||||||
assert(inst->mlen != 0);
|
assert(inst->mlen != 0);
|
||||||
|
|
||||||
|
|
@ -585,7 +606,7 @@ fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
fs_generator::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
||||||
struct brw_reg index,
|
struct brw_reg index,
|
||||||
struct brw_reg offset)
|
struct brw_reg offset)
|
||||||
{
|
{
|
||||||
|
|
@ -632,7 +653,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
||||||
* Used only on Gen6 and above.
|
* Used only on Gen6 and above.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
fs_visitor::generate_mov_dispatch_to_flags()
|
fs_generator::generate_mov_dispatch_to_flags()
|
||||||
{
|
{
|
||||||
struct brw_reg f0 = brw_flag_reg();
|
struct brw_reg f0 = brw_flag_reg();
|
||||||
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
|
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
|
||||||
|
|
@ -722,7 +743,7 @@ brw_reg_from_fs_reg(fs_reg *reg)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::generate_code()
|
fs_generator::generate_code(exec_list *instructions)
|
||||||
{
|
{
|
||||||
int last_native_insn_offset = p->next_insn_offset;
|
int last_native_insn_offset = p->next_insn_offset;
|
||||||
const char *last_annotation_string = NULL;
|
const char *last_annotation_string = NULL;
|
||||||
|
|
@ -740,9 +761,9 @@ fs_visitor::generate_code()
|
||||||
|
|
||||||
cfg_t *cfg = NULL;
|
cfg_t *cfg = NULL;
|
||||||
if (unlikely(INTEL_DEBUG & DEBUG_WM))
|
if (unlikely(INTEL_DEBUG & DEBUG_WM))
|
||||||
cfg = new(mem_ctx) cfg_t(this);
|
cfg = new(mem_ctx) cfg_t(mem_ctx, instructions);
|
||||||
|
|
||||||
foreach_list(node, &this->instructions) {
|
foreach_list(node, instructions) {
|
||||||
fs_inst *inst = (fs_inst *)node;
|
fs_inst *inst = (fs_inst *)node;
|
||||||
struct brw_reg src[3], dst;
|
struct brw_reg src[3], dst;
|
||||||
|
|
||||||
|
|
@ -1054,3 +1075,35 @@ fs_visitor::generate_code()
|
||||||
brw_dump_compile(p, stdout, 0, p->next_insn_offset);
|
brw_dump_compile(p, stdout, 0, p->next_insn_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const unsigned *
|
||||||
|
fs_generator::generate_assembly(exec_list *simd8_instructions,
|
||||||
|
exec_list *simd16_instructions,
|
||||||
|
unsigned *assembly_size)
|
||||||
|
{
|
||||||
|
dispatch_width = 8;
|
||||||
|
generate_code(simd8_instructions);
|
||||||
|
|
||||||
|
if (simd16_instructions) {
|
||||||
|
/* We have to do a compaction pass now, or the one at the end of
|
||||||
|
* execution will squash down where our prog_offset start needs
|
||||||
|
* to be.
|
||||||
|
*/
|
||||||
|
brw_compact_instructions(p);
|
||||||
|
|
||||||
|
/* align to 64 byte boundary. */
|
||||||
|
while ((p->nr_insn * sizeof(struct brw_instruction)) % 64) {
|
||||||
|
brw_NOP(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Save off the start of this 16-wide program */
|
||||||
|
c->prog_data.prog_offset_16 = p->nr_insn * sizeof(struct brw_instruction);
|
||||||
|
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
||||||
|
|
||||||
|
dispatch_width = 16;
|
||||||
|
generate_code(simd16_instructions);
|
||||||
|
}
|
||||||
|
|
||||||
|
return brw_get_program(p, assembly_size);
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue