mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
i965: Create a new vec4 backend for Broadwell.
This replaces the old vec4_generator backend.
v2: Port to use the C-based instruction representation. Also, remove
Geometry Shader offset hacks - the visitor will handle those instead
of this code.
v3: Texturing fixes (including adding textureGather support).
v4: Pass brw_context to gen8_instruction functions as required.
v5: Add SHADER_OPCODE_TXF_MCS support; port DUAL_INSTANCED gs fixes
(caught by Eric). Simplify ADDC/SUBB handling; add comments to
gen8_set_dp_message calls (suggested by Matt).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
f8035ba036
commit
9eb568d753
5 changed files with 976 additions and 17 deletions
|
|
@ -142,4 +142,5 @@ i965_FILES = \
|
|||
gen8_disasm.c \
|
||||
gen8_generator.cpp \
|
||||
gen8_instruction.c \
|
||||
gen8_vec4_generator.cpp \
|
||||
$()
|
||||
|
|
|
|||
|
|
@ -1676,10 +1676,16 @@ brw_vs_emit(struct brw_context *brw,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base, mem_ctx,
|
||||
INTEL_DEBUG & DEBUG_VS);
|
||||
const unsigned *generated =g.generate_assembly(&v.instructions,
|
||||
final_assembly_size);
|
||||
const unsigned *assembly = NULL;
|
||||
if (brw->gen >= 8) {
|
||||
gen8_vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
|
||||
mem_ctx, INTEL_DEBUG & DEBUG_VS);
|
||||
assembly = g.generate_assembly(&v.instructions, final_assembly_size);
|
||||
} else {
|
||||
vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
|
||||
mem_ctx, INTEL_DEBUG & DEBUG_VS);
|
||||
assembly = g.generate_assembly(&v.instructions, final_assembly_size);
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug) && shader) {
|
||||
if (shader->compiled_once) {
|
||||
|
|
@ -1692,7 +1698,7 @@ brw_vs_emit(struct brw_context *brw,
|
|||
shader->compiled_once = true;
|
||||
}
|
||||
|
||||
return generated;
|
||||
return assembly;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ extern "C" {
|
|||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#include "gen8_generator.h"
|
||||
#endif
|
||||
|
||||
#include "glsl/ir.h"
|
||||
|
|
@ -653,6 +654,66 @@ private:
|
|||
const bool debug_flag;
|
||||
};
|
||||
|
||||
/**
|
||||
* The vertex shader code generator.
|
||||
*
|
||||
* Translates VS IR to actual i965 assembly code.
|
||||
*/
|
||||
class gen8_vec4_generator : public gen8_generator
|
||||
{
|
||||
public:
|
||||
gen8_vec4_generator(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
struct brw_vec4_prog_data *prog_data,
|
||||
void *mem_ctx,
|
||||
bool debug_flag);
|
||||
~gen8_vec4_generator();
|
||||
|
||||
const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size);
|
||||
|
||||
private:
|
||||
void generate_code(exec_list *instructions);
|
||||
void generate_vec4_instruction(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg *src);
|
||||
|
||||
void generate_tex(vec4_instruction *inst,
|
||||
struct brw_reg dst);
|
||||
|
||||
void generate_urb_write(vec4_instruction *ir, bool copy_g0);
|
||||
void generate_gs_thread_end(vec4_instruction *ir);
|
||||
void generate_gs_set_write_offset(struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1);
|
||||
void generate_gs_set_vertex_count(struct brw_reg dst,
|
||||
struct brw_reg src);
|
||||
void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src);
|
||||
void generate_gs_prepare_channel_masks(struct brw_reg dst);
|
||||
void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
|
||||
|
||||
void generate_oword_dual_block_offsets(struct brw_reg m1,
|
||||
struct brw_reg index);
|
||||
void generate_scratch_write(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src,
|
||||
struct brw_reg index);
|
||||
void generate_scratch_read(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index);
|
||||
void generate_pull_constant_load(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset);
|
||||
|
||||
void mark_surface_used(unsigned surf_index);
|
||||
|
||||
struct brw_vec4_prog_data *prog_data;
|
||||
|
||||
const bool debug_flag;
|
||||
};
|
||||
|
||||
|
||||
} /* namespace brw */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
|
|
|||
|
|
@ -533,6 +533,25 @@ vec4_gs_visitor::visit(ir_end_primitive *)
|
|||
emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
|
||||
}
|
||||
|
||||
static const unsigned *
|
||||
generate_assembly(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
struct brw_vec4_prog_data *prog_data,
|
||||
void *mem_ctx,
|
||||
exec_list *instructions,
|
||||
unsigned *final_assembly_size)
|
||||
{
|
||||
if (brw->gen >= 8) {
|
||||
gen8_vec4_generator g(brw, shader_prog, prog, prog_data, mem_ctx,
|
||||
INTEL_DEBUG & DEBUG_GS);
|
||||
return g.generate_assembly(instructions, final_assembly_size);
|
||||
} else {
|
||||
vec4_generator g(brw, shader_prog, prog, prog_data, mem_ctx,
|
||||
INTEL_DEBUG & DEBUG_GS);
|
||||
return g.generate_assembly(instructions, final_assembly_size);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" const unsigned *
|
||||
brw_gs_emit(struct brw_context *brw,
|
||||
|
|
@ -558,12 +577,9 @@ brw_gs_emit(struct brw_context *brw,
|
|||
|
||||
vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */);
|
||||
if (v.run()) {
|
||||
vec4_generator g(brw, prog, &c->gp->program.Base, &c->prog_data.base,
|
||||
mem_ctx, INTEL_DEBUG & DEBUG_GS);
|
||||
const unsigned *generated =
|
||||
g.generate_assembly(&v.instructions, final_assembly_size);
|
||||
|
||||
return generated;
|
||||
return generate_assembly(brw, prog, &c->gp->program.Base,
|
||||
&c->prog_data.base, mem_ctx, &v.instructions,
|
||||
final_assembly_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -586,12 +602,8 @@ brw_gs_emit(struct brw_context *brw,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
vec4_generator g(brw, prog, &c->gp->program.Base, &c->prog_data.base,
|
||||
mem_ctx, INTEL_DEBUG & DEBUG_GS);
|
||||
const unsigned *generated =
|
||||
g.generate_assembly(&v.instructions, final_assembly_size);
|
||||
|
||||
return generated;
|
||||
return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base,
|
||||
mem_ctx, &v.instructions, final_assembly_size);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
879
src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp
Normal file
879
src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp
Normal file
|
|
@ -0,0 +1,879 @@
|
|||
/*
|
||||
* Copyright © 2011 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_vec4.h"
|
||||
|
||||
extern "C" {
|
||||
#include "brw_eu.h"
|
||||
#include "main/macros.h"
|
||||
#include "program/prog_print.h"
|
||||
#include "program/prog_parameter.h"
|
||||
};
|
||||
|
||||
namespace brw {
|
||||
|
||||
gen8_vec4_generator::gen8_vec4_generator(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
struct brw_vec4_prog_data *prog_data,
|
||||
void *mem_ctx,
|
||||
bool debug_flag)
|
||||
: gen8_generator(brw, shader_prog, prog, mem_ctx),
|
||||
prog_data(prog_data),
|
||||
debug_flag(debug_flag)
|
||||
{
|
||||
shader = shader_prog ? shader_prog->_LinkedShaders[MESA_SHADER_VERTEX] : NULL;
|
||||
}
|
||||
|
||||
gen8_vec4_generator::~gen8_vec4_generator()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::mark_surface_used(unsigned surf_index)
|
||||
{
|
||||
assert(surf_index < BRW_MAX_SURFACES);
|
||||
|
||||
prog_data->base.binding_table.size_bytes =
|
||||
MAX2(prog_data->base.binding_table.size_bytes, (surf_index + 1) * 4);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst)
|
||||
{
|
||||
int msg_type = 0;
|
||||
|
||||
switch (ir->opcode) {
|
||||
case SHADER_OPCODE_TEX:
|
||||
case SHADER_OPCODE_TXL:
|
||||
if (ir->shadow_compare) {
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
|
||||
} else {
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TXD:
|
||||
if (ir->shadow_compare) {
|
||||
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
|
||||
} else {
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TXF:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_MS:
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
|
||||
break;
|
||||
case SHADER_OPCODE_TXS:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
|
||||
break;
|
||||
case SHADER_OPCODE_TG4:
|
||||
if (ir->shadow_compare) {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
|
||||
} else {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
if (ir->shadow_compare) {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
|
||||
} else {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(!"should not get here: invalid VS texture opcode");
|
||||
break;
|
||||
}
|
||||
|
||||
if (ir->header_present) {
|
||||
MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD),
|
||||
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
if (ir->texture_offset) {
|
||||
/* Set the offset bits in DWord 2. */
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2),
|
||||
BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(ir->texture_offset));
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t surf_index =
|
||||
prog_data->base.binding_table.texture_start + ir->sampler;
|
||||
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, inst, dst);
|
||||
gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
|
||||
gen8_set_sampler_message(brw, inst,
|
||||
surf_index,
|
||||
ir->sampler,
|
||||
msg_type,
|
||||
1,
|
||||
ir->mlen,
|
||||
ir->header_present,
|
||||
BRW_SAMPLER_SIMD_MODE_SIMD4X2);
|
||||
|
||||
mark_surface_used(surf_index);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs)
|
||||
{
|
||||
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
|
||||
|
||||
/* Copy g0. */
|
||||
if (vs)
|
||||
MOV_RAW(header, brw_vec8_grf(0, 0));
|
||||
|
||||
gen8_instruction *inst;
|
||||
if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
|
||||
/* Enable Channel Masks in the URB_WRITE_OWORD message header */
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
|
||||
BRW_REGISTER_TYPE_UD),
|
||||
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(0xff00));
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
inst = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset,
|
||||
true);
|
||||
gen8_set_dst(brw, inst, brw_null_reg());
|
||||
gen8_set_src0(brw, inst, header);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header,
|
||||
struct brw_reg src)
|
||||
{
|
||||
/* Move the vertex count into the second MRF for the EOT write. */
|
||||
assert(eot_mrf_header.file == BRW_MESSAGE_REGISTER_FILE);
|
||||
int dst_nr = GEN7_MRF_HACK_START + eot_mrf_header.nr + 1;
|
||||
MOV(retype(brw_vec8_grf(dst_nr, 0), BRW_REGISTER_TYPE_UD), src);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir)
|
||||
{
|
||||
struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
|
||||
gen8_instruction *inst;
|
||||
|
||||
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
|
||||
BRW_REGISTER_TYPE_UD),
|
||||
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
|
||||
/* mlen = 2: g0 header + vertex count */
|
||||
inst = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true);
|
||||
gen8_set_dst(brw, inst, brw_null_reg());
|
||||
gen8_set_src0(brw, inst, src);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1)
|
||||
{
|
||||
/* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
|
||||
* Header: M0.3):
|
||||
*
|
||||
* Slot 0 Offset. This field, after adding to the Global Offset field
|
||||
* in the message descriptor, specifies the offset (in 256-bit units)
|
||||
* from the start of the URB entry, as referenced by URB Handle 0, at
|
||||
* which the data will be accessed.
|
||||
*
|
||||
* Similar text describes DWORD M0.4, which is slot 1 offset.
|
||||
*
|
||||
* Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
|
||||
* of the register for geometry shader invocations 0 and 1) by the
|
||||
* immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
|
||||
*
|
||||
* We can do this with the following EU instruction:
|
||||
*
|
||||
* mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
|
||||
*/
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
gen8_instruction *inst =
|
||||
MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1);
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
assert(src.file == BRW_IMMEDIATE_VALUE);
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
|
||||
gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src);
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
|
||||
{
|
||||
/* We want to left shift just DWORD 4 (the x component belonging to the
|
||||
* second geometry shader invocation) by 4 bits. So generate the
|
||||
* instruction:
|
||||
*
|
||||
* shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
|
||||
*/
|
||||
dst = suboffset(vec1(dst), 4);
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
/* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
|
||||
* Header: M0.5):
|
||||
*
|
||||
* 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
|
||||
*
|
||||
* When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
|
||||
* DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
|
||||
* Vertex 0 DATA[7]. This bit is ANDed with the corresponding
|
||||
* channel enable to determine the final channel enable. For the
|
||||
* URB_READ_OWORD & URB_READ_HWORD messages, when final channel
|
||||
* enable is 1 it indicates that Vertex 1 DATA [3] will be included
|
||||
* in the writeback message. For the URB_WRITE_OWORD &
|
||||
* URB_WRITE_HWORD messages, when final channel enable is 1 it
|
||||
* indicates that Vertex 1 DATA [3] will be written to the surface.
|
||||
*
|
||||
* 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
|
||||
* 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
|
||||
*
|
||||
* 14 Vertex 1 DATA [2] Channel Mask
|
||||
* 13 Vertex 1 DATA [1] Channel Mask
|
||||
* 12 Vertex 1 DATA [0] Channel Mask
|
||||
* 11 Vertex 0 DATA [3] Channel Mask
|
||||
* 10 Vertex 0 DATA [2] Channel Mask
|
||||
* 9 Vertex 0 DATA [1] Channel Mask
|
||||
* 8 Vertex 0 DATA [0] Channel Mask
|
||||
*
|
||||
* (This is from a section of the PRM that is agnostic to the particular
|
||||
* type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
|
||||
* geometry shader invocations 0 and 1, respectively). Since we have the
|
||||
* enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
|
||||
* and the enable flags for geometry shader invocation 1 in bits 7:0 of
|
||||
* DWORD 4, we just need to OR them together and store the result in bits
|
||||
* 15:8 of DWORD 5.
|
||||
*
|
||||
* It's easier to get the EU to do this if we think of the src and dst
|
||||
* registers as composed of 32 bytes each; then, we want to pick up the
|
||||
* contents of bytes 0 and 16 from src, OR them together, and store them in
|
||||
* byte 21.
|
||||
*
|
||||
* We can do that by the following EU instruction:
|
||||
*
|
||||
* or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
|
||||
*
|
||||
* Note: this relies on the source register having zeros in (a) bits 7:4 of
|
||||
* DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
|
||||
* source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
|
||||
* shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
|
||||
* the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
|
||||
* contain valid channel mask values (which are in the range 0x0-0xf).
|
||||
*/
|
||||
dst = retype(dst, BRW_REGISTER_TYPE_UB);
|
||||
src = retype(src, BRW_REGISTER_TYPE_UB);
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
|
||||
gen8_instruction *inst =
|
||||
OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
|
||||
struct brw_reg index)
|
||||
{
|
||||
int second_vertex_offset = 1;
|
||||
|
||||
m1 = retype(m1, BRW_REGISTER_TYPE_D);
|
||||
|
||||
/* Set up M1 (message payload). Only the block offsets in M1.0 and
|
||||
* M1.4 are used, and the rest are ignored.
|
||||
*/
|
||||
struct brw_reg m1_0 = suboffset(vec1(m1), 0);
|
||||
struct brw_reg m1_4 = suboffset(vec1(m1), 4);
|
||||
struct brw_reg index_0 = suboffset(vec1(index), 0);
|
||||
struct brw_reg index_4 = suboffset(vec1(index), 4);
|
||||
|
||||
default_state.mask_control = BRW_MASK_DISABLE;
|
||||
default_state.access_mode = BRW_ALIGN_1;
|
||||
|
||||
MOV(m1_0, index_0);
|
||||
|
||||
if (index.file == BRW_IMMEDIATE_VALUE) {
|
||||
index_4.dw1.ud += second_vertex_offset;
|
||||
MOV(m1_4, index_4);
|
||||
} else {
|
||||
ADD(m1_4, index_4, brw_imm_d(second_vertex_offset));
|
||||
}
|
||||
|
||||
default_state.mask_control = BRW_MASK_ENABLE;
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_scratch_read(vec4_instruction *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index)
|
||||
{
|
||||
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
|
||||
|
||||
MOV_RAW(header, brw_vec8_grf(0, 0));
|
||||
|
||||
generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
|
||||
|
||||
/* Each of the 8 channel enables is considered for whether each
|
||||
* dword is written.
|
||||
*/
|
||||
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, send, dst);
|
||||
gen8_set_src0(brw, send, header);
|
||||
gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
|
||||
255, /* binding table index: stateless access */
|
||||
GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
|
||||
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
|
||||
2, /* mlen */
|
||||
1, /* rlen */
|
||||
true, /* header present */
|
||||
false); /* EOT */
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_scratch_write(vec4_instruction *ir,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src,
|
||||
struct brw_reg index)
|
||||
{
|
||||
struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
|
||||
|
||||
MOV_RAW(header, brw_vec8_grf(0, 0));
|
||||
|
||||
generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
|
||||
|
||||
MOV(retype(brw_message_reg(ir->base_mrf + 2), BRW_REGISTER_TYPE_D),
|
||||
retype(src, BRW_REGISTER_TYPE_D));
|
||||
|
||||
/* Each of the 8 channel enables is considered for whether each
|
||||
* dword is written.
|
||||
*/
|
||||
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, send, dst);
|
||||
gen8_set_src0(brw, send, header);
|
||||
gen8_set_pred_control(send, ir->predicate);
|
||||
gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
|
||||
255, /* binding table index: stateless access */
|
||||
GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE,
|
||||
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
|
||||
3, /* mlen */
|
||||
0, /* rlen */
|
||||
true, /* header present */
|
||||
false); /* EOT */
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset)
|
||||
{
|
||||
assert(index.file == BRW_IMMEDIATE_VALUE &&
|
||||
index.type == BRW_REGISTER_TYPE_UD);
|
||||
uint32_t surf_index = index.dw1.ud;
|
||||
|
||||
assert(offset.file == BRW_GENERAL_REGISTER_FILE);
|
||||
|
||||
/* Each of the 8 channel enables is considered for whether each
|
||||
* dword is written.
|
||||
*/
|
||||
gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
|
||||
gen8_set_dst(brw, send, dst);
|
||||
gen8_set_src0(brw, send, offset);
|
||||
gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
|
||||
surf_index,
|
||||
GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
|
||||
0, /* message control */
|
||||
1, /* mlen */
|
||||
1, /* rlen */
|
||||
false, /* no header */
|
||||
false); /* EOT */
|
||||
|
||||
mark_surface_used(surf_index);
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg *src)
|
||||
{
|
||||
vec4_instruction *ir = (vec4_instruction *) instruction;
|
||||
|
||||
if (dst.width == BRW_WIDTH_4) {
|
||||
/* This happens in attribute fixups for "dual instanced" geometry
|
||||
* shaders, since they use attributes that are vec4's. Since the exec
|
||||
* width is only 4, it's essential that the caller set
|
||||
* force_writemask_all in order to make sure the instruction is executed
|
||||
* regardless of which channels are enabled.
|
||||
*/
|
||||
assert(ir->force_writemask_all);
|
||||
|
||||
/* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
|
||||
* the following register region restrictions (from Graphics BSpec:
|
||||
* 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
|
||||
* > Register Region Restrictions)
|
||||
*
|
||||
* 1. ExecSize must be greater than or equal to Width.
|
||||
*
|
||||
* 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
|
||||
* to Width * HorzStride."
|
||||
*/
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (src[i].file == BRW_GENERAL_REGISTER_FILE)
|
||||
src[i] = stride(src[i], 4, 4, 1);
|
||||
}
|
||||
}
|
||||
|
||||
switch (ir->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
MOV(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ADD:
|
||||
ADD(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
MUL(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MACH:
|
||||
MACH(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MAD:
|
||||
MAD(dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_FRC:
|
||||
FRC(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_RNDD:
|
||||
RNDD(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_RNDE:
|
||||
RNDE(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_RNDZ:
|
||||
RNDZ(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
AND(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_OR:
|
||||
OR(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_XOR:
|
||||
XOR(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_NOT:
|
||||
NOT(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ASR:
|
||||
ASR(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SHR:
|
||||
SHR(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SHL:
|
||||
SHL(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
CMP(dst, ir->conditional_mod, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SEL:
|
||||
SEL(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DPH:
|
||||
DPH(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DP4:
|
||||
DP4(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DP3:
|
||||
DP3(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DP2:
|
||||
DP2(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_F32TO16:
|
||||
F32TO16(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_F16TO32:
|
||||
F16TO32(dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_LRP:
|
||||
LRP(dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BFREV:
|
||||
/* BFREV only supports UD type for src and dst. */
|
||||
BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
|
||||
retype(src[0], BRW_REGISTER_TYPE_UD));
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_FBH:
|
||||
/* FBH only supports UD type for dst. */
|
||||
FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_FBL:
|
||||
/* FBL only supports UD type for dst. */
|
||||
FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CBIT:
|
||||
/* CBIT only supports UD type for dst. */
|
||||
CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ADDC:
|
||||
ADDC(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SUBB:
|
||||
SUBB(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BFE:
|
||||
BFE(dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BFI1:
|
||||
BFI1(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BFI2:
|
||||
BFI2(dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_IF:
|
||||
IF(ir->predicate);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ELSE:
|
||||
ELSE();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ENDIF:
|
||||
ENDIF();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DO:
|
||||
DO();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BREAK:
|
||||
BREAK();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CONTINUE:
|
||||
CONTINUE();
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_WHILE:
|
||||
WHILE();
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_RCP:
|
||||
MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_RSQ:
|
||||
MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SQRT:
|
||||
MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_EXP2:
|
||||
MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_LOG2:
|
||||
MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SIN:
|
||||
MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_COS:
|
||||
MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_POW:
|
||||
MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TEX:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_MS:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
generate_tex(ir, dst);
|
||||
break;
|
||||
|
||||
case VS_OPCODE_URB_WRITE:
|
||||
generate_urb_write(ir, true);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||
generate_scratch_read(ir, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
||||
generate_scratch_write(ir, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
||||
generate_pull_constant_load(ir, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_URB_WRITE:
|
||||
generate_urb_write(ir, false);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_THREAD_END:
|
||||
generate_gs_thread_end(ir);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_WRITE_OFFSET:
|
||||
generate_gs_set_write_offset(dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_VERTEX_COUNT:
|
||||
generate_gs_set_vertex_count(dst, src[0]);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_DWORD_2_IMMED:
|
||||
generate_gs_set_dword_2_immed(dst, src[0]);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
|
||||
generate_gs_prepare_channel_masks(dst);
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_CHANNEL_MASKS:
|
||||
generate_gs_set_channel_masks(dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SHADER_TIME_ADD:
|
||||
assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||
assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_ATOMIC");
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||
assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_SURFACE_READ");
|
||||
break;
|
||||
|
||||
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
|
||||
assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
|
||||
break;
|
||||
|
||||
default:
|
||||
if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
|
||||
_mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
|
||||
opcode_descs[ir->opcode].name);
|
||||
} else {
|
||||
_mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
|
||||
}
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gen8_vec4_generator::generate_code(exec_list *instructions)
|
||||
{
|
||||
int last_native_inst_offset = 0;
|
||||
const char *last_annotation_string = NULL;
|
||||
const void *last_annotation_ir = NULL;
|
||||
|
||||
if (unlikely(debug_flag)) {
|
||||
if (shader) {
|
||||
printf("Native code for vertex shader %d:\n", shader_prog->Name);
|
||||
} else {
|
||||
printf("Native code for vertex program %d:\n", prog->Id);
|
||||
}
|
||||
}
|
||||
|
||||
foreach_list(node, instructions) {
|
||||
vec4_instruction *ir = (vec4_instruction *) node;
|
||||
struct brw_reg src[3], dst;
|
||||
|
||||
if (unlikely(debug_flag)) {
|
||||
if (last_annotation_ir != ir->ir) {
|
||||
last_annotation_ir = ir->ir;
|
||||
if (last_annotation_ir) {
|
||||
printf(" ");
|
||||
if (shader) {
|
||||
((ir_instruction *) last_annotation_ir)->print();
|
||||
} else {
|
||||
const prog_instruction *vpi;
|
||||
vpi = (const prog_instruction *) ir->ir;
|
||||
printf("%d: ", (int)(vpi - prog->Instructions));
|
||||
_mesa_fprint_instruction_opt(stdout, vpi, 0,
|
||||
PROG_PRINT_DEBUG, NULL);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
if (last_annotation_string != ir->annotation) {
|
||||
last_annotation_string = ir->annotation;
|
||||
if (last_annotation_string)
|
||||
printf(" %s\n", last_annotation_string);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < 3; i++) {
|
||||
src[i] = ir->get_src(prog_data, i);
|
||||
}
|
||||
dst = ir->get_dst();
|
||||
|
||||
default_state.conditional_mod = ir->conditional_mod;
|
||||
default_state.predicate = ir->predicate;
|
||||
default_state.predicate_inverse = ir->predicate_inverse;
|
||||
default_state.saturate = ir->saturate;
|
||||
|
||||
const unsigned pre_emit_nr_inst = nr_inst;
|
||||
|
||||
generate_vec4_instruction(ir, dst, src);
|
||||
|
||||
if (ir->no_dd_clear || ir->no_dd_check) {
|
||||
assert(nr_inst == pre_emit_nr_inst + 1 ||
|
||||
!"no_dd_check or no_dd_clear set for IR emitting more "
|
||||
"than 1 instruction");
|
||||
|
||||
gen8_instruction *last = &store[pre_emit_nr_inst];
|
||||
gen8_set_no_dd_clear(last, ir->no_dd_clear);
|
||||
gen8_set_no_dd_check(last, ir->no_dd_check);
|
||||
}
|
||||
|
||||
if (unlikely(debug_flag)) {
|
||||
disassemble(stdout, last_native_inst_offset, next_inst_offset);
|
||||
}
|
||||
|
||||
last_native_inst_offset = next_inst_offset;
|
||||
}
|
||||
|
||||
if (unlikely(debug_flag)) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
patch_jump_targets();
|
||||
|
||||
/* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
|
||||
* emit issues, it doesn't get the jump distances into the output,
|
||||
* which is often something we want to debug. So this is here in
|
||||
* case you're doing that.
|
||||
*/
|
||||
if (0 && unlikely(debug_flag)) {
|
||||
disassemble(stdout, 0, next_inst_offset);
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned *
|
||||
gen8_vec4_generator::generate_assembly(exec_list *instructions,
|
||||
unsigned *assembly_size)
|
||||
{
|
||||
default_state.access_mode = BRW_ALIGN_16;
|
||||
default_state.exec_size = BRW_EXECUTE_8;
|
||||
generate_code(instructions);
|
||||
*assembly_size = next_inst_offset;
|
||||
return (const unsigned *) store;
|
||||
}
|
||||
|
||||
} /* namespace brw */
|
||||
Loading…
Add table
Reference in a new issue