965: get fragment shader compiler compiling

Don't think that it will run though.
This commit is contained in:
Keith Whitwell 2007-12-13 20:38:56 +00:00
parent c605a55e9f
commit 568fcf64c7
32 changed files with 1785 additions and 2028 deletions

View file

@ -47,10 +47,6 @@ static unsigned minify( unsigned d )
return MAX2(1, d>>1);
}
static int align(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
}
static void

View file

@ -31,6 +31,7 @@ DRIVER_SOURCES = \
brw_sf.c \
brw_sf_emit.c \
brw_sf_state.c \
brw_shader_info.c \
brw_state.c \
brw_state_batch.c \
brw_state_cache.c \
@ -44,6 +45,7 @@ DRIVER_SOURCES = \
brw_vs_state.c \
brw_wm.c \
brw_wm_iz.c \
brw_wm_decl.c \
brw_wm_glsl.c \
brw_wm_sampler_state.c \
brw_wm_state.c \

View file

@ -36,7 +36,7 @@
#define INTEL_BATCH_CLIPRECTS 0x2
#define BEGIN_BATCH( dwords, relocs ) \
(brw->batch_start = brw->winsys->batch_start(brw->winsys, dwords, relocs))
brw->winsys->batch_start(brw->winsys, dwords, relocs)
#define OUT_BATCH( dword ) \
brw->winsys->batch_dword(brw->winsys, dword)
@ -50,7 +50,6 @@
*/
#define FLUSH_BATCH(fence) do { \
brw->winsys->batch_flush(brw->winsys, fence); \
brw->batch_start = NULL; \
brw->hardware_dirty = ~0; \
} while (0)

View file

@ -142,7 +142,7 @@ static void upload_cc_vp( struct brw_context *brw )
const struct brw_tracked_state brw_cc_vp = {
.dirty = {
.brw = BRW_NEW_CONTEXT,
.brw = BRW_NEW_SCENE,
.cache = 0
},
.update = upload_cc_vp

View file

@ -32,7 +32,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "pipe/p_util.h"
static void upload_clip_unit( struct brw_context *brw )
@ -43,7 +43,7 @@ static void upload_clip_unit( struct brw_context *brw )
/* CACHE_NEW_CLIP_PROG */
clip.thread0.grf_reg_count =
ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1;
align(brw->clip.prog_data->total_grf, 16) / 16 - 1;
clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6;
clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length;

View file

@ -237,7 +237,6 @@ struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys,
brw->pci_id = pci_id;
brw->dirty = ~0;
brw->hardware_dirty = ~0;
brw->batch_start = NULL;
memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind));

View file

@ -119,7 +119,6 @@
* Handles blending and (presumably) depth and stencil testing.
*/
#define BRW_FALLBACK_TEXTURE 0x1
#define BRW_MAX_CURBE (32*16)
struct brw_context;
@ -147,16 +146,13 @@ struct brw_winsys;
/* Raised for other internal events:
*/
#define BRW_NEW_URB_FENCE 0x10000
#define BRW_NEW_INPUT_DIMENSIONS 0x20000
#define BRW_NEW_PSP 0x20000
#define BRW_NEW_CURBE_OFFSETS 0x40000
#define BRW_NEW_REDUCED_PRIMITIVE 0x80000
#define BRW_NEW_PRIMITIVE 0x100000
#define BRW_NEW_CONTEXT 0x200000
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x400000
#define BRW_NEW_INPUT_VARYING 0x800000
#define BRW_NEW_PSP 0x1000000
#define BRW_NEW_SCENE 0x200000
#define BRW_NEW_SF_LINKAGE 0x400000
#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1))
extern int BRW_DEBUG;
#define DEBUG_TEXTURE 0x1
@ -198,23 +194,47 @@ struct brw_state_flags {
unsigned brw;
};
struct brw_shader_info {
int nr_regs[8]; /* TGSI_FILE_* */
};
struct brw_vertex_program {
struct pipe_shader_state program;
unsigned id;
unsigned param_state; /* flags indicating state tracked by params */
struct brw_shader_info info;
int id;
};
struct brw_fragment_program {
struct pipe_shader_state program;
unsigned id;
unsigned param_state; /* flags indicating state tracked by params */
struct brw_shader_info info;
boolean UsesDepth;
boolean UsesKill;
boolean ComputesDepth;
int id;
};
struct pipe_setup_linkage {
struct {
unsigned vp_output:5;
unsigned interp_mode:4;
unsigned bf_vp_output:5;
} fp_input[PIPE_MAX_SHADER_INPUTS];
unsigned fp_input_count:5;
unsigned max_vp_output:5;
};
struct brw_texture {
struct pipe_texture base;
@ -248,6 +268,12 @@ struct brw_texture {
* corresponding to a different brw_wm_prog_key struct, with different
* compiled programs:
*/
/* Data about a particular attempt to compile a program. Note that
* there can be many of these, each in a different GL state
* corresponding to a different brw_wm_prog_key struct, with different
* compiled programs:
*/
struct brw_wm_prog_data {
unsigned curb_read_length;
unsigned urb_read_length;
@ -256,13 +282,14 @@ struct brw_wm_prog_data {
unsigned total_grf;
unsigned total_scratch;
unsigned nr_params;
boolean error;
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
/* Internally generated constants for the CURBE. These are loaded
* ahead of the data from the constant buffer.
*/
const float *param[BRW_MAX_CURBE];
const float internal_const[8];
unsigned nr_internal_consts;
unsigned max_const;
boolean error;
};
struct brw_sf_prog_data {
@ -298,19 +325,14 @@ struct brw_vs_prog_data {
unsigned inputs_read;
unsigned max_const;
/* Used for calculating urb partitions:
*/
unsigned urb_entry_size;
};
/* Size == 0 if output either not written, or always [0,0,0,1]
*/
struct brw_vs_ouput_sizes {
ubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
};
#define BRW_MAX_TEX_UNIT 8
#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
@ -374,8 +396,6 @@ struct brw_cache {
struct brw_tracked_state {
struct brw_state_flags dirty;
void (*update)( struct brw_context *brw );
void (*emit_reloc)( struct brw_context *brw );
boolean always_update;
};
@ -455,8 +475,6 @@ struct brw_context
struct {
struct brw_state_flags dirty;
struct brw_tracked_state **atoms;
unsigned nr_atoms;
} state;
@ -496,27 +514,16 @@ struct brw_context
#define BRW_NR_UPLOAD_BUFS 17
#define BRW_UPLOAD_INIT_SIZE (128*1024)
struct {
struct pipe_buffer_handle *vbo[BRW_NR_UPLOAD_BUFS];
unsigned buf;
unsigned offset;
unsigned size;
unsigned wrap;
} upload;
/* Summary of size and varying of active arrays, so we can check
* for changes to this state:
*/
struct brw_vertex_info info;
int last_vb;
} vb;
unsigned *batch_start;
unsigned hardware_dirty;
unsigned dirty;
unsigned pci_id;
/* BRW_NEW_URB_ALLOCATIONS:
*/
struct {
@ -557,11 +564,6 @@ struct brw_context
unsigned vs_size;
unsigned total_size;
/* Dynamic tracker which changes to reflect the state referenced
* by active fp and vp program parameters:
*/
struct brw_tracked_state tracked_state;
unsigned gs_offset;
float *last_buf;
@ -595,6 +597,8 @@ struct brw_context
struct {
struct brw_sf_prog_data *prog_data;
struct pipe_setup_linkage linkage;
unsigned prog_gs_offset;
unsigned vp_gs_offset;
unsigned state_gs_offset;
@ -602,11 +606,8 @@ struct brw_context
struct {
struct brw_wm_prog_data *prog_data;
struct brw_wm_compile *compile_data;
/* Input sizes, calculated from active vertex program:
*/
unsigned input_size_masks[4];
// struct brw_wm_compiler *compile_data;
/**
@ -667,8 +668,6 @@ void brw_destroy_state(struct brw_context *brw);
* brw_tex.c
*/
void brwUpdateTextureState( struct brw_context *brw );
void brw_FrameBufferTexInit( struct brw_context *brw );
void brw_FrameBufferTexDestroy( struct brw_context *brw );
/* brw_urb.c

View file

@ -35,6 +35,9 @@
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_util.h"
#include "brw_wm.h"
#include "pipe/p_state.h"
#include "pipe/p_util.h"
#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
@ -43,11 +46,10 @@
static void calculate_curbe_offsets( struct brw_context *brw )
{
/* CACHE_NEW_WM_PROG */
unsigned nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16);
/* BRW_NEW_VERTEX_PROGRAM */
struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->attribs.VertexProgram;
unsigned nr_vp_regs = (vp->program.num_inputs * 4 + 15) / 16;
unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16);
unsigned nr_clip_regs = 0;
unsigned total_regs;
@ -55,7 +57,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
/* BRW_NEW_CLIP ? */
if (brw->attribs.Transform->ClipPlanesEnabled) {
unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
nr_clip_regs = (nr_planes * 4 + 15) / 16;
nr_clip_regs = align(nr_planes * 4, 16);
}
#endif
@ -172,28 +174,18 @@ static float fixed_plane[6][4] = {
{ 1, 0, 0, 1 }
};
#if 0
/* Upload a new set of constants. Too much variability to go into the
* cache mechanism, but maybe would benefit from a comparison against
* the current uploaded set of constants.
*/
static void upload_constant_buffer(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
unsigned sz = brw->curbe.total_size;
unsigned bufsz = sz * 16 * sizeof(float);
float *buf;
unsigned i;
/* Update our own dependency flags. This works because this
* function will also be called whenever fp or vp changes.
*/
brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
brw->curbe.tracked_state.dirty.mesa |= vp->param_state;
brw->curbe.tracked_state.dirty.mesa |= fp->param_state;
if (sz == 0) {
struct brw_constant_buffer cb;
@ -220,10 +212,16 @@ static void upload_constant_buffer(struct brw_context *brw)
if (brw->curbe.wm_size) {
unsigned offset = brw->curbe.wm_start * 16;
_mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
/* First the constant buffer constants:
*/
/* Then any internally generated constants:
*/
for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++)
buf[offset + i] = brw->wm.prog_data->internal_const[i];
for (i = 0; i < brw->wm.prog_data->nr_params; i++)
buf[offset + i] = brw->wm.prog_data->param[i][0];
assert(brw->wm.prog_data->max_const ==
brw->wm.prog_data->nr_internal_consts);
}
@ -243,34 +241,26 @@ static void upload_constant_buffer(struct brw_context *brw)
buf[offset + i * 4 + 3] = fixed_plane[i][3];
}
/* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
* clip-space:
/* Clip planes: BRW_NEW_CLIP:
*/
assert(MAX_CLIP_PLANES == 6);
for (j = 0; j < MAX_CLIP_PLANES; j++) {
if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) {
buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0];
buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1];
buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2];
buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3];
i++;
}
for (j = 0; j < brw->attribs.Clip.nr; j++) {
buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0];
buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1];
buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2];
buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3];
i++;
}
}
if (brw->curbe.vs_size) {
unsigned offset = brw->curbe.vs_start * 16;
unsigned nr = vp->program.Base.Parameters->NumParameters;
// unsigned offset = brw->curbe.vs_start * 16;
// unsigned nr = vp->max_const;
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
/* map the vertex constant buffer and copy to curbe: */
for (i = 0; i < nr; i++) {
buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0];
buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1];
buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2];
buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3];
}
// assert(nr == 0);
assert(0);
}
if (0) {
@ -309,7 +299,12 @@ static void upload_constant_buffer(struct brw_context *brw)
/* Copy data to the buffer:
*/
dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf);
brw->winsys->buffer_subdata_typed(brw->winsys,
pool->buffer,
brw->curbe.gs_offset,
bufsz,
buf,
BRW_CONSTANT_BUFFER );
}
/* TODO: only emit the constant_buffer packet when necessary, ie:
@ -341,9 +336,7 @@ static void upload_constant_buffer(struct brw_context *brw)
* flushes as necessary when doublebuffering of CURBEs isn't
* possible.
*/
/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */
BRW_BATCH_STRUCT(brw, &cb);
/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */
}
}
@ -355,9 +348,8 @@ static void upload_constant_buffer(struct brw_context *brw)
*/
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_VERTEX_PROGRAM |
.brw = (BRW_NEW_CLIP |
BRW_NEW_CONSTANTS |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
BRW_NEW_CURBE_OFFSETS),
@ -366,4 +358,3 @@ const struct brw_tracked_state brw_constant_buffer = {
.update = upload_constant_buffer
};
#endif

View file

@ -694,6 +694,17 @@ void brw_init_compile( struct brw_compile *p );
const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz );
struct brw_instruction *brw_alu1( struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src );
struct brw_instruction *brw_alu2(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1 );
/* Helpers for regular instructions:
*/
#define ALU1(OP) \

View file

@ -363,10 +363,10 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
}
static struct brw_instruction *brw_alu1( struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src )
struct brw_instruction *brw_alu1( struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src )
{
struct brw_instruction *insn = next_insn(p, opcode);
brw_set_dest(insn, dest);
@ -374,11 +374,11 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p,
return insn;
}
static struct brw_instruction *brw_alu2(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1 )
struct brw_instruction *brw_alu2(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1 )
{
struct brw_instruction *insn = next_insn(p, opcode);
brw_set_dest(insn, dest);

View file

@ -34,6 +34,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "pipe/p_util.h"
@ -46,7 +47,7 @@ static void upload_gs_unit( struct brw_context *brw )
/* CACHE_NEW_GS_PROG */
if (brw->gs.prog_active) {
gs.thread0.grf_reg_count =
ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1;
align(brw->gs.prog_data->total_grf, 16) / 16 - 1;
gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6;
gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
}

View file

@ -315,7 +315,7 @@ static void upload_pipe_control(struct brw_context *brw)
const struct brw_tracked_state brw_pipe_control = {
.dirty = {
.brw = BRW_NEW_CONTEXT,
.brw = BRW_NEW_SCENE,
.cache = 0
},
.update = upload_pipe_control
@ -380,7 +380,7 @@ static void upload_invarient_state( struct brw_context *brw )
const struct brw_tracked_state brw_invarient_state = {
.dirty = {
.brw = BRW_NEW_CONTEXT,
.brw = BRW_NEW_SCENE,
.cache = 0
},
.update = upload_invarient_state
@ -416,7 +416,7 @@ static void upload_state_base_address( struct brw_context *brw )
const struct brw_tracked_state brw_state_base_address = {
.dirty = {
.brw = BRW_NEW_CONTEXT,
.brw = BRW_NEW_SCENE,
.cache = 0
},
.update = upload_state_base_address

View file

@ -36,9 +36,8 @@
#include "brw_util.h"
#include "brw_sf.h"
#include "brw_state.h"
#include "tgsi/util/tgsi_parse.h"
#if 0
#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
static void compile_sf_prog( struct brw_context *brw,
struct brw_sf_prog_key *key )
@ -46,7 +45,6 @@ static void compile_sf_prog( struct brw_context *brw,
struct brw_sf_compile c;
const unsigned *program;
unsigned program_size;
unsigned i, idx;
memset(&c, 0, sizeof(c));
@ -55,27 +53,17 @@ static void compile_sf_prog( struct brw_context *brw,
brw_init_compile(&c.func);
c.key = *key;
c.nr_attrs = brw_count_bits(c.key.attrs);
c.nr_attrs = c.key.vp_output_count;
c.nr_attr_regs = (c.nr_attrs+1)/2;
c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
c.nr_setup_attrs = c.key.fp_input_count;
c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
c.prog_data.urb_read_length = c.nr_attr_regs;
c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
/* Construct map from attribute number to position in the vertex.
*/
for (i = idx = 0; i < VERT_RESULT_MAX; i++)
if (c.key.attrs & (1<<i)) {
c.attr_to_idx[i] = idx;
c.idx_to_attr[idx] = i;
if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
c.point_attrs[i].CoordReplace =
brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0];
} else
c.point_attrs[i].CoordReplace = FALSE;
idx++;
}
/* Which primitive? Or all three?
*/
@ -90,21 +78,17 @@ static void compile_sf_prog( struct brw_context *brw,
break;
case SF_POINTS:
c.nr_verts = 1;
if (key->do_point_sprite)
brw_emit_point_sprite_setup( &c );
else
brw_emit_point_setup( &c );
brw_emit_point_setup( &c );
break;
case SF_UNFILLED_TRIS:
c.nr_verts = 3;
brw_emit_anyprim_setup( &c );
break;
default:
assert(0);
return;
}
/* get the program
*/
program = brw_get_program(&c.func, &program_size);
@ -142,20 +126,15 @@ static void upload_sf_prog( struct brw_context *brw )
/* Populate the key, noting state dependencies:
*/
/* CACHE_NEW_VS_PROG */
key.attrs = brw->vs.prog_data->outputs_written;
key.vp_output_count = brw->vs.prog_data->outputs_written;
/* BRW_NEW_REDUCED_PRIMITIVE */
switch (brw->reduced_primitive) {
case PIPE_PRIM_TRIANGLES:
/* NOTE: We just use the edgeflag attribute as an indicator that
* unfilled triangles are active. We don't actually do the
* edgeflag testing here, it is already done in the clip
* program.
*/
if (key.attrs & (1<<VERT_RESULT_EDGE))
key.primitive = SF_UNFILLED_TRIS;
else
key.primitive = SF_TRIANGLES;
// if (key.attrs & (1<<VERT_RESULT_EDGE))
// key.primitive = SF_UNFILLED_TRIS;
// else
key.primitive = SF_TRIANGLES;
break;
case PIPE_PRIM_LINES:
key.primitive = SF_LINES;
@ -165,16 +144,15 @@ static void upload_sf_prog( struct brw_context *brw )
break;
}
/* BRW_NEW_POINT */
key.do_point_sprite = brw->attribs.Point->PointSprite;
key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
/* BRW_NEW_RASTER */
key.do_flat_shading = (brw->attribs.Raster->flatshade);
key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
/* _NEW_POLYGON */
if (key.do_twoside_color)
key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
// key.do_point_sprite = brw->attribs.Point->PointSprite;
// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
// key.do_flat_shading = (brw->attribs.Raster->flatshade);
// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
// if (key.do_twoside_color)
// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
if (!search_cache(brw, &key))
@ -184,11 +162,150 @@ static void upload_sf_prog( struct brw_context *brw )
const struct brw_tracked_state brw_sf_prog = {
.dirty = {
.brw = (BRW_NEW_RASTER |
BRW_NEW_REDUCED_PRIMITIVE),
.cache = CACHE_NEW_VS_PROG
.brw = (BRW_NEW_RASTERIZER |
BRW_NEW_REDUCED_PRIMITIVE |
BRW_NEW_VS |
BRW_NEW_FS),
.cache = 0,
},
.update = upload_sf_prog
};
#endif
/* Build a struct like the one we'd like the state tracker to pass to
* us.
*/
static void update_sf_linkage( struct brw_context *brw )
{
const struct brw_vertex_program *vs = brw->attribs.VertexProgram;
const struct brw_fragment_program *fs = brw->attribs.FragmentProgram;
struct pipe_setup_linkage state;
struct tgsi_parse_context parse;
int i, j;
int nr_vp_outputs = 0;
int done = 0;
struct {
unsigned semantic:8;
unsigned semantic_index:16;
} fp_semantic[32], vp_semantic[32];
memset(&state, 0, sizeof(state));
state.fp_input_count = 0;
/* First scan fp inputs
*/
tgsi_parse_init( &parse, fs->program.tokens );
while( !done &&
!tgsi_parse_end_of_tokens( &parse ) )
{
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT)
{
int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
for (i = first; i < last; i++) {
state.fp_input[i].vp_output = ~0;
state.fp_input[i].bf_vp_output = ~0;
state.fp_input[i].interp_mode =
parse.FullToken.FullDeclaration.Interpolation.Interpolate;
fp_semantic[i].semantic =
parse.FullToken.FullDeclaration.Semantic.SemanticName;
fp_semantic[i].semantic_index =
parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
}
assert(last > state.fp_input_count);
state.fp_input_count = last;
}
break;
default:
done = 1;
break;
}
}
assert(state.fp_input_count == fs->program.num_inputs);
/* Then scan vp outputs
*/
done = 0;
tgsi_parse_init( &parse, vs->program.tokens );
while( !done &&
!tgsi_parse_end_of_tokens( &parse ) )
{
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT)
{
int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
for (i = first; i < last; i++) {
vp_semantic[i].semantic =
parse.FullToken.FullDeclaration.Semantic.SemanticName;
vp_semantic[i].semantic_index =
parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
}
assert(last > nr_vp_outputs);
nr_vp_outputs = last;
}
break;
default:
done = 1;
break;
}
}
/* Now match based on semantic information.
*/
for (i = 0; i< state.fp_input_count; i++) {
for (j = 0; j < nr_vp_outputs; j++) {
if (fp_semantic[i].semantic == vp_semantic[j].semantic &&
fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) {
state.fp_input[i].vp_output = j;
}
}
if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) {
for (j = 0; j < nr_vp_outputs; j++) {
if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic &&
fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) {
state.fp_input[i].bf_vp_output = j;
}
}
}
}
if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) {
brw->sf.linkage = state;
brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE;
}
}
const struct brw_tracked_state brw_sf_linkage = {
.dirty = {
.brw = (BRW_NEW_VS |
BRW_NEW_FS),
.cache = 0,
},
.update = update_sf_linkage
};

View file

@ -42,15 +42,26 @@
#define SF_TRIANGLES 2
#define SF_UNFILLED_TRIS 3
struct brw_sf_prog_key {
unsigned attrs:32;
unsigned vp_output_count:5;
unsigned fp_input_count:5;
unsigned primitive:2;
unsigned do_twoside_color:1;
unsigned do_flat_shading:1;
unsigned frontface_ccw:1;
unsigned do_point_sprite:1;
unsigned pad:10;
int SpriteOrigin;
/* Interpolation masks;
*/
unsigned linear_mask;
unsigned persp_mask;
unsigned const_mask;
// int SpriteOrigin;
};
struct brw_sf_point_tex {

View file

@ -36,171 +36,6 @@
#include "brw_util.h"
#include "brw_sf.h"
#if 0
static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
struct brw_reg vert,
unsigned attr)
{
unsigned off = c->attr_to_idx[attr] / 2;
unsigned sub = c->attr_to_idx[attr] % 2;
return brw_vec4_grf(vert.nr + off, sub * 4);
}
static boolean have_attr(struct brw_sf_compile *c,
unsigned attr)
{
return (c->key.attrs & (1<<attr)) ? 1 : 0;
}
/***********************************************************************
* Twoside lighting
*/
static void copy_bfc( struct brw_sf_compile *c,
struct brw_reg vert )
{
struct brw_compile *p = &c->func;
unsigned i;
for (i = 0; i < 2; i++) {
if (have_attr(c, VERT_RESULT_COL0+i) &&
have_attr(c, VERT_RESULT_BFC0+i))
brw_MOV(p,
get_vert_attr(c, vert, VERT_RESULT_COL0+i),
get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
}
}
static void do_twoside_color( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *if_insn;
unsigned backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
/* Already done in clip program:
*/
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
/* XXX: What happens if BFC isn't present? This could only happen
* for user-supplied vertex programs, as t_vp_build.c always does
* the right thing.
*/
if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
!(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
return;
/* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
* to get all channels active inside the IF. In the clipping code
* we run with NoMask, so it's not an option and we can use
* BRW_EXECUTE_1 for all comparisions.
*/
brw_push_insn_state(p);
brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
if_insn = brw_IF(p, BRW_EXECUTE_4);
{
switch (c->nr_verts) {
case 3: copy_bfc(c, c->vert[2]);
case 2: copy_bfc(c, c->vert[1]);
case 1: copy_bfc(c, c->vert[0]);
}
}
brw_ENDIF(p, if_insn);
brw_pop_insn_state(p);
}
/***********************************************************************
* Flat shading
*/
#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
(1<<VERT_RESULT_COL1))
static void copy_colors( struct brw_sf_compile *c,
struct brw_reg dst,
struct brw_reg src)
{
struct brw_compile *p = &c->func;
unsigned i;
for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
if (have_attr(c,i))
brw_MOV(p,
get_vert_attr(c, dst, i),
get_vert_attr(c, src, i));
}
}
/* Need to use a computed jump to copy flatshaded attributes as the
* vertices are ordered according to y-coordinate before reaching this
* point, so the PV could be anywhere.
*/
static void do_flatshade_triangle( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
if (!nr)
return;
/* Already done in clip program:
*/
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
brw_push_insn_state(p);
brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
brw_JMPI(p, ip, ip, c->pv);
copy_colors(c, c->vert[1], c->vert[0]);
copy_colors(c, c->vert[2], c->vert[0]);
brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));
copy_colors(c, c->vert[0], c->vert[1]);
copy_colors(c, c->vert[2], c->vert[1]);
brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));
copy_colors(c, c->vert[0], c->vert[2]);
copy_colors(c, c->vert[1], c->vert[2]);
brw_pop_insn_state(p);
}
static void do_flatshade_line( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
if (!nr)
return;
/* Already done in clip program:
*/
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
brw_push_insn_state(p);
brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1));
brw_JMPI(p, ip, ip, c->pv);
copy_colors(c, c->vert[1], c->vert[0]);
brw_JMPI(p, ip, ip, brw_imm_ud(nr));
copy_colors(c, c->vert[0], c->vert[1]);
brw_pop_insn_state(p);
}
/***********************************************************************
@ -277,9 +112,6 @@ static void copy_z_inv_w( struct brw_sf_compile *c )
static void invert_det( struct brw_sf_compile *c)
{
/* Looks like we invert all 8 elements just to get 1/det in
* position 2 !?!
*/
brw_math(&c->func,
c->inv_det,
BRW_MATH_FUNCTION_INV,
@ -302,22 +134,16 @@ static boolean calculate_masks( struct brw_sf_compile *c,
ushort *pc_linear)
{
boolean is_last_attr = (reg == c->nr_setup_regs - 1);
unsigned persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS;
unsigned linear_mask;
if (c->key.do_flat_shading)
linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
else
linear_mask = c->key.attrs;
*pc_persp = 0;
*pc_linear = 0;
*pc = 0xf;
if (persp_mask & (1 << c->idx_to_attr[reg*2]))
*pc_persp = 0xf;
// if (persp_mask & (1 << c->idx_to_attr[reg*2]))
// *pc_persp = 0xf;
if (linear_mask & (1 << c->idx_to_attr[reg*2]))
// if (linear_mask & (1 << c->idx_to_attr[reg*2]))
*pc_linear = 0xf;
/* Maybe only processs one attribute on the final round:
@ -325,10 +151,10 @@ static boolean calculate_masks( struct brw_sf_compile *c,
if (reg*2+1 < c->nr_setup_attrs) {
*pc |= 0xf0;
if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
*pc_persp |= 0xf0;
// if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
// *pc_persp |= 0xf0;
if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
// if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
*pc_linear |= 0xf0;
}
@ -347,12 +173,6 @@ void brw_emit_tri_setup( struct brw_sf_compile *c )
invert_det(c);
copy_z_inv_w(c);
if (c->key.do_twoside_color)
do_twoside_color(c);
if (c->key.do_flat_shading)
do_flatshade_triangle(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
@ -433,9 +253,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c )
invert_det(c);
copy_z_inv_w(c);
if (c->key.do_flat_shading)
do_flatshade_line(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
/* Pair of incoming attributes:
@ -491,86 +308,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c )
}
}
void brw_emit_point_sprite_setup( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
unsigned i;
c->nr_verts = 1;
alloc_regs(c);
copy_z_inv_w(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
struct brw_reg a0 = offset(c->vert[0], i);
ushort pc, pc_persp, pc_linear;
boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
if (pc_persp)
{
if (!tex->CoordReplace) {
brw_set_predicate_control_flag_value(p, pc_persp);
brw_MUL(p, a0, a0, c->inv_w[0]);
}
}
if (tex->CoordReplace) {
/* Caculate 1.0/PointWidth */
brw_math(&c->func,
c->tmp,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
0,
c->dx0,
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
if (c->key.SpriteOrigin == GL_UPPER_LEFT) {
brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
} else {
brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
}
} else {
brw_MOV(p, c->m1Cx, brw_imm_ud(0));
brw_MOV(p, c->m2Cy, brw_imm_ud(0));
}
{
brw_set_predicate_control_flag_value(p, pc);
if (tex->CoordReplace) {
if (c->key.SpriteOrigin == GL_UPPER_LEFT) {
brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
}
else
brw_MOV(p, c->m3C0, brw_imm_f(0.0));
} else {
brw_MOV(p, c->m3C0, a0); /* constant value */
}
/* Copy m0..m3 to URB.
*/
brw_urb_WRITE(p,
brw_null_reg(),
0,
brw_vec8_grf(0, 0),
0, /* allocate */
1, /* used */
4, /* msg len */
0, /* response len */
last, /* eot */
last, /* writes complete */
i*4, /* urb destination offset */
BRW_URB_SWIZZLE_TRANSPOSE);
}
}
}
/* Points setup - several simplifications as all attributes are
* constant across the face of the point (point sprites excluded!)
@ -629,68 +366,3 @@ void brw_emit_point_setup( struct brw_sf_compile *c )
}
}
}
void brw_emit_anyprim_setup( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
struct brw_reg primmask;
struct brw_instruction *jmp;
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
alloc_regs(c);
primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
brw_MOV(p, primmask, brw_imm_ud(1));
brw_SHL(p, primmask, primmask, payload_prim);
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
(1<<_3DPRIM_TRISTRIP) |
(1<<_3DPRIM_TRIFAN) |
(1<<_3DPRIM_TRISTRIP_REVERSE) |
(1<<_3DPRIM_POLYGON) |
(1<<_3DPRIM_RECTLIST) |
(1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
{
brw_push_insn_state(p);
brw_emit_tri_setup( c );
brw_pop_insn_state(p);
/* note - thread killed in subroutine */
}
brw_land_fwd_jump(p, jmp);
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
(1<<_3DPRIM_LINESTRIP) |
(1<<_3DPRIM_LINELOOP) |
(1<<_3DPRIM_LINESTRIP_CONT) |
(1<<_3DPRIM_LINESTRIP_BF) |
(1<<_3DPRIM_LINESTRIP_CONT_BF)));
jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
{
brw_push_insn_state(p);
brw_emit_line_setup( c );
brw_pop_insn_state(p);
/* note - thread killed in subroutine */
}
brw_land_fwd_jump(p, jmp);
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
{
brw_push_insn_state(p);
brw_emit_point_sprite_setup( c );
brw_pop_insn_state(p);
}
brw_land_fwd_jump(p, jmp);
brw_emit_point_setup( c );
}
#endif

View file

@ -34,91 +34,41 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "pipe/p_util.h"
#if 0
static void upload_sf_vp(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_sf_viewport sfv;
struct intel_renderbuffer *irb =
intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]);
float y_scale, y_bias;
int x, y, w, h, x1, x2, y1, y2;
int draw_h = ctx->DrawBuffer->Height;
memset(&sfv, 0, sizeof(sfv));
if (ctx->DrawBuffer->Name) {
/* User-created FBO */
if (irb && !irb->RenderToTexture) {
y_scale = -1.0;
y_bias = draw_h;
} else {
y_scale = 1.0;
y_bias = 0;
}
} else {
if (brw->intel.driDrawable != NULL) {
y_scale = -1.0;
y_bias = draw_h;
} else {
y_scale = 1.0;
y_bias = 0;
}
}
/* _NEW_VIEWPORT, BRW_NEW_METAOPS */
/* BRW_NEW_VIEWPORT */
{
const float *scale = brw->attribs.Viewport.scale;
const float *trans = brw->attribs.Viewport.translate;
if (!brw->metaops.active) {
const float *v = brw->intel.ctx.Viewport._WindowMap.m;
sfv.viewport.m00 = v[MAT_SX];
sfv.viewport.m11 = v[MAT_SY] * y_scale;
sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale;
sfv.viewport.m30 = v[MAT_TX];
sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale;
} else {
sfv.viewport.m00 = 1;
sfv.viewport.m11 = - 1;
sfv.viewport.m22 = 1;
sfv.viewport.m30 = 0;
sfv.viewport.m31 = brw->intel.driDrawable->h;
sfv.viewport.m32 = 0;
sfv.viewport.m00 = scale[0];
sfv.viewport.m11 = scale[1];
sfv.viewport.m22 = scale[2];
sfv.viewport.m30 = trans[0];
sfv.viewport.m31 = trans[1];
sfv.viewport.m32 = trans[2];
}
/* _NEW_SCISSOR */
x = brw->attribs.Scissor->X;
y = brw->attribs.Scissor->Y;
w = brw->attribs.Scissor->Width;
h = brw->attribs.Scissor->Height;
if (ctx->DrawBuffer->Name == 0) {
x1 = x;
y1 = draw_h - (y + h);
x2 = x + w - 1;
y2 = y1 + h - 1;
} else {
/* FBO has non-inverted coords. */
x1 = x;
y1 = y;
x2 = x + w - 1;
y2 = y + h - 1;
}
sfv.scissor.xmin = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
sfv.scissor.xmax = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
sfv.scissor.ymin = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
sfv.scissor.ymax = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
sfv.scissor.xmin = brw->attribs.Scissor.minx;
sfv.scissor.xmax = brw->attribs.Scissor.maxx;
sfv.scissor.ymin = brw->attribs.Scissor.miny;
sfv.scissor.ymax = brw->attribs.Scissor.maxy;
brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv );
}
const struct brw_tracked_state brw_sf_vp = {
.dirty = {
.mesa = (_NEW_VIEWPORT |
_NEW_SCISSOR),
.brw = BRW_NEW_METAOPS,
.brw = (BRW_NEW_SCISSOR |
BRW_NEW_VIEWPORT),
.cache = 0
},
.update = upload_sf_vp
@ -130,7 +80,7 @@ static void upload_sf_unit( struct brw_context *brw )
memset(&sf, 0, sizeof(sf));
/* CACHE_NEW_SF_PROG */
sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1;
sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6;
sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
@ -151,19 +101,19 @@ static void upload_sf_unit( struct brw_context *brw )
/* CACHE_NEW_SF_VP */
sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5;
sf.sf5.viewport_transform = 1;
/* _NEW_SCISSOR */
if (brw->attribs.Scissor->Enabled)
/* BRW_NEW_RASTER */
if (brw->attribs.Raster->scissor)
sf.sf6.scissor = 1;
/* _NEW_POLYGON */
#if 0
if (brw->attribs.Polygon->FrontFace == GL_CCW)
sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
else
sf.sf5.front_winding = BRW_FRONTWINDING_CW;
if (brw->attribs.Polygon->CullFlag) {
switch (brw->attribs.Polygon->CullFaceMode) {
case GL_FRONT:
@ -182,25 +132,24 @@ static void upload_sf_unit( struct brw_context *brw )
}
else
sf.sf6.cull_mode = BRW_CULLMODE_NONE;
#else
sf.sf5.front_winding = BRW_FRONTWINDING_CW;
sf.sf6.cull_mode = BRW_CULLMODE_NONE;
#endif
/* _NEW_LINE */
/* XXX use ctx->Const.Min/MaxLineWidth here */
sf.sf6.line_width = CLAMP(brw->attribs.Line->Width, 1.0, 5.0) * (1<<1);
sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1);
sf.sf6.line_endcap_aa_region_width = 1;
if (brw->attribs.Line->SmoothFlag)
if (brw->attribs.Raster->line_smooth)
sf.sf6.aa_enable = 1;
else if (sf.sf6.line_width <= 0x2)
sf.sf6.line_width = 0;
/* _NEW_POINT */
sf.sf6.point_rast_rule = 1; /* opengl conventions */
/* XXX clamp max depends on AA vs. non-AA */
sf.sf7.sprite_point = brw->attribs.Point->PointSprite;
sf.sf7.point_size = CLAMP(brw->attribs.Point->Size, 1.0, 255.0) * (1<<3);
sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated;
sf.sf7.sprite_point = brw->attribs.Raster->point_sprite;
sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3);
sf.sf7.use_point_size_state = brw->attribs.Raster->point_size_per_vertex;
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
*/
@ -220,12 +169,8 @@ static void upload_sf_unit( struct brw_context *brw )
const struct brw_tracked_state brw_sf_unit = {
.dirty = {
.mesa = (_NEW_POLYGON |
_NEW_LINE |
_NEW_POINT |
_NEW_SCISSOR),
.brw = (BRW_NEW_URB_FENCE |
BRW_NEW_METAOPS),
.brw = (BRW_NEW_RASTERIZER |
BRW_NEW_URB_FENCE),
.cache = (CACHE_NEW_SF_VP |
CACHE_NEW_SF_PROG)
},
@ -233,4 +178,3 @@ const struct brw_tracked_state brw_sf_unit = {
};
#endif

View file

@ -0,0 +1,49 @@
#include "brw_context.h"
#include "brw_state.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "pipe/tgsi/util/tgsi_parse.h"
void brw_shader_info(const struct tgsi_token *tokens,
struct brw_shader_info *info )
{
struct tgsi_parse_context parse;
int done = 0;
tgsi_parse_init( &parse, tokens );
while( !done &&
!tgsi_parse_end_of_tokens( &parse ) )
{
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
{
const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
unsigned last = decl->u.DeclarationRange.Last;
assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
// Broken by crazy wpos init:
//assert( info->nr_regs[decl->Declaration.File] <= last);
info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File],
last+1);
break;
}
case TGSI_TOKEN_TYPE_IMMEDIATE:
case TGSI_TOKEN_TYPE_INSTRUCTION:
default:
done = 1;
break;
}
}
tgsi_parse_free (&parse);
}

View file

@ -198,6 +198,10 @@ static void * brw_create_fs_state(struct pipe_context *pipe,
/* XXX: Do I have to duplicate the tokens as well??
*/
brw_fp->program = *shader;
brw_fp->id = brw_context(pipe)->program_id++;
brw_shader_info(shader->tokens,
&brw_fp->info);
return (void *)brw_fp;
}
@ -228,6 +232,9 @@ static void *brw_create_vs_state(struct pipe_context *pipe,
/* XXX: Do I have to duplicate the tokens as well??
*/
brw_vp->program = *shader;
brw_vp->id = brw_context(pipe)->program_id++;
brw_shader_info(shader->tokens,
&brw_vp->info);
tgsi_dump(shader->tokens, 0);

View file

@ -154,4 +154,11 @@ void brw_upload_clip_prog(struct brw_context *brw);
void brw_upload_blend_constant_color(struct brw_context *brw);
void brw_upload_wm_samplers(struct brw_context *brw);
/* brw_shader_info.c
*/
void brw_shader_info(const struct tgsi_token *tokens,
struct brw_shader_info *info );
#endif

View file

@ -178,8 +178,9 @@ unsigned brw_upload_cache( struct brw_cache *cache,
if (BRW_DEBUG & DEBUG_STATE)
printf("upload %s: %d bytes to pool buffer %p offset %x\n",
cache->name, data_size,
cache->pool->buffer,
cache->name,
data_size,
(void*)cache->pool->buffer,
offset);
/* Copy data to the buffer:

View file

@ -43,17 +43,18 @@
*/
#include "pipe/p_winsys.h"
#include "pipe/p_util.h"
#include "brw_context.h"
#include "brw_state.h"
boolean brw_pool_alloc( struct brw_mem_pool *pool,
unsigned size,
unsigned align,
unsigned alignment,
unsigned *offset_return)
{
unsigned fixup = ALIGN(pool->offset, align) - pool->offset;
unsigned fixup = align(pool->offset, alignment) - pool->offset;
size = ALIGN(size, 4);
size = align(size, 4);
if (pool->offset + fixup + size >= pool->size) {
printf("%s failed\n", __FUNCTION__);
@ -114,7 +115,7 @@ void brw_pool_check_wrap( struct brw_context *brw,
struct brw_mem_pool *pool )
{
if (pool->offset > (pool->size * 3) / 4) {
brw->state.dirty.brw |= BRW_NEW_CONTEXT;
brw->state.dirty.brw |= BRW_NEW_SCENE;
}
}

View file

@ -97,8 +97,6 @@ const struct brw_tracked_state *atoms[] =
void brw_init_state( struct brw_context *brw )
{
unsigned i;
brw_init_pools(brw);
brw_init_caches(brw);
@ -156,7 +154,7 @@ void brw_validate_state( struct brw_context *brw )
state->brw == 0)
return;
if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
if (brw->state.dirty.brw & BRW_NEW_SCENE)
brw_clear_batch_cache_flush(brw);
if (BRW_DEBUG) {

View file

@ -149,10 +149,10 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
unsigned mip1_width;
if (pt->compressed) {
mip1_width = ALIGN(minify(pt->width[0]), align_w)
+ ALIGN(minify(minify(pt->width[0])), align_w);
mip1_width = align(minify(pt->width[0]), align_w)
+ align(minify(minify(pt->width[0])), align_w);
} else {
mip1_width = ALIGN(minify(pt->width[0]), align_w)
mip1_width = align(minify(pt->width[0]), align_w)
+ minify(minify(pt->width[0]));
}
@ -164,7 +164,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
/* Pitch must be a whole number of dwords, even though we
* express it in texels.
*/
tex->pitch = ALIGN(tex->pitch * pt->cpp, 4) / pt->cpp;
tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp;
tex->total_height = 0;
for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
@ -176,7 +176,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
if (pt->compressed)
img_height = MAX2(1, height/4);
else
img_height = ALIGN(height, align_h);
img_height = align(height, align_h);
/* Because the images are packed better, the final offset
@ -187,7 +187,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
/* Layout_below: step right after second mipmap.
*/
if (level == pt->first_level + 1) {
x += ALIGN(width, align_w);
x += align(width, align_w);
}
else {
y += img_height;
@ -221,13 +221,13 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
#if 0
if (pt->compressed) {
align_w = intel_compressed_alignment(pt->internal_format);
pt->pitch = ALIGN(width, align_w);
pt->pitch = align(width, align_w);
pack_y_pitch = (height + 3) / 4;
} else
#endif
{
tex->pitch = ALIGN(pt->width[0] * pt->cpp, 4) / pt->cpp;
pack_y_pitch = ALIGN(pt->height[0], align_h);
tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp;
pack_y_pitch = align(pt->height[0], align_h);
}
pack_x_pitch = tex->pitch;
@ -262,8 +262,8 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
if (pt->compressed) {
pack_y_pitch = (height + 3) / 4;
if (pack_x_pitch > ALIGN(width, align_w)) {
pack_x_pitch = ALIGN(width, align_w);
if (pack_x_pitch > align(width, align_w)) {
pack_x_pitch = align(width, align_w);
pack_x_nr <<= 1;
}
} else {
@ -275,7 +275,7 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
if (pack_y_pitch > 2) {
pack_y_pitch >>= 1;
pack_y_pitch = ALIGN(pack_y_pitch, align_h);
pack_y_pitch = align(pack_y_pitch, align_h);
}
}
@ -305,8 +305,6 @@ brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
sizeof(struct brw_texture));
if (tex) {
struct brw_context *brw = brw_context(pipe);
memset(&tex->base + 1, 0,
sizeof(struct brw_texture) - sizeof(struct pipe_texture));

View file

@ -44,7 +44,7 @@ static void upload_vs_unit( struct brw_context *brw )
/* CACHE_NEW_VS_PROG */
vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6;
vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1;
vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
vs.thread3.dispatch_grf_start_reg = 1;

View file

@ -33,7 +33,9 @@
#include "brw_context.h"
#include "brw_util.h"
#include "brw_wm.h"
#include "brw_eu.h"
#include "brw_state.h"
#include "pipe/p_util.h"
@ -41,24 +43,22 @@ static void do_wm_prog( struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
{
struct brw_wm_compile *c;
struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile);
const unsigned *program;
unsigned program_size;
c = brw->wm.compile_data;
if (c == NULL) {
brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
c = brw->wm.compile_data;
} else {
memset(c, 0, sizeof(*brw->wm.compile_data));
}
memcpy(&c->key, key, sizeof(*key));
c->key = *key;
c->fp = fp;
c->delta_xy[0] = brw_null_reg();
c->delta_xy[1] = brw_null_reg();
c->pixel_xy[0] = brw_null_reg();
c->pixel_xy[1] = brw_null_reg();
c->pixel_w = brw_null_reg();
fprintf(stderr, "XXXXXXXX FP\n");
brw_wm_glsl_emit(c);
/* get the program
@ -74,6 +74,8 @@ static void do_wm_prog( struct brw_context *brw,
program_size,
&c->prog_data,
&brw->wm.prog_data );
FREE(c);
}
@ -86,8 +88,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
(struct brw_fragment_program *)brw->attribs.FragmentProgram;
unsigned lookup = 0;
unsigned line_aa;
unsigned i;
memset(key, 0, sizeof(*key));
/* Build the index for table lookup
@ -204,7 +205,6 @@ static void brw_upload_wm_prog( struct brw_context *brw )
const struct brw_tracked_state brw_wm_prog = {
.dirty = {
.brw = (BRW_NEW_FS |
BRW_NEW_WM_INPUT_DIMENSIONS |
BRW_NEW_REDUCED_PRIMITIVE),
.cache = 0
},

View file

@ -60,87 +60,20 @@ struct brw_wm_prog_key {
unsigned aa_dest_stencil_reg:3;
unsigned dest_depth_reg:3;
unsigned nr_depth_regs:3;
unsigned projtex_mask:8;
unsigned shadowtex_mask:8;
unsigned computes_depth:1; /* could be derived from program string */
unsigned source_depth_to_render_target:1;
unsigned flat_shade:1;
unsigned runtime_check_aads_emit:1;
unsigned yuvtex_mask:8;
unsigned pad1:24;
unsigned program_string_id:32;
unsigned program_string_id;
};
/* A bit of a glossary:
*
* brw_wm_value: A computed value or program input. Values are
* constant, they are created once and are never modified. When a
* fragment program register is written or overwritten, new values are
* created fresh, preserving the rule that values are constant.
*
* brw_wm_ref: A reference to a value. Wherever a value used is by an
* instruction or as a program output, that is tracked with an
* instance of this struct. All references to a value occur after it
* is created. After the last reference, a value is dead and can be
* discarded.
*
* brw_wm_grf: Represents a physical hardware register. May be either
* empty or hold a value. Register allocation is the process of
* assigning values to grf registers. This occurs in pass2 and the
* brw_wm_grf struct is not used before that.
*
* Fragment program registers: These are time-varying constructs that
* are hard to reason about and which we translate away in pass0. A
* single fragment program register element (eg. temp[0].x) will be
* translated to one or more brw_wm_value structs, one for each time
* that temp[0].x is written to during the program.
*/
/* Used in pass2 to track register allocation.
*/
struct brw_wm_grf {
struct brw_wm_value *value;
unsigned nextuse;
};
struct brw_wm_value {
struct brw_reg hw_reg; /* emitted to this reg, may not always be there */
struct brw_wm_ref *lastuse;
struct brw_wm_grf *resident;
unsigned contributes_to_output:1;
unsigned spill_slot:16; /* if non-zero, spill immediately after calculation */
};
struct brw_wm_ref {
struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */
struct brw_wm_value *value;
struct brw_wm_ref *prevuse;
unsigned unspill_reg:7; /* unspill to reg */
unsigned emitted:1;
unsigned insn:24;
};
struct brw_wm_constref {
const struct brw_wm_ref *ref;
float constval;
};
struct brw_wm_instruction {
struct brw_wm_value *dst[4];
struct brw_wm_ref *src[3][4];
unsigned opcode:8;
unsigned saturate:1;
unsigned writemask:4;
unsigned tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */
unsigned tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
};
#define PROGRAM_INTERNAL_PARAM
#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */
#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_ATTRIB_MAX + 3)
@ -151,124 +84,59 @@ struct brw_wm_instruction {
#define BRW_WM_MAX_CONST 256
#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
/* New opcodes to track internal operations required for WM unit.
* These are added early so that the registers used can be tracked,
* freed and reused like those of other instructions.
*/
#define WM_PIXELXY (TGSI_OPCODE_LAST)
#define WM_DELTAXY (TGSI_OPCODE_LAST + 1)
#define WM_PIXELW (TGSI_OPCODE_LAST + 2)
#define WM_LINTERP (TGSI_OPCODE_LAST + 3)
#define WM_PINTERP (TGSI_OPCODE_LAST + 4)
#define WM_CINTERP (TGSI_OPCODE_LAST + 5)
#define WM_WPOSXY (TGSI_OPCODE_LAST + 6)
#define WM_FB_WRITE (TGSI_OPCODE_LAST + 7)
#define MAX_WM_OPCODE (TGSI_OPCODE_LAST + 8)
#define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX)
#define MAX_IFSN 32
#define MAX_LOOP_DEPTH 32
struct brw_wm_compile {
struct brw_compile func;
struct brw_wm_prog_key key;
struct brw_wm_prog_data prog_data;
struct brw_wm_prog_data prog_data; /* result */
struct brw_fragment_program *fp;
float (*env_param)[4];
enum {
START,
PASS2_DONE
} state;
/* Initial pass - translate fp instructions to fp instructions,
* simplifying and adding instructions for interpolation and
* framebuffer writes.
*/
const struct pipe_shader_state *prog_instructions;
unsigned nr_fp_insns;
unsigned fp_temp;
unsigned fp_interp_emitted;
unsigned fp_deriv_emitted;
struct tgsi_src_register pixel_xy;
struct tgsi_src_register delta_xy;
struct tgsi_src_register pixel_w;
struct brw_wm_value vreg[BRW_WM_MAX_VREG];
unsigned nr_vreg;
struct brw_wm_value creg[BRW_WM_MAX_PARAM];
unsigned nr_creg;
struct {
struct brw_wm_value depth[4]; /* includes r0/r1 */
struct brw_wm_value input_interp[PIPE_ATTRIB_MAX];
} payload;
const struct brw_wm_ref *pass0_fp_reg[16][256][4];
struct brw_wm_ref undef_ref;
struct brw_wm_value undef_value;
struct brw_wm_ref refs[BRW_WM_MAX_REF];
unsigned nr_refs;
struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
unsigned nr_insns;
struct brw_wm_constref constref[BRW_WM_MAX_CONST];
unsigned nr_constrefs;
struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
unsigned grf_limit;
unsigned max_wm_grf;
unsigned last_scratch;
struct {
boolean inited;
struct brw_reg reg;
} wm_regs[16][256][4];
struct brw_reg stack;
struct brw_reg pixel_xy[2];
struct brw_reg delta_xy[2];
struct brw_reg pixel_w;
struct brw_reg wm_regs[8][32][4];
struct brw_reg payload_depth[4];
struct brw_reg payload_coef[16];
struct brw_reg emit_mask_reg;
struct brw_instruction *if_inst[MAX_IFSN];
int if_insn;
struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
int loop_insn;
struct brw_instruction *inst0;
struct brw_instruction *inst1;
struct brw_reg stack;
struct brw_indirect stack_index;
unsigned reg_index;
unsigned tmp_start;
unsigned tmp_index;
};
unsigned brw_wm_nr_args( unsigned opcode );
unsigned brw_wm_is_scalar_result( unsigned opcode );
void brw_wm_pass_fp( struct brw_wm_compile *c );
void brw_wm_pass0( struct brw_wm_compile *c );
void brw_wm_pass1( struct brw_wm_compile *c );
void brw_wm_pass2( struct brw_wm_compile *c );
void brw_wm_emit( struct brw_wm_compile *c );
void brw_wm_print_value( struct brw_wm_compile *c,
struct brw_wm_value *value );
void brw_wm_print_ref( struct brw_wm_compile *c,
struct brw_wm_ref *ref );
void brw_wm_print_insn( struct brw_wm_compile *c,
struct brw_wm_instruction *inst );
void brw_wm_print_program( struct brw_wm_compile *c,
const char *stage );
void brw_wm_lookup_iz( unsigned line_aa,
unsigned lookup,
struct brw_wm_prog_key *key );
#if 0
boolean brw_wm_is_glsl(struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_wm_compile *c);
#endif
void brw_wm_emit_decls(struct brw_wm_compile *c);
#endif

View file

@ -0,0 +1,377 @@
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "pipe/tgsi/util/tgsi_parse.h"
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
c->tmp_index++;
c->reg_index = MAX2(c->reg_index, c->tmp_index);
return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
}
static void release_tmps(struct brw_wm_compile *c)
{
c->tmp_index = 0;
}
static int is_null( struct brw_reg reg )
{
return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
reg.nr == BRW_ARF_NULL);
}
static void emit_pixel_xy( struct brw_wm_compile *c )
{
if (is_null(c->pixel_xy[0])) {
struct brw_compile *p = &c->func;
struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
/* Calculate pixel centers by adding 1 or 0 to each of the
* micro-tile coordinates passed in r1.
*/
brw_ADD(p,
c->pixel_xy[0],
stride(suboffset(r1_uw, 4), 2, 4, 0),
brw_imm_v(0x10101010));
brw_ADD(p,
c->pixel_xy[1],
stride(suboffset(r1_uw, 5), 2, 4, 0),
brw_imm_v(0x11001100));
}
}
static void emit_delta_xy( struct brw_wm_compile *c )
{
if (is_null(c->delta_xy[0])) {
struct brw_compile *p = &c->func;
struct brw_reg r1 = brw_vec1_grf(1, 0);
emit_pixel_xy(c);
c->delta_xy[0] = alloc_tmp(c);
c->delta_xy[1] = alloc_tmp(c);
/* Calc delta X,Y by subtracting origin in r1 from the pixel
* centers.
*/
brw_ADD(p,
c->delta_xy[0],
retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW),
negate(r1));
brw_ADD(p,
c->delta_xy[1],
retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW),
negate(suboffset(r1,1)));
}
}
#if 0
static void emit_pixel_w( struct brw_wm_compile *c )
{
if (is_null(c->pixel_w)) {
struct brw_compile *p = &c->func;
struct brw_reg interp_wpos = c->coef_wpos;
c->pixel_w = alloc_tmp(c);
emit_delta_xy(c);
/* Calc 1/w - just linterp wpos[3] optimized by putting the
* result straight into a message reg.
*/
struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4);
brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]);
brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]);
/* Calc w */
brw_math_16( p,
c->pixel_w,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
2,
brw_null_reg(),
BRW_MATH_PRECISION_FULL);
}
}
#endif
static void emit_cinterp(struct brw_wm_compile *c,
int idx,
int mask )
{
struct brw_compile *p = &c->func;
struct brw_reg interp[4];
struct brw_reg coef = c->payload_coef[idx];
int i;
interp[0] = brw_vec1_grf(coef.nr, 0);
interp[1] = brw_vec1_grf(coef.nr, 4);
interp[2] = brw_vec1_grf(coef.nr+1, 0);
interp[3] = brw_vec1_grf(coef.nr+1, 4);
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
brw_MOV(p, dst, suboffset(interp[i],3));
}
}
}
static void emit_linterp(struct brw_wm_compile *c,
int idx,
int mask )
{
struct brw_compile *p = &c->func;
struct brw_reg interp[4];
struct brw_reg coef = c->payload_coef[idx];
int i;
emit_delta_xy(c);
interp[0] = brw_vec1_grf(coef.nr, 0);
interp[1] = brw_vec1_grf(coef.nr, 4);
interp[2] = brw_vec1_grf(coef.nr+1, 0);
interp[3] = brw_vec1_grf(coef.nr+1, 4);
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
}
}
}
#if 0
static void emit_pinterp(struct brw_wm_compile *c,
int idx,
int mask )
{
struct brw_compile *p = &c->func;
struct brw_reg interp[4];
struct brw_reg coef = c->payload_coef[idx];
int i;
get_delta_xy(c);
get_pixel_w(c);
interp[0] = brw_vec1_grf(coef.nr, 0);
interp[1] = brw_vec1_grf(coef.nr, 4);
interp[2] = brw_vec1_grf(coef.nr+1, 0);
interp[3] = brw_vec1_grf(coef.nr+1, 4);
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i);
brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
brw_MUL(p, dst, dst, c->pixel_w);
}
}
}
#endif
#if 0
static void emit_wpos( )
{
struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
struct tgsi_full_src_register deltas = get_delta_xy(c);
struct tgsi_full_src_register arg2;
unsigned opcode;
opcode = WM_LINTERP;
arg2 = src_undef();
/* Have to treat wpos.xy specially:
*/
emit_op(c,
WM_WPOSXY,
dst_mask(dst, WRITEMASK_XY),
0, 0, 0,
get_pixel_xy(c),
src_undef(),
src_undef());
dst = dst_mask(dst, WRITEMASK_ZW);
/* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
*/
emit_op(c,
WM_LINTERP,
dst,
0, 0, 0,
interp,
deltas,
arg2);
}
#endif
/* Perform register allocation:
*
* -- r0???
* -- passthrough depth regs (and stencil/aa??)
* -- curbe ??
* -- inputs (coefficients)
*
* Use a totally static register allocation. This will perform poorly
* but is an easy way to get started (again).
*/
static void prealloc_reg(struct brw_wm_compile *c)
{
int i, j;
int nr_curbe_regs = 0;
/* R0, then some depth related regs:
*/
for (i = 0; i < c->key.nr_depth_regs; i++) {
c->payload_depth[i] = brw_vec8_grf(i*2, 0);
c->reg_index += 2;
}
/* Then a copy of our part of the CURBE entry:
*/
{
int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT];
int index = 0;
c->prog_data.max_const = 4*nr_constants;
for (i = 0; i < nr_constants; i++) {
for (j = 0; j < 4; j++, index++)
c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8,
index%8);
}
nr_curbe_regs = 2*((4*nr_constants+15)/16);
c->reg_index += nr_curbe_regs;
}
/* Next we receive the plane coefficients for parameter
* interpolation:
*/
for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) {
c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0);
c->reg_index += 2;
}
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
c->prog_data.urb_read_length = c->fp->program.num_inputs * 2;
c->prog_data.curb_read_length = nr_curbe_regs;
/* That's the end of the payload, now we can start allocating registers.
*/
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
c->reg_index++;
c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
c->reg_index += 2;
/* Now allocate room for the interpolated inputs and staging
* registers for the outputs:
*/
for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++)
for (j = 0; j < 4; j++)
c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++)
for (j = 0; j < 4; j++)
c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
/* Beyond this we should only need registers for internal temporaries:
*/
c->tmp_start = c->reg_index;
}
/* Need to interpolate fragment program inputs in as a preamble to the
* shader. A more sophisticated compiler would do this on demand, but
* we'll do it up front:
*/
void brw_wm_emit_decls(struct brw_wm_compile *c)
{
struct tgsi_parse_context parse;
int done = 0;
prealloc_reg(c);
tgsi_parse_init( &parse, c->fp->program.tokens );
while( !done &&
!tgsi_parse_end_of_tokens( &parse ) )
{
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
{
const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
unsigned first = decl->u.DeclarationRange.First;
unsigned last = decl->u.DeclarationRange.Last;
unsigned mask = decl->Declaration.UsageMask; /* ? */
unsigned i;
if (decl->Declaration.File != TGSI_FILE_INPUT)
break;
assert(decl->Declaration.Interpolate);
for( i = first; i <= last; i++ ) {
switch (decl->Interpolation.Interpolate) {
case TGSI_INTERPOLATE_CONSTANT:
emit_cinterp(c, i, mask);
break;
case TGSI_INTERPOLATE_LINEAR:
emit_linterp(c, i, mask);
break;
case TGSI_INTERPOLATE_PERSPECTIVE:
//emit_pinterp(c, i, mask);
emit_linterp(c, i, mask);
break;
}
}
break;
}
case TGSI_TOKEN_TYPE_IMMEDIATE:
case TGSI_TOKEN_TYPE_INSTRUCTION:
default:
done = 1;
break;
}
}
tgsi_parse_free (&parse);
release_tmps(c);
}

File diff suppressed because it is too large Load diff

View file

@ -229,12 +229,12 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp
* complicates various things. However, this is still too confusing -
* FIXME: simplify all the different new texture state flags.
*/
void brw_upload_wm_samplers(struct brw_context *brw)
static void upload_wm_samplers(struct brw_context *brw)
{
unsigned unit;
unsigned sampler_count = 0;
/* _NEW_TEXTURE */
/* BRW_NEW_SAMPLER */
for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
if (brw->attribs.Samplers[unit]) { /* FIXME: correctly detect enabled ones */
const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit];
@ -262,14 +262,11 @@ void brw_upload_wm_samplers(struct brw_context *brw)
sizeof(struct brw_sampler_state) * brw->wm.sampler_count);
}
#if 0
const struct brw_tracked_state brw_wm_samplers = {
.dirty = {
.mesa = _NEW_TEXTURE,
.brw = 0,
.brw = BRW_NEW_SAMPLER,
.cache = 0
},
.update = upload_wm_samplers
};
#endif

View file

@ -34,15 +34,13 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_wm.h"
#include "pipe/p_util.h"
/***********************************************************************
* WM unit - fragment programs and rasterization
*/
#if 0
static void upload_wm_unit(struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
struct brw_wm_unit_state wm;
unsigned max_threads;
unsigned per_thread;
@ -56,7 +54,7 @@ static void upload_wm_unit(struct brw_context *brw )
memset(&wm, 0, sizeof(wm));
/* CACHE_NEW_WM_PROG */
wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1;
wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6;
wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
@ -64,9 +62,10 @@ static void upload_wm_unit(struct brw_context *brw )
wm.wm5.max_threads = max_threads;
per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024);
per_thread = align(brw->wm.prog_data->total_scratch, 1024);
assert(per_thread <= 12 * 1024);
#if 0
if (brw->wm.prog_data->total_scratch) {
unsigned total = per_thread * (max_threads + 1);
@ -95,6 +94,7 @@ static void upload_wm_unit(struct brw_context *brw )
* so just fail for now if we hit that path.
*/
assert(brw->wm.prog_data->total_scratch == 0);
#endif
/* CACHE_NEW_SURFACE */
wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
@ -112,23 +112,20 @@ static void upload_wm_unit(struct brw_context *brw )
/* BRW_NEW_FRAGMENT_PROGRAM */
{
const struct gl_fragment_program *fp = brw->fragment_program;
const struct brw_fragment_program *fp = brw->attribs.FragmentProgram;
if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS))
if (fp->UsesDepth)
wm.wm5.program_uses_depth = 1; /* as far as we can tell */
if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))
if (fp->ComputesDepth)
wm.wm5.program_computes_depth = 1;
/* _NEW_COLOR */
/* BRW_NEW_ALPHA_TEST */
if (fp->UsesKill ||
brw->attribs.Color->AlphaEnabled)
brw->attribs.AlphaTest->enabled)
wm.wm5.program_uses_killpixel = 1;
if (brw_wm_is_glsl(fp))
wm.wm5.enable_8_pix = 1;
else
wm.wm5.enable_16_pix = 1;
wm.wm5.enable_8_pix = 1;
}
wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
@ -138,11 +135,11 @@ static void upload_wm_unit(struct brw_context *brw )
wm.wm5.line_aa_region_width = 0;
wm.wm5.line_endcap_aa_region_width = 1;
/* _NEW_POLYGONSTIPPLE */
if (brw->attribs.Polygon->StippleFlag)
/* BRW_NEW_RASTERIZER */
if (brw->attribs.Raster->poly_stipple_enable)
wm.wm5.polygon_stipple = 1;
/* _NEW_POLYGON */
#if 0
if (brw->attribs.Polygon->OffsetFill) {
wm.wm5.depth_offset = 1;
/* Something wierd going on with legacy_global_depth_bias,
@ -156,13 +153,13 @@ static void upload_wm_unit(struct brw_context *brw )
*/
wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor;
}
#endif
/* _NEW_LINE */
if (brw->attribs.Line->StippleFlag) {
if (brw->attribs.Raster->line_stipple_enable) {
wm.wm5.line_stipple = 1;
}
if (BRW_DEBUG & DEBUG_STATS || intel->stats_wm)
if (BRW_DEBUG & DEBUG_STATS)
wm.wm4.stats_enable = 1;
brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
@ -183,14 +180,10 @@ static void upload_wm_unit(struct brw_context *brw )
const struct brw_tracked_state brw_wm_unit = {
.dirty = {
.mesa = (_NEW_POLYGON |
_NEW_POLYGONSTIPPLE |
_NEW_LINE |
_NEW_COLOR),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_LOCK),
.brw = (BRW_NEW_RASTERIZER |
BRW_NEW_ALPHA_TEST |
BRW_NEW_FS |
BRW_NEW_CURBE_OFFSETS),
.cache = (CACHE_NEW_SURFACE |
CACHE_NEW_WM_PROG |
@ -199,4 +192,3 @@ const struct brw_tracked_state brw_wm_unit = {
.update = upload_wm_unit
};
#endif

View file

@ -360,6 +360,11 @@ static INLINE float LOG2(float val)
#define CEILF(x) ((float) ceil(x))
#endif
static INLINE int align(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
}
/* Convenient...
*/
extern void _mesa_printf(const char *str, ...);

View file

@ -52,10 +52,6 @@ static unsigned minify( unsigned d )
return MAX2(1, d>>1);
}
static int align(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
}
static void