Merge branch 'gallium-tex-surfaces' into gallium-0.1

Conflicts:

	src/gallium/drivers/i915simple/i915_context.h
This commit is contained in:
Jakob Bornecrantz 2008-05-30 13:46:18 +02:00
commit 3869c3c87a
38 changed files with 1153 additions and 440 deletions

View file

@ -1,12 +1,39 @@
#!/usr/bin/env python
##########################################################################
#
# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
# All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sub license, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice (including the
# next paragraph) shall be included in all copies or substantial portions
# of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
##########################################################################
import sys
import optparse
import re
import struct
__version__ = '0.1'
__version__ = '0.1'
verbose = False
@ -16,7 +43,7 @@ class ParseError(Exception):
class MsvcDemangler:
# http://www.kegel.com/mangle.html
# http://www.kegel.com/mangle.html
def __init__(self, symbol):
self._symbol = symbol

View file

@ -37,6 +37,7 @@ C_SOURCES = \
draw_vs_varient.c \
draw_vs_aos.c \
draw_vs_aos_io.c \
draw_vs_aos_machine.c \
draw_vs_exec.c \
draw_vs_llvm.c \
draw_vs_sse.c

View file

@ -15,7 +15,7 @@ draw = env.ConvenienceLibrary(
'draw_pipe_stipple.c',
'draw_pipe_twoside.c',
'draw_pipe_unfilled.c',
'draw_pipe_util.c',
'draw_pipe_util.c',
'draw_pipe_validate.c',
'draw_pipe_vbuf.c',
'draw_pipe_wide_line.c',
@ -25,15 +25,21 @@ draw = env.ConvenienceLibrary(
'draw_pt_emit.c',
'draw_pt_fetch.c',
'draw_pt_fetch_emit.c',
'draw_pt_fetch_shade_emit.c',
'draw_pt_fetch_shade_pipeline.c',
'draw_pt_post_vs.c',
'draw_pt_util.c',
'draw_pt_varray.c',
'draw_pt_vcache.c',
'draw_vertex.c',
'draw_vs.c',
'draw_vs_aos.c',
'draw_vs_aos_io.c',
'draw_vs_aos_machine.c',
'draw_vs_exec.c',
'draw_vs_llvm.c',
'draw_vs_sse.c',
'draw_vs_varient.c'
])
auxiliaries.insert(0, draw)

View file

@ -174,6 +174,8 @@ void draw_set_viewport_state( struct draw_context *draw,
viewport->translate[1] == 0.0f &&
viewport->translate[2] == 0.0f &&
viewport->translate[3] == 0.0f);
draw_vs_set_viewport( draw, viewport );
}
@ -215,9 +217,11 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw,
void
draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer)
const void *buffer,
unsigned size )
{
draw->pt.user.constants = buffer;
draw_vs_set_constants( draw, (const float (*)[4])buffer, size );
}
@ -345,14 +349,30 @@ void draw_set_edgeflags( struct draw_context *draw,
* \param elements the element buffer ptr
*/
void
draw_set_mapped_element_buffer( struct draw_context *draw,
unsigned eltSize, void *elements )
draw_set_mapped_element_buffer_range( struct draw_context *draw,
unsigned eltSize,
unsigned min_index,
unsigned max_index,
void *elements )
{
draw->pt.user.elts = elements;
draw->pt.user.eltSize = eltSize;
draw->pt.user.min_index = min_index;
draw->pt.user.max_index = max_index;
}
void
draw_set_mapped_element_buffer( struct draw_context *draw,
unsigned eltSize,
void *elements )
{
draw->pt.user.elts = elements;
draw->pt.user.eltSize = eltSize;
draw->pt.user.min_index = 0;
draw->pt.user.max_index = 0xffffffff;
}
/* Revamp me please:
*/

View file

@ -118,14 +118,23 @@ void draw_set_vertex_elements(struct draw_context *draw,
unsigned count,
const struct pipe_vertex_element *elements);
void
draw_set_mapped_element_buffer_range( struct draw_context *draw,
unsigned eltSize,
unsigned min_index,
unsigned max_index,
void *elements );
void draw_set_mapped_element_buffer( struct draw_context *draw,
unsigned eltSize, void *elements );
unsigned eltSize,
void *elements );
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
void draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer);
const void *buffer,
unsigned size );
void draw_set_edgeflags( struct draw_context *draw,
const unsigned *edgeflag );

View file

@ -175,6 +175,22 @@ reset_stipple_counter(struct draw_stage *stage)
stage->next->reset_stipple_counter( stage->next );
}
static void
stipple_reset_point(struct draw_stage *stage, struct prim_header *header)
{
struct stipple_stage *stipple = stipple_stage(stage);
stipple->counter = 0;
stage->next->point(stage->next, header);
}
static void
stipple_reset_tri(struct draw_stage *stage, struct prim_header *header)
{
struct stipple_stage *stipple = stipple_stage(stage);
stipple->counter = 0;
stage->next->tri(stage->next, header);
}
static void
stipple_first_line(struct draw_stage *stage,
@ -220,9 +236,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw )
stipple->stage.draw = draw;
stipple->stage.next = NULL;
stipple->stage.point = draw_pipe_passthrough_point;
stipple->stage.point = stipple_reset_point;
stipple->stage.line = stipple_first_line;
stipple->stage.tri = draw_pipe_passthrough_tri;
stipple->stage.tri = stipple_reset_tri;
stipple->stage.reset_stipple_counter = reset_stipple_counter;
stipple->stage.flush = stipple_flush;
stipple->stage.destroy = stipple_destroy;

View file

@ -145,7 +145,7 @@ emit_vertex( struct vbuf_stage *vbuf,
vertex->vertex_id = vbuf->nr_vertices++;
}
return vertex->vertex_id;
return (ushort)vertex->vertex_id;
}

View file

@ -147,6 +147,8 @@ struct draw_context
const void *elts;
/** bytes per index (0, 1, 2 or 4) */
unsigned eltSize;
unsigned min_index;
unsigned max_index;
/** vertex arrays */
const void *vbuffer[PIPE_MAX_ATTRIBS];
@ -155,7 +157,8 @@ struct draw_context
const void *constants;
} user;
boolean test_fse;
boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */
boolean no_fse; /* disable FSE even when it is correct */
} pt;
struct {
@ -183,6 +186,16 @@ struct draw_context
*/
struct gallivm_cpu_engine *engine;
/* Here's another one:
*/
struct aos_machine *aos_machine;
const float (*aligned_constants)[4];
const float (*aligned_constant_storage)[4];
unsigned const_storage_size;
struct translate *fetch;
struct translate_cache *fetch_cache;
@ -215,6 +228,12 @@ struct draw_context
boolean draw_vs_init( struct draw_context *draw );
void draw_vs_destroy( struct draw_context *draw );
void draw_vs_set_viewport( struct draw_context *,
const struct pipe_viewport_state * );
void draw_vs_set_constants( struct draw_context *,
const float (*constants)[4],
unsigned size );

View file

@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw,
if (opt == 0)
middle = draw->pt.middle.fetch_emit;
else if (opt == PT_SHADE && draw->pt.test_fse)
else if (opt == PT_SHADE && !draw->pt.no_fse)
middle = draw->pt.middle.fetch_shade_emit;
else
middle = draw->pt.middle.general;
@ -105,6 +105,7 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
draw->pt.test_fse = GETENV("DRAW_FSE") != NULL;
draw->pt.no_fse = GETENV("DRAW_NO_FSE") != NULL;
draw->pt.front.vcache = draw_pt_vcache( draw );
if (!draw->pt.front.vcache)
@ -118,12 +119,9 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_emit)
return FALSE;
if (draw->pt.test_fse) {
draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
if (!draw->pt.middle.fetch_shade_emit)
return FALSE;
}
draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
if (!draw->pt.middle.fetch_shade_emit)
return FALSE;
draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)

View file

@ -96,6 +96,15 @@ struct draw_pt_middle_end {
unsigned start,
unsigned count);
/* Transform all vertices in a linear range and then draw them with
* the supplied element list.
*/
void (*run_linear_elts)( struct draw_pt_middle_end *,
unsigned fetch_start,
unsigned fetch_count,
const ushort *draw_elts,
unsigned draw_count );
void (*finish)( struct draw_pt_middle_end * );
void (*destroy)( struct draw_pt_middle_end * );
};

View file

@ -311,6 +311,53 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
}
static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
unsigned draw_count )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
void *hw_verts;
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
hw_verts = draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count );
if (!hw_verts) {
assert(0);
return;
}
/* Single routine to fetch vertices and emit HW verts.
*/
feme->translate->run( feme->translate,
start,
count,
hw_verts );
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
draw->render->draw( draw->render,
draw_elts,
draw_count );
/* Done -- that was easy, wasn't it:
*/
draw->render->release_vertices( draw->render,
hw_verts,
feme->translate->key.output_stride,
count );
}
static void fetch_emit_finish( struct draw_pt_middle_end *middle )
{
@ -343,6 +390,7 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
fetch_emit->base.prepare = fetch_emit_prepare;
fetch_emit->base.run = fetch_emit_run;
fetch_emit->base.run_linear = fetch_emit_run_linear;
fetch_emit->base.run_linear_elts = fetch_emit_run_linear_elts;
fetch_emit->base.finish = fetch_emit_finish;
fetch_emit->base.destroy = fetch_emit_destroy;

View file

@ -189,12 +189,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
draw->pt.vertex_buffer[buf].pitch );
}
fse->active->set_constants( fse->active,
(const float (*)[4])draw->pt.user.constants );
fse->active->set_viewport( fse->active,
&draw->viewport );
//return TRUE;
}
@ -316,6 +310,54 @@ fse_run(struct draw_pt_middle_end *middle,
}
static void fse_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
unsigned draw_count )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
unsigned alloc_count = align(count, 4);
char *hw_verts;
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
hw_verts = draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)alloc_count );
if (!hw_verts) {
assert(0);
return;
}
/* Single routine to fetch vertices, run shader and emit HW verts.
* Clipping is done elsewhere -- either by the API or on hardware,
* or for some other reason not required...
*/
fse->active->run_linear( fse->active,
start, count,
hw_verts );
draw->render->draw( draw->render,
draw_elts,
draw_count );
draw->render->release_vertices( draw->render,
hw_verts,
fse->key.output_stride,
count );
}
static void fse_finish( struct draw_pt_middle_end *middle )
{
}
@ -336,6 +378,7 @@ struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
fse->base.prepare = fse_prepare;
fse->base.run = fse_run;
fse->base.run_linear = fse_run_linear;
fse->base.run_linear_elts = fse_run_linear_elts;
fse->base.finish = fse_finish;
fse->base.destroy = fse_destroy;
fse->draw = draw;

View file

@ -81,9 +81,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
draw->bypass_clipping,
draw->identity_viewport,
draw->rasterizer->gl_rasterization_rules );
(boolean)draw->bypass_clipping,
(boolean)draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules );
if (!(opt & PT_PIPELINE))
@ -98,7 +98,6 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
@ -251,6 +250,84 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
unsigned draw_count )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vs.vertex_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align_int( count, 4 );
struct vertex_header *pipeline_verts =
(struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
if (!pipeline_verts) {
/* Not much we can do here - just skip the rendering.
*/
assert(0);
return;
}
/* Fetch into our vertex buffer
*/
draw_pt_fetch_run_linear( fpme->fetch,
start,
count,
(char *)pipeline_verts );
/* Run the shader, note that this overwrites the data[] parts of
* the pipeline verts. If there is no shader, ie a bypass shader,
* then the inputs == outputs, and are already in the correct
* place.
*/
if (opt & PT_SHADE)
{
shader->run_linear(shader,
(const float (*)[4])pipeline_verts->data,
( float (*)[4])pipeline_verts->data,
(const float (*)[4])draw->pt.user.constants,
count,
fpme->vertex_size,
fpme->vertex_size);
}
if (draw_pt_post_vs_run( fpme->post_vs,
pipeline_verts,
count,
fpme->vertex_size ))
{
opt |= PT_PIPELINE;
}
/* Do we need to run the pipeline?
*/
if (opt & PT_PIPELINE) {
draw_pipeline_run( fpme->draw,
fpme->prim,
pipeline_verts,
count,
fpme->vertex_size,
draw_elts,
draw_count );
}
else {
draw_pt_emit( fpme->emit,
(const float (*)[4])pipeline_verts->data,
count,
fpme->vertex_size,
draw_elts,
draw_count );
}
FREE(pipeline_verts);
}
static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
{
/* nothing to do */
@ -282,6 +359,7 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
fpme->base.prepare = fetch_pipeline_prepare;
fpme->base.run = fetch_pipeline_run;
fpme->base.run_linear = fetch_pipeline_linear_run;
fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts;
fpme->base.finish = fetch_pipeline_finish;
fpme->base.destroy = fetch_pipeline_destroy;

View file

@ -10,7 +10,8 @@ static void FUNC(struct draw_pt_front_end *frontend,
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
unsigned i, j, flags;
unsigned i, j;
ushort flags;
unsigned first, incr;
varray->fetch_start = start;
@ -200,9 +201,9 @@ static void FUNC(struct draw_pt_front_end *frontend,
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
for (j = 0; j + first <= count; j += i) {

View file

@ -36,8 +36,8 @@
#include "draw/draw_pt.h"
#define CACHE_MAX 32
#define FETCH_MAX 128
#define CACHE_MAX 1024
#define FETCH_MAX 4096
#define DRAW_MAX (16*1024)
struct vcache_frontend {
@ -201,7 +201,128 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
#define FUNC vcache_run
#include "draw_pt_vcache_tmp.h"
static void translate_uint_elts( const unsigned *src,
unsigned count,
int delta,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
static void translate_ushort_elts( const ushort *src,
unsigned count,
int delta,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
static void translate_ubyte_elts( const ubyte *src,
unsigned count,
int delta,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
#if 0
static enum pipe_format format_from_get_elt( pt_elt_func get_elt )
{
switch (draw->pt.user.eltSize) {
case 1: return PIPE_FORMAT_R8_UNORM;
case 2: return PIPE_FORMAT_R16_UNORM;
case 4: return PIPE_FORMAT_R32_UNORM;
default: return PIPE_FORMAT_NONE;
}
}
#endif
static void vcache_check_run( struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
const void *elts,
unsigned draw_count )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
struct draw_context *draw = vcache->draw;
unsigned min_index = draw->pt.user.min_index;
unsigned max_index = draw->pt.user.max_index;
unsigned index_size = draw->pt.user.eltSize;
unsigned fetch_count = max_index + 1 - min_index;
const ushort *transformed_elts;
ushort *storage = NULL;
if (0) debug_printf("fetch_count %d draw_count %d\n", fetch_count, draw_count);
if (max_index == 0xffffffff ||
fetch_count >= FETCH_MAX ||
fetch_count > draw_count) {
if (0) debug_printf("fail\n");
goto fail;
}
if (min_index == 0 &&
index_size == 2)
{
transformed_elts = (const ushort *)elts;
}
else
{
storage = MALLOC( draw_count * sizeof(ushort) );
if (!storage)
goto fail;
switch(index_size) {
case 1:
translate_ubyte_elts( (const ubyte *)elts,
draw_count,
0 - (int)min_index,
storage );
break;
case 2:
translate_ushort_elts( (const ushort *)elts,
draw_count,
0 - (int)min_index,
storage );
break;
case 4:
translate_uint_elts( (const uint *)elts,
draw_count,
0 - (int)min_index,
storage );
break;
default:
assert(0);
return;
}
transformed_elts = storage;
}
vcache->middle->run_linear_elts( vcache->middle,
min_index, /* start */
fetch_count,
transformed_elts,
draw_count );
FREE(storage);
return;
fail:
vcache_run( frontend, get_elt, elts, draw_count );
}
@ -219,7 +340,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
}
else
{
vcache->base.run = vcache_run;
vcache->base.run = vcache_check_run;
}
vcache->input_prim = prim;

View file

@ -10,7 +10,8 @@ static void FUNC( struct draw_pt_front_end *frontend,
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
unsigned i, flags;
unsigned i;
ushort flags;
switch (vcache->input_prim) {
@ -138,9 +139,9 @@ static void FUNC( struct draw_pt_front_end *frontend,
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;

View file

@ -41,6 +41,36 @@
void draw_vs_set_constants( struct draw_context *draw,
const float (*constants)[4],
unsigned size )
{
if (((unsigned)constants) & 0xf) {
if (size > draw->vs.const_storage_size) {
if (draw->vs.aligned_constant_storage)
align_free(draw->vs.aligned_constant_storage);
draw->vs.aligned_constant_storage = align_malloc( size, 16 );
}
memcpy( draw->vs.aligned_constant_storage,
constants,
size );
constants = draw->vs.aligned_constant_storage;
}
draw->vs.aligned_constants = constants;
draw_vs_aos_machine_constants( draw->vs.aos_machine, constants );
}
void draw_vs_set_viewport( struct draw_context *draw,
const struct pipe_viewport_state *viewport )
{
draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport );
}
struct draw_vertex_shader *
draw_create_vertex_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
@ -83,6 +113,13 @@ void
draw_delete_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs)
{
unsigned i;
for (i = 0; i < dvs->nr_varients; i++)
dvs->varient[i]->destroy( dvs->varient[i] );
dvs->nr_varients = 0;
dvs->delete( dvs );
}
@ -110,6 +147,10 @@ draw_vs_init( struct draw_context *draw )
draw->vs.fetch_cache = translate_cache_create();
if (!draw->vs.fetch_cache)
return FALSE;
draw->vs.aos_machine = draw_vs_aos_machine();
if (!draw->vs.aos_machine)
return FALSE;
return TRUE;
}
@ -129,6 +170,12 @@ draw_vs_destroy( struct draw_context *draw )
if (draw->vs.emit_cache)
translate_cache_destroy(draw->vs.emit_cache);
if (draw->vs.aos_machine)
draw_vs_aos_machine_destroy(draw->vs.aos_machine);
if (draw->vs.aligned_constant_storage)
align_free(draw->vs.aligned_constant_storage);
tgsi_exec_machine_free_data(&draw->vs.machine);
}
@ -153,10 +200,17 @@ draw_vs_lookup_varient( struct draw_vertex_shader *vs,
if (varient == NULL)
return NULL;
/* Add it to our list:
/* Add it to our list, could be smarter:
*/
assert(vs->nr_varients < Elements(vs->varient));
vs->varient[vs->nr_varients++] = varient;
if (vs->nr_varients < Elements(vs->varient)) {
vs->varient[vs->nr_varients++] = varient;
}
else {
vs->last_varient++;
vs->last_varient %= Elements(vs->varient);
vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]);
vs->varient[vs->last_varient] = varient;
}
/* Done
*/

View file

@ -70,16 +70,6 @@ struct draw_vs_varient_key {
struct draw_vs_varient;
typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *,
const unsigned *elts,
unsigned count,
void *output_buffer);
typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *,
unsigned start,
unsigned count,
void *output_buffer);
struct draw_vs_varient {
struct draw_vs_varient_key key;
@ -91,12 +81,6 @@ struct draw_vs_varient {
const void *ptr,
unsigned stride );
void (*set_constants)( struct draw_vs_varient *,
const float (*constants)[4] );
void (*set_viewport)( struct draw_vs_varient *,
const struct pipe_viewport_state * );
void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
unsigned start,
unsigned count,
@ -123,10 +107,15 @@ struct draw_vertex_shader {
struct tgsi_shader_info info;
/* Extracted from shader:
*/
const float (*immediates)[4];
/*
*/
struct draw_vs_varient *varient[16];
unsigned nr_varients;
unsigned last_varient;
struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
const struct draw_vs_varient_key *key );
@ -213,7 +202,14 @@ static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key
}
struct aos_machine *draw_vs_aos_machine( void );
void draw_vs_aos_machine_destroy( struct aos_machine *machine );
void draw_vs_aos_machine_constants( struct aos_machine *machine,
const float (*constants)[4] );
void draw_vs_aos_machine_viewport( struct aos_machine *machine,
const struct pipe_viewport_state *viewport );
#define MAX_TGSI_VERTICES 4

View file

@ -66,6 +66,37 @@ static INLINE boolean eq( struct x86_reg a,
a.disp == b.disp);
}
struct x86_reg aos_get_x86( struct aos_compilation *cp,
unsigned value )
{
if (cp->ebp != value) {
unsigned offset;
switch (value) {
case X86_IMMEDIATES:
offset = Offset(struct aos_machine, immediates);
break;
case X86_CONSTANTS:
offset = Offset(struct aos_machine, constants);
break;
case X86_ATTRIBS:
offset = Offset(struct aos_machine, attrib);
break;
default:
assert(0);
offset = 0;
}
x86_mov(cp->func, cp->temp_EBP,
x86_make_disp(cp->machine_EDX, offset));
/* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */
cp->ebp = value;
}
return cp->temp_EBP;
}
static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
unsigned file,
@ -83,15 +114,15 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
case TGSI_FILE_TEMPORARY:
return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
case TGSI_FILE_IMMEDIATE:
return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));
case TGSI_FILE_CONSTANT:
return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx]));
case AOS_FILE_INTERNAL:
return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
case TGSI_FILE_IMMEDIATE:
return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float));
case TGSI_FILE_CONSTANT:
return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float));
default:
ERROR(cp, "unknown reg file");
return x86_make_reg(0,0);
@ -118,70 +149,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
#define X87_CW_ROUND_MASK (3<<10)
#define X87_CW_INFINITY (1<<12)
static void do_populate_lut( struct shine_tab *tab,
float unclamped_exponent )
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
unsigned i;
tab->exponent = unclamped_exponent; /* for later comparison */
tab->values[0] = 0;
if (exponent == 0) {
for (i = 1; i < 258; i++) {
tab->values[i] = 1.0;
}
}
else {
for (i = 1; i < 258; i++) {
tab->values[i] = powf((float)i * epsilon, exponent);
}
}
}
static void init_internals( struct aos_machine *machine )
{
unsigned i;
float inv = 1.0f/255.0f;
float f255 = 255.0f;
ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
*(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
X87_CW_EXCEPTION_DENORM_OP |
X87_CW_EXCEPTION_ZERO_DIVIDE |
X87_CW_EXCEPTION_OVERFLOW |
X87_CW_EXCEPTION_UNDERFLOW |
X87_CW_EXCEPTION_PRECISION |
(1<<6) |
X87_CW_ROUND_NEAREST |
X87_CW_PRECISION_DOUBLE_EXT);
assert(machine->fpu_rnd_nearest == 0x37f);
machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
X87_CW_EXCEPTION_DENORM_OP |
X87_CW_EXCEPTION_ZERO_DIVIDE |
X87_CW_EXCEPTION_OVERFLOW |
X87_CW_EXCEPTION_UNDERFLOW |
X87_CW_EXCEPTION_PRECISION |
(1<<6) |
X87_CW_ROUND_DOWN |
X87_CW_PRECISION_DOUBLE_EXT);
for (i = 0; i < MAX_SHINE_TAB; i++)
do_populate_lut( &machine->shine_tab[i], 1.0f );
}
static void spill( struct aos_compilation *cp, unsigned idx )
@ -1189,136 +1157,6 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
static PIPE_CDECL void do_lit( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
if (in[0] > 0)
{
if (in[1] <= 0.0)
{
result[0] = 1.0F;
result[1] = in[0];
result[2] = 1.0;
result[3] = 1.0F;
}
else
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
result[0] = 1.0F;
result[1] = in[0];
result[2] = powf(in[1], exponent);
result[3] = 1.0;
}
}
else
{
result[0] = 1.0F;
result[1] = 0.0;
result[2] = 0.0;
result[3] = 1.0F;
}
}
static PIPE_CDECL void do_lit_lut( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
if (in[0] > 0)
{
if (in[1] <= 0.0)
{
result[0] = 1.0F;
result[1] = in[0];
result[2] = 1.0;
result[3] = 1.0F;
return;
}
if (machine->lit_info[count].shine_tab->exponent != in[3]) {
machine->lit_info[count].func = do_lit;
goto no_luck;
}
if (in[1] <= 1.0)
{
const float *tab = machine->lit_info[count].shine_tab->values;
float f = in[1] * 256;
int k = (int)f;
float frac = f - (float)k;
result[0] = 1.0F;
result[1] = in[0];
result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
result[3] = 1.0;
return;
}
no_luck:
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
result[0] = 1.0F;
result[1] = in[0];
result[2] = powf(in[1], exponent);
result[3] = 1.0;
}
}
else
{
result[0] = 1.0F;
result[1] = 0.0;
result[2] = 0.0;
result[3] = 1.0F;
}
}
static void PIPE_CDECL populate_lut( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
unsigned i, tab;
/* Search for an existing table for this value. Note that without
* static analysis we don't really know if in[3] will be constant,
* but it usually is...
*/
for (tab = 0; tab < 4; tab++) {
if (machine->shine_tab[tab].exponent == in[3]) {
goto found;
}
}
for (tab = 0, i = 1; i < 4; i++) {
if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
tab = i;
}
if (machine->shine_tab[tab].last_used == machine->now) {
/* No unused tables (this is not a ffvertex program...). Just
* call pow each time:
*/
machine->lit_info[count].func = do_lit;
machine->lit_info[count].func( machine, result, in, count );
return;
}
else {
do_populate_lut( &machine->shine_tab[tab], in[3] );
}
found:
machine->shine_tab[tab].last_used = machine->now;
machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
machine->lit_info[count].func = do_lit_lut;
machine->lit_info[count].func( machine, result, in, count );
}
@ -1382,7 +1220,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
Offset(struct lit_info, func)));
}
else {
x86_mov_reg_imm( cp->func, ecx, (int)do_lit );
x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit );
}
x86_call( cp->func, ecx );
@ -1403,7 +1241,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
#if 0
static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
@ -1464,6 +1302,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu
return TRUE;
}
#endif
@ -1533,11 +1372,20 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
/* A wrapper for powf().
* Makes sure it is cdecl and operates on floats.
*/
static float PIPE_CDECL _powerf( float x, float y )
{
return powf( x, y );
}
/* Really not sufficient -- need to check for conditions that could
* generate inf/nan values, which will slow things down hugely.
*/
static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
#if 0
x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */
x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */
x87_fyl2x(cp->func); /* a1*log2(a0) */
@ -1545,6 +1393,42 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */
x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
#else
uint i;
/* For absolute correctness, need to spill/invalidate all XMM regs
* too.
*/
for (i = 0; i < 8; i++) {
if (cp->xmm[i].dirty)
spill(cp, i);
aos_release_xmm_reg(cp, i);
}
/* Push caller-save (ie scratch) regs.
*/
x86_cdecl_caller_push_regs( cp->func );
x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) );
x87_fld_src( cp, &op->FullSrcRegisters[1], 0 );
x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) );
x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf );
x86_call( cp->func, cp->tmp_EAX );
x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 8) );
x86_cdecl_caller_pop_regs( cp->func );
/* Note retval on x87 stack:
*/
cp->func->x87_stack++;
x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
#endif
return TRUE;
}
@ -1865,6 +1749,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp )
}
#if 0
static boolean note_immediate( struct aos_compilation *cp,
struct tgsi_full_immediate *imm )
{
@ -1877,6 +1762,7 @@ static boolean note_immediate( struct aos_compilation *cp,
return TRUE;
}
#endif
@ -1912,7 +1798,7 @@ static void find_last_write_outputs( struct aos_compilation *cp )
}
#define ARG_VARIENT 1
#define ARG_MACHINE 1
#define ARG_START_ELTS 2
#define ARG_COUNT 3
#define ARG_OUTBUF 4
@ -1939,6 +1825,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
cp.machine_EDX = x86_make_reg(file_REG32, reg_DX);
cp.count_ESI = x86_make_reg(file_REG32, reg_SI);
cp.temp_EBP = x86_make_reg(file_REG32, reg_BP);
cp.stack_ESP = x86_make_reg( file_REG32, reg_SP );
x86_init_func(cp.func);
@ -1946,11 +1834,12 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
x86_push(cp.func, cp.idx_EBX);
x86_push(cp.func, cp.count_ESI);
x86_push(cp.func, cp.temp_EBP);
/* Load arguments into regs:
*/
x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_VARIENT));
x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE));
x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS));
x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT));
x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF));
@ -1962,11 +1851,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX);
fixup = x86_jcc_forward(cp.func, cc_E);
/* Dig out the machine pointer from inside the varient arg
*/
x86_mov(cp.func, cp.machine_EDX,
x86_make_disp(cp.machine_EDX,
Offset( struct draw_vs_varient_aos_sse, machine )));
save_fpu_state( &cp );
set_fpu_round_nearest( &cp );
@ -1988,8 +1872,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
#if 0
if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
goto fail;
#endif
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
@ -2072,6 +1958,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
if (cp.func->need_emms)
mmx_emms(cp.func);
x86_pop(cp.func, cp.temp_EBP);
x86_pop(cp.func, cp.count_ESI);
x86_pop(cp.func, cp.idx_EBX);
@ -2098,93 +1985,65 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
for (i = 0; i < vaos->base.key.nr_inputs; i++) {
if (vaos->base.key.element[i].in.buffer == buf) {
vaos->machine->attrib[i].input_ptr = ((char *)ptr +
vaos->base.key.element[i].in.offset);
vaos->machine->attrib[i].input_stride = stride;
vaos->attrib[i].input_ptr = ((char *)ptr +
vaos->base.key.element[i].in.offset);
vaos->attrib[i].input_stride = stride;
}
}
}
static void vaos_destroy( struct draw_vs_varient *varient )
static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
const unsigned *elts,
unsigned count,
void *output_buffer )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
struct aos_machine *machine = vaos->draw->vs.aos_machine;
if (vaos->machine)
align_free( vaos->machine );
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
machine->constants = vaos->draw->vs.aligned_constants;
machine->immediates = vaos->base.vs->immediates;
machine->attrib = vaos->attrib;
x86_release_func( &vaos->func[0] );
x86_release_func( &vaos->func[1] );
FREE(vaos);
}
static void vaos_run_elts( struct draw_vs_varient *varient,
const unsigned *elts,
unsigned count,
void *output_buffer )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
vaos->gen_run_elts( varient,
vaos->gen_run_elts( machine,
elts,
count,
output_buffer );
}
static void vaos_run_linear( struct draw_vs_varient *varient,
unsigned start,
unsigned count,
void *output_buffer )
static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
unsigned start,
unsigned count,
void *output_buffer )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
struct aos_machine *machine = vaos->draw->vs.aos_machine;
vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
vaos->gen_run_linear( varient,
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
machine->constants = vaos->draw->vs.aligned_constants;
machine->immediates = vaos->base.vs->immediates;
machine->attrib = vaos->attrib;
vaos->gen_run_linear( machine,
start,
count,
output_buffer );
}
static void vaos_set_constants( struct draw_vs_varient *varient,
const float (*constants)[4] )
static void vaos_destroy( struct draw_vs_varient *varient )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
memcpy(vaos->machine->constant,
constants,
(vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float));
FREE( vaos->attrib );
#if 0
unsigned i;
for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
debug_printf("state %d: %f %f %f %f\n",
i,
constants[i][0],
constants[i][1],
constants[i][2],
constants[i][3]);
#endif
x86_release_func( &vaos->func[0] );
x86_release_func( &vaos->func[1] );
{
unsigned i;
for (i = 0; i < MAX_LIT_INFO; i++) {
vaos->machine->lit_info[i].func = populate_lut;
vaos->machine->now++;
}
}
}
static void vaos_set_viewport( struct draw_vs_varient *varient,
const struct pipe_viewport_state *viewport )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float));
memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
FREE(vaos);
}
@ -2200,19 +2059,15 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
vaos->base.key = *key;
vaos->base.vs = vs;
vaos->base.set_input = vaos_set_buffer;
vaos->base.set_constants = vaos_set_constants;
vaos->base.set_viewport = vaos_set_viewport;
vaos->base.destroy = vaos_destroy;
vaos->base.run_linear = vaos_run_linear;
vaos->base.run_elts = vaos_run_elts;
vaos->draw = vs->draw;
vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
if (!vaos->machine)
vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
if (!vaos->attrib)
goto fail;
memset(vaos->machine, 0, sizeof(struct aos_machine));
init_internals(vaos->machine);
tgsi_dump(vs->state.tokens, 0);
@ -2222,19 +2077,19 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
if (!build_vertex_program( vaos, FALSE ))
goto fail;
vaos->gen_run_linear = (vsv_run_linear_func)x86_get_func(&vaos->func[0]);
vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]);
if (!vaos->gen_run_linear)
goto fail;
vaos->gen_run_elts = (vsv_run_elts_func)x86_get_func(&vaos->func[1]);
vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]);
if (!vaos->gen_run_elts)
goto fail;
return &vaos->base;
fail:
if (vaos->machine)
align_free( vaos->machine );
if (vaos && vaos->attrib)
FREE(vaos->attrib);
if (vaos)
x86_release_func( &vaos->func[0] );

View file

@ -60,10 +60,16 @@ struct x86_function;
#define FPU_RND_NEAREST 2
struct aos_machine;
typedef void PIPE_CDECL (*lit_func)( struct aos_machine *,
typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
float *result,
const float *in,
unsigned count );
void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
float *result,
const float *in,
unsigned count );
struct shine_tab {
float exponent;
float values[258];
@ -78,6 +84,14 @@ struct lit_info {
#define MAX_SHINE_TAB 4
#define MAX_LIT_INFO 16
struct aos_attrib {
const void *input_ptr;
unsigned input_stride;
};
/* This is the temporary storage used by all the aos_sse vs varients.
* Create one per context and reuse by passing a pointer in at
* vs_varient creation??
@ -86,8 +100,6 @@ struct aos_machine {
float input [MAX_INPUTS ][4];
float output [MAX_OUTPUTS ][4];
float temp [MAX_TEMPS ][4];
float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
float internal [MAX_INTERNALS ][4];
float scale[4]; /* viewport */
@ -105,12 +117,10 @@ struct aos_machine {
ushort fpu_restore;
ushort fpucntl; /* one of FPU_* above */
struct {
const void *input_ptr;
unsigned input_stride;
const float (*immediates)[4]; /* points to shader data */
const float (*constants)[4]; /* points to draw data */
unsigned output_offset;
} attrib[PIPE_MAX_ATTRIBS];
const struct aos_attrib *attrib; /* points to ? */
};
@ -132,6 +142,7 @@ struct aos_compilation {
unsigned last_used;
} xmm[8];
unsigned ebp; /* one of X86_* */
boolean input_fetched[PIPE_MAX_ATTRIBS];
unsigned output_last_write[PIPE_MAX_ATTRIBS];
@ -148,6 +159,8 @@ struct aos_compilation {
struct x86_reg outbuf_ECX;
struct x86_reg machine_EDX;
struct x86_reg count_ESI; /* decrements to zero */
struct x86_reg temp_EBP;
struct x86_reg stack_ESP;
};
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
@ -192,25 +205,34 @@ do { \
} while (0)
#define X86_NULL 0
#define X86_IMMEDIATES 1
#define X86_CONSTANTS 2
#define X86_ATTRIBS 3
struct x86_reg aos_get_x86( struct aos_compilation *cp,
unsigned value );
typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
const unsigned *elts,
unsigned count,
void *output_buffer);
typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
unsigned start,
unsigned count,
void *output_buffer);
struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;
#if 0
struct {
const void *ptr;
unsigned stride;
} attrib[PIPE_MAX_ATTRIBS];
#endif
struct aos_attrib *attrib;
struct aos_machine *machine; /* XXX: temporarily unshared */
vsv_run_linear_func gen_run_linear;
vsv_run_elts_func gen_run_elts;
vaos_run_linear_func gen_run_linear;
vaos_run_elts_func gen_run_elts;
struct x86_function func[2];

View file

@ -91,25 +91,25 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
static void get_src_ptr( struct x86_function *func,
static void get_src_ptr( struct aos_compilation *cp,
struct x86_reg src,
struct x86_reg machine,
struct x86_reg elt,
unsigned a )
{
struct x86_reg input_ptr =
x86_make_disp(machine,
Offset(struct aos_machine, attrib[a].input_ptr));
struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ),
a * sizeof(struct aos_attrib));
struct x86_reg input_stride =
x86_make_disp(machine,
Offset(struct aos_machine, attrib[a].input_stride));
struct x86_reg input_ptr = x86_make_disp(attrib,
Offset(struct aos_attrib, input_ptr));
struct x86_reg input_stride = x86_make_disp(attrib,
Offset(struct aos_attrib, input_stride));
/* Calculate pointer to current attrib:
*/
x86_mov(func, src, input_stride);
x86_imul(func, src, elt);
x86_add(func, src, input_ptr);
x86_mov(cp->func, src, input_stride);
x86_imul(cp->func, src, elt);
x86_add(cp->func, src, input_ptr);
}
@ -134,9 +134,8 @@ static boolean load_input( struct aos_compilation *cp,
/* Figure out source pointer address:
*/
get_src_ptr(cp->func,
get_src_ptr(cp,
src,
cp->machine_EDX,
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
idx);

View file

@ -0,0 +1,297 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/util/tgsi_parse.h"
#include "tgsi/util/tgsi_util.h"
#include "tgsi/exec/tgsi_exec.h"
#include "draw_vs.h"
#include "draw_vs_aos.h"
#include "draw_vertex.h"
#include "rtasm/rtasm_x86sse.h"
#define X87_CW_EXCEPTION_INV_OP (1<<0)
#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
#define X87_CW_EXCEPTION_PRECISION (1<<5)
#define X87_CW_PRECISION_SINGLE (0<<8)
#define X87_CW_PRECISION_RESERVED (1<<8)
#define X87_CW_PRECISION_DOUBLE (2<<8)
#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
#define X87_CW_PRECISION_MASK (3<<8)
#define X87_CW_ROUND_NEAREST (0<<10)
#define X87_CW_ROUND_DOWN (1<<10)
#define X87_CW_ROUND_UP (2<<10)
#define X87_CW_ROUND_ZERO (3<<10)
#define X87_CW_ROUND_MASK (3<<10)
#define X87_CW_INFINITY (1<<12)
void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
if (in[0] > 0)
{
if (in[1] <= 0.0)
{
result[0] = 1.0F;
result[1] = in[0];
result[2] = 1.0;
result[3] = 1.0F;
}
else
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
result[0] = 1.0F;
result[1] = in[0];
result[2] = powf(in[1], exponent);
result[3] = 1.0;
}
}
else
{
result[0] = 1.0F;
result[1] = 0.0;
result[2] = 0.0;
result[3] = 1.0F;
}
}
static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
if (in[0] > 0)
{
if (in[1] <= 0.0)
{
result[0] = 1.0F;
result[1] = in[0];
result[2] = 1.0;
result[3] = 1.0F;
return;
}
if (machine->lit_info[count].shine_tab->exponent != in[3]) {
machine->lit_info[count].func = aos_do_lit;
goto no_luck;
}
if (in[1] <= 1.0)
{
const float *tab = machine->lit_info[count].shine_tab->values;
float f = in[1] * 256;
int k = (int)f;
float frac = f - (float)k;
result[0] = 1.0F;
result[1] = in[0];
result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
result[3] = 1.0;
return;
}
no_luck:
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
result[0] = 1.0F;
result[1] = in[0];
result[2] = powf(in[1], exponent);
result[3] = 1.0;
}
}
else
{
result[0] = 1.0F;
result[1] = 0.0;
result[2] = 0.0;
result[3] = 1.0F;
}
}
static void do_populate_lut( struct shine_tab *tab,
float unclamped_exponent )
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
unsigned i;
tab->exponent = unclamped_exponent; /* for later comparison */
tab->values[0] = 0;
if (exponent == 0) {
for (i = 1; i < 258; i++) {
tab->values[i] = 1.0;
}
}
else {
for (i = 1; i < 258; i++) {
tab->values[i] = powf((float)i * epsilon, exponent);
}
}
}
static void PIPE_CDECL populate_lut( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
unsigned i, tab;
/* Search for an existing table for this value. Note that without
* static analysis we don't really know if in[3] will be constant,
* but it usually is...
*/
for (tab = 0; tab < 4; tab++) {
if (machine->shine_tab[tab].exponent == in[3]) {
goto found;
}
}
for (tab = 0, i = 1; i < 4; i++) {
if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
tab = i;
}
if (machine->shine_tab[tab].last_used == machine->now) {
/* No unused tables (this is not a ffvertex program...). Just
* call pow each time:
*/
machine->lit_info[count].func = aos_do_lit;
machine->lit_info[count].func( machine, result, in, count );
return;
}
else {
do_populate_lut( &machine->shine_tab[tab], in[3] );
}
found:
machine->shine_tab[tab].last_used = machine->now;
machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
machine->lit_info[count].func = do_lit_lut;
machine->lit_info[count].func( machine, result, in, count );
}
void draw_vs_aos_machine_constants( struct aos_machine *machine,
const float (*constants)[4] )
{
machine->constants = constants;
{
unsigned i;
for (i = 0; i < MAX_LIT_INFO; i++) {
machine->lit_info[i].func = populate_lut;
machine->now++;
}
}
}
void draw_vs_aos_machine_viewport( struct aos_machine *machine,
const struct pipe_viewport_state *viewport )
{
memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
}
void draw_vs_aos_machine_destroy( struct aos_machine *machine )
{
align_free(machine);
}
struct aos_machine *draw_vs_aos_machine( void )
{
struct aos_machine *machine;
unsigned i;
float inv = 1.0f/255.0f;
float f255 = 255.0f;
machine = align_malloc(sizeof(struct aos_machine), 16);
if (!machine)
return NULL;
memset(machine, 0, sizeof(*machine));
ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
*(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
X87_CW_EXCEPTION_DENORM_OP |
X87_CW_EXCEPTION_ZERO_DIVIDE |
X87_CW_EXCEPTION_OVERFLOW |
X87_CW_EXCEPTION_UNDERFLOW |
X87_CW_EXCEPTION_PRECISION |
(1<<6) |
X87_CW_ROUND_NEAREST |
X87_CW_PRECISION_DOUBLE_EXT);
assert(machine->fpu_rnd_nearest == 0x37f);
machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
X87_CW_EXCEPTION_DENORM_OP |
X87_CW_EXCEPTION_ZERO_DIVIDE |
X87_CW_EXCEPTION_OVERFLOW |
X87_CW_EXCEPTION_UNDERFLOW |
X87_CW_EXCEPTION_PRECISION |
(1<<6) |
X87_CW_ROUND_DOWN |
X87_CW_PRECISION_DOUBLE_EXT);
for (i = 0; i < MAX_SHINE_TAB; i++)
do_populate_lut( &machine->shine_tab[i], 1.0f );
return machine;
}

View file

@ -70,8 +70,6 @@ struct draw_sse_vertex_shader {
codegen_function func;
struct tgsi_exec_machine *machine;
float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
};
@ -109,7 +107,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Outputs,
(float (*)[4])constants,
machine->Temps,
shader->immediates,
(float (*)[4])shader->base.immediates,
input,
base->info.num_inputs,
input_stride,
@ -132,6 +130,8 @@ vs_sse_delete( struct draw_vertex_shader *base )
x86_release_func( &shader->sse2_program );
align_free(shader->base.immediates);
FREE( (void*) shader->base.state.tokens );
FREE( shader );
}
@ -163,12 +163,18 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
sizeof(float), 16);
vs->machine = &draw->vs.machine;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
&vs->sse2_program, vs->immediates, TRUE ))
&vs->sse2_program,
(float (*)[4])vs->base.immediates,
TRUE ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );

View file

@ -44,8 +44,6 @@
struct draw_vs_varient_generic {
struct draw_vs_varient base;
struct pipe_viewport_state viewport;
struct draw_vertex_shader *shader;
struct draw_context *draw;
@ -57,21 +55,11 @@ struct draw_vs_varient_generic {
*/
struct translate *fetch;
struct translate *emit;
const float (*constants)[4];
};
static void vsvg_set_constants( struct draw_vs_varient *varient,
const float (*constants)[4] )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
vsvg->constants = constants;
}
static void vsvg_set_input( struct draw_vs_varient *varient,
unsigned buffer,
@ -94,8 +82,8 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
void *output_buffer )
{
char *ptr = (char *)output_buffer;
const float *scale = vsvg->viewport.scale;
const float *trans = vsvg->viewport.translate;
const float *scale = vsvg->base.vs->draw->viewport.scale;
const float *trans = vsvg->base.vs->draw->viewport.translate;
unsigned stride = vsvg->base.key.output_stride;
unsigned j;
@ -115,8 +103,8 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
void *output_buffer )
{
char *ptr = (char *)output_buffer;
const float *scale = vsvg->viewport.scale;
const float *trans = vsvg->viewport.translate;
const float *scale = vsvg->base.vs->draw->viewport.scale;
const float *trans = vsvg->base.vs->draw->viewport.translate;
unsigned stride = vsvg->base.key.output_stride;
unsigned j;
@ -130,10 +118,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
}
static void vsvg_run_elts( struct draw_vs_varient *varient,
const unsigned *elts,
unsigned count,
void *output_buffer)
static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
const unsigned *elts,
unsigned count,
void *output_buffer)
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
@ -150,7 +138,7 @@ static void vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
output_buffer,
output_buffer,
vsvg->constants,
(const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
count,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
@ -186,10 +174,10 @@ static void vsvg_run_elts( struct draw_vs_varient *varient,
}
static void vsvg_run_linear( struct draw_vs_varient *varient,
unsigned start,
unsigned count,
void *output_buffer )
static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
unsigned start,
unsigned count,
void *output_buffer )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
@ -206,7 +194,7 @@ static void vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
output_buffer,
output_buffer,
vsvg->constants,
(const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
count,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
@ -245,13 +233,6 @@ static void vsvg_run_linear( struct draw_vs_varient *varient,
static void vsvg_set_viewport( struct draw_vs_varient *varient,
const struct pipe_viewport_state *viewport )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
vsvg->viewport = *viewport;
}
static void vsvg_destroy( struct draw_vs_varient *varient )
{
@ -272,8 +253,6 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
vsvg->base.key = *key;
vsvg->base.vs = vs;
vsvg->base.set_input = vsvg_set_input;
vsvg->base.set_constants = vsvg_set_constants;
vsvg->base.set_viewport = vsvg_set_viewport;
vsvg->base.run_elts = vsvg_run_elts;
vsvg->base.run_linear = vsvg_run_linear;
vsvg->base.destroy = vsvg_destroy;

View file

@ -30,13 +30,14 @@
* Implementation of malloc-based buffers to store data that can't be processed
* by the hardware.
*
* \author José Fonseca <jrfonseca@tungstengraphics.com>
* \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pb_buffer.h"
#include "pb_bufmgr.h"
struct malloc_buffer
@ -125,3 +126,33 @@ pb_malloc_buffer_create(size_t size,
return &buf->base;
}
static struct pb_buffer *
pb_malloc_buffer_create_buffer(struct pb_manager *mgr,
size_t size,
const struct pb_desc *desc)
{
return pb_malloc_buffer_create(size, desc);
}
static void
pb_malloc_bufmgr_destroy(struct pb_manager *mgr)
{
/* No-op */
}
static struct pb_manager
pb_malloc_bufmgr = {
pb_malloc_buffer_create_buffer,
pb_malloc_bufmgr_destroy
};
struct pb_manager *
pb_malloc_bufmgr_create(void)
{
return &pb_malloc_bufmgr;
}

View file

@ -79,6 +79,15 @@ struct pb_manager
};
/**
* Malloc buffer provider.
*
* Simple wrapper around pb_malloc_buffer_create for convenience.
*/
struct pb_manager *
pb_malloc_bufmgr_create(void);
/**
* Static buffer pool sub-allocator.
*

View file

@ -312,8 +312,8 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr,
struct list_head *list;
/* check size */
assert(size == mgr->bufSize);
if(size != mgr->bufSize)
assert(size <= mgr->bufSize);
if(size > mgr->bufSize)
return NULL;
/* check if we can provide the requested alignment */

View file

@ -330,7 +330,7 @@ struct x86_reg x86_make_disp( struct x86_reg reg,
else
reg.disp += disp;
if (reg.disp == 0)
if (reg.disp == 0 && reg.idx != reg_BP)
reg.mod = mod_INDIRECT;
else if (reg.disp <= 127 && reg.disp >= -128)
reg.mod = mod_DISP8;

View file

@ -198,7 +198,7 @@ get_coef(
static void
emit_retw(
struct x86_function *func,
unsigned size )
unsigned short size )
{
x86_retw( func, size );
}

View file

@ -754,7 +754,7 @@ tgsi_dump_instruction(
}
for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
if( !first_reg ) {
CHR( ',' );
@ -812,7 +812,7 @@ tgsi_dump_instruction(
}
for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
if( !first_reg ) {
CHR( ',' );

View file

@ -310,7 +310,7 @@ static void get_src_ptr( struct translate_sse *p,
static void emit_swizzle( struct translate_sse *p,
struct x86_reg dest,
struct x86_reg src,
unsigned shuffle )
unsigned char shuffle )
{
sse_shufps(p->func, dest, src, shuffle);
}
@ -535,7 +535,7 @@ static void translate_sse_release( struct translate *translate )
FREE(p);
}
static void translate_sse_run_elts( struct translate *translate,
static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
void *output_buffer )
@ -548,7 +548,7 @@ static void translate_sse_run_elts( struct translate *translate,
output_buffer );
}
static void translate_sse_run( struct translate *translate,
static void PIPE_CDECL translate_sse_run( struct translate *translate,
unsigned start,
unsigned count,
void *output_buffer )

View file

@ -86,7 +86,9 @@ i915_draw_elements( struct pipe_context *pipe,
draw_set_mapped_constant_buffer(draw,
i915->current.constants[PIPE_SHADER_VERTEX]);
i915->current.constants[PIPE_SHADER_VERTEX],
( i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
4 * sizeof(float) ));
/* draw! */
draw_arrays(i915->draw, prim, start, count);

View file

@ -179,6 +179,7 @@ softpipe_create( struct pipe_screen *screen,
softpipe->pipe.draw_arrays = softpipe_draw_arrays;
softpipe->pipe.draw_elements = softpipe_draw_elements;
softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
softpipe->pipe.set_edgeflags = softpipe_set_edgeflags;
@ -227,11 +228,13 @@ softpipe_create( struct pipe_screen *screen,
if (GETENV( "SP_NO_RAST" ) != NULL)
softpipe->no_rast = TRUE;
if (GETENV( "SP_VBUF" ) != NULL) {
sp_init_vbuf(softpipe);
if (GETENV( "SP_NO_VBUF" ) != NULL) {
/* Deprecated path -- vbuf is the intended interface to the draw module:
*/
draw_set_rasterize_stage(softpipe->draw, softpipe->setup);
}
else {
draw_set_rasterize_stage(softpipe->draw, softpipe->setup);
sp_init_vbuf(softpipe);
}
/* plug in AA line/point stages */

View file

@ -54,7 +54,8 @@ softpipe_map_constant_buffers(struct softpipe_context *sp)
}
draw_set_mapped_constant_buffer(sp->draw,
sp->mapped_constants[PIPE_SHADER_VERTEX]);
sp->mapped_constants[PIPE_SHADER_VERTEX],
sp->constants[i].size);
}
static void
@ -68,7 +69,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
*/
draw_flush(sp->draw);
draw_set_mapped_constant_buffer(sp->draw, NULL);
draw_set_mapped_constant_buffer(sp->draw, NULL, 0);
for (i = 0; i < 2; i++) {
if (sp->constants[i].size)
@ -108,11 +109,14 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
*
* XXX should the element buffer be specified/bound with a separate function?
*/
boolean
softpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned min_index,
unsigned max_index,
unsigned mode, unsigned start, unsigned count)
{
struct softpipe_context *sp = softpipe_context(pipe);
struct draw_context *draw = sp->draw;
@ -141,11 +145,14 @@ softpipe_draw_elements(struct pipe_context *pipe,
void *mapped_indexes
= pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
draw_set_mapped_element_buffer_range(draw, indexSize,
min_index,
max_index,
mapped_indexes);
}
else {
/* no index/element buffer */
draw_set_mapped_element_buffer(draw, 0, NULL);
draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
}
@ -171,6 +178,19 @@ softpipe_draw_elements(struct pipe_context *pipe,
return TRUE;
}
boolean
softpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
return softpipe_draw_range_elements( pipe, indexBuffer,
indexSize,
0, 0xffffffff,
mode, start, count );
}
void
softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)

View file

@ -171,6 +171,13 @@ boolean softpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count);
boolean
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned min_index,
unsigned max_index,
unsigned mode, unsigned start, unsigned count);
void
softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);

View file

@ -76,6 +76,20 @@ struct pipe_context {
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count);
/* XXX: this is (probably) a temporary entrypoint, as the range
* information should be available from the vertex_buffer state.
* Using this to quickly evaluate a specialized path in the draw
* module.
*/
boolean (*draw_range_elements)( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned minIndex,
unsigned maxIndex,
unsigned mode,
unsigned start,
unsigned count);
/*@}*/

View file

@ -567,8 +567,9 @@ void
st_flush_bitmap_cache(struct st_context *st)
{
if (!st->bitmap.cache->empty) {
struct bitmap_cache *cache = st->bitmap.cache;
if (st->ctx->DrawBuffer) {
struct bitmap_cache *cache = st->bitmap.cache;
struct pipe_context *pipe = st->pipe;
struct pipe_screen *screen = pipe->screen;
@ -592,10 +593,11 @@ st_flush_bitmap_cache(struct st_context *st)
st->ctx->Current.RasterPos[2],
BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
cache->texture);
/* release/free the texture */
pipe_texture_reference(&cache->texture, NULL);
}
/* release/free the texture */
pipe_texture_reference(&cache->texture, NULL);
reset_cache(st);
}
}

View file

@ -365,14 +365,33 @@ st_draw_vbo(GLcontext *ctx,
}
/* draw */
for (i = 0; i < nr_prims; i++) {
if (nr_prims == 1 && pipe->draw_range_elements != NULL) {
i = 0;
/* XXX: exercise temporary path to pass min/max directly
* through to driver & draw module. These interfaces still
* need a bit of work...
*/
setup_edgeflags(ctx, prims[i].mode,
prims[i].start + indexOffset, prims[i].count,
arrays[VERT_ATTRIB_EDGEFLAG]);
pipe->draw_elements(pipe, indexBuf, indexSize,
prims[i].mode,
prims[i].start + indexOffset, prims[i].count);
pipe->draw_range_elements(pipe, indexBuf, indexSize,
min_index,
max_index,
prims[i].mode,
prims[i].start + indexOffset, prims[i].count);
}
else {
for (i = 0; i < nr_prims; i++) {
setup_edgeflags(ctx, prims[i].mode,
prims[i].start + indexOffset, prims[i].count,
arrays[VERT_ATTRIB_EDGEFLAG]);
pipe->draw_elements(pipe, indexBuf, indexSize,
prims[i].mode,
prims[i].start + indexOffset, prims[i].count);
}
}
pipe_reference_buffer(pipe, &indexBuf, NULL);
@ -577,9 +596,10 @@ st_feedback_draw_vbo(GLcontext *ctx,
/* map constant buffers */
mapped_constants = pipe_buffer_map(pipe,
st->state.constants[PIPE_SHADER_VERTEX].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_constant_buffer(st->draw, mapped_constants);
st->state.constants[PIPE_SHADER_VERTEX].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_constant_buffer(st->draw, mapped_constants,
st->state.constants[PIPE_SHADER_VERTEX].buffer->size);
/* draw here */