st/nine: Initial ProcessVertices support

For now only VS 3 support is implemented.

This enables The Sims 2 to work.

Signed-off-by: Axel Davy <axel.davy@ens.fr>
This commit is contained in:
Axel Davy 2016-09-19 19:00:23 +02:00
parent 3bf02d383f
commit b9639c661f
11 changed files with 735 additions and 69 deletions

View file

@ -152,6 +152,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
list_inithead(&This->managed_textures);
This->screen = pScreen;
This->screen_sw = pCTX->ref;
This->caps = *pCaps;
This->d3d9 = pD3D9;
This->params = *pCreationParameters;
@ -195,9 +196,13 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->pipe = This->screen->context_create(This->screen, NULL, 0);
if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
This->pipe_sw = This->screen_sw->context_create(This->screen_sw, NULL, 0);
if (!This->pipe_sw) { return E_OUTOFMEMORY; }
This->cso = cso_create_context(This->pipe);
if (!This->cso) { return E_OUTOFMEMORY; } /* also a guess */
This->cso_sw = cso_create_context(This->pipe_sw);
if (!This->cso_sw) { return E_OUTOFMEMORY; }
/* Create first, it messes up our state. */
This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */
@ -426,10 +431,14 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS);
This->driver_caps.user_sw_vbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_VERTEX_BUFFERS);
This->driver_caps.user_sw_cbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_CONSTANT_BUFFERS);
if (!This->driver_caps.user_vbufs)
This->vertex_uploader = u_upload_create(This->pipe, 65536,
PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
This->vertex_sw_uploader = u_upload_create(This->pipe_sw, 65536,
PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
if (!This->driver_caps.user_ibufs)
This->index_uploader = u_upload_create(This->pipe, 128 * 1024,
PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM);
@ -439,6 +448,9 @@ NineDevice9_ctor( struct NineDevice9 *This,
PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
}
This->constbuf_sw_uploader = u_upload_create(This->pipe_sw, 128 * 1024,
PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION);
This->driver_caps.vs_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS);
This->driver_caps.ps_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
@ -457,6 +469,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->update = &This->state;
nine_update_state(This);
nine_state_init_sw(This);
ID3DPresentGroup_Release(This->present);
return D3D_OK;
@ -473,6 +487,7 @@ NineDevice9_dtor( struct NineDevice9 *This )
if (This->pipe && This->cso)
nine_pipe_context_clear(This);
nine_ff_fini(This);
nine_state_destroy_sw(This);
nine_state_clear(&This->state, TRUE);
if (This->vertex_uploader)
@ -481,6 +496,10 @@ NineDevice9_dtor( struct NineDevice9 *This )
u_upload_destroy(This->index_uploader);
if (This->constbuf_uploader)
u_upload_destroy(This->constbuf_uploader);
if (This->vertex_sw_uploader)
u_upload_destroy(This->vertex_sw_uploader);
if (This->constbuf_sw_uploader)
u_upload_destroy(This->constbuf_sw_uploader);
nine_bind(&This->record, NULL);
@ -502,13 +521,11 @@ NineDevice9_dtor( struct NineDevice9 *This )
FREE(This->swapchains);
}
/* state stuff */
if (This->pipe) {
if (This->cso) {
cso_destroy_context(This->cso);
}
if (This->pipe->destroy) { This->pipe->destroy(This->pipe); }
}
/* Destroy cso first */
if (This->cso) { cso_destroy_context(This->cso); }
if (This->cso_sw) { cso_destroy_context(This->cso_sw); }
if (This->pipe && This->pipe->destroy) { This->pipe->destroy(This->pipe); }
if (This->pipe_sw && This->pipe_sw->destroy) { This->pipe_sw->destroy(This->pipe_sw); }
if (This->present) { ID3DPresentGroup_Release(This->present); }
if (This->d3d9) { IDirect3D9_Release(This->d3d9); }
@ -3166,9 +3183,6 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
return D3D_OK;
}
/* TODO: Write to pDestBuffer directly if vertex declaration contains
* only f32 formats.
*/
HRESULT NINE_WINAPI
NineDevice9_ProcessVertices( struct NineDevice9 *This,
UINT SrcStartIndex,
@ -3178,33 +3192,69 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 *pVertexDecl,
DWORD Flags )
{
struct pipe_screen *screen = This->screen;
struct pipe_screen *screen_sw = This->screen_sw;
struct pipe_context *pipe_sw = This->pipe_sw;
struct NineVertexDeclaration9 *vdecl = NineVertexDeclaration9(pVertexDecl);
struct NineVertexBuffer9 *dst = NineVertexBuffer9(pDestBuffer);
struct NineVertexShader9 *vs;
struct pipe_resource *resource;
struct pipe_transfer *transfer = NULL;
struct pipe_stream_output_info so;
struct pipe_stream_output_target *target;
struct pipe_draw_info draw;
struct pipe_box box;
unsigned offsets[1] = {0};
HRESULT hr;
unsigned buffer_offset, buffer_size;
unsigned buffer_size;
void *map;
DBG("This=%p SrcStartIndex=%u DestIndex=%u VertexCount=%u "
"pDestBuffer=%p pVertexDecl=%p Flags=%d\n",
This, SrcStartIndex, DestIndex, VertexCount, pDestBuffer,
pVertexDecl, Flags);
if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS))
STUB(D3DERR_INVALIDCALL);
if (!screen_sw->get_param(screen_sw, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS)) {
DBG("ProcessVertices not supported\n");
return D3DERR_INVALIDCALL;
}
nine_update_state(This);
/* TODO: Create shader with stream output. */
STUB(D3DERR_INVALIDCALL);
struct NineVertexBuffer9 *dst = NineVertexBuffer9(pDestBuffer);
vs = This->state.programmable_vs ? This->state.vs : This->ff.vs;
/* Note: version is 0 for ff */
user_assert(vdecl || (vs->byte_code.version < 0x30 && dst->desc.FVF),
D3DERR_INVALIDCALL);
if (!vdecl) {
DWORD FVF = dst->desc.FVF;
vdecl = util_hash_table_get(This->ff.ht_fvf, &FVF);
if (!vdecl) {
hr = NineVertexDeclaration9_new_from_fvf(This, FVF, &vdecl);
if (FAILED(hr))
return hr;
vdecl->fvf = FVF;
util_hash_table_set(This->ff.ht_fvf, &vdecl->fvf, vdecl);
NineUnknown_ConvertRefToBind(NineUnknown(vdecl));
}
}
vs = This->state.vs ? This->state.vs : This->ff.vs;
/* Flags: Can be 0 or D3DPV_DONOTCOPYDATA, and/or lock flags
* D3DPV_DONOTCOPYDATA -> Has effect only for ff. In particular
* if not set, everything from src will be used, and dst
* must match exactly the ff vs outputs.
* TODO: Handle all the checks, etc for ff */
user_assert(vdecl->position_t || This->state.programmable_vs,
D3DERR_INVALIDCALL);
buffer_size = VertexCount * vs->so->stride[0];
if (1) {
/* TODO: Support vs < 3 and ff */
user_assert(vs->byte_code.version == 0x30,
D3DERR_INVALIDCALL);
/* TODO: Not hardcode the constant buffers for swvp */
user_assert(This->may_swvp,
D3DERR_INVALIDCALL);
nine_state_prepare_draw_sw(This, vdecl, SrcStartIndex, VertexCount, &so);
buffer_size = VertexCount * so.stride[0] * 4;
{
struct pipe_resource templ;
memset(&templ, 0, sizeof(templ));
@ -3217,49 +3267,50 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
templ.height0 = templ.depth0 = templ.array_size = 1;
templ.last_level = templ.nr_samples = 0;
resource = This->screen->resource_create(This->screen, &templ);
resource = screen_sw->resource_create(screen_sw, &templ);
if (!resource)
return E_OUTOFMEMORY;
buffer_offset = 0;
} else {
/* SO matches vertex declaration */
resource = NineVertexBuffer9_GetResource(dst);
buffer_offset = DestIndex * vs->so->stride[0];
}
target = This->pipe->create_stream_output_target(This->pipe, resource,
buffer_offset,
buffer_size);
target = pipe_sw->create_stream_output_target(pipe_sw, resource,
0, buffer_size);
if (!target) {
pipe_resource_reference(&resource, NULL);
return D3DERR_DRIVERINTERNALERROR;
}
if (!vdecl) {
hr = NineVertexDeclaration9_new_from_fvf(This, dst->desc.FVF, &vdecl);
if (FAILED(hr))
goto out;
}
init_draw_info(&draw, This, D3DPT_POINTLIST, VertexCount);
draw.instance_count = 1;
draw.indexed = FALSE;
draw.start = SrcStartIndex;
draw.start = 0;
draw.index_bias = 0;
draw.min_index = SrcStartIndex;
draw.max_index = SrcStartIndex + VertexCount - 1;
draw.min_index = 0;
draw.max_index = VertexCount - 1;
This->pipe->set_stream_output_targets(This->pipe, 1, &target, 0);
This->pipe->draw_vbo(This->pipe, &draw);
This->pipe->set_stream_output_targets(This->pipe, 0, NULL, 0);
This->pipe->stream_output_target_destroy(This->pipe, target);
pipe_sw->set_stream_output_targets(pipe_sw, 1, &target, offsets);
pipe_sw->draw_vbo(pipe_sw, &draw);
pipe_sw->set_stream_output_targets(pipe_sw, 0, NULL, 0);
pipe_sw->stream_output_target_destroy(pipe_sw, target);
u_box_1d(0, VertexCount * so.stride[0] * 4, &box);
map = pipe_sw->transfer_map(pipe_sw, resource, 0, PIPE_TRANSFER_READ, &box,
&transfer);
if (!map) {
hr = D3DERR_DRIVERINTERNALERROR;
goto out;
}
hr = NineVertexDeclaration9_ConvertStreamOutput(vdecl,
dst, DestIndex, VertexCount,
resource, vs->so);
map, &so);
if (transfer)
pipe_sw->transfer_unmap(pipe_sw, transfer);
out:
nine_state_after_draw_sw(This);
pipe_resource_reference(&resource, NULL);
if (!pVertexDecl)
NineUnknown_Release(NineUnknown(vdecl));
return hr;
}

View file

@ -52,8 +52,11 @@ struct NineDevice9
/* G3D context */
struct pipe_screen *screen;
struct pipe_screen *screen_sw;
struct pipe_context *pipe;
struct pipe_context *pipe_sw;
struct cso_context *cso;
struct cso_context *cso_sw;
/* creation parameters */
D3DCAPS9 caps;
@ -115,6 +118,8 @@ struct NineDevice9
boolean user_vbufs;
boolean user_ibufs;
boolean user_cbufs;
boolean user_sw_vbufs;
boolean user_sw_cbufs;
boolean window_space_position_support;
boolean vs_integer;
boolean ps_integer;
@ -128,6 +133,8 @@ struct NineDevice9
struct u_upload_mgr *vertex_uploader;
struct u_upload_mgr *index_uploader;
struct u_upload_mgr *constbuf_uploader;
struct u_upload_mgr *vertex_sw_uploader;
struct u_upload_mgr *constbuf_sw_uploader;
unsigned constbuf_alignment;
struct nine_range_pool range_pool;

View file

@ -26,6 +26,7 @@
#include "device9.h"
#include "nine_debug.h"
#include "nine_state.h"
#include "vertexdeclaration9.h"
#include "util/macros.h"
#include "util/u_memory.h"
@ -467,6 +468,7 @@ struct shader_translator
struct {
struct ureg_dst *r;
struct ureg_dst oPos;
struct ureg_dst oPos_out; /* the real output when doing streamout */
struct ureg_dst oFog;
struct ureg_dst oPts;
struct ureg_dst oCol[4];
@ -511,6 +513,9 @@ struct shader_translator
boolean indirect_const_access;
boolean failure;
struct nine_vs_output_info output_info[16];
int num_outputs;
struct nine_shader_info *info;
int16_t op_info_map[D3DSIO_BREAKP + 1];
@ -536,6 +541,17 @@ sm1_instruction_check(const struct sm1_instruction *insn)
}
}
static void
nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
int mask, int output_index)
{
tx->output_info[tx->num_outputs].output_semantic = Usage;
tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
tx->output_info[tx->num_outputs].mask = mask;
tx->output_info[tx->num_outputs].output_index = output_index;
tx->num_outputs++;
}
static boolean
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
{
@ -2137,6 +2153,12 @@ DECL_SPECIAL(DCL)
assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
tx->regs.oPos = tx->regs.o[sem.reg.idx];
}
if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
@ -3348,6 +3370,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
info->version = (tx->version.major << 4) | tx->version.minor;
tx->num_outputs = 0;
create_op_info_map(tx);
}
@ -3361,6 +3385,26 @@ tx_dtor(struct shader_translator *tx)
FREE(tx);
}
/* CONST[0].xyz = width/2, -height/2, zmax-zmin
* CONST[1].xyz = x+width/2, y+height/2, zmin */
static void
shader_add_vs_viewport_transform(struct shader_translator *tx)
{
struct ureg_program *ureg = tx->ureg;
struct ureg_src c0 = NINE_CONSTANT_SRC(0);
struct ureg_src c1 = NINE_CONSTANT_SRC(1);
/* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
c0 = ureg_src_dimension(c0, 4);
c1 = ureg_src_dimension(c1, 4);
/* TODO: find out when we need to apply the viewport transformation or not.
* Likely will be XYZ vs XYZRHW in vdecl_out
* ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
* ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
*/
ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
}
static void
shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
{
@ -3412,10 +3456,10 @@ shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
}
#define GET_CAP(n) device->screen->get_param( \
device->screen, PIPE_CAP_##n)
#define GET_SHADER_CAP(n) device->screen->get_shader_param( \
device->screen, info->type, PIPE_SHADER_CAP_##n)
#define GET_CAP(n) screen->get_param( \
screen, PIPE_CAP_##n)
#define GET_SHADER_CAP(n) screen->get_shader_param( \
screen, info->type, PIPE_SHADER_CAP_##n)
HRESULT
nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
@ -3423,6 +3467,8 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
struct shader_translator *tx;
HRESULT hr = D3D_OK;
const unsigned processor = info->type;
struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
struct pipe_context *pipe = info->process_vertices ? device->pipe_sw : device->pipe;
user_assert(processor != ~0, D3DERR_INVALIDCALL);
@ -3535,6 +3581,9 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
info->point_size = TRUE;
}
if (info->process_vertices)
shader_add_vs_viewport_transform(tx);
ureg_END(tx->ureg);
/* record local constants */
@ -3627,6 +3676,9 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
}
if (info->process_vertices)
ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
unsigned count;
const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
@ -3634,7 +3686,14 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
ureg_free_tokens(toks);
}
info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
if (info->process_vertices) {
NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
tx->output_info,
tx->num_outputs,
&(info->so));
info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
} else
info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
if (!info->cso) {
hr = D3DERR_DRIVERINTERNALERROR;
FREE(info->lconstf.data);

View file

@ -26,10 +26,12 @@
#include "d3d9types.h"
#include "d3d9caps.h"
#include "nine_defines.h"
#include "nine_helpers.h"
#include "pipe/p_state.h" /* PIPE_MAX_ATTRIBS */
#include "util/u_memory.h"
struct NineDevice9;
struct NineVertexDeclaration9;
struct nine_lconstf /* NOTE: both pointers should be FREE'd by the user */
{
@ -78,6 +80,18 @@ struct nine_shader_info
uint8_t bumpenvmat_needed;
boolean swvp_on;
boolean process_vertices;
struct NineVertexDeclaration9 *vdecl_out;
struct pipe_stream_output_info so;
};
struct nine_vs_output_info
{
BYTE output_semantic;
int output_semantic_index;
int mask;
int output_index;
};
static inline void
@ -147,4 +161,65 @@ nine_shader_variants_free(struct nine_shader_variant *list)
}
}
struct nine_shader_variant_so
{
struct nine_shader_variant_so *next;
struct NineVertexDeclaration9 *vdecl;
struct pipe_stream_output_info so;
void *cso;
};
static inline void *
nine_shader_variant_so_get(struct nine_shader_variant_so *list,
struct NineVertexDeclaration9 *vdecl,
struct pipe_stream_output_info *so)
{
while (list->vdecl != vdecl && list->next)
list = list->next;
if (list->vdecl == vdecl) {
*so = list->so;
return list->cso;
}
return NULL;
}
static inline boolean
nine_shader_variant_so_add(struct nine_shader_variant_so *list,
struct NineVertexDeclaration9 *vdecl,
struct pipe_stream_output_info *so, void *cso)
{
if (list->vdecl == NULL) { /* first shader */
list->next = NULL;
nine_bind(&list->vdecl, vdecl);
list->so = *so;
list->cso = cso;
return TRUE;
}
while (list->next) {
assert(list->vdecl != vdecl);
list = list->next;
}
list->next = MALLOC_STRUCT(nine_shader_variant_so);
if (!list->next)
return FALSE;
list->next->next = NULL;
nine_bind(&list->vdecl, vdecl);
list->next->so = *so;
list->next->cso = cso;
return TRUE;
}
static inline void
nine_shader_variants_so_free(struct nine_shader_variant_so *list)
{
while (list->next) {
struct nine_shader_variant_so *ptr = list->next;
list->next = ptr->next;
nine_bind(&ptr->vdecl, NULL);
FREE(ptr);
}
if (list->vdecl)
nine_bind(&list->vdecl, NULL);
}
#endif /* _NINE_SHADER_H_ */

View file

@ -26,6 +26,7 @@
#include "buffer9.h"
#include "indexbuffer9.h"
#include "surface9.h"
#include "vertexbuffer9.h"
#include "vertexdeclaration9.h"
#include "vertexshader9.h"
#include "pixelshader9.h"
@ -36,6 +37,8 @@
#include "cso_cache/cso_context.h"
#include "util/u_upload_mgr.h"
#include "util/u_math.h"
#include "util/u_box.h"
#include "util/u_simple_shaders.h"
#define DBG_CHANNEL DBG_DEVICE
@ -1356,6 +1359,367 @@ nine_state_clear(struct nine_state *state, const boolean device)
}
}
void
nine_state_init_sw(struct NineDevice9 *device)
{
struct pipe_context *pipe_sw = device->pipe_sw;
struct pipe_rasterizer_state rast;
struct pipe_blend_state blend;
struct pipe_depth_stencil_alpha_state dsa;
struct pipe_framebuffer_state fb;
/* Only used with Streamout */
memset(&rast, 0, sizeof(rast));
rast.rasterizer_discard = true;
rast.point_quad_rasterization = 1; /* to make llvmpipe happy */
cso_set_rasterizer(device->cso_sw, &rast);
/* dummy settings */
memset(&blend, 0, sizeof(blend));
memset(&dsa, 0, sizeof(dsa));
memset(&fb, 0, sizeof(fb));
cso_set_blend(device->cso_sw, &blend);
cso_set_depth_stencil_alpha(device->cso_sw, &dsa);
cso_set_framebuffer(device->cso_sw, &fb);
cso_set_viewport_dims(device->cso_sw, 1.0, 1.0, false);
cso_set_fragment_shader_handle(device->cso_sw, util_make_empty_fragment_shader(pipe_sw));
}
/* There is duplication with update_vertex_elements.
* TODO: Share the code */
static void
update_vertex_elements_sw(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
const struct NineVertexShader9 *vs;
unsigned n, b, i;
int index;
char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
char used_streams[device->caps.MaxStreams];
int dummy_vbo_stream = -1;
BOOL need_dummy_vbo = FALSE;
struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
state->stream_usage_mask = 0;
memset(vdecl_index_map, -1, 16);
memset(used_streams, 0, device->caps.MaxStreams);
vs = state->programmable_vs ? device->state.vs : device->ff.vs;
if (vdecl) {
for (n = 0; n < vs->num_inputs; ++n) {
DBG("looking up input %u (usage %u) from vdecl(%p)\n",
n, vs->input_map[n].ndecl, vdecl);
for (i = 0; i < vdecl->nelems; i++) {
if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
vdecl_index_map[n] = i;
used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
break;
}
}
if (vdecl_index_map[n] < 0)
need_dummy_vbo = TRUE;
}
} else {
/* No vertex declaration. Likely will never happen in practice,
* but we need not crash on this */
need_dummy_vbo = TRUE;
}
if (need_dummy_vbo) {
for (i = 0; i < device->caps.MaxStreams; i++ ) {
if (!used_streams[i]) {
dummy_vbo_stream = i;
break;
}
}
}
/* there are less vertex shader inputs than stream slots,
* so if we need a slot for the dummy vbo, we should have found one */
assert (!need_dummy_vbo || dummy_vbo_stream != -1);
for (n = 0; n < vs->num_inputs; ++n) {
index = vdecl_index_map[n];
if (index >= 0) {
ve[n] = vdecl->elems[index];
b = ve[n].vertex_buffer_index;
state->stream_usage_mask |= 1 << b;
/* XXX wine just uses 1 here: */
if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
} else {
/* if the vertex declaration is incomplete compared to what the
* vertex shader needs, we bind a dummy vbo with 0 0 0 0.
* This is not precised by the spec, but is the behaviour
* tested on win */
ve[n].vertex_buffer_index = dummy_vbo_stream;
ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
ve[n].src_offset = 0;
ve[n].instance_divisor = 0;
}
}
if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
if (state->dummy_vbo_bound_at >= 0)
state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
if (dummy_vbo_stream >= 0) {
state->changed.vtxbuf |= 1 << dummy_vbo_stream;
state->vbo_bound_done = FALSE;
}
state->dummy_vbo_bound_at = dummy_vbo_stream;
}
cso_set_vertex_elements(device->cso_sw, vs->num_inputs, ve);
}
static void
update_vertex_buffers_sw(struct NineDevice9 *device, int start_vertice, int num_vertices)
{
struct pipe_context *pipe = device->pipe;
struct pipe_context *pipe_sw = device->pipe_sw;
struct nine_state *state = &device->state;
struct pipe_vertex_buffer vtxbuf;
uint32_t mask = 0xf;
unsigned i;
DBG("mask=%x\n", mask);
assert (state->dummy_vbo_bound_at < 0);
/* TODO: handle dummy_vbo_bound_at */
for (i = 0; mask; mask >>= 1, ++i) {
if (mask & 1) {
if (state->vtxbuf[i].buffer) {
struct pipe_resource *buf;
struct pipe_box box;
vtxbuf = state->vtxbuf[i];
DBG("Locking %p (offset %d, length %d)\n", vtxbuf.buffer,
vtxbuf.buffer_offset, num_vertices * vtxbuf.stride);
u_box_1d(vtxbuf.buffer_offset + start_vertice * vtxbuf.stride,
num_vertices * vtxbuf.stride, &box);
buf = vtxbuf.buffer;
vtxbuf.user_buffer = pipe->transfer_map(pipe, buf, 0, PIPE_TRANSFER_READ, &box,
&(state->transfers_so[i]));
vtxbuf.buffer = NULL;
if (!device->driver_caps.user_sw_vbufs) {
u_upload_data(device->vertex_sw_uploader,
0,
box.width,
16,
vtxbuf.user_buffer,
&(vtxbuf.buffer_offset),
&(vtxbuf.buffer));
u_upload_unmap(device->vertex_sw_uploader);
vtxbuf.user_buffer = NULL;
}
pipe_sw->set_vertex_buffers(pipe_sw, i, 1, &vtxbuf);
if (vtxbuf.buffer)
pipe_resource_reference(&vtxbuf.buffer, NULL);
} else
pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL);
}
}
}
static void
update_vs_constants_sw(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
struct pipe_context *pipe_sw = device->pipe_sw;
DBG("updating\n");
{
struct pipe_constant_buffer cb;
const void *buf;
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = 4096 * sizeof(float[4]);
cb.user_buffer = state->vs_const_f_swvp;
if (state->vs->lconstf.ranges) {
const struct nine_lconstf *lconstf = &device->state.vs->lconstf;
const struct nine_range *r = lconstf->ranges;
unsigned n = 0;
float *dst = device->state.vs_lconstf_temp;
float *src = (float *)cb.user_buffer;
memcpy(dst, src, 8192 * sizeof(float[4]));
while (r) {
unsigned p = r->bgn;
unsigned c = r->end - r->bgn;
memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
n += c;
r = r->next;
}
cb.user_buffer = dst;
}
buf = cb.user_buffer;
if (!device->driver_caps.user_sw_cbufs) {
u_upload_data(device->constbuf_sw_uploader,
0,
cb.buffer_size,
16,
cb.user_buffer,
&(cb.buffer_offset),
&(cb.buffer));
u_upload_unmap(device->constbuf_sw_uploader);
cb.user_buffer = NULL;
}
pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 0, &cb);
if (cb.buffer)
pipe_resource_reference(&cb.buffer, NULL);
cb.user_buffer = (char *)buf + 4096 * sizeof(float[4]);
if (!device->driver_caps.user_sw_cbufs) {
u_upload_data(device->constbuf_sw_uploader,
0,
cb.buffer_size,
16,
cb.user_buffer,
&(cb.buffer_offset),
&(cb.buffer));
u_upload_unmap(device->constbuf_sw_uploader);
cb.user_buffer = NULL;
}
pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 1, &cb);
if (cb.buffer)
pipe_resource_reference(&cb.buffer, NULL);
}
{
struct pipe_constant_buffer cb;
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = 2048 * sizeof(float[4]);
cb.user_buffer = state->vs_const_i;
if (!device->driver_caps.user_sw_cbufs) {
u_upload_data(device->constbuf_sw_uploader,
0,
cb.buffer_size,
16,
cb.user_buffer,
&(cb.buffer_offset),
&(cb.buffer));
u_upload_unmap(device->constbuf_sw_uploader);
cb.user_buffer = NULL;
}
pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 2, &cb);
if (cb.buffer)
pipe_resource_reference(&cb.buffer, NULL);
}
{
struct pipe_constant_buffer cb;
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = 512 * sizeof(float[4]);
cb.user_buffer = state->vs_const_b;
if (!device->driver_caps.user_sw_cbufs) {
u_upload_data(device->constbuf_sw_uploader,
0,
cb.buffer_size,
16,
cb.user_buffer,
&(cb.buffer_offset),
&(cb.buffer));
u_upload_unmap(device->constbuf_sw_uploader);
cb.user_buffer = NULL;
}
pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 3, &cb);
if (cb.buffer)
pipe_resource_reference(&cb.buffer, NULL);
}
{
struct pipe_constant_buffer cb;
const D3DVIEWPORT9 *vport = &device->state.viewport;
float viewport_data[8] = {(float)vport->Width * 0.5f,
(float)vport->Height * -0.5f, vport->MaxZ - vport->MinZ, 0.f,
(float)vport->Width * 0.5f + (float)vport->X,
(float)vport->Height * 0.5f + (float)vport->Y,
vport->MinZ, 0.f};
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = 2 * sizeof(float[4]);
cb.user_buffer = viewport_data;
{
u_upload_data(device->constbuf_sw_uploader,
0,
cb.buffer_size,
16,
cb.user_buffer,
&(cb.buffer_offset),
&(cb.buffer));
u_upload_unmap(device->constbuf_sw_uploader);
cb.user_buffer = NULL;
}
pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 4, &cb);
if (cb.buffer)
pipe_resource_reference(&cb.buffer, NULL);
}
}
void
nine_state_prepare_draw_sw(struct NineDevice9 *device, struct NineVertexDeclaration9 *vdecl_out,
int start_vertice, int num_vertices, struct pipe_stream_output_info *so)
{
struct nine_state *state = &device->state;
struct NineVertexShader9 *vs = state->programmable_vs ? device->state.vs : device->ff.vs;
assert(state->programmable_vs);
DBG("Preparing draw\n");
cso_set_vertex_shader_handle(device->cso_sw,
NineVertexShader9_GetVariantProcessVertices(vs, vdecl_out, so));
update_vertex_elements_sw(device);
update_vertex_buffers_sw(device, start_vertice, num_vertices);
update_vs_constants_sw(device);
DBG("Preparation succeeded\n");
}
void
nine_state_after_draw_sw(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
struct pipe_context *pipe = device->pipe;
struct pipe_context *pipe_sw = device->pipe_sw;
int i;
for (i = 0; i < 4; i++) {
pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL);
if (state->transfers_so[i])
pipe->transfer_unmap(pipe, state->transfers_so[i]);
state->transfers_so[i] = NULL;
}
}
void
nine_state_destroy_sw(struct NineDevice9 *device)
{
(void) device;
/* Everything destroyed with cso */
}
/*
static const DWORD nine_render_states_pixel[] =
{

View file

@ -242,6 +242,9 @@ struct nine_state
struct pipe_constant_buffer cb_vs_ff;
struct pipe_constant_buffer cb_ps_ff;
} pipe;
/* sw */
struct pipe_transfer *transfers_so[4];
};
/* map D3DRS -> NINE_STATE_x
@ -263,6 +266,15 @@ void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *,
boolean is_reset);
void nine_state_clear(struct nine_state *, const boolean device);
void nine_state_init_sw(struct NineDevice9 *device);
void nine_state_prepare_draw_sw(struct NineDevice9 *device,
struct NineVertexDeclaration9 *vdecl_out,
int start_vertice,
int num_vertices,
struct pipe_stream_output_info *so);
void nine_state_after_draw_sw(struct NineDevice9 *device);
void nine_state_destroy_sw(struct NineDevice9 *device);
/* If @alloc is FALSE, the return value may be a const identity matrix.
* Therefore, do not modify if you set alloc to FALSE !
*/

View file

@ -59,6 +59,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This,
info.sampler_ps1xtypes = 0x0;
info.fog_enable = 0;
info.projected = 0;
info.process_vertices = false;
hr = nine_translate_shader(device, &info);
if (FAILED(hr))
@ -162,6 +163,7 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This )
info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE];
info.force_color_in_centroid = key >> 34 & 1;
info.projected = (key >> 48) & 0xffff;
info.process_vertices = false;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))

View file

@ -24,12 +24,12 @@
#include "vertexbuffer9.h"
#include "device9.h"
#include "nine_helpers.h"
#include "nine_shader.h"
#include "pipe/p_format.h"
#include "pipe/p_context.h"
#include "util/u_math.h"
#include "util/u_format.h"
#include "util/u_box.h"
#include "translate/translate.h"
#define DBG_CHANNEL DBG_VERTEXDECLARATION
@ -409,6 +409,53 @@ NineVertexDeclaration9_new_from_fvf( struct NineDevice9 *pDevice,
NINE_DEVICE_CHILD_NEW(VertexDeclaration9, ppOut, /* args */ pDevice, elems);
}
void
NineVertexDeclaration9_FillStreamOutputInfo(
struct NineVertexDeclaration9 *This,
struct nine_vs_output_info *ShaderOutputsInfo,
unsigned numOutputs,
struct pipe_stream_output_info *so )
{
unsigned so_outputs = 0;
int i, j;
memset(so, 0, sizeof(struct pipe_stream_output_info));
for (i = 0; i < numOutputs; i++) {
BYTE output_semantic = ShaderOutputsInfo[i].output_semantic;
unsigned output_semantic_index = ShaderOutputsInfo[i].output_semantic_index;
for (j = 0; j < This->nelems; j++) {
if ((This->decls[j].Usage == output_semantic ||
(output_semantic == D3DDECLUSAGE_POSITION &&
This->decls[j].Usage == D3DDECLUSAGE_POSITIONT)) &&
This->decls[j].UsageIndex == output_semantic_index) {
DBG("Matching %s %d: o%d -> %d\n",
nine_declusage_name(nine_d3d9_to_nine_declusage(This->decls[j].Usage, 0)),
This->decls[j].UsageIndex, i, j);
so->output[so_outputs].register_index = ShaderOutputsInfo[i].output_index;
so->output[so_outputs].start_component = 0;
if (ShaderOutputsInfo[i].mask & 8)
so->output[so_outputs].num_components = 4;
else if (ShaderOutputsInfo[i].mask & 4)
so->output[so_outputs].num_components = 3;
else if (ShaderOutputsInfo[i].mask & 2)
so->output[so_outputs].num_components = 2;
else
so->output[so_outputs].num_components = 1;
so->output[so_outputs].output_buffer = 0;
so->output[so_outputs].dst_offset = so_outputs * sizeof(float[4])/4;
so->output[so_outputs].stream = 0;
so_outputs++;
break;
}
}
}
so->num_outputs = so_outputs;
so->stride[0] = so_outputs * sizeof(float[4])/4;
}
/* ProcessVertices runs stream output into a temporary buffer to capture
* all outputs.
* Now we have to convert them to the format and order set by the vertex
@ -422,17 +469,13 @@ NineVertexDeclaration9_ConvertStreamOutput(
struct NineVertexBuffer9 *pDstBuf,
UINT DestIndex,
UINT VertexCount,
struct pipe_resource *pSrcBuf,
void *pSrcBuf,
const struct pipe_stream_output_info *so )
{
struct pipe_context *pipe = This->base.device->pipe;
struct pipe_transfer *transfer = NULL;
struct translate *translate;
struct translate_key transkey;
struct pipe_box box;
HRESULT hr;
unsigned i;
void *src_map;
void *dst_map;
DBG("This=%p pDstBuf=%p DestIndex=%u VertexCount=%u pSrcBuf=%p so=%p\n",
@ -477,20 +520,12 @@ NineVertexDeclaration9_ConvertStreamOutput(
if (FAILED(hr))
goto out;
src_map = pipe->transfer_map(pipe, pSrcBuf, 0, PIPE_TRANSFER_READ, &box,
&transfer);
if (!src_map) {
hr = D3DERR_DRIVERINTERNALERROR;
goto out;
}
translate->set_buffer(translate, 0, src_map, so->stride[0], ~0);
translate->set_buffer(translate, 0, pSrcBuf, so->stride[0] * 4, ~0);
translate->run(translate, 0, VertexCount, 0, 0, dst_map);
NineVertexBuffer9_Unlock(pDstBuf);
out:
if (transfer)
pipe->transfer_unmap(pipe, transfer);
translate->release(translate); /* TODO: cache these */
return hr;
}

View file

@ -31,6 +31,7 @@ struct pipe_vertex_element;
struct pipe_stream_output_info;
struct NineDevice9;
struct NineVertexBuffer9;
struct nine_vs_output_info;
struct NineVertexDeclaration9
{
@ -78,6 +79,13 @@ NineVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This,
D3DVERTEXELEMENT9 *pElement,
UINT *pNumElements );
void
NineVertexDeclaration9_FillStreamOutputInfo(
struct NineVertexDeclaration9 *This,
struct nine_vs_output_info *ShaderOutputsInfo,
unsigned numOutputs,
struct pipe_stream_output_info *so );
/* Convert stream output data to the vertex declaration's format. */
HRESULT
NineVertexDeclaration9_ConvertStreamOutput(
@ -85,7 +93,7 @@ NineVertexDeclaration9_ConvertStreamOutput(
struct NineVertexBuffer9 *pDstBuf,
UINT DestIndex,
UINT VertexCount,
struct pipe_resource *pSrcBuf,
void *pSrcBuf,
const struct pipe_stream_output_info *so );
#endif /* _NINE_VERTEXDECLARATION9_H_ */

View file

@ -23,10 +23,12 @@
#include "nine_helpers.h"
#include "nine_shader.h"
#include "vertexdeclaration9.h"
#include "vertexshader9.h"
#include "device9.h"
#include "pipe/p_context.h"
#include "cso_cache/cso_context.h"
#define DBG_CHANNEL DBG_VERTEXSHADER
@ -64,6 +66,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
info.point_size_min = 0;
info.point_size_max = 0;
info.swvp_on = !!(device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING);
info.process_vertices = false;
hr = nine_translate_shader(device, &info);
if (hr == D3DERR_INVALIDCALL &&
@ -109,6 +112,7 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This )
if (This->base.device) {
struct pipe_context *pipe = This->base.device->pipe;
struct nine_shader_variant *var = &This->variant;
struct nine_shader_variant_so *var_so = &This->variant_so;
do {
if (var->cso) {
@ -119,6 +123,13 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This )
var = var->next;
} while (var);
while (var_so && var_so->vdecl) {
if (var_so->cso) {
cso_delete_vertex_shader(This->base.device->cso_sw, var_so->cso );
}
var_so = var_so->next;
}
if (This->ff_cso) {
if (This->ff_cso == This->base.device->state.cso.vs)
pipe->bind_vs_state(pipe, NULL);
@ -126,6 +137,7 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This )
}
}
nine_shader_variants_free(&This->variant);
nine_shader_variants_so_free(&This->variant_so);
FREE((void *)This->byte_code.tokens); /* const_cast */
@ -178,6 +190,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]);
info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]);
info.swvp_on = device->swvp;
info.process_vertices = false;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
@ -192,6 +205,38 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
return cso;
}
void *
NineVertexShader9_GetVariantProcessVertices( struct NineVertexShader9 *This,
struct NineVertexDeclaration9 *vdecl_out,
struct pipe_stream_output_info *so )
{
struct nine_shader_info info;
HRESULT hr;
void *cso;
cso = nine_shader_variant_so_get(&This->variant_so, vdecl_out, so);
if (cso)
return cso;
info.type = PIPE_SHADER_VERTEX;
info.const_i_base = 0;
info.const_b_base = 0;
info.byte_code = This->byte_code.tokens;
info.sampler_mask_shadow = 0;
info.fog_enable = false;
info.point_size_min = 0;
info.point_size_max = 0;
info.swvp_on = true;
info.vdecl_out = vdecl_out;
info.process_vertices = true;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
return NULL;
*so = info.so;
nine_shader_variant_so_add(&This->variant_so, vdecl_out, so, info.cso);
return info.cso;
}
IDirect3DVertexShader9Vtbl NineVertexShader9_vtable = {
(void *)NineUnknown_QueryInterface,
(void *)NineUnknown_AddRef,

View file

@ -31,6 +31,8 @@
#include "nine_shader.h"
#include "nine_state.h"
struct NineVertexDeclaration9;
struct NineVertexShader9
{
struct NineUnknown base;
@ -57,8 +59,6 @@ struct NineVertexShader9
struct nine_lconstf lconstf;
const struct pipe_stream_output_info *so;
uint64_t ff_key[3];
void *ff_cso;
@ -66,6 +66,9 @@ struct NineVertexShader9
void *last_cso;
uint64_t next_key;
/* so */
struct nine_shader_variant_so variant_so;
};
static inline struct NineVertexShader9 *
NineVertexShader9( void *data )
@ -107,6 +110,11 @@ NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
void *
NineVertexShader9_GetVariant( struct NineVertexShader9 *vs );
void *
NineVertexShader9_GetVariantProcessVertices( struct NineVertexShader9 *vs,
struct NineVertexDeclaration9 *vdecl_out,
struct pipe_stream_output_info *so );
/*** public ***/
HRESULT