[g3dvl] start implementing zscan and quantification

Not 100% complete, but at least a good start.
This commit is contained in:
Christian König 2011-04-23 03:37:05 +02:00
parent b7acf83d52
commit f0819a22f3
7 changed files with 849 additions and 111 deletions

View file

@ -152,6 +152,7 @@ C_SOURCES = \
vl/vl_mpeg12_decoder.c \
vl/vl_compositor.c \
vl/vl_csc.c \
vl/vl_zscan.c \
vl/vl_idct.c \
vl/vl_mc.c \
vl/vl_vertex_buffers.c \

View file

@ -44,6 +44,14 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
{ { 0x01, 0x01 }, { 0x01, 0x01 } }
};
static const enum pipe_format const_zscan_source_formats[] = {
PIPE_FORMAT_R16_SNORM,
PIPE_FORMAT_R16_SSCALED
};
static const unsigned num_zscan_source_formats =
sizeof(const_zscan_source_formats) / sizeof(enum pipe_format);
static const enum pipe_format const_idct_source_formats[] = {
PIPE_FORMAT_R16G16B16A16_SNORM,
PIPE_FORMAT_R16G16B16A16_SSCALED
@ -79,10 +87,8 @@ map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
assert(ctx && buffer);
if (ctx->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
sampler_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
else
sampler_views = buffer->mc_source->get_sampler_views(buffer->mc_source);
sampler_views = buffer->zscan_source->get_sampler_views(buffer->zscan_source);
assert(sampler_views);
for (i = 0; i < VL_MAX_PLANES; ++i) {
@ -112,21 +118,17 @@ upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane,
unsigned x, unsigned y, short *block,
bool intra, enum pipe_mpeg12_dct_type type)
{
unsigned tex_pitch;
short *texels;
unsigned i;
unsigned idx;
assert(buffer);
assert(block);
vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
idx = vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
tex_pitch = buffer->tex_transfer[plane]->stride / sizeof(short);
texels = buffer->texels[plane] + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
texels = buffer->texels[plane] + idx * BLOCK_WIDTH * BLOCK_HEIGHT;
for (i = 0; i < BLOCK_HEIGHT; ++i)
memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
memcpy(texels, block, BLOCK_WIDTH * BLOCK_HEIGHT * sizeof(short));
}
static void
@ -178,6 +180,144 @@ unmap_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
}
}
static bool
init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
{
enum pipe_format formats[3];
struct pipe_sampler_view **source;
struct pipe_surface **destination;
struct vl_mpeg12_decoder *dec;
unsigned i;
assert(buffer);
dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
dec->max_blocks / dec->blocks_per_line,
1, PIPE_VIDEO_CHROMA_FORMAT_444,
formats, PIPE_USAGE_STATIC);
if (!buffer->zscan_source)
goto error_source;
source = buffer->zscan_source->get_sampler_views(buffer->zscan_source);
if (!source)
goto error_sampler;
if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
destination = buffer->idct_source->get_surfaces(buffer->idct_source);
else
destination = buffer->mc_source->get_surfaces(buffer->mc_source);
if (!destination)
goto error_surface;
for (i = 0; i < VL_MAX_PLANES; ++i)
if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
&buffer->zscan[i], source[i], destination[i]))
goto error_plane;
return true;
error_plane:
for (; i > 0; --i)
vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]);
error_surface:
error_sampler:
buffer->zscan_source->destroy(buffer->zscan_source);
error_source:
return false;
}
static void
cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
{
unsigned i;
assert(buffer);
for (i = 0; i < VL_MAX_PLANES; ++i)
vl_zscan_cleanup_buffer(&buffer->zscan[i]);
buffer->zscan_source->destroy(buffer->zscan_source);
}
static bool
init_idct_buffer(struct vl_mpeg12_buffer *buffer)
{
enum pipe_format formats[3];
struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
struct pipe_surface **idct_surfaces;
struct vl_mpeg12_decoder *dec;
unsigned i;
assert(buffer);
dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
formats[0] = formats[1] = formats[2] = dec->idct_source_format;
buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
dec->base.width / 4, dec->base.height, 1,
dec->base.chroma_format,
formats, PIPE_USAGE_STATIC);
if (!buffer->idct_source)
goto error_source;
formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format;
buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
dec->base.width / dec->nr_of_idct_render_targets,
dec->base.height / 4, dec->nr_of_idct_render_targets,
dec->base.chroma_format,
formats, PIPE_USAGE_STATIC);
if (!buffer->idct_intermediate)
goto error_intermediate;
idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
if (!idct_source_sv)
goto error_source_sv;
idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
if (!idct_intermediate_sv)
goto error_intermediate_sv;
idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
if (!idct_surfaces)
goto error_surfaces;
for (i = 0; i < 3; ++i)
if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
&buffer->idct[i], idct_source_sv[i],
idct_intermediate_sv[i], idct_surfaces[i]))
goto error_plane;
return true;
error_plane:
for (; i > 0; --i)
vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
error_surfaces:
error_intermediate_sv:
error_source_sv:
buffer->idct_intermediate->destroy(buffer->idct_intermediate);
error_intermediate:
buffer->idct_source->destroy(buffer->idct_source);
error_source:
return false;
}
static void
cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
{
@ -187,11 +327,11 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
assert(dec);
buf->idct_source->destroy(buf->idct_source);
buf->idct_intermediate->destroy(buf->idct_intermediate);
vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
buf->idct_source->destroy(buf->idct_source);
buf->idct_intermediate->destroy(buf->idct_intermediate);
}
static void
@ -206,6 +346,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
assert(dec);
cleanup_zscan_buffer(buf);
if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
cleanup_idct_buffer(buf);
@ -310,6 +452,9 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
vl_idct_cleanup(&dec->idct_c);
}
vl_zscan_cleanup(&dec->zscan_y);
vl_zscan_cleanup(&dec->zscan_c);
dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
@ -319,76 +464,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
FREE(dec);
}
static bool
init_idct_buffer(struct vl_mpeg12_buffer *buffer)
{
enum pipe_format formats[3];
struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
struct pipe_surface **idct_surfaces;
struct vl_mpeg12_decoder *dec;
unsigned i;
assert(buffer);
dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
formats[0] = formats[1] = formats[2] = dec->idct_source_format;
buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
dec->base.width / 4, dec->base.height, 1,
dec->base.chroma_format,
formats, PIPE_USAGE_STREAM);
if (!buffer->idct_source)
goto error_source;
formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format;
buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
dec->base.width / dec->nr_of_idct_render_targets,
dec->base.height / 4, dec->nr_of_idct_render_targets,
dec->base.chroma_format,
formats, PIPE_USAGE_STATIC);
if (!buffer->idct_intermediate)
goto error_intermediate;
idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
if (!idct_source_sv)
goto error_source_sv;
idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
if (!idct_intermediate_sv)
goto error_intermediate_sv;
idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
if (!idct_surfaces)
goto error_surfaces;
for (i = 0; i < 3; ++i)
if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
&buffer->idct[i], idct_source_sv[i],
idct_intermediate_sv[i], idct_surfaces[i]))
goto error_plane;
return true;
error_plane:
for (; i > 0; --i)
vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
error_surfaces:
error_intermediate_sv:
error_source_sv:
buffer->idct_intermediate->destroy(buffer->idct_intermediate);
error_intermediate:
buffer->idct_source->destroy(buffer->idct_source);
error_source:
return false;
}
static struct pipe_video_decode_buffer *
vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
{
@ -426,10 +501,6 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
if (!buffer->mc_source)
goto error_mc_source;
if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
if (!init_idct_buffer(buffer))
goto error_idct;
mc_source_sv = buffer->mc_source->get_sampler_views(buffer->mc_source);
if (!mc_source_sv)
goto error_mc_source_sv;
@ -443,8 +514,18 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
if(!vl_mc_init_buffer(&dec->mc_c, &buffer->mc[2], mc_source_sv[2]))
goto error_mc_cr;
if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
if (!init_idct_buffer(buffer))
goto error_idct;
if (!init_zscan_buffer(buffer))
goto error_zscan;
return &buffer->base;
error_zscan:
// TODO Cleanup error handling
error_mc_cr:
vl_mc_cleanup_buffer(&buffer->mc[1]);
@ -517,6 +598,8 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
vl_zscan_render(&buf->zscan[i] , num_instances);
if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_instances);
@ -590,9 +673,47 @@ find_first_supported_format(struct vl_mpeg12_decoder *dec,
}
static bool
init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height)
init_zscan(struct vl_mpeg12_decoder *dec)
{
struct pipe_sampler_view *layout;
unsigned num_channels;
assert(dec);
dec->blocks_per_line = 4;
dec->max_blocks =
(dec->base.width * dec->base.height) /
(BLOCK_WIDTH * BLOCK_HEIGHT);
dec->zscan_source_format = find_first_supported_format(dec, const_zscan_source_formats,
num_zscan_source_formats, PIPE_TEXTURE_2D);
if (dec->zscan_source_format == PIPE_FORMAT_NONE)
return false;
layout = vl_zscan_linear(dec->pipe, dec->blocks_per_line);
num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height,
dec->blocks_per_line, dec->max_blocks, num_channels))
return false;
vl_zscan_set_layout(&dec->zscan_y, layout);
if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
dec->blocks_per_line, dec->max_blocks, num_channels))
return false;
vl_zscan_set_layout(&dec->zscan_c, layout);
return true;
}
static bool
init_idct(struct vl_mpeg12_decoder *dec)
{
unsigned chroma_width, chroma_height;
struct pipe_sampler_view *matrix, *transpose;
float matrix_scale, transpose_scale;
@ -645,22 +766,11 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
} else
pipe_sampler_view_reference(&transpose, matrix);
if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
dec->nr_of_idct_render_targets, matrix, transpose))
goto error_y;
if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
chroma_width = buffer_width / 2;
chroma_height = buffer_height / 2;
} else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
chroma_width = buffer_width;
chroma_height = buffer_height / 2;
} else {
chroma_width = buffer_width;
chroma_height = buffer_height;
}
if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height,
if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
dec->nr_of_idct_render_targets, matrix, transpose))
goto error_c;
@ -736,8 +846,22 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
if (dec->mc_source_format == PIPE_FORMAT_NONE)
return NULL;
if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
dec->chroma_width = dec->base.width / 2;
dec->chroma_height = dec->base.height / 2;
} else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
dec->chroma_width = dec->base.width;
dec->chroma_height = dec->base.height / 2;
} else {
dec->chroma_width = dec->base.width;
dec->chroma_height = dec->base.height;
}
if (!init_zscan(dec))
return NULL; // TODO error handling
if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
if (!init_idct(dec, dec->base.width, dec->base.height))
if (!init_idct(dec))
goto error_idct;
if (dec->mc_source_format == PIPE_FORMAT_R16_SSCALED)
mc_scale = SCALE_FACTOR_SSCALED;

View file

@ -30,6 +30,7 @@
#include <pipe/p_video_context.h>
#include "vl_zscan.h"
#include "vl_idct.h"
#include "vl_mc.h"
@ -44,9 +45,15 @@ struct vl_mpeg12_decoder
struct pipe_video_decoder base;
struct pipe_context *pipe;
unsigned chroma_width, chroma_height;
unsigned blocks_per_line;
unsigned max_blocks;
const unsigned (*empty_block_mask)[3][2][2];
unsigned nr_of_idct_render_targets;
enum pipe_format zscan_source_format;
enum pipe_format idct_source_format;
enum pipe_format idct_intermediate_format;
enum pipe_format mc_source_format;
@ -57,6 +64,7 @@ struct vl_mpeg12_decoder
void *ves_ycbcr;
void *ves_mv;
struct vl_zscan zscan_y, zscan_c;
struct vl_idct idct_y, idct_c;
struct vl_mc mc_y, mc_c;
@ -69,10 +77,12 @@ struct vl_mpeg12_buffer
struct vl_vertex_buffer vertex_stream;
struct pipe_video_buffer *zscan_source;
struct pipe_video_buffer *idct_source;
struct pipe_video_buffer *idct_intermediate;
struct pipe_video_buffer *mc_source;
struct vl_zscan_buffer zscan[VL_MAX_PLANES];
struct vl_idct_buffer idct[VL_MAX_PLANES];
struct vl_mc_buffer mc[VL_MAX_PLANES];

View file

@ -301,9 +301,10 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
}
void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
unsigned component, unsigned x, unsigned y,
bool intra, enum pipe_mpeg12_dct_type type)
unsigned
vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
unsigned component, unsigned x, unsigned y,
bool intra, enum pipe_mpeg12_dct_type type)
{
struct vl_ycbcr_vertex_stream *stream;
@ -316,7 +317,7 @@ void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
stream->intra = intra;
stream->field = type == PIPE_MPEG12_DCT_TYPE_FIELD;
buffer->ycbcr[component].num_instances++;
return buffer->ycbcr[component].num_instances++;
}
unsigned

View file

@ -84,9 +84,9 @@ void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
unsigned component, unsigned x, unsigned y,
bool intra, enum pipe_mpeg12_dct_type type);
unsigned vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
unsigned component, unsigned x, unsigned y,
bool intra, enum pipe_mpeg12_dct_type type);
struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame);

View file

@ -0,0 +1,492 @@
/**************************************************************************
*
* Copyright 2011 Christian König
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include <assert.h>
#include <pipe/p_screen.h>
#include <pipe/p_context.h>
#include <util/u_draw.h>
#include <util/u_sampler.h>
#include <util/u_inlines.h>
#include <tgsi/tgsi_ureg.h>
#include <vl/vl_defines.h>
#include <vl/vl_types.h>
#include "vl_zscan.h"
#include "vl_vertex_buffers.h"
enum VS_OUTPUT
{
VS_O_VPOS,
VS_O_VTEX
};
static void *
create_vert_shader(struct vl_zscan *zscan)
{
struct ureg_program *shader;
struct ureg_src scale, instance;
struct ureg_src vrect, vpos;
struct ureg_dst tmp;
struct ureg_dst o_vpos, o_vtex[zscan->num_channels];
unsigned i;
shader = ureg_create(TGSI_PROCESSOR_VERTEX);
if (!shader)
return NULL;
scale = ureg_imm2f(shader,
(float)BLOCK_WIDTH / zscan->buffer_width,
(float)BLOCK_HEIGHT / zscan->buffer_height);
instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0);
vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
tmp = ureg_DECL_temporary(shader);
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
for (i = 0; i < zscan->num_channels; ++i)
o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);
/*
* o_vpos.xy = (vpos + vrect) * scale
* o_vpos.zw = 1.0f
*
* tmp.xy = InstanceID / blocks_per_line
* tmp.x = frac(tmp.x)
* tmp.y = floor(tmp.y)
*
* o_vtex.x = vrect.x / blocks_per_line + tmp.x
* o_vtex.y = vrect.y
* o_vtex.z = tmp.z * blocks_per_line / blocks_total
*/
ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance,
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp));
ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
for (i = 0; i < zscan->num_channels; ++i) {
if (i > 0)
ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp),
ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH)));
ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp),
ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
}
ureg_release_temporary(shader, tmp);
ureg_END(shader);
return ureg_create_shader_and_destroy(shader, zscan->pipe);
}
static void *
create_frag_shader(struct vl_zscan *zscan)
{
struct ureg_program *shader;
struct ureg_src vtex[zscan->num_channels];
struct ureg_src src, scan, quant;
struct ureg_dst tmp[zscan->num_channels];
struct ureg_dst fragment;
unsigned i;
shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
if (!shader)
return NULL;
for (i = 0; i < zscan->num_channels; ++i)
vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
src = ureg_DECL_sampler(shader, 0);
scan = ureg_DECL_sampler(shader, 1);
quant = ureg_DECL_sampler(shader, 2);
for (i = 0; i < zscan->num_channels; ++i)
tmp[i] = ureg_DECL_temporary(shader);
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
/*
* tmp.x = tex(vtex, 1)
* tmp.y = vtex.z
* fragment = tex(tmp, 0) * quant
*/
for (i = 0; i < zscan->num_channels; ++i)
ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan);
for (i = 0; i < zscan->num_channels; ++i)
ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z));
for (i = 0; i < zscan->num_channels; ++i)
ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src);
// TODO: Fetch quant and use it
for (i = 0; i < zscan->num_channels; ++i)
ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f));
for (i = 0; i < zscan->num_channels; ++i)
ureg_release_temporary(shader, tmp[i]);
ureg_END(shader);
return ureg_create_shader_and_destroy(shader, zscan->pipe);
}
static bool
init_shaders(struct vl_zscan *zscan)
{
assert(zscan);
zscan->vs = create_vert_shader(zscan);
if (!zscan->vs)
goto error_vs;
zscan->fs = create_frag_shader(zscan);
if (!zscan->fs)
goto error_fs;
return true;
error_fs:
zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
error_vs:
return false;
}
static void
cleanup_shaders(struct vl_zscan *zscan)
{
assert(zscan);
zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs);
}
static bool
init_state(struct vl_zscan *zscan)
{
struct pipe_blend_state blend;
struct pipe_rasterizer_state rs_state;
struct pipe_sampler_state sampler;
unsigned i;
assert(zscan);
memset(&rs_state, 0, sizeof(rs_state));
rs_state.gl_rasterization_rules = false;
zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
if (!zscan->rs_state)
goto error_rs_state;
memset(&blend, 0, sizeof blend);
blend.independent_blend_enable = 0;
blend.rt[0].blend_enable = 0;
blend.rt[0].rgb_func = PIPE_BLEND_ADD;
blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
blend.rt[0].alpha_func = PIPE_BLEND_ADD;
blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
blend.logicop_enable = 0;
blend.logicop_func = PIPE_LOGICOP_CLEAR;
/* Needed to allow color writes to FB, even if blending disabled */
blend.rt[0].colormask = PIPE_MASK_RGBA;
blend.dither = 0;
zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend);
if (!zscan->blend)
goto error_blend;
for (i = 0; i < 3; ++i) {
memset(&sampler, 0, sizeof(sampler));
sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
sampler.compare_func = PIPE_FUNC_ALWAYS;
sampler.normalized_coords = 1;
zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler);
if (!zscan->samplers[i])
goto error_samplers;
}
return true;
error_samplers:
for (i = 0; i < 2; ++i)
if (zscan->samplers[i])
zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
error_blend:
zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
error_rs_state:
return false;
}
static void
cleanup_state(struct vl_zscan *zscan)
{
unsigned i;
assert(zscan);
for (i = 0; i < 3; ++i)
zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
}
struct pipe_sampler_view *
vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line)
{
const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
struct pipe_resource res_tmpl, *res;
struct pipe_sampler_view sv_tmpl, *sv;
struct pipe_transfer *buf_transfer;
unsigned x, y, i, pitch;
float *f;
struct pipe_box rect =
{
0, 0, 0,
BLOCK_WIDTH * blocks_per_line,
BLOCK_HEIGHT,
1
};
assert(pipe && blocks_per_line);
memset(&res_tmpl, 0, sizeof(res_tmpl));
res_tmpl.target = PIPE_TEXTURE_2D;
res_tmpl.format = PIPE_FORMAT_R32_FLOAT;
res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line;
res_tmpl.height0 = BLOCK_HEIGHT;
res_tmpl.depth0 = 1;
res_tmpl.array_size = 1;
res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
if (!res)
goto error_resource;
buf_transfer = pipe->get_transfer
(
pipe, res,
0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
&rect
);
if (!buf_transfer)
goto error_transfer;
pitch = buf_transfer->stride / sizeof(float);
f = pipe->transfer_map(pipe, buf_transfer);
if (!f)
goto error_map;
for (i = 0; i < blocks_per_line; ++i)
for (y = 0; y < BLOCK_HEIGHT; ++y)
for (x = 0; x < BLOCK_WIDTH; ++x) {
float addr = x + y * BLOCK_WIDTH +
i * BLOCK_WIDTH * BLOCK_HEIGHT;
addr /= total_size;
f[i * BLOCK_WIDTH + y * pitch + x] = addr;
}
pipe->transfer_unmap(pipe, buf_transfer);
pipe->transfer_destroy(pipe, buf_transfer);
memset(&sv_tmpl, 0, sizeof(sv_tmpl));
u_sampler_view_default_template(&sv_tmpl, res, res->format);
sv = pipe->create_sampler_view(pipe, res, &sv_tmpl);
pipe_resource_reference(&res, NULL);
if (!sv)
goto error_map;
return sv;
error_map:
pipe->transfer_destroy(pipe, buf_transfer);
error_transfer:
pipe_resource_reference(&res, NULL);
error_resource:
return NULL;
}
#if 0
// TODO
struct pipe_sampler_view *
vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
struct pipe_sampler_view *
vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
#endif
bool
vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
unsigned buffer_width, unsigned buffer_height,
unsigned blocks_per_line, unsigned blocks_total,
unsigned num_channels)
{
assert(zscan && pipe);
zscan->pipe = pipe;
zscan->buffer_width = buffer_width;
zscan->buffer_height = buffer_height;
zscan->num_channels = num_channels;
zscan->blocks_per_line = blocks_per_line;
zscan->blocks_total = blocks_total;
if(!init_shaders(zscan))
return false;
if(!init_state(zscan)) {
cleanup_shaders(zscan);
return false;
}
return true;
}
void
vl_zscan_cleanup(struct vl_zscan *zscan)
{
assert(zscan);
cleanup_shaders(zscan);
cleanup_state(zscan);
}
void
vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout)
{
assert(zscan);
assert(layout);
pipe_sampler_view_reference(&zscan->scan, layout);
}
#if 0
// TODO
void
vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
#endif
bool
vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
struct pipe_sampler_view *src, struct pipe_surface *dst)
{
assert(zscan && buffer);
memset(buffer, 0, sizeof(struct vl_zscan_buffer));
buffer->zscan = zscan;
pipe_sampler_view_reference(&buffer->src, src);
pipe_sampler_view_reference(&buffer->scan, zscan->scan);
pipe_sampler_view_reference(&buffer->quant, zscan->quant);
buffer->viewport.scale[0] = dst->width;
buffer->viewport.scale[1] = dst->height;
buffer->viewport.scale[2] = 1;
buffer->viewport.scale[3] = 1;
buffer->viewport.translate[0] = 0;
buffer->viewport.translate[1] = 0;
buffer->viewport.translate[2] = 0;
buffer->viewport.translate[3] = 0;
buffer->fb_state.width = dst->width;
buffer->fb_state.height = dst->height;
buffer->fb_state.nr_cbufs = 1;
pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
return true;
}
void
vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
{
assert(buffer);
pipe_sampler_view_reference(&buffer->src, NULL);
pipe_sampler_view_reference(&buffer->scan, NULL);
pipe_sampler_view_reference(&buffer->quant, NULL);
pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
}
void
vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
{
struct vl_zscan *zscan;
assert(buffer);
zscan = buffer->zscan;
zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers);
zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src);
zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
}

View file

@ -0,0 +1,110 @@
/**************************************************************************
*
* Copyright 2011 Christian König
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef vl_zscan_h
#define vl_zscan_h
#include <pipe/p_compiler.h>
#include <pipe/p_state.h>
/*
* shader based zscan and quantification
* expect usage of vl_vertex_buffers as a todo list
*/
struct vl_zscan
{
struct pipe_context *pipe;
unsigned buffer_width;
unsigned buffer_height;
unsigned num_channels;
unsigned blocks_per_line;
unsigned blocks_total;
void *rs_state;
void *blend;
void *samplers[3];
void *vs, *fs;
struct pipe_sampler_view *scan;
struct pipe_sampler_view *quant;
};
struct vl_zscan_buffer
{
struct vl_zscan *zscan;
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state fb_state;
struct pipe_sampler_view *src, *scan, *quant;
struct pipe_surface *dst;
};
struct pipe_sampler_view *
vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line);
#if 0
struct pipe_sampler_view *
vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
struct pipe_sampler_view *
vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
#endif
bool
vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
unsigned buffer_width, unsigned buffer_height,
unsigned blocks_per_line, unsigned blocks_total,
unsigned num_channels);
void
vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout);
void
vl_zscan_cleanup(struct vl_zscan *zscan);
#if 0
void
vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
#endif
bool
vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
struct pipe_sampler_view *src, struct pipe_surface *dst);
void
vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer);
void
vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances);
#endif