g3dvl: IDCT part 1.

Very basic IDCT support is in, performed CPU-side for now.
This commit is contained in:
Younes Manton 2008-07-06 22:04:29 -04:00
parent 49937b9985
commit 6235141fd2
4 changed files with 381 additions and 8 deletions

View file

@ -11,22 +11,262 @@
#include <tgsi/util/tgsi_build.h>
#include "vl_shader_build.h"
#include "vl_data.h"
#include "vl_defs.h"
#include "vl_util.h"
static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context)
{
const unsigned int max_tokens = 50;
struct pipe_context *pipe;
struct pipe_shader_state fs;
struct tgsi_token *tokens;
struct tgsi_header *header;
struct tgsi_full_declaration decl;
struct tgsi_full_instruction inst;
unsigned int ti;
unsigned int i;
assert(context);
pipe = context->pipe;
tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
/* Version */
*(struct tgsi_version*)&tokens[0] = tgsi_build_version();
/* Header */
header = (struct tgsi_header*)&tokens[1];
*header = tgsi_build_header();
/* Processor */
*(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
ti = 3;
/*
* decl i0 ; Vertex pos
* decl i1 ; Vertex texcoords
*/
for (i = 0; i < 2; i++)
{
decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
}
/*
* decl o0 ; Vertex pos
* decl o1 ; Vertex texcoords
*/
for (i = 0; i < 2; i++)
{
decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
}
/*
* mov o0, i0 ; Move pos in to pos out
* mov o1, i1 ; Move texcoord in to texcoord out */
for (i = 0; i < 2; ++i)
{
inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
}
/* end */
inst = vl_end();
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
fs.tokens = tokens;
//context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs);
free(tokens);
return 0;
}
static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context)
{
const unsigned int max_tokens = 50;
struct pipe_context *pipe;
struct pipe_shader_state fs;
struct tgsi_token *tokens;
struct tgsi_header *header;
struct tgsi_full_declaration decl;
struct tgsi_full_instruction inst;
unsigned int ti;
unsigned int i;
assert(context);
pipe = context->pipe;
tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
/* Version */
*(struct tgsi_version*)&tokens[0] = tgsi_build_version();
/* Header */
header = (struct tgsi_header*)&tokens[1];
*header = tgsi_build_header();
/* Processor */
*(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
ti = 3;
/* decl i0 ; Texcoords for s0 */
decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
/* decl o0 ; Fragment color */
decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
/* decl s0 ; Sampler for tex containing picture to display */
decl = vl_decl_samplers(0, 0);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
/* tex2d t0, i0, s0 ; Read src pixel */
inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* sub t0, t0, c0 ; Subtract bias vector from pixel */
inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/*
* dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix
* dp4 o0.y, t0, c2
* dp4 o0.z, t0, c3
* dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient
*/
for (i = 0; i < 4; ++i)
{
inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
}
/* end */
inst = vl_end();
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
fs.tokens = tokens;
//context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs);
free(tokens);
return 0;
}
static int vlInitIDCT(struct VL_CONTEXT *context)
{
struct pipe_context *pipe;
struct pipe_sampler_state sampler;
struct pipe_texture template;
unsigned int i;
assert(context);
pipe = context->pipe;
context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH;
context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT;
context->states.idct.viewport.scale[2] = 1;
context->states.idct.viewport.scale[3] = 1;
context->states.idct.viewport.translate[0] = 0;
context->states.idct.viewport.translate[1] = 0;
context->states.idct.viewport.translate[2] = 0;
context->states.idct.viewport.translate[3] = 0;
context->states.idct.render_target.width = VL_BLOCK_WIDTH;
context->states.idct.render_target.height = VL_BLOCK_HEIGHT;
context->states.idct.render_target.num_cbufs = 1;
context->states.idct.render_target.zsbuf = NULL;
sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
sampler.compare_func = PIPE_FUNC_ALWAYS;
sampler.normalized_coords = 1;
/*sampler.prefilter = ;*/
/*sampler.shadow_ambient = ;*/
/*sampler.lod_bias = ;*/
sampler.min_lod = 0;
/*sampler.max_lod = ;*/
/*sampler.border_color[i] = ;*/
/*sampler.max_anisotropy = ;*/
context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler);
memset(&template, 0, sizeof(struct pipe_texture));
template.target = PIPE_TEXTURE_2D;
template.format = PIPE_FORMAT_A8L8_UNORM;
template.last_level = 0;
template.width[0] = 8;
template.height[0] = 8;
template.depth[0] = 1;
template.compressed = 0;
pf_get_block(template.format, &template.block);
context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template);
template.format = PIPE_FORMAT_A8R8G8B8_UNORM;
template.width[0] = 16;
template.height[0] = 1;
context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template);
for (i = 0; i < 2; ++i)
{
context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i];
context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i];
/*
context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F);
context->states.idct.vertex_bufs[i].max_index = 3;
context->states.idct.vertex_bufs[i].buffer_offset = 0;
context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create
(
pipe->winsys,
1,
PIPE_BUFFER_USAGE_VERTEX,
sizeof(struct VL_VERTEX2F) * 4
);
context->states.idct.vertex_buf_elems[i].src_offset = 0;
context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i;
context->states.idct.vertex_buf_elems[i].nr_components = 2;
context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT;
*/
}
vlCreateVertexShaderFrameIDCT(context);
vlCreateFragmentShaderFrameIDCT(context);
return 0;
}
static int vlDestroyIDCT(struct VL_CONTEXT *context)
{
//unsigned int i;
assert(context);
context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler);
//for (i = 0; i < 2; ++i)
//context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer);
pipe_texture_release(&context->states.idct.texture);
pipe_texture_release(&context->states.idct.basis);
//context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs);
//context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs);
//context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer);
//context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer);
return 0;
}
@ -1271,7 +1511,6 @@ int vlCreateDataBufsMC(struct VL_CONTEXT *context)
context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Create our texcoord buffers and texcoord buffer elements */
/* TODO: Should be able to use 1 texcoord buf for chroma textures, 1 buf for ref surfaces */
for (i = 1; i < 3; ++i)
{
context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F);

View file

@ -26,6 +26,16 @@ struct VL_CONTEXT
struct
{
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state render_target;
struct pipe_sampler_state *sampler;
struct pipe_texture *texture;
struct pipe_texture *basis;
struct pipe_shader_state *frame_vs;
struct pipe_shader_state *frame_fs;
struct pipe_vertex_buffer *vertex_bufs[2];
struct pipe_vertex_element *vertex_buf_elems[2];
//struct pipe_constant_buffer vs_const_buf, fs_const_buf;
} idct;
struct

View file

@ -9,6 +9,59 @@
#include "vl_defs.h"
#include "vl_util.h"
static int vlTransformBlock(short *src, short *dst, short bias)
{
static const float basis[8][8] =
{
{0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975},
{0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778},
{0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157},
{0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904},
{0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904},
{0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157},
{0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778},
{0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975}
};
unsigned int x, y;
short tmp[64];
for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
for (x = 0; x < VL_BLOCK_WIDTH; ++x)
tmp[y * VL_BLOCK_WIDTH + x] = (short)
(
src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] +
src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] +
src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] +
src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] +
src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] +
src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] +
src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] +
src[y * VL_BLOCK_WIDTH + 7] * basis[x][7]
);
for (x = 0; x < VL_BLOCK_WIDTH; ++x)
for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
{
dst[y * VL_BLOCK_WIDTH + x] = bias + (short)
(
tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] +
tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] +
tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] +
tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] +
tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] +
tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] +
tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] +
tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7]
);
if (dst[y * VL_BLOCK_WIDTH + x] > 255)
dst[y * VL_BLOCK_WIDTH + x] = 255;
else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0)
dst[y * VL_BLOCK_WIDTH + x] = 0;
}
return 0;
}
static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch)
{
unsigned int y;
@ -102,6 +155,9 @@ static int vlGrabBlocks
unsigned int tex_pitch;
unsigned int tb, sb = 0;
const int do_idct = 1;
short temp_block[64];
assert(context);
assert(blocks);
@ -121,6 +177,17 @@ static int vlGrabBlocks
{
if (dct_type == VL_DCT_FRAME_CODED)
if (sample_type == VL_FULL_SAMPLE)
if (do_idct)
{
vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
vlGrabFrameCodedFullBlock
(
temp_block,
texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
tex_pitch
);
}
else
vlGrabFrameCodedFullBlock
(
blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@ -128,6 +195,17 @@ static int vlGrabBlocks
tex_pitch
);
else
if (do_idct)
{
vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
vlGrabFrameCodedDiffBlock
(
temp_block,
texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
tex_pitch
);
}
else
vlGrabFrameCodedDiffBlock
(
blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@ -136,6 +214,17 @@ static int vlGrabBlocks
);
else
if (sample_type == VL_FULL_SAMPLE)
if (do_idct)
{
vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
vlGrabFieldCodedFullBlock
(
temp_block,
texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
tex_pitch
);
}
else
vlGrabFieldCodedFullBlock
(
blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@ -143,6 +232,17 @@ static int vlGrabBlocks
tex_pitch
);
else
if (do_idct)
{
vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
vlGrabFieldCodedDiffBlock
(
temp_block,
texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
tex_pitch
);
}
else
vlGrabFieldCodedDiffBlock
(
blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@ -173,6 +273,17 @@ static int vlGrabBlocks
if ((coded_block_pattern >> (1 - tb)) & 1)
{
if (sample_type == VL_FULL_SAMPLE)
if (do_idct)
{
vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
vlGrabFrameCodedFullBlock
(
temp_block,
texels,
tex_pitch
);
}
else
vlGrabFrameCodedFullBlock
(
blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@ -180,6 +291,17 @@ static int vlGrabBlocks
tex_pitch
);
else
if (do_idct)
{
vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
vlGrabFrameCodedDiffBlock
(
temp_block,
texels,
tex_pitch
);
}
else
vlGrabFrameCodedDiffBlock
(
blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@ -266,6 +388,8 @@ int vlRenderIMacroBlock
if (picture_type != VL_FRAME_PICTURE)
return 0;
vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks);
pipe = surface->context->pipe;
vs_consts = pipe->winsys->buffer_map
@ -298,8 +422,6 @@ int vlRenderIMacroBlock
pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs);
pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs);
vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks);
pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
return 0;
@ -335,6 +457,8 @@ int vlRenderPMacroBlock
if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC)
return 0;
vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
pipe = surface->context->pipe;
vs_consts = pipe->winsys->buffer_map
@ -390,8 +514,6 @@ int vlRenderPMacroBlock
pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures);
pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers);
vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
return 0;
@ -428,6 +550,8 @@ int vlRenderBMacroBlock
if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC)
return 0;
vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
pipe = surface->context->pipe;
vs_consts = pipe->winsys->buffer_map
@ -492,8 +616,6 @@ int vlRenderBMacroBlock
pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures);
pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers);
vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
return 0;
@ -589,7 +711,7 @@ int vlPutSurface
pipe->set_sampler_textures(pipe, 1, &surface->texture);
pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
/* XXX: Need to take destx, desty into consideration */
/* TODO: Need to take destx, desty into consideration */
pipe->winsys->flush_frontbuffer
(
pipe->winsys,

View file

@ -146,6 +146,8 @@ Status XvMCRenderSurface
assert(flags == 0 || flags == XVMC_SECOND_FIELD);
/* TODO: Batch macroblocks by type (I,P,B) */
for (i = first_macroblock; i < first_macroblock + num_macroblocks; ++i)
if (macroblocks->macro_blocks[i].macroblock_type & XVMC_MB_TYPE_INTRA)
vlRenderIMacroBlock