mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
[g3dvl] move scaling completely into matrix and use less temp registers
This commit is contained in:
parent
08c8cd26b8
commit
f780626c35
2 changed files with 56 additions and 58 deletions
|
|
@ -42,9 +42,6 @@
|
|||
|
||||
#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
|
||||
|
||||
#define STAGE1_SCALE 4.0f
|
||||
#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE)
|
||||
|
||||
#define NR_RENDER_TARGETS 4
|
||||
|
||||
enum VS_INPUT
|
||||
|
|
@ -171,24 +168,27 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
|
|||
}
|
||||
|
||||
static void
|
||||
increment_addr(struct ureg_program *shader, struct ureg_dst addr[2],
|
||||
bool right_side, bool transposed, float size)
|
||||
increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
|
||||
struct ureg_src saddr[2], bool right_side, bool transposed,
|
||||
int pos, float size)
|
||||
{
|
||||
unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
|
||||
unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
|
||||
|
||||
/* addr[0..1]++ */
|
||||
ureg_ADD(shader, ureg_writemask(addr[0], wm_tc),
|
||||
ureg_src(addr[0]), ureg_imm1f(shader, 1.0f / size));
|
||||
ureg_ADD(shader, ureg_writemask(addr[1], wm_tc),
|
||||
ureg_src(addr[1]), ureg_imm1f(shader, 1.0f / size));
|
||||
/*
|
||||
* daddr[0..1].(start) = saddr[0..1].(start)
|
||||
* daddr[0..1].(tc) = saddr[0..1].(tc)
|
||||
*/
|
||||
|
||||
ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
|
||||
ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
|
||||
ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
|
||||
ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
|
||||
}
|
||||
|
||||
static void
|
||||
fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler)
|
||||
{
|
||||
m[0] = ureg_DECL_temporary(shader);
|
||||
m[1] = ureg_DECL_temporary(shader);
|
||||
|
||||
ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler);
|
||||
ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler);
|
||||
}
|
||||
|
|
@ -218,9 +218,9 @@ create_matrix_frag_shader(struct vl_idct *idct)
|
|||
{
|
||||
struct ureg_program *shader;
|
||||
|
||||
struct ureg_src l_addr[2], r_addr[2], saddr[2];
|
||||
struct ureg_src l_addr[2], r_addr[2];
|
||||
|
||||
struct ureg_dst addr[2], l[4][2], r[2];
|
||||
struct ureg_dst l[4][2], r[2];
|
||||
struct ureg_dst fragment[NR_RENDER_TARGETS];
|
||||
|
||||
unsigned i, j;
|
||||
|
|
@ -229,12 +229,6 @@ create_matrix_frag_shader(struct vl_idct *idct)
|
|||
if (!shader)
|
||||
return NULL;
|
||||
|
||||
addr[0] = ureg_DECL_temporary(shader);
|
||||
addr[1] = ureg_DECL_temporary(shader);
|
||||
|
||||
saddr[0] = ureg_src(addr[0]);
|
||||
saddr[1] = ureg_src(addr[1]);
|
||||
|
||||
l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
|
||||
l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
|
||||
|
||||
|
|
@ -245,37 +239,44 @@ create_matrix_frag_shader(struct vl_idct *idct)
|
|||
fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
if(i == 0) {
|
||||
ureg_MOV(shader, addr[0], l_addr[0]);
|
||||
ureg_MOV(shader, addr[1], l_addr[1]);
|
||||
} else
|
||||
increment_addr(shader, addr, false, false, idct->buffer_height);
|
||||
l[i][0] = ureg_DECL_temporary(shader);
|
||||
l[i][1] = ureg_DECL_temporary(shader);
|
||||
}
|
||||
|
||||
fetch_four(shader, l[i], saddr, ureg_DECL_sampler(shader, 1));
|
||||
r[0] = ureg_DECL_temporary(shader);
|
||||
r[1] = ureg_DECL_temporary(shader);
|
||||
|
||||
for (i = 1; i < 4; ++i) {
|
||||
increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
struct ureg_src s_addr[2];
|
||||
s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]);
|
||||
s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]);
|
||||
fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1));
|
||||
}
|
||||
|
||||
for (i = 0; i < NR_RENDER_TARGETS; ++i) {
|
||||
if(i == 0) {
|
||||
ureg_MOV(shader, addr[0], r_addr[0]);
|
||||
ureg_MOV(shader, addr[1], r_addr[1]);
|
||||
} else
|
||||
increment_addr(shader, addr, true, true, BLOCK_HEIGHT);
|
||||
if(i > 0)
|
||||
increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT);
|
||||
|
||||
fetch_four(shader, r, saddr, ureg_DECL_sampler(shader, 0));
|
||||
struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
|
||||
s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]);
|
||||
s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]);
|
||||
fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0));
|
||||
|
||||
for (j = 0; j < 4; ++j) {
|
||||
matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
|
||||
}
|
||||
ureg_release_temporary(shader, r[0]);
|
||||
ureg_release_temporary(shader, r[1]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
ureg_release_temporary(shader, l[i][0]);
|
||||
ureg_release_temporary(shader, l[i][1]);
|
||||
}
|
||||
ureg_release_temporary(shader, addr[0]);
|
||||
ureg_release_temporary(shader, addr[1]);
|
||||
ureg_release_temporary(shader, r[0]);
|
||||
ureg_release_temporary(shader, r[1]);
|
||||
|
||||
ureg_END(shader);
|
||||
|
||||
|
|
@ -290,7 +291,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
|
|||
struct ureg_src l_addr[2], r_addr[2];
|
||||
|
||||
struct ureg_dst l[2], r[2];
|
||||
struct ureg_dst tmp, fragment;
|
||||
struct ureg_dst fragment;
|
||||
|
||||
shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
|
||||
if (!shader)
|
||||
|
|
@ -302,16 +303,18 @@ create_transpose_frag_shader(struct vl_idct *idct)
|
|||
r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
|
||||
r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
|
||||
|
||||
l[0] = ureg_DECL_temporary(shader);
|
||||
l[1] = ureg_DECL_temporary(shader);
|
||||
r[0] = ureg_DECL_temporary(shader);
|
||||
r[1] = ureg_DECL_temporary(shader);
|
||||
|
||||
fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0));
|
||||
fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1));
|
||||
|
||||
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
|
||||
|
||||
tmp = ureg_DECL_temporary(shader);
|
||||
matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r);
|
||||
ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
|
||||
matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r);
|
||||
|
||||
ureg_release_temporary(shader, tmp);
|
||||
ureg_release_temporary(shader, l[0]);
|
||||
ureg_release_temporary(shader, l[1]);
|
||||
ureg_release_temporary(shader, r[0]);
|
||||
|
|
@ -542,7 +545,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
|
|||
for(i = 0; i < BLOCK_HEIGHT; ++i)
|
||||
for(j = 0; j < BLOCK_WIDTH; ++j)
|
||||
// transpose and scale
|
||||
f[i * pitch + j] = const_matrix[j][i] * STAGE1_SCALE;
|
||||
f[i * pitch + j] = const_matrix[j][i] * sqrtf(SCALE_FACTOR_16_TO_9);
|
||||
|
||||
pipe->transfer_unmap(pipe, buf_transfer);
|
||||
pipe->transfer_destroy(pipe, buf_transfer);
|
||||
|
|
|
|||
|
|
@ -273,13 +273,12 @@ static struct ureg_dst
|
|||
fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
|
||||
{
|
||||
struct ureg_src tc[3], sampler[3], eb[2];
|
||||
struct ureg_dst texel, t_tc, t_eb_info, tmp;
|
||||
struct ureg_dst texel, t_tc, t_eb_info;
|
||||
unsigned i, label;
|
||||
|
||||
texel = ureg_DECL_temporary(shader);
|
||||
t_tc = ureg_DECL_temporary(shader);
|
||||
t_eb_info = ureg_DECL_temporary(shader);
|
||||
tmp = ureg_DECL_temporary(shader);
|
||||
|
||||
tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
|
||||
tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
|
||||
|
|
@ -326,7 +325,6 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
|
|||
|
||||
ureg_release_temporary(shader, t_tc);
|
||||
ureg_release_temporary(shader, t_eb_info);
|
||||
ureg_release_temporary(shader, tmp);
|
||||
|
||||
return texel;
|
||||
}
|
||||
|
|
@ -336,7 +334,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
|
|||
{
|
||||
struct ureg_src ref_frames, bkwd_pred;
|
||||
struct ureg_src tc[4], sampler[2];
|
||||
struct ureg_dst ref[2], tmp, result;
|
||||
struct ureg_dst ref[2], result;
|
||||
unsigned i, intra_label, bi_label, label;
|
||||
|
||||
ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
|
||||
|
|
@ -350,14 +348,13 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
|
|||
ref[i] = ureg_DECL_temporary(shader);
|
||||
}
|
||||
|
||||
tmp = ureg_DECL_temporary(shader);
|
||||
result = ureg_DECL_temporary(shader);
|
||||
|
||||
ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
|
||||
|
||||
ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f));
|
||||
ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &intra_label);
|
||||
ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
|
||||
ureg_SGE(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f));
|
||||
ureg_IF(shader, ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X), &intra_label);
|
||||
ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
|
||||
ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
|
||||
tc[1], tc[0]);
|
||||
|
||||
|
|
@ -367,10 +364,10 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
|
|||
* result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
|
||||
*/
|
||||
ureg_IF(shader, bkwd_pred, &label);
|
||||
ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
|
||||
ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[1]);
|
||||
ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
|
||||
ureg_ELSE(shader, &label);
|
||||
ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
|
||||
ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
|
||||
ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
|
||||
ureg_ENDIF(shader);
|
||||
|
||||
|
|
@ -383,12 +380,11 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
|
|||
* else
|
||||
* ref[0..1] = tex(tc[2..3], sampler[0..1])
|
||||
*/
|
||||
ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
|
||||
|
||||
ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
|
||||
ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
|
||||
ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
|
||||
tc[3], tc[2]);
|
||||
ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
|
||||
ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
|
||||
ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
|
||||
|
||||
ureg_LRP(shader, result, ureg_imm1f(shader, 0.5f),
|
||||
ureg_src(ref[0]), ureg_src(ref[1]));
|
||||
|
|
@ -400,7 +396,6 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
|
|||
|
||||
for (i = 0; i < 2; ++i)
|
||||
ureg_release_temporary(shader, ref[i]);
|
||||
ureg_release_temporary(shader, tmp);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue