llvmpipe: add reduction mode support

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9423>
This commit is contained in:
Dave Airlie 2021-03-05 16:08:22 +10:00
parent 1fb43ae9bf
commit 6adbf6c86c
7 changed files with 319 additions and 59 deletions

View file

@ -1580,7 +1580,6 @@ lp_build_min(struct lp_build_context *bld,
return lp_build_min_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
}
/**
* Generate min(a, b)
* NaN's are handled according to the behavior specified by the

View file

@ -193,7 +193,7 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
state->mag_img_filter = sampler->mag_img_filter;
state->min_mip_filter = sampler->min_mip_filter;
state->seamless_cube_map = sampler->seamless_cube_map;
state->reduction_mode = sampler->reduction_mode;
if (sampler->max_lod > 0.0f) {
state->max_lod_pos = 1;
}
@ -2114,3 +2114,197 @@ lp_build_sample_offset(struct lp_build_context *bld,
*out_offset = offset;
}
static LLVMValueRef
lp_build_sample_min(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
LLVMValueRef v1)
{
/* if the incoming LERP weight is 0 then the min/max
* should ignore that value. */
LLVMValueRef mask = lp_build_compare(bld->gallivm,
bld->type,
PIPE_FUNC_NOTEQUAL,
x, bld->zero);
LLVMValueRef min = lp_build_min(bld, v0, v1);
return lp_build_select(bld, mask, min, v0);
}
static LLVMValueRef
lp_build_sample_max(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
LLVMValueRef v1)
{
/* if the incoming LERP weight is 0 then the min/max
* should ignore that value. */
LLVMValueRef mask = lp_build_compare(bld->gallivm,
bld->type,
PIPE_FUNC_NOTEQUAL,
x, bld->zero);
LLVMValueRef max = lp_build_max(bld, v0, v1);
return lp_build_select(bld, mask, max, v0);
}
static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef c,
LLVMValueRef d)
{
LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
return lp_build_sample_min(bld, y, v0, v1);
}
static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef c,
LLVMValueRef d)
{
LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
return lp_build_sample_max(bld, y, v0, v1);
}
static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef a, LLVMValueRef b,
LLVMValueRef c, LLVMValueRef d,
LLVMValueRef e, LLVMValueRef f,
LLVMValueRef g, LLVMValueRef h)
{
LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
return lp_build_sample_min(bld, z, v0, v1);
}
static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef a, LLVMValueRef b,
LLVMValueRef c, LLVMValueRef d,
LLVMValueRef e, LLVMValueRef f,
LLVMValueRef g, LLVMValueRef h)
{
LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
return lp_build_sample_max(bld, z, v0, v1);
}
void
lp_build_reduce_filter(struct lp_build_context *bld,
enum pipe_tex_reduction_mode mode,
unsigned flags,
unsigned num_chan,
LLVMValueRef x,
LLVMValueRef *v00,
LLVMValueRef *v01,
LLVMValueRef *out)
{
unsigned chan;
switch (mode) {
case PIPE_TEX_REDUCTION_MIN:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
break;
case PIPE_TEX_REDUCTION_MAX:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
break;
case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
default:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
break;
}
}
void
lp_build_reduce_filter_2d(struct lp_build_context *bld,
enum pipe_tex_reduction_mode mode,
unsigned flags,
unsigned num_chan,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef *v00,
LLVMValueRef *v01,
LLVMValueRef *v10,
LLVMValueRef *v11,
LLVMValueRef *out)
{
unsigned chan;
switch (mode) {
case PIPE_TEX_REDUCTION_MIN:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
break;
case PIPE_TEX_REDUCTION_MAX:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
break;
case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
default:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan], flags);
break;
}
}
void
lp_build_reduce_filter_3d(struct lp_build_context *bld,
enum pipe_tex_reduction_mode mode,
unsigned flags,
unsigned num_chan,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef *v000,
LLVMValueRef *v001,
LLVMValueRef *v010,
LLVMValueRef *v011,
LLVMValueRef *v100,
LLVMValueRef *v101,
LLVMValueRef *v110,
LLVMValueRef *v111,
LLVMValueRef *out)
{
unsigned chan;
switch (mode) {
case PIPE_TEX_REDUCTION_MIN:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_sample_min_3d(bld, x, y, z,
v000[chan], v001[chan], v010[chan], v011[chan],
v100[chan], v101[chan], v110[chan], v111[chan]);
break;
case PIPE_TEX_REDUCTION_MAX:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_sample_max_3d(bld, x, y, z,
v000[chan], v001[chan], v010[chan], v011[chan],
v100[chan], v101[chan], v110[chan], v111[chan]);
break;
case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
default:
for (chan = 0; chan < num_chan; chan++)
out[chan] = lp_build_lerp_3d(bld, x, y, z,
v000[chan], v001[chan], v010[chan], v011[chan],
v100[chan], v101[chan], v110[chan], v111[chan],
flags);
break;
}
}

View file

@ -205,6 +205,7 @@ struct lp_static_sampler_state
/* Hacks */
unsigned force_nearest_s:1;
unsigned force_nearest_t:1;
unsigned reduction_mode:2;
};
@ -749,6 +750,46 @@ lp_build_image_op_array_case(struct lp_build_img_op_array_switch *switch_info,
struct lp_sampler_dynamic_state *dynamic_state);
void lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch *switch_info);
void
lp_build_reduce_filter(struct lp_build_context *bld,
enum pipe_tex_reduction_mode mode,
unsigned flags,
unsigned num_chan,
LLVMValueRef x,
LLVMValueRef *v00,
LLVMValueRef *v01,
LLVMValueRef *out);
void
lp_build_reduce_filter_2d(struct lp_build_context *bld,
enum pipe_tex_reduction_mode mode,
unsigned flags,
unsigned num_chan,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef *v00,
LLVMValueRef *v01,
LLVMValueRef *v10,
LLVMValueRef *v11,
LLVMValueRef *out);
void
lp_build_reduce_filter_3d(struct lp_build_context *bld,
enum pipe_tex_reduction_mode mode,
unsigned flags,
unsigned num_chan,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef *v000,
LLVMValueRef *v001,
LLVMValueRef *v010,
LLVMValueRef *v011,
LLVMValueRef *v100,
LLVMValueRef *v101,
LLVMValueRef *v110,
LLVMValueRef *v111,
LLVMValueRef *out);
#ifdef __cplusplus
}
#endif

View file

@ -705,34 +705,43 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
else {
/* general 1/2/3-D lerping */
if (dims == 1) {
packed = lp_build_lerp(&u8n,
lp_build_reduce_filter(&u8n,
bld->static_sampler_state->reduction_mode,
LP_BLD_LERP_PRESCALED_WEIGHTS,
1,
s_fpart,
neighbors[0][0][0],
neighbors[0][0][1],
LP_BLD_LERP_PRESCALED_WEIGHTS);
&neighbors[0][0][0],
&neighbors[0][0][1],
&packed);
} else if (dims == 2) {
/* 2-D lerp */
packed = lp_build_lerp_2d(&u8n,
lp_build_reduce_filter_2d(&u8n,
bld->static_sampler_state->reduction_mode,
LP_BLD_LERP_PRESCALED_WEIGHTS,
1,
s_fpart, t_fpart,
neighbors[0][0][0],
neighbors[0][0][1],
neighbors[0][1][0],
neighbors[0][1][1],
LP_BLD_LERP_PRESCALED_WEIGHTS);
&neighbors[0][0][0],
&neighbors[0][0][1],
&neighbors[0][1][0],
&neighbors[0][1][1],
&packed);
} else {
/* 3-D lerp */
assert(dims == 3);
packed = lp_build_lerp_3d(&u8n,
lp_build_reduce_filter_3d(&u8n,
bld->static_sampler_state->reduction_mode,
LP_BLD_LERP_PRESCALED_WEIGHTS,
1,
s_fpart, t_fpart, r_fpart,
neighbors[0][0][0],
neighbors[0][0][1],
neighbors[0][1][0],
neighbors[0][1][1],
neighbors[1][0][0],
neighbors[1][0][1],
neighbors[1][1][0],
neighbors[1][1][1],
LP_BLD_LERP_PRESCALED_WEIGHTS);
&neighbors[0][0][0],
&neighbors[0][0][1],
&neighbors[0][1][0],
&neighbors[0][1][1],
&neighbors[1][0][0],
&neighbors[1][0][1],
&neighbors[1][1][0],
&neighbors[1][1][1],
&packed);
}
}
@ -1092,9 +1101,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMConstVector(shuffle, u8n_bld.type.length), "");
}
colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
colors0, colors1,
LP_BLD_LERP_PRESCALED_WEIGHTS);
lp_build_reduce_filter(&u8n_bld,
bld->static_sampler_state->reduction_mode,
LP_BLD_LERP_PRESCALED_WEIGHTS,
1,
lod_fpart,
&colors0,
&colors1,
&colors0);
LLVMBuildStore(builder, colors0, colors_var);
}

View file

@ -1360,13 +1360,14 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
if (dims == 1) {
assert(!is_gather);
if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
/* Interpolate two samples from 1D image to produce one color */
for (chan = 0; chan < 4; chan++) {
colors_out[chan] = lp_build_lerp(texel_bld, s_fpart,
neighbors[0][0][chan],
neighbors[0][1][chan],
0);
}
lp_build_reduce_filter(texel_bld,
bld->static_sampler_state->reduction_mode,
0,
4,
s_fpart,
neighbors[0][0],
neighbors[0][1],
colors_out);
}
else {
LLVMValueRef cmpval0, cmpval1;
@ -1381,7 +1382,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
else {
/* 2D/3D texture */
struct lp_build_if_state corner_if;
LLVMValueRef colors0[4], colorss[4];
LLVMValueRef colors0[4], colorss[4] = { 0 };
/* get x0/x1 texels at y1 */
lp_build_sample_texel_soa(bld,
@ -1400,7 +1401,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
* another branch (with corner condition though edge would work
* as well) here.
*/
if (have_corners && accurate_cube_corners) {
if (have_corners && accurate_cube_corners &&
bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
LLVMValueRef have_corner, one_third;
@ -1619,15 +1621,17 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
}
else {
/* Bilinear interpolate the four samples from the 2D image / 3D slice */
for (chan = 0; chan < 4; chan++) {
colors0[chan] = lp_build_lerp_2d(texel_bld,
s_fpart, t_fpart,
neighbors[0][0][chan],
neighbors[0][1][chan],
neighbors[1][0][chan],
neighbors[1][1][chan],
0);
}
lp_build_reduce_filter_2d(texel_bld,
bld->static_sampler_state->reduction_mode,
0,
4,
s_fpart,
t_fpart,
neighbors[0][0],
neighbors[0][1],
neighbors[1][0],
neighbors[1][1],
colors0);
}
}
else {
@ -1655,7 +1659,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
}
}
if (have_corners && accurate_cube_corners) {
if (have_corners && accurate_cube_corners &&
bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
LLVMBuildStore(builder, colors0[0], colorss[0]);
LLVMBuildStore(builder, colors0[1], colorss[1]);
LLVMBuildStore(builder, colors0[2], colorss[2]);
@ -1699,22 +1704,27 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
/* Bilinear interpolate the four samples from the second Z slice */
for (chan = 0; chan < 4; chan++) {
colors1[chan] = lp_build_lerp_2d(texel_bld,
s_fpart, t_fpart,
neighbors1[0][0][chan],
neighbors1[0][1][chan],
neighbors1[1][0][chan],
neighbors1[1][1][chan],
0);
}
lp_build_reduce_filter_2d(texel_bld,
bld->static_sampler_state->reduction_mode,
0,
4,
s_fpart,
t_fpart,
neighbors1[0][0],
neighbors1[0][1],
neighbors1[1][0],
neighbors1[1][1],
colors1);
/* Linearly interpolate the two samples from the two 3D slices */
for (chan = 0; chan < 4; chan++) {
colors_out[chan] = lp_build_lerp(texel_bld,
r_fpart,
colors0[chan], colors1[chan],
0);
}
lp_build_reduce_filter(texel_bld,
bld->static_sampler_state->reduction_mode,
0,
4,
r_fpart,
colors0,
colors1,
colors_out);
}
else {
LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;

View file

@ -340,6 +340,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
return 1;
case PIPE_CAP_SAMPLER_REDUCTION_MINMAX:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_TGSI_VOTE:
case PIPE_CAP_LOAD_CONSTBUF:

View file

@ -3399,6 +3399,7 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod);
debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod);
debug_printf(" .reduction_mode = %u\n", sampler->reduction_mode);
}
for (i = 0; i < key->nr_sampler_views; ++i) {
const struct lp_static_texture_state *texture = &key->samplers[i].texture_state;