llvmpipe: Add support for 8x MSAA.
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Advertise format support for 8 samples.

Extend tri rasteriser to operate on fb_max_samples samples, rather than hardcoded 4.

Since we have run out of space for input mask in fragment shaders, add a second 64-bit input mask to handle samples 4 to 7.

Add sample positions for 8xMSAA.

Reviewed-by: Brian Paul brian.paul@broadcom.com
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37001>
This commit is contained in:
Michal Krol 2025-08-26 09:46:40 +02:00 committed by Marge Bot
parent 92f2ef5a72
commit 0aa5bed029
14 changed files with 114 additions and 54 deletions

View file

@ -56,8 +56,14 @@ spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 6 msaa,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 6 upsample,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 8 msaa,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 8 upsample,Fail
spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail
spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
spec@ext_framebuffer_multisample@interpolation 6 centroid-edges,Fail
spec@ext_framebuffer_multisample@interpolation 8 centroid-edges,Fail
spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail

View file

@ -199,7 +199,8 @@ typedef void
const void *dady,
uint8_t **color,
uint8_t *depth,
uint64_t mask,
uint64_t mask0,
uint64_t mask1,
struct lp_jit_thread_data *thread_data,
unsigned *stride,
unsigned depth_stride,

View file

@ -64,7 +64,7 @@
#define LP_MAX_HEIGHT (1 << (LP_MAX_TEXTURE_LEVELS - 1))
#define LP_MAX_WIDTH (1 << (LP_MAX_TEXTURE_LEVELS - 1))
#define LP_MAX_SAMPLES 4
#define LP_MAX_SAMPLES 8
#define LP_MAX_THREADS 32

View file

@ -65,6 +65,15 @@ const float lp_sample_pos_4x[4][2] = { { 0.375, 0.125 },
{ 0.125, 0.625 },
{ 0.625, 0.875 } };
const float lp_sample_pos_8x[8][2] = { { 0.5625, 0.3125 },
{ 0.4375, 0.6875 },
{ 0.8125, 0.5625 },
{ 0.3125, 0.1875 },
{ 0.1875, 0.8125 },
{ 0.0625, 0.4375 },
{ 0.6875, 0.9375 },
{ 0.9375, 0.0625 } };
/**
* Begin rasterizing a scene.
* Called once per scene by one thread.
@ -351,9 +360,10 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
depth_sample_stride = scene->zsbuf.sample_stride;
}
uint64_t mask = 0;
for (unsigned i = 0; i < scene->fb_max_samples; i++)
mask |= (uint64_t)(0xffff) << (16 * i);
static_assert(LP_MAX_SAMPLES <= 8, "Code below assumes max of 8 samples");
uint64_t mask[2] = { 0, 0 };
for (unsigned i = 0; i < MIN2(scene->fb_max_samples, LP_MAX_SAMPLES); i++)
mask[i / 4] |= (uint64_t)(0xffff) << (16 * (i % 4));
/* Propagate non-interpolated raster state. */
task->thread_data.raster_state.viewport_index = inputs->viewport_index;
@ -370,7 +380,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
GET_DADY(inputs),
color,
depth,
mask,
mask[0], mask[1],
&task->thread_data,
stride,
depth_stride,
@ -412,7 +422,7 @@ void
lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
uint64_t mask)
const uint64_t mask[2])
{
const struct lp_rast_state *state = task->state;
const struct lp_fragment_shader_variant *variant = state->variant;
@ -479,7 +489,7 @@ lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task,
GET_DADY(inputs),
color,
depth,
mask,
mask[0], mask[1],
&task->thread_data,
stride,
depth_stride,
@ -496,9 +506,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
unsigned x, unsigned y,
unsigned mask)
{
uint64_t new_mask = 0;
for (unsigned i = 0; i < task->scene->fb_max_samples; i++)
new_mask |= ((uint64_t)mask) << (16 * i);
static_assert(LP_MAX_SAMPLES <= 8, "Code below assumes max of 8 samples");
uint64_t new_mask[2] = { 0, 0 };
for (unsigned i = 0; i < MIN2(task->scene->fb_max_samples, LP_MAX_SAMPLES); i++)
new_mask[i / 4] |= ((uint64_t)mask) << (16 * (i % 4));
lp_rast_shade_quads_mask_sample(task, inputs, x, y, new_mask);
}

View file

@ -74,6 +74,7 @@ struct cmd_bin;
struct lp_rasterizer_task;
extern const float lp_sample_pos_4x[4][2];
extern const float lp_sample_pos_8x[8][2];
/**

View file

@ -117,7 +117,7 @@ shade_quads(struct lp_rasterizer_task *task,
GET_DADY(inputs),
cbufs,
NULL,
mask,
mask, 0,
&task->thread_data,
strides, 0, 0, 0);
END_JIT_CALL();

View file

@ -141,7 +141,7 @@ void
lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
uint64_t mask);
const uint64_t mask[2]);
void
lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
@ -259,9 +259,10 @@ lp_rast_shade_quads_all(struct lp_rasterizer_task *task,
depth_stride = scene->zsbuf.stride;
}
uint64_t mask = 0;
for (unsigned i = 0; i < scene->fb_max_samples; i++)
mask |= (uint64_t)0xffff << (16 * i);
static_assert(LP_MAX_SAMPLES <= 8, "Code below assumes max of 8 samples");
uint64_t mask[2] = { 0, 0 };
for (unsigned i = 0; i < MIN2(scene->fb_max_samples, LP_MAX_SAMPLES); i++)
mask[i / 4] |= (uint64_t)0xffff << (16 * (i % 4));
/*
* The rasterizer may produce fragments outside our
@ -283,7 +284,7 @@ lp_rast_shade_quads_all(struct lp_rasterizer_task *task,
GET_DADY(inputs),
color,
depth,
mask,
mask[0], mask[1],
&task->thread_data,
stride,
depth_stride,

View file

@ -46,25 +46,26 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,
int x, int y,
const int64_t *c)
{
static_assert(LP_MAX_SAMPLES <= 8, "Code below assumes max of 8 samples");
#ifndef MULTISAMPLE
unsigned mask = 0xffff;
uint64_t mask[2] = { 0xffff, 0 };
#else
uint64_t mask = UINT64_MAX;
uint64_t mask[2] = { UINT64_MAX, UINT64_MAX };
#endif
for (unsigned j = 0; j < NR_PLANES; j++) {
#ifndef MULTISAMPLE
#ifdef RASTER_64
mask &= ~BUILD_MASK_LINEAR(((c[j] - 1) >> (int64_t)FIXED_ORDER),
-plane[j].dcdx >> FIXED_ORDER,
plane[j].dcdy >> FIXED_ORDER);
mask[0] &= ~BUILD_MASK_LINEAR(((c[j] - 1) >> (int64_t)FIXED_ORDER),
-plane[j].dcdx >> FIXED_ORDER,
plane[j].dcdy >> FIXED_ORDER);
#else
mask &= ~BUILD_MASK_LINEAR((c[j] - 1),
-plane[j].dcdx,
plane[j].dcdy);
mask[0] &= ~BUILD_MASK_LINEAR((c[j] - 1),
-plane[j].dcdx,
plane[j].dcdy);
#endif
#else
for (unsigned s = 0; s < 4; s++) {
for (unsigned s = 0; s < task->scene->fb_max_samples; s++) {
int64_t new_c = (c[j]) + ((IMUL64(task->scene->fixed_sample_pos[s][1], plane[j].dcdy) + IMUL64(task->scene->fixed_sample_pos[s][0], -plane[j].dcdx)) >> FIXED_ORDER);
uint32_t build_mask;
#ifdef RASTER_64
@ -76,14 +77,14 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,
-plane[j].dcdx,
plane[j].dcdy);
#endif
mask &= ~((uint64_t)build_mask << (s * 16));
mask[s / 4] &= ~((uint64_t)build_mask << ((s % 4) * 16));
}
#endif
}
/* Now pass to the shader:
*/
if (mask)
if (mask[0] | mask[1])
lp_rast_shade_quads_mask_sample(task, &tri->inputs, x, y, mask);
}

View file

@ -645,6 +645,11 @@ lp_scene_begin_binning(struct lp_scene *scene,
scene->fixed_sample_pos[i][0] = util_iround(lp_sample_pos_4x[i][0] * FIXED_ONE);
scene->fixed_sample_pos[i][1] = util_iround(lp_sample_pos_4x[i][1] * FIXED_ONE);
}
} else if (scene->fb_max_samples == 8) {
for (unsigned i = 0; i < 8; i++) {
scene->fixed_sample_pos[i][0] = util_iround(lp_sample_pos_8x[i][0] * FIXED_ONE);
scene->fixed_sample_pos[i][1] = util_iround(lp_sample_pos_8x[i][1] * FIXED_ONE);
}
}
}

View file

@ -598,8 +598,16 @@ llvmpipe_is_format_supported(struct pipe_screen *_screen,
target == PIPE_TEXTURE_CUBE ||
target == PIPE_TEXTURE_CUBE_ARRAY);
if (sample_count != 0 && sample_count != 1 && sample_count != 4)
static_assert(LP_MAX_SAMPLES == 8, "Code below assumes support up to 8x");
switch (sample_count) {
case 0:
case 1:
case 4:
case 8:
break;
default:
return false;
}
if (bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SHADER_IMAGE))
if (!lp_storage_render_image_format_supported(format))

View file

@ -218,7 +218,8 @@ generate_quad_mask(struct gallivm_state *gallivm,
struct lp_type fs_type,
unsigned first_quad,
unsigned sample,
LLVMValueRef mask_input) /* int64 */
LLVMValueRef mask_input, /* int64, samples 0..3 */
LLVMValueRef mask_input1) /* int64, samples 4..7 */
{
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
@ -256,8 +257,8 @@ generate_quad_mask(struct gallivm_state *gallivm,
shift = 0;
}
mask_input = LLVMBuildLShr(builder, mask_input,
lp_build_const_int64(gallivm, 16 * sample), "");
mask_input = LLVMBuildLShr(builder, sample < 4 ? mask_input : mask_input1,
lp_build_const_int64(gallivm, 16 * (sample % 4)), "");
mask_input = LLVMBuildTrunc(builder, mask_input, i32t, "");
mask_input = LLVMBuildAnd(builder, mask_input,
lp_build_const_int32(gallivm, 0xffff), "");
@ -3162,7 +3163,7 @@ generate_fragment(struct llvmpipe_context *lp,
struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
LLVMTypeRef fs_elem_type;
LLVMTypeRef blend_vec_type;
LLVMTypeRef arg_types[16];
LLVMTypeRef arg_types[17];
LLVMTypeRef func_type;
LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
LLVMTypeRef int32p_type = LLVMPointerType(int32_type, 0);
@ -3181,7 +3182,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef depth_ptr;
LLVMValueRef depth_stride;
LLVMValueRef depth_sample_stride;
LLVMValueRef mask_input;
LLVMValueRef mask_input0;
LLVMValueRef mask_input1;
LLVMValueRef thread_data_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
@ -3249,12 +3251,13 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[7] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[8] = LLVMPointerType(int8p_type, 0); /* color */
arg_types[9] = int8p_type; /* depth */
arg_types[10] = LLVMInt64TypeInContext(gallivm->context); /* mask_input */
arg_types[11] = variant->jit_thread_data_ptr_type; /* per thread data */
arg_types[12] = int32p_type; /* stride */
arg_types[13] = int32_type; /* depth_stride */
arg_types[14] = int32p_type; /* color sample strides */
arg_types[15] = int32_type; /* depth sample stride */
arg_types[10] = LLVMInt64TypeInContext(gallivm->context); /* mask_input0 */
arg_types[11] = LLVMInt64TypeInContext(gallivm->context); /* mask_input1 */
arg_types[12] = variant->jit_thread_data_ptr_type; /* per thread data */
arg_types[13] = int32p_type; /* stride */
arg_types[14] = int32_type; /* depth_stride */
arg_types[15] = int32p_type; /* color sample strides */
arg_types[16] = int32_type; /* depth sample stride */
func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
arg_types, ARRAY_SIZE(arg_types), 0);
@ -3290,12 +3293,13 @@ generate_fragment(struct llvmpipe_context *lp,
dady_ptr = LLVMGetParam(function, 7);
color_ptr_ptr = LLVMGetParam(function, 8);
depth_ptr = LLVMGetParam(function, 9);
mask_input = LLVMGetParam(function, 10);
thread_data_ptr = LLVMGetParam(function, 11);
stride_ptr = LLVMGetParam(function, 12);
depth_stride = LLVMGetParam(function, 13);
color_sample_stride_ptr = LLVMGetParam(function, 14);
depth_sample_stride = LLVMGetParam(function, 15);
mask_input0 = LLVMGetParam(function, 10);
mask_input1 = LLVMGetParam(function, 11);
thread_data_ptr = LLVMGetParam(function, 12);
stride_ptr = LLVMGetParam(function, 13);
depth_stride = LLVMGetParam(function, 14);
color_sample_stride_ptr = LLVMGetParam(function, 15);
depth_sample_stride = LLVMGetParam(function, 16);
lp_build_name(context_ptr, "context");
lp_build_name(resources_ptr, "resources");
@ -3306,7 +3310,8 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(dady_ptr, "dady");
lp_build_name(color_ptr_ptr, "color_ptr_ptr");
lp_build_name(depth_ptr, "depth");
lp_build_name(mask_input, "mask_input");
lp_build_name(mask_input0, "mask_input0");
lp_build_name(mask_input1, "mask_input1");
lp_build_name(thread_data_ptr, "thread_data");
lp_build_name(stride_ptr, "stride_ptr");
lp_build_name(depth_stride, "depth_stride");
@ -3366,6 +3371,17 @@ generate_fragment(struct llvmpipe_context *lp,
sample_pos_array =
LLVMConstArray(LLVMFloatTypeInContext(gallivm->context),
sample_pos_arr, 8);
} else if (key->multisample && key->coverage_samples == 8) {
LLVMValueRef sample_pos_arr[16];
for (unsigned i = 0; i < 8; i++) {
sample_pos_arr[i * 2] = LLVMConstReal(flt_type,
lp_sample_pos_8x[i][0]);
sample_pos_arr[i * 2 + 1] = LLVMConstReal(flt_type,
lp_sample_pos_8x[i][1]);
}
sample_pos_array =
LLVMConstArray(LLVMFloatTypeInContext(gallivm->context),
sample_pos_arr, 16);
} else {
LLVMValueRef sample_pos_arr[2];
sample_pos_arr[0] = LLVMConstReal(flt_type, 0.5);
@ -3419,7 +3435,7 @@ generate_fragment(struct llvmpipe_context *lp,
"sample_mask_ptr");
LLVMValueRef s_mask =
generate_quad_mask(gallivm, fs_type,
i * fs_type.length / 4, s, mask_input);
i * fs_type.length / 4, s, mask_input0, mask_input1);
LLVMValueRef smask_bit =
LLVMBuildAnd(builder, smask_val,
lp_build_const_int32(gallivm, (1 << s)), "");
@ -3440,7 +3456,7 @@ generate_fragment(struct llvmpipe_context *lp,
if (partial_mask) {
mask = generate_quad_mask(gallivm, fs_type,
i * fs_type.length / 4, 0, mask_input);
i * fs_type.length / 4, 0, mask_input0, mask_input1);
} else {
mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
}

View file

@ -53,7 +53,8 @@ no_op(const struct lp_jit_context *context,
const void *dady,
uint8_t **cbufs,
uint8_t *depth,
uint64_t mask,
uint64_t mask0,
uint64_t mask1,
struct lp_jit_thread_data *thread_data,
unsigned *strides,
unsigned depth_stride,
@ -152,14 +153,15 @@ red(const struct lp_jit_context *context,
const void *dady,
uint8_t **cbufs,
uint8_t *depth,
uint64_t int_mask,
uint64_t int_mask0,
uint64_t int_mask1,
struct lp_jit_thread_data *thread_data,
unsigned *strides,
unsigned depth_stride,
unsigned *sample_stride,
unsigned depth_sample_stride)
{
opaque_color(cbufs, strides, int_mask, 0xffff0000);
opaque_color(cbufs, strides, int_mask0, 0xffff0000);
(void)facing;
(void)depth;
(void)thread_data;
@ -180,14 +182,15 @@ green(const struct lp_jit_context *context,
const void *dady,
uint8_t **cbufs,
uint8_t *depth,
uint64_t int_mask,
uint64_t int_mask0,
uint64_t int_mask1,
struct lp_jit_thread_data *thread_data,
unsigned *strides,
unsigned depth_stride,
unsigned *sample_stride,
unsigned depth_sample_stride)
{
opaque_color(cbufs, strides, int_mask, 0xff00ff00);
opaque_color(cbufs, strides, int_mask0, 0xff00ff00);
(void)facing;
(void)depth;
(void)thread_data;

View file

@ -267,6 +267,10 @@ llvmpipe_get_sample_position(struct pipe_context *pipe,
out_value[0] = lp_sample_pos_4x[sample_index][0];
out_value[1] = lp_sample_pos_4x[sample_index][1];
break;
case 8:
out_value[0] = lp_sample_pos_8x[sample_index][0];
out_value[1] = lp_sample_pos_8x[sample_index][1];
break;
default:
break;
}

View file

@ -84,12 +84,14 @@ spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
spec@!opengl 1.1@depthstencil-default_fb-copypixels,Fail
spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=2,Fail
spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=4,Fail
spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=6,Fail
spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8,Fail
spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=2,Fail
spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=4,Fail
spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev,Fail
spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=2,Fail
spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=4,Fail
spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=8,Fail
spec@!opengl 1.1@linestipple,Fail
spec@!opengl 1.1@linestipple@Factor 2x,Fail
spec@!opengl 1.1@linestipple@Factor 3x,Fail
@ -362,6 +364,7 @@ spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Fail
spec@ext_framebuffer_multisample@clip-and-scissor-blit 8 upsample,Fail
spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail
spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
spec@ext_framebuffer_multisample@no-color 2 depth single,Fail