i965/gen6: Initial implementation of MSAA.

This patch enables MSAA for Gen6, by modifying intel_mipmap_tree to
understand multisampled buffers, adapting the rendering pipeline setup
to enable multisampled rendering, and adding multisample resolve
operations to brw_blorp_blit.cpp. Some preparation work is also
included for Gen7, but it is not yet enabled.

MSAA support is still fairly preliminary.  In particular, the
following are not yet supported:
- Fully general blits between MSAA and non-MSAA buffers.
- Formats other than RGBA8, DEPTH24, and STENCIL8.
- Centroid interpolation.
- Coverage parameters (glSampleCoverage, GL_SAMPLE_ALPHA_TO_COVERAGE,
  GL_SAMPLE_ALPHA_TO_ONE, GL_SAMPLE_COVERAGE, GL_SAMPLE_COVERAGE_VALUE,
  GL_SAMPLE_COVERAGE_INVERT).

Fixes piglit tests "EXT_framebuffer_multisample/accuracy" on
i965/Gen6.

v2:
- In intel_alloc_renderbuffer_storage(), quantize the requested number
  of samples to the next higher sample count supported by the
  hardware.  This ensures that a query of GL_SAMPLES will return the
  correct value.  It also ensures that MSAA is fully disabled on Gen7
  for now (since Gen7 MSAA support doesn't work yet).
- When reading from a non-MSAA surface, ensure that s_is_zero is true
  so that we won't try to read from a nonexistent sample.
This commit is contained in:
Paul Berry 2012-04-29 21:41:42 -07:00
parent 506d70be21
commit 19e9b24626
23 changed files with 662 additions and 121 deletions

View file

@ -88,6 +88,7 @@ i965_C_FILES = \
gen6_clip_state.c \
gen6_depthstencil.c \
gen6_gs_state.c \
gen6_multisample_state.c \
gen6_sampler_state.c \
gen6_scissor_state.c \
gen6_sf_state.c \

View file

@ -36,7 +36,8 @@ brw_blorp_mip_info::brw_blorp_mip_info()
}
brw_blorp_surface_info::brw_blorp_surface_info()
: map_stencil_as_y_tiled(false)
: map_stencil_as_y_tiled(false),
num_samples(0)
{
}
@ -60,11 +61,15 @@ brw_blorp_surface_info::set(struct intel_mipmap_tree *mt,
if (mt->format == MESA_FORMAT_S8) {
/* The miptree is a W-tiled stencil buffer. Surface states can't be set
* up for W tiling, so we'll need to use Y tiling and have the WM
* program swizzle the coordinates.
* program swizzle the coordinates. Furthermore, we need to set up the
* surface state as single-sampled, because the memory layout of related
* samples doesn't match between W and Y tiling.
*/
this->map_stencil_as_y_tiled = true;
this->num_samples = 0;
} else {
this->map_stencil_as_y_tiled = false;
this->num_samples = mt->num_samples;
}
}
@ -88,6 +93,7 @@ brw_blorp_params::brw_blorp_params()
y1(0),
depth_format(0),
hiz_op(GEN6_HIZ_OP_NONE),
num_samples(0),
use_wm_prog(false)
{
}

View file

@ -97,6 +97,8 @@ public:
* width and height of the buffer.
*/
bool map_stencil_as_y_tiled;
unsigned num_samples;
};
@ -151,6 +153,7 @@ public:
brw_blorp_surface_info src;
brw_blorp_surface_info dst;
enum gen6_hiz_op hiz_op;
unsigned num_samples;
bool use_wm_prog;
brw_blorp_wm_push_constants wm_push_consts;
};
@ -177,16 +180,39 @@ public:
struct brw_blorp_blit_prog_key
{
/* Number of samples per pixel that have been configured in the surface
* state for texturing from.
*/
unsigned tex_samples;
/* Actual number of samples per pixel in the source image. */
unsigned src_samples;
/* Number of samples per pixel that have been configured in the render
* target.
*/
unsigned rt_samples;
/* Actual number of samples per pixel in the destination image. */
unsigned dst_samples;
/* True if the source image is W tiled. If true, the surface state for the
* source image must be configured as Y tiled.
* source image must be configured as Y tiled, and tex_samples must be 0.
*/
bool src_tiled_w;
/* True if the destination image is W tiled. If true, the surface state
* for the render target must be configured as Y tiled.
* for the render target must be configured as Y tiled, and rt_samples must
* be 0.
*/
bool dst_tiled_w;
/* True if all source samples should be blended together to produce each
* destination pixel. If true, src_tiled_w must be false, tex_samples must
* equal src_samples, and tex_samples must be nonzero.
*/
bool blend;
/* True if the rectangle being sent through the rendering pipeline might be
* larger than the destination rectangle, so the WM program should kill any
* pixels that are outside the destination rectangle.

View file

@ -215,11 +215,29 @@ brw_blorp_framebuffer(struct intel_context *intel,
*
* The bulk of the work done by the WM program is to wrap and unwrap the
* coordinate transformations used by the hardware to store surfaces in
* memory. The hardware transforms a pixel location (X, Y) to a memory offset
* by the following formulas:
* memory. The hardware transforms a pixel location (X, Y, S) (where S is the
* sample index for a multisampled surface) to a memory offset by the
* following formulas:
*
* offset = tile(tiling_format, X, Y)
* (X, Y) = detile(tiling_format, offset)
* offset = tile(tiling_format, encode_msaa(num_samples, X, Y, S))
* (X, Y, S) = decode_msaa(num_samples, detile(tiling_format, offset))
*
* For a single-sampled surface, encode_msaa() and decode_msaa are the
* identity function:
*
* encode_msaa(1, X, Y, 0) = (X, Y)
* decode_msaa(1, X, Y) = (X, Y, 0)
*
* For a 4x multisampled surface, encode_msaa() embeds the sample number into
* bit 1 of the X and Y coordinates:
*
* encode_msaa(4, X, Y, S) = (X', Y')
* where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1)
* Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1)
* decode_msaa(4, X, Y) = (X', Y', S)
* where X' = (X & ~0b11) >> 1 | (X & 0b1)
* Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
* S = (Y & 0b10) | (X & 0b10) >> 1
*
* For X tiling, tile() combines together the low-order bits of the X and Y
* coordinates in the pattern 0byyyxxxxxxxxx, creating 4k tiles that are 512
@ -239,7 +257,7 @@ brw_blorp_framebuffer(struct intel_context *intel,
* | (A & 0b111111111)
*
* (In all tiling formulas, cpp is the number of bytes occupied by a single
* pixel ("chars per pixel"), and tile_pitch is the number of 4k tiles
* sample ("chars per pixel"), and tile_pitch is the number of 4k tiles
* required to fill the width of the surface).
*
* For Y tiling, tile() combines together the low-order bits of the X and Y
@ -301,7 +319,7 @@ brw_blorp_framebuffer(struct intel_context *intel,
* X' = A % pitch
*
* (In these formulas, pitch is the number of bytes occupied by a single row
* of pixels).
* of samples).
*/
class brw_blorp_blit_program
{
@ -319,8 +337,12 @@ private:
void alloc_push_const_regs(int base_reg);
void compute_frag_coords();
void translate_tiling(bool old_tiled_w, bool new_tiled_w);
void encode_msaa(unsigned num_samples);
void decode_msaa(unsigned num_samples);
void kill_if_outside_dst_rect();
void translate_dst_to_src();
void single_to_blend();
void sample();
void texel_fetch();
void texture_lookup(GLuint msg_type,
struct brw_reg mrf_u, struct brw_reg mrf_v);
@ -364,6 +386,14 @@ private:
*/
int xy_coord_index;
/* True if, at the point in the program currently being compiled, the
* sample index is known to be zero.
*/
bool s_is_zero;
/* Register storing the sample index when s_is_zero is false. */
struct brw_reg sample_index;
/* Temporaries */
struct brw_reg t1;
struct brw_reg t2;
@ -395,6 +425,37 @@ const GLuint *
brw_blorp_blit_program::compile(struct brw_context *brw,
GLuint *program_size)
{
/* Sanity checks */
if (key->src_tiled_w) {
/* If the source image is W tiled, then tex_samples must be 0.
* Otherwise, after conversion between W and Y tiling, there's no
* guarantee that the sample index will be 0.
*/
assert(key->tex_samples == 0);
}
if (key->dst_tiled_w) {
/* If the destination image is W tiled, then dst_samples must be 0.
* Otherwise, after conversion between W and Y tiling, there's no
* guarantee that all samples corresponding to a single pixel will still
* be together.
*/
assert(key->rt_samples == 0);
}
if (key->blend) {
/* We are blending, which means we'll be using a SAMPLE message, which
* causes the hardware to pick up the all of the samples corresponding
* to this pixel and average them together. Since we'll be relying on
* the hardware to find all of the samples and combine them together,
* the surface state for the texture must be configured with the correct
* tiling and sample count.
*/
assert(!key->src_tiled_w);
assert(key->tex_samples == key->src_samples);
assert(key->tex_samples > 0);
}
brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
alloc_regs();
@ -405,22 +466,29 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
const bool tex_tiled_w = false;
/* The address that data will be written to is determined by the
* coordinates supplied to the WM thread and the tiling of the render
* target, according to the formula:
* coordinates supplied to the WM thread and the tiling and sample count of
* the render target, according to the formula:
*
* (X, Y) = detile(rt_tiling, offset)
* (X, Y, S) = decode_msaa(rt_samples, detile(rt_tiling, offset))
*
* If the actual tiling of the destination surface is not the same as the
* configuration of the render target, then these coordinates are wrong and
* we have to adjust them to compensate for the difference.
* If the actual tiling and sample count of the destination surface are not
* the same as the configuration of the render target, then these
* coordinates are wrong and we have to adjust them to compensate for the
* difference.
*/
if (rt_tiled_w != key->dst_tiled_w)
if (rt_tiled_w != key->dst_tiled_w ||
key->rt_samples != key->dst_samples) {
encode_msaa(key->rt_samples);
/* Now (X, Y) = detile(rt_tiling, offset) */
translate_tiling(rt_tiled_w, key->dst_tiled_w);
/* Now (X, Y) = detile(dst_tiling, offset) */
decode_msaa(key->dst_samples);
}
/* Now (X, Y) = detile(dst_tiling, offset).
/* Now (X, Y, S) = decode_msaa(dst_samples, detile(dst_tiling, offset)).
*
* That is: X and Y now contain the true coordinates of the data that the
* WM thread should output.
* That is: X, Y and S now contain the true coordinates and sample index of
* the data that the WM thread should output.
*
* If we need to kill pixels that are outside the destination rectangle,
* now is the time to do it.
@ -432,31 +500,50 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
/* Next, apply a translation to obtain coordinates in the source image. */
translate_dst_to_src();
/* X and Y are now the coordinates of the pixel in the source image that we
* want to texture from.
*
* The address that we want to fetch from is
* related to the X and Y values according to the formula:
*
* (X, Y) = detile(src_tiling, offset).
*
* If the actual tiling of the source surface is not the same as the
* configuration of the texture, then we need to adjust the coordinates to
* compensate for the difference.
/* If the source image is not multisampled, then we want to fetch sample
* number 0, because that's the only sample there is.
*/
if (tex_tiled_w != key->src_tiled_w)
translate_tiling(key->src_tiled_w, tex_tiled_w);
if (key->src_samples == 0)
s_is_zero = true;
/* Now (X, Y) = detile(tex_tiling, offset).
*
* In other words: X and Y now contain values which, when passed to
* the texturing unit, will cause data to be read from the correct
* memory location. So we can fetch the texel now.
/* X, Y, and S are now the coordinates of the pixel in the source image
* that we want to texture from. Exception: if we are blending, then S is
* irrelevant, because we are going to fetch all samples.
*/
texel_fetch();
if (key->blend) {
single_to_blend();
sample();
} else {
/* We aren't blending, which means we just want to fetch a single sample
* from the source surface. The address that we want to fetch from is
* related to the X, Y and S values according to the formula:
*
* (X, Y, S) = decode_msaa(src_samples, detile(src_tiling, offset)).
*
* If the actual tiling and sample count of the source surface are not
* the same as the configuration of the texture, then we need to adjust
* the coordinates to compensate for the difference.
*/
if (tex_tiled_w != key->src_tiled_w ||
key->tex_samples != key->src_samples) {
encode_msaa(key->src_samples);
/* Now (X, Y) = detile(src_tiling, offset) */
translate_tiling(key->src_tiled_w, tex_tiled_w);
/* Now (X, Y) = detile(tex_tiling, offset) */
decode_msaa(key->tex_samples);
}
/* Finally, write the fetched value to the render target and terminate the
* thread.
/* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).
*
* In other words: X, Y, and S now contain values which, when passed to
* the texturing unit, will cause data to be read from the correct
* memory location. So we can fetch the texel now.
*/
texel_fetch();
}
/* Finally, write the fetched (or blended) value to the render target and
* terminate the thread.
*/
render_target_write();
return brw_get_program(&func, program_size);
@ -499,6 +586,8 @@ brw_blorp_blit_program::alloc_regs()
= vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
}
this->xy_coord_index = 0;
this->sample_index
= vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
this->t1 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
this->t2 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
@ -511,11 +600,14 @@ brw_blorp_blit_program::alloc_regs()
/* In the code that follows, X and Y can be used to quickly refer to the
* active elements of x_coords and y_coords, and Xp and Yp ("X prime" and "Y
* prime") to the inactive elements.
*
* S can be used to quickly refer to sample_index.
*/
#define X x_coords[xy_coord_index]
#define Y y_coords[xy_coord_index]
#define Xp x_coords[!xy_coord_index]
#define Yp y_coords[!xy_coord_index]
#define S sample_index
/* Quickly swap the roles of (X, Y) and (Xp, Yp). Saves us from having to do
* MOVs to transfor (Xp, Yp) to (X, Y) after a coordinate transformation.
@ -564,6 +656,12 @@ brw_blorp_blit_program::compute_frag_coords()
* pixels n+2 and n+3 are in the bottom half of the subspan.
*/
brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
/* Since we always run the WM in a mode that causes a single fragment
* dispatch per pixel, it's not meaningful to compute a sample value. Just
* set it to 0.
*/
s_is_zero = true;
}
/**
@ -655,6 +753,86 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)
}
}
/**
* Emit code to compensate for the difference between MSAA and non-MSAA
* surfaces.
*
* This code modifies the X and Y coordinates according to the formula:
*
* (X', Y') = encode_msaa_4x(X, Y, S)
*
* (See brw_blorp_blit_program).
*/
void
brw_blorp_blit_program::encode_msaa(unsigned num_samples)
{
if (num_samples == 0) {
/* No translation necessary. */
} else {
/* encode_msaa_4x(X, Y, S) = (X', Y')
* where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1)
* Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1)
*/
brw_AND(&func, t1, X, brw_imm_uw(0xfffe)); /* X & ~0b1 */
if (!s_is_zero) {
brw_AND(&func, t2, S, brw_imm_uw(1)); /* S & 0b1 */
brw_OR(&func, t1, t1, t2); /* (X & ~0b1) | (S & 0b1) */
}
brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b1) << 1
| (S & 0b1) << 1 */
brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
brw_OR(&func, Xp, t1, t2);
brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */
brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */
if (!s_is_zero) {
brw_AND(&func, t2, S, brw_imm_uw(2)); /* S & 0b10 */
brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (S & 0b10) */
}
brw_AND(&func, t2, Y, brw_imm_uw(1));
brw_OR(&func, Yp, t1, t2);
SWAP_XY_AND_XPYP();
}
}
/**
* Emit code to compensate for the difference between MSAA and non-MSAA
* surfaces.
*
* This code modifies the X and Y coordinates according to the formula:
*
* (X', Y', S) = decode_msaa(num_samples, X, Y)
*
* (See brw_blorp_blit_program).
*/
void
brw_blorp_blit_program::decode_msaa(unsigned num_samples)
{
if (num_samples == 0) {
/* No translation necessary. */
s_is_zero = true;
} else {
/* decode_msaa_4x(X, Y) = (X', Y', S)
* where X' = (X & ~0b11) >> 1 | (X & 0b1)
* Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
* S = (Y & 0b10) | (X & 0b10) >> 1
*/
brw_AND(&func, t1, X, brw_imm_uw(0xfffc)); /* X & ~0b11 */
brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b11) >> 1 */
brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
brw_OR(&func, Xp, t1, t2);
brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */
brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */
brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
brw_OR(&func, Yp, t1, t2);
brw_AND(&func, t1, Y, brw_imm_uw(2)); /* Y & 0b10 */
brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */
brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */
brw_OR(&func, S, t1, t2);
s_is_zero = false;
SWAP_XY_AND_XPYP();
}
}
/**
* Emit code that kills pixels whose X and Y coordinates are outside the
* boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
@ -693,6 +871,36 @@ brw_blorp_blit_program::translate_dst_to_src()
SWAP_XY_AND_XPYP();
}
/**
* Emit code to transform the X and Y coordinates as needed for blending
* together the different samples in an MSAA texture.
*/
void
brw_blorp_blit_program::single_to_blend()
{
/* When looking up samples in an MSAA texture using the SAMPLE message,
* Gen6 requires the texture coordinates to be odd integers (so that they
* correspond to the center of a 2x2 block representing the four samples
* that maxe up a pixel). So we need to multiply our X and Y coordinates
* each by 2 and then add 1.
*/
brw_SHL(&func, t1, X, brw_imm_w(1));
brw_SHL(&func, t2, Y, brw_imm_w(1));
brw_ADD(&func, Xp, t1, brw_imm_w(1));
brw_ADD(&func, Yp, t2, brw_imm_w(1));
SWAP_XY_AND_XPYP();
}
/**
* Emit code to look up a value in the texture using the SAMPLE message (which
* does blending of MSAA surfaces).
*/
void
brw_blorp_blit_program::sample()
{
texture_lookup(GEN5_SAMPLER_MESSAGE_SAMPLE, mrf_u_float, mrf_v_float);
}
/**
* Emit code to look up a value in the texture using the SAMPLE_LD message
* (which does a simple texel fetch).
@ -700,6 +908,7 @@ brw_blorp_blit_program::translate_dst_to_src()
void
brw_blorp_blit_program::texel_fetch()
{
assert(s_is_zero);
texture_lookup(GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
retype(mrf_u_float, BRW_REGISTER_TYPE_UD),
retype(mrf_v_float, BRW_REGISTER_TYPE_UD));
@ -816,6 +1025,39 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct intel_mipmap_tree *src_mt,
use_wm_prog = true;
memset(&wm_prog_key, 0, sizeof(wm_prog_key));
if (src_mt->num_samples > 0 && dst_mt->num_samples > 0) {
/* We are blitting from a multisample buffer to a multisample buffer, so
* we must preserve samples within a pixel. This means we have to
* configure the render target and texture surface states as
* single-sampled, so that the WM program can access each sample
* individually.
*/
src.num_samples = dst.num_samples = 0;
}
/* The render path must be configured to use the same number of samples as
* the destination buffer.
*/
num_samples = dst.num_samples;
GLenum base_format = _mesa_get_format_base_format(src_mt->format);
if (base_format != GL_DEPTH_COMPONENT && /* TODO: what about depth/stencil? */
base_format != GL_STENCIL_INDEX &&
src_mt->num_samples > 0 && dst_mt->num_samples == 0) {
/* We are downsampling a color buffer, so blend. */
wm_prog_key.blend = true;
}
/* src_samples and dst_samples are the true sample counts */
wm_prog_key.src_samples = src_mt->num_samples;
wm_prog_key.dst_samples = dst_mt->num_samples;
/* tex_samples and rt_samples are the sample counts that are set up in
* SURFACE_STATE.
*/
wm_prog_key.tex_samples = src.num_samples;
wm_prog_key.rt_samples = dst.num_samples;
wm_prog_key.src_tiled_w = src.map_stencil_as_y_tiled;
wm_prog_key.dst_tiled_w = dst.map_stencil_as_y_tiled;
x0 = wm_push_consts.dst_x0 = dst_x0;
@ -825,6 +1067,22 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct intel_mipmap_tree *src_mt,
wm_push_consts.x_transform.setup(src_x0, dst_x0, dst_x1, mirror_x);
wm_push_consts.y_transform.setup(src_y0, dst_y0, dst_y1, mirror_y);
if (dst.num_samples == 0 && dst_mt->num_samples > 0) {
/* We must expand the rectangle we send through the rendering pipeline,
* to account for the fact that we are mapping the destination region as
* single-sampled when it is in fact multisampled. We must also align
* it to a multiple of the multisampling pattern, because the
* differences between multisampled and single-sampled surface formats
* will mean that pixels are scrambled within the multisampling pattern.
* TODO: what if this makes the coordinates too large?
*/
x0 = (x0 * 2) & ~3;
y0 = (y0 * 2) & ~3;
x1 = ALIGN(x1 * 2, 4);
y1 = ALIGN(y1 * 2, 4);
wm_prog_key.use_kill = true;
}
if (dst.map_stencil_as_y_tiled) {
/* We must modify the rectangle we send through the rendering pipeline,
* to account for the fact that we are mapping it as Y-tiled when it is

View file

@ -1099,6 +1099,14 @@ brw_blorp_framebuffer(struct intel_context *intel,
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
GLbitfield mask, GLenum filter);
/* gen6_multisample_state.c */
void
gen6_emit_3dstate_multisample(struct brw_context *brw,
unsigned num_samples);
void
gen6_emit_3dstate_sample_mask(struct brw_context *brw,
unsigned num_samples);
/*======================================================================

View file

@ -456,6 +456,11 @@
/* Surface state DW4 */
#define BRW_SURFACE_MIN_LOD_SHIFT 28
#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28)
#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4)
#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4)
#define GEN7_SURFACE_MULTISAMPLECOUNT_1 0
#define GEN7_SURFACE_MULTISAMPLECOUNT_4 2
#define GEN7_SURFACE_MULTISAMPLECOUNT_8 3
/* Surface state DW5 */
#define BRW_SURFACE_X_OFFSET_SHIFT 25
@ -1305,6 +1310,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
# define GEN6_WM_MSDISPMODE_PERSAMPLE (0 << 0)
# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
/* DW7: kernel 1 pointer */
/* DW8: kernel 2 pointer */
@ -1388,6 +1394,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0)
# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0)
/* DW2 */
# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31)
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
#define _3DSTATE_PS 0x7820 /* GEN7+ */

View file

@ -782,33 +782,16 @@ static void upload_invariant_state( struct brw_context *brw )
ADVANCE_BATCH();
}
if (intel->gen >= 6) {
if (intel->gen == 6) {
int i;
int len = intel->gen >= 7 ? 4 : 3;
BEGIN_BATCH(len);
OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
MS_NUMSAMPLES_1);
OUT_BATCH(0); /* positions for 4/8-sample */
if (intel->gen >= 7)
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
OUT_BATCH(1);
ADVANCE_BATCH();
if (intel->gen < 7) {
for (i = 0; i < 4; i++) {
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
OUT_BATCH(i << SVB_INDEX_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0xffffffff);
ADVANCE_BATCH();
}
for (i = 0; i < 4; i++) {
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
OUT_BATCH(i << SVB_INDEX_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0xffffffff);
ADVANCE_BATCH();
}
}

View file

@ -95,6 +95,7 @@ extern const struct brw_tracked_state gen6_color_calc_state;
extern const struct brw_tracked_state gen6_depth_stencil_state;
extern const struct brw_tracked_state gen6_gs_state;
extern const struct brw_tracked_state gen6_gs_binding_table;
extern const struct brw_tracked_state gen6_multisample_state;
extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
extern const struct brw_tracked_state gen6_sampler_state;
extern const struct brw_tracked_state gen6_scissor_state;
@ -181,6 +182,7 @@ void *brw_state_batch(struct brw_context *brw,
/* brw_wm_surface_state.c */
void gen4_init_vtable_surface_functions(struct brw_context *brw);
uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
void brw_create_constant_surface(struct brw_context *brw,
drm_intel_bo *bo,
int width,
@ -197,6 +199,8 @@ GLuint translate_tex_format(gl_format mesa_format,
/* gen7_wm_surface_state.c */
void gen7_set_surface_tiling(struct gen7_surface_state *surf, uint32_t tiling);
void gen7_set_surface_num_multisamples(struct gen7_surface_state *surf,
unsigned num_samples);
void gen7_init_vtable_surface_functions(struct brw_context *brw);
void gen7_create_constant_surface(struct brw_context *brw,
drm_intel_bo *bo,

View file

@ -153,6 +153,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_samplers,
&gen6_sampler_state,
&gen6_multisample_state, /* TODO: is this the right spot? */
&gen6_vs_state,
&gen6_gs_state,
@ -221,6 +222,7 @@ const struct brw_tracked_state *gen7_atoms[] =
&brw_wm_binding_table,
&gen7_samplers,
&gen6_multisample_state, /* TODO: is this the right spot? */
&gen7_disable_stages,
&gen7_vs_state,

View file

@ -633,6 +633,17 @@ brw_get_surface_tiling_bits(uint32_t tiling)
}
}
uint32_t
brw_get_surface_num_multisamples(unsigned num_samples)
{
if (num_samples > 0)
return BRW_SURFACE_MULTISAMPLECOUNT_4;
else
return BRW_SURFACE_MULTISAMPLECOUNT_1;
}
static void
brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
{
@ -943,7 +954,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
intel_image->base.Base.Level,
intel_image->base.Base.Level,
width, height, depth,
true);
true,
0 /* num_samples */);
intel_miptree_copy_teximage(intel, intel_image, new_mt);
intel_miptree_reference(&irb->mt, intel_image->mt);
@ -993,7 +1005,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
((region->pitch * region->cpp) - 1) << BRW_SURFACE_PITCH_SHIFT);
surf[4] = 0;
surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
/* Note that the low bits of these fields are missing, so

View file

@ -100,28 +100,8 @@ gen6_blorp_emit_batch_head(struct brw_context *brw,
ADVANCE_BATCH();
}
/* 3DSTATE_MULTISAMPLE */
{
int length = intel->gen == 7 ? 4 : 3;
BEGIN_BATCH(length);
OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2));
OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
MS_NUMSAMPLES_1);
OUT_BATCH(0);
if (length >= 4)
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_SAMPLE_MASK */
{
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
OUT_BATCH(1);
ADVANCE_BATCH();
}
gen6_emit_3dstate_multisample(brw, params->num_samples);
gen6_emit_3dstate_sample_mask(brw, params->num_samples);
/* CMD_STATE_BASE_ADDRESS
*
@ -426,6 +406,10 @@ gen6_blorp_emit_surface_state(struct brw_context *brw,
uint32_t wm_surf_offset;
uint32_t width, height;
surface->get_miplevel_dims(&width, &height);
if (surface->num_samples > 0) { /* TODO: seems clumsy */
width /= 2;
height /= 2;
}
if (surface->map_stencil_as_y_tiled) {
width *= 2;
height /= 2;
@ -462,7 +446,7 @@ gen6_blorp_emit_surface_state(struct brw_context *brw,
0 << BRW_SURFACE_DEPTH_SHIFT |
(pitch_bytes - 1) << BRW_SURFACE_PITCH_SHIFT);
surf[4] = 0;
surf[4] = brw_get_surface_num_multisamples(surface->num_samples);
surf[5] = (0 << BRW_SURFACE_X_OFFSET_SHIFT |
0 << BRW_SURFACE_Y_OFFSET_SHIFT |
@ -695,7 +679,9 @@ gen6_blorp_emit_sf_config(struct brw_context *brw,
OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */
1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
for (int i = 0; i < 18; ++i)
OUT_BATCH(0); /* dw2 */
OUT_BATCH(params->num_samples > 0 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
for (int i = 0; i < 16; ++i)
OUT_BATCH(0);
ADVANCE_BATCH();
}
@ -754,6 +740,14 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
}
if (params->num_samples > 0) {
dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
} else {
dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
}
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
@ -761,7 +755,7 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
OUT_BATCH(0); /* No scratch needed */
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(dw6); /* only position */
OUT_BATCH(dw6);
OUT_BATCH(0); /* No other programs */
OUT_BATCH(0); /* No other programs */
ADVANCE_BATCH();

View file

@ -0,0 +1,102 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "intel_batchbuffer.h"
#include "brw_context.h"
#include "brw_defines.h"
/**
* 3DSTATE_MULTISAMPLE
*/
void
gen6_emit_3dstate_multisample(struct brw_context *brw,
unsigned num_samples)
{
struct intel_context *intel = &brw->intel;
/* TODO: MSAA only implemented on Gen6 */
if (intel->gen != 6) {
assert(num_samples == 0);
}
int len = intel->gen >= 7 ? 4 : 3;
BEGIN_BATCH(len);
OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
(num_samples > 0 ? MS_NUMSAMPLES_4 : MS_NUMSAMPLES_1));
OUT_BATCH(num_samples > 0 ? 0xae2ae662 : 0); /* positions for 4/8-sample */
if (intel->gen >= 7)
OUT_BATCH(0);
ADVANCE_BATCH();
}
/**
* 3DSTATE_SAMPLE_MASK
*/
void
gen6_emit_3dstate_sample_mask(struct brw_context *brw,
unsigned num_samples)
{
struct intel_context *intel = &brw->intel;
/* TODO: MSAA only implemented on Gen6 */
if (intel->gen != 6) {
assert(num_samples == 0);
}
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
OUT_BATCH(num_samples > 0 ? 15 : 1);
ADVANCE_BATCH();
}
static void upload_multisample_state(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
unsigned num_samples = 0;
/* _NEW_BUFFERS */
if (ctx->DrawBuffer->_ColorDrawBuffers[0])
num_samples = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples;
/* 3DSTATE_MULTISAMPLE is nonpipelined. */
intel_emit_post_sync_nonzero_flush(intel);
gen6_emit_3dstate_multisample(brw, num_samples);
gen6_emit_3dstate_sample_mask(brw, num_samples);
}
const struct brw_tracked_state gen6_multisample_state = {
.dirty = {
.mesa = _NEW_BUFFERS,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_multisample_state
};

View file

@ -122,6 +122,10 @@ upload_sf_state(struct brw_context *brw)
int i;
/* _NEW_BUFFER */
bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
bool multisampled = false;
if (ctx->DrawBuffer->_ColorDrawBuffers[0])
multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0;
int attr = 0, input_index = 0;
int urb_entry_read_offset = 1;
float point_size;
@ -226,13 +230,20 @@ upload_sf_state(struct brw_context *brw)
}
/* _NEW_LINE */
dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
GEN6_SF_LINE_WIDTH_SHIFT;
{
uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
/* TODO: line width of 0 is not allowed when MSAA enabled */
if (line_width_u3_7 == 0)
line_width_u3_7 = 1;
dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
}
if (ctx->Line.SmoothFlag) {
dw3 |= GEN6_SF_LINE_AA_ENABLE;
dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
}
if (multisampled)
dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
/* _NEW_PROGRAM | _NEW_POINT */
if (!(ctx->VertexProgram.PointSizeEnabled ||

View file

@ -98,6 +98,11 @@ upload_wm_state(struct brw_context *brw)
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
uint32_t dw2, dw4, dw5, dw6;
bool multisampled = false;
/* _NEW_BUFFERS */
if (ctx->DrawBuffer->_ColorDrawBuffers[0])
multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0;
/* CACHE_NEW_WM_PROG */
if (brw->wm.prog_data->nr_params == 0) {
@ -185,6 +190,13 @@ upload_wm_state(struct brw_context *brw)
dw6 |= _mesa_bitcount_64(brw->fragment_program->Base.InputsRead) <<
GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
if (multisampled) {
dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
} else {
dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
}
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));

View file

@ -143,6 +143,10 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
uint32_t wm_surf_offset;
uint32_t width, height;
surface->get_miplevel_dims(&width, &height);
if (surface->num_samples > 0) { /* TODO: wrong for 8x */
width /= 2;
height /= 2;
}
if (surface->map_stencil_as_y_tiled) {
width *= 2;
height /= 2;
@ -181,6 +185,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
pitch_bytes *= 2;
surf->ss3.pitch = pitch_bytes - 1;
gen7_set_surface_num_multisamples(surf, surface->num_samples);
if (intel->is_haswell) {
surf->ss7.shader_chanel_select_r = HSW_SCS_RED;
surf->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
@ -366,7 +372,7 @@ gen7_blorp_emit_sf_config(struct brw_context *brw,
OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
OUT_BATCH(params->depth_format <<
GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
OUT_BATCH(0);
OUT_BATCH(params->num_samples > 0 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@ -397,7 +403,7 @@ gen7_blorp_emit_wm_config(struct brw_context *brw,
{
struct intel_context *intel = &brw->intel;
uint32_t dw1 = 0;
uint32_t dw1 = 0, dw2 = 0;
switch (params->hiz_op) {
case GEN6_HIZ_OP_DEPTH_CLEAR:
@ -423,10 +429,18 @@ gen7_blorp_emit_wm_config(struct brw_context *brw,
dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */
}
if (params->num_samples > 0) {
dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
} else {
dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
}
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
OUT_BATCH(dw1);
OUT_BATCH(0);
OUT_BATCH(dw2);
ADVANCE_BATCH();
}

View file

@ -161,6 +161,9 @@ upload_sf_state(struct brw_context *brw)
float point_size;
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
bool multisampled = false;
if (ctx->DrawBuffer->_ColorDrawBuffers[0])
multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0;
dw1 = GEN6_SF_STATISTICS_ENABLE |
GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
@ -243,8 +246,13 @@ upload_sf_state(struct brw_context *brw)
dw2 |= GEN6_SF_SCISSOR_ENABLE;
/* _NEW_LINE */
dw2 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
GEN6_SF_LINE_WIDTH_SHIFT;
{
uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
/* TODO: line width of 0 is not allowed when MSAA enabled */
if (line_width_u3_7 == 0)
line_width_u3_7 = 1;
dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
}
if (ctx->Line.SmoothFlag) {
dw2 |= GEN6_SF_LINE_AA_ENABLE;
dw2 |= GEN6_SF_LINE_AA_MODE_TRUE;
@ -253,6 +261,8 @@ upload_sf_state(struct brw_context *brw)
if (ctx->Line.StippleFlag && intel->is_haswell) {
dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
}
if (multisampled)
dw2 |= GEN6_SF_MSRAST_ON_PATTERN;
/* FINISHME: Last Pixel Enable? Vertex Sub Pixel Precision Select?
* FINISHME: AA Line Distance Mode?

View file

@ -39,9 +39,14 @@ upload_wm_state(struct brw_context *brw)
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
bool writes_depth = false;
uint32_t dw1;
bool multisampled = false;
uint32_t dw1, dw2;
dw1 = 0;
/* _NEW_BUFFERS */
if (ctx->DrawBuffer->_ColorDrawBuffers[0])
multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0;
dw1 = dw2 = 0;
dw1 |= GEN7_WM_STATISTICS_ENABLE;
dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
@ -74,11 +79,18 @@ upload_wm_state(struct brw_context *brw)
dw1 & GEN7_WM_KILL_ENABLE) {
dw1 |= GEN7_WM_DISPATCH_ENABLE;
}
if (multisampled) {
dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
} else {
dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
}
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
OUT_BATCH(dw1);
OUT_BATCH(0);
OUT_BATCH(dw2);
ADVANCE_BATCH();
}

View file

@ -54,6 +54,20 @@ gen7_set_surface_tiling(struct gen7_surface_state *surf, uint32_t tiling)
}
}
void
gen7_set_surface_num_multisamples(struct gen7_surface_state *surf,
unsigned num_samples)
{
if (num_samples > 4)
surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_8;
else if (num_samples > 0)
surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_4;
else
surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_1;
}
static void
gen7_update_buffer_texture_surface(struct gl_context *ctx, GLuint unit)
{
@ -328,6 +342,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
gen7_set_surface_tiling(surf, region->tiling);
surf->ss3.pitch = (region->pitch * region->cpp) - 1;
gen7_set_surface_num_multisamples(surf, irb->mt->num_samples);
if (intel->is_haswell) {
surf->ss7.shader_chanel_select_r = HSW_SCS_RED;
surf->ss7.shader_chanel_select_g = HSW_SCS_GREEN;

View file

@ -188,6 +188,29 @@ intel_unmap_renderbuffer(struct gl_context *ctx,
}
/**
* Round up the requested multisample count to the next supported sample size.
*/
static unsigned
quantize_num_samples(struct intel_context *intel, unsigned num_samples)
{
switch (intel->gen) {
case 6:
/* Gen6 supports only 4x multisampling. */
if (num_samples > 0)
return 4;
else
return 0;
case 7:
/* TODO: MSAA only implemented on Gen6 */
return 0;
default:
/* MSAA unsupported */
return 0;
}
}
/**
* Called via glRenderbufferStorageEXT() to set the format and allocate
* storage for a user-created renderbuffer.
@ -199,6 +222,7 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
{
struct intel_context *intel = intel_context(ctx);
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
rb->NumSamples = quantize_num_samples(intel, rb->NumSamples);
ASSERT(rb->Name != 0);
@ -241,12 +265,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
return true;
irb->mt = intel_miptree_create_for_renderbuffer(intel, rb->Format,
width, height);
width, height,
rb->NumSamples);
if (!irb->mt)
return false;
if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) {
bool ok = intel_miptree_alloc_hiz(intel, irb->mt);
bool ok = intel_miptree_alloc_hiz(intel, irb->mt, rb->NumSamples);
if (!ok) {
intel_miptree_release(&irb->mt);
return false;
@ -495,7 +520,7 @@ intel_renderbuffer_update_wrapper(struct intel_context *intel,
if (mt->hiz_mt == NULL &&
intel->vtbl.is_hiz_depth_format(intel, rb->Format)) {
intel_miptree_alloc_hiz(intel, mt);
intel_miptree_alloc_hiz(intel, mt, 0 /* num_samples */);
if (!mt->hiz_mt)
return false;
}

View file

@ -72,7 +72,8 @@ intel_miptree_create_internal(struct intel_context *intel,
GLuint width0,
GLuint height0,
GLuint depth0,
bool for_region)
bool for_region,
GLuint num_samples)
{
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
int compress_byte = 0;
@ -92,6 +93,7 @@ intel_miptree_create_internal(struct intel_context *intel,
mt->width0 = width0;
mt->height0 = height0;
mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt->format);
mt->num_samples = num_samples;
mt->compressed = compress_byte ? 1 : 0;
mt->refcount = 1;
@ -115,7 +117,8 @@ intel_miptree_create_internal(struct intel_context *intel,
mt->width0,
mt->height0,
mt->depth0,
true);
true,
num_samples);
if (!mt->stencil_mt) {
intel_miptree_release(&mt);
return NULL;
@ -161,7 +164,8 @@ intel_miptree_create(struct intel_context *intel,
GLuint width0,
GLuint height0,
GLuint depth0,
bool expect_accelerated_upload)
bool expect_accelerated_upload,
GLuint num_samples)
{
struct intel_mipmap_tree *mt;
uint32_t tiling = I915_TILING_NONE;
@ -172,7 +176,21 @@ intel_miptree_create(struct intel_context *intel,
(base_format == GL_DEPTH_COMPONENT ||
base_format == GL_DEPTH_STENCIL_EXT))
tiling = I915_TILING_Y;
else if (width0 >= 64)
else if (num_samples > 0) {
/* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
* Surface"):
*
* [DevSNB+]: For multi-sample render targets, this field must be
* 1. MSRTs can only be tiled.
*
* Our usual reason for preferring X tiling (fast blits using the
* blitting engine) doesn't apply to MSAA, since we'll generally be
* downsampling or upsampling when blitting between the MSAA buffer
* and another buffer, and the blitting engine doesn't support that.
* So use Y tiling, since it makes better use of the cache.
*/
tiling = I915_TILING_Y;
} else if (width0 >= 64)
tiling = I915_TILING_X;
}
@ -189,7 +207,7 @@ intel_miptree_create(struct intel_context *intel,
mt = intel_miptree_create_internal(intel, target, format,
first_level, last_level, width0,
height0, depth0,
false);
false, num_samples);
/*
* pitch == 0 || height == 0 indicates the null texture
*/
@ -225,7 +243,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
mt = intel_miptree_create_internal(intel, target, format,
0, 0,
region->width, region->height, 1,
true);
true, 0 /* num_samples */);
if (!mt)
return mt;
@ -238,12 +256,24 @@ struct intel_mipmap_tree*
intel_miptree_create_for_renderbuffer(struct intel_context *intel,
gl_format format,
uint32_t width,
uint32_t height)
uint32_t height,
uint32_t num_samples)
{
struct intel_mipmap_tree *mt;
/* Adjust width/height for MSAA */
if (num_samples > 4) {
num_samples = 8;
width *= 4;
height *= 2;
} else if (num_samples > 0) {
num_samples = 4;
width *= 2;
height *= 2;
}
mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0,
width, height, 1, true);
width, height, 1, true, num_samples);
return mt;
}
@ -513,7 +543,8 @@ intel_miptree_copy_teximage(struct intel_context *intel,
bool
intel_miptree_alloc_hiz(struct intel_context *intel,
struct intel_mipmap_tree *mt)
struct intel_mipmap_tree *mt,
GLuint num_samples)
{
assert(mt->hiz_mt == NULL);
mt->hiz_mt = intel_miptree_create(intel,
@ -524,7 +555,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel,
mt->width0,
mt->height0,
mt->depth0,
true);
true,
num_samples);
if (!mt->hiz_mt)
return false;

View file

@ -169,6 +169,7 @@ struct intel_mipmap_tree
GLuint width0, height0, depth0; /**< Level zero image dimensions */
GLuint cpp;
GLuint num_samples;
bool compressed;
/* Derived from the above:
@ -231,7 +232,8 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
GLuint width0,
GLuint height0,
GLuint depth0,
bool expect_accelerated_upload);
bool expect_accelerated_upload,
GLuint num_samples);
struct intel_mipmap_tree *
intel_miptree_create_for_region(struct intel_context *intel,
@ -250,7 +252,8 @@ struct intel_mipmap_tree*
intel_miptree_create_for_renderbuffer(struct intel_context *intel,
gl_format format,
uint32_t width,
uint32_t height);
uint32_t height,
uint32_t num_samples);
/** \brief Assert that the level and layer are valid for the miptree. */
static inline void
@ -341,7 +344,8 @@ intel_miptree_s8z24_gather(struct intel_context *intel,
bool
intel_miptree_alloc_hiz(struct intel_context *intel,
struct intel_mipmap_tree *mt);
struct intel_mipmap_tree *mt,
GLuint num_samples);
void
intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,

View file

@ -99,7 +99,8 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
width,
height,
depth,
expect_accelerated_upload);
expect_accelerated_upload,
0 /* num_samples */);
}
/* There are actually quite a few combinations this will work for,

View file

@ -86,7 +86,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
width,
height,
depth,
true);
true,
0 /* num_samples */);
if (!intelObj->mt)
return false;
}