Merge remote branch 'origin/nv50-compiler'

Conflicts:
	src/gallium/drivers/nouveau/nouveau_class.h
	src/gallium/drivers/nv50/nv50_screen.c
This commit is contained in:
Christoph Bumiller 2010-09-15 17:34:40 +02:00
commit 26fe16a99b
23 changed files with 9083 additions and 5021 deletions

View file

@ -8,9 +8,9 @@ C_SOURCES = \
nv50_clear.c \
nv50_context.c \
nv50_draw.c \
nv50_formats.c \
nv50_miptree.c \
nv50_query.c \
nv50_program.c \
nv50_resource.c \
nv50_screen.c \
nv50_state.c \
@ -19,6 +19,14 @@ C_SOURCES = \
nv50_tex.c \
nv50_transfer.c \
nv50_vbo.c \
nv50_push.c
nv50_push.c \
nv50_program.c \
nv50_shader_state.c \
nv50_pc.c \
nv50_pc_print.c \
nv50_pc_emit.c \
nv50_tgsi_to_nc.c \
nv50_pc_optimize.c \
nv50_pc_regalloc.c
include ../../Makefile.template

View file

@ -9,6 +9,7 @@ nv50 = env.ConvenienceLibrary(
'nv50_clear.c',
'nv50_context.c',
'nv50_draw.c',
'nv50_formats.c',
'nv50_miptree.c',
'nv50_query.c',
'nv50_program.c',

View file

@ -0,0 +1,452 @@
/*
* Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "nv50_screen.h"
#include "nv50_texture.h"
#include "nv50_reg.h"
#include "pipe/p_defines.h"
#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
NV50TIC_0_0_FMT_##sz, \
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_##sz | \
NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 | \
(NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 << 3) | (r << 31)
#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
NV50TIC_0_0_FMT_##sz, 0
#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW
#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
#define SCANOUT PIPE_BIND_SCANOUT
/* for vertex buffers: */
#define NV50TIC_0_0_FMT_8_8_8 NV50TIC_0_0_FMT_8_8_8_8
#define NV50TIC_0_0_FMT_16_16_16 NV50TIC_0_0_FMT_16_16_16_16
#define NV50TIC_0_0_FMT_32_32_32 NV50TIC_0_0_FMT_32_32_32_32
const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
{
/* COMMON FORMATS */
[PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50TCL_RT_FORMAT_A8R8G8B8_UNORM,
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
[PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50TCL_RT_FORMAT_X8R8G8B8_UNORM,
A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
[PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50TCL_RT_FORMAT_A8R8G8B8_SRGB,
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50TCL_RT_FORMAT_X8R8G8B8_SRGB,
A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_B5G6R5_UNORM] = { NV50TCL_RT_FORMAT_R5G6B5_UNORM,
B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
[PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50TCL_RT_FORMAT_A1R5G5B5_UNORM,
B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
[PIPE_FORMAT_B4G4R4A4_UNORM] = { 0,
B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
SAMPLER_VIEW },
[PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50TCL_RT_FORMAT_A2B10G10R10_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
[PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50TCL_RT_FORMAT_A2R10G10B10_UNORM,
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
/* DEPTH/STENCIL FORMATS */
[PIPE_FORMAT_Z16_UNORM] = { NV50TCL_ZETA_FORMAT_Z16_UNORM,
B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_DEPTH, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
[PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
[PIPE_FORMAT_Z24X8_UNORM] = { NV50TCL_ZETA_FORMAT_X8Z24_UNORM,
B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
[PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
[PIPE_FORMAT_Z32_FLOAT] = { NV50TCL_ZETA_FORMAT_Z32_FLOAT,
B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_DEPTH, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
[PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
NV50TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
SAMPLER_VIEW | DEPTH_STENCIL },
/* LUMINANCE, ALPHA, INTENSITY */
[PIPE_FORMAT_L8_UNORM] = { 0,
A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_L8_SRGB] = { 0,
A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_I8_UNORM] = { 0,
A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_A8_UNORM] = { NV50TCL_RT_FORMAT_A8_UNORM,
A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_L8A8_UNORM] = { 0,
A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_L8A8_SRGB] = { 0,
A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
SAMPLER_VIEW },
/* DXT, RGTC */
[PIPE_FORMAT_DXT1_RGB] = { 0,
B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_DXT1_RGBA] = { 0,
B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_DXT3_RGBA] = { 0,
B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_DXT5_RGBA] = { 0,
B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_RGTC1_UNORM] = { 0,
B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_RGTC1_SNORM] = { 0,
B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_RGTC2_UNORM] = { 0,
B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
SAMPLER_VIEW },
[PIPE_FORMAT_RGTC2_SNORM] = { 0,
B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
SAMPLER_VIEW },
/* FLOAT 16 */
[PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16A16_FLOAT,
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16G16B16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16X16_FLOAT,
A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16G16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16_FLOAT,
A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16_FLOAT] = { NV50TCL_RT_FORMAT_R16_FLOAT,
A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* FLOAT 32 */
[PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT,
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R32G32B32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32X32_FLOAT,
A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R32G32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32_FLOAT,
A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R32_FLOAT] = { NV50TCL_RT_FORMAT_R32_FLOAT,
A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* ODD FORMATS */
[PIPE_FORMAT_R11G11B10_FLOAT] = { NV50TCL_RT_FORMAT_B10G11R11_FLOAT,
B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 5_9_9_9, 0),
SAMPLER_VIEW },
/* SNORM 32 */
[PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32B32_SNORM] = { 0,
A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32_SNORM] = { 0,
A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32_SNORM] = { 0,
A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* UNORM 32 */
[PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32B32_UNORM] = { 0,
A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32_UNORM] = { 0,
A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32_UNORM] = { 0,
A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* SNORM 16 */
[PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_SNORM,
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16G16B16_SNORM] = { 0,
A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16G16_SNORM] = { NV50TCL_RT_FORMAT_R16G16_SNORM,
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16_SNORM] = { 0,
A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* UNORM 16 */
[PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16G16B16_UNORM] = { 0,
A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16G16_UNORM] = { NV50TCL_RT_FORMAT_R16G16_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16_UNORM] = { 0,
A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* SNORM 8 */
[PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_SNORM,
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R8G8B8_SNORM] = { 0,
A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R8G8_SNORM] = { NV50TCL_RT_FORMAT_R8G8_SNORM,
A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R8_SNORM] = { NV50TCL_RT_FORMAT_R8_SNORM,
A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* UNORM 8 */
[PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50TCL_RT_FORMAT_A8B8G8R8_SRGB,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R8G8B8_UNORM] = { NV50TCL_RT_FORMAT_X8B8G8R8_UNORM,
A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R8G8B8_SRGB] = { NV50TCL_RT_FORMAT_X8B8G8R8_SRGB,
A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R8G8_UNORM] = { NV50TCL_RT_FORMAT_R8G8_UNORM,
A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R8_UNORM] = { NV50TCL_RT_FORMAT_R8_UNORM,
A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
/* SSCALED 32 */
[PIPE_FORMAT_R32G32B32A32_SSCALED] = { 0,
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32_SSCALED] = { 0,
A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32_SSCALED] = { 0,
A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* USCALED 32 */
[PIPE_FORMAT_R32G32B32A32_USCALED] = { 0,
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32B32_USCALED] = { 0,
A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32_USCALED] = { 0,
A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32_USCALED] = { 0,
A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* SSCALED 16 */
[PIPE_FORMAT_R16G16B16A16_SSCALED] = { 0,
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16G16_SSCALED] = { 0,
A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16_SSCALED] = { 0,
A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* USCALED 16 */
[PIPE_FORMAT_R16G16B16A16_USCALED] = { 0,
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16G16B16_USCALED] = { 0,
A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16G16_USCALED] = { 0,
A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16_USCALED] = { 0,
A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* SSCALED 8 */
[PIPE_FORMAT_R8G8B8A8_SSCALED] = { 0,
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R8G8_SSCALED] = { 0,
A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R8_SSCALED] = { 0,
A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
/* USCALED 8 */
[PIPE_FORMAT_R8G8B8A8_USCALED] = { 0,
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R8G8B8_USCALED] = { 0,
A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R8G8_USCALED] = { 0,
A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R8_USCALED] = { 0,
A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
};

View file

@ -159,6 +159,9 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmp
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
tile_flags = 0x2800;
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
tile_flags = 0xe000;
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32_FLOAT:
tile_flags = 0x7400;

View file

@ -0,0 +1,804 @@
/*
* Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* #define NV50PC_DEBUG */
#include "nv50_pc.h"
#include "nv50_program.h"
#include <stdio.h>
/* returns TRUE if operands 0 and 1 can be swapped */
boolean
nv_op_commutative(uint opcode)
{
switch (opcode) {
case NV_OP_ADD:
case NV_OP_MUL:
case NV_OP_MAD:
case NV_OP_AND:
case NV_OP_OR:
case NV_OP_XOR:
case NV_OP_MIN:
case NV_OP_MAX:
case NV_OP_SAD:
return TRUE;
default:
return FALSE;
}
}
/* return operand to which the address register applies */
int
nv50_indirect_opnd(struct nv_instruction *i)
{
if (!i->src[4])
return -1;
switch (i->opcode) {
case NV_OP_MOV:
case NV_OP_LDA:
case NV_OP_STA:
return 0;
default:
return 1;
}
}
boolean
nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s)
{
if (nvi->flags_src || nvi->flags_def)
return FALSE;
switch (nvi->opcode) {
case NV_OP_ADD:
case NV_OP_MUL:
case NV_OP_AND:
case NV_OP_OR:
case NV_OP_XOR:
case NV_OP_SHL:
case NV_OP_SHR:
return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) &&
(nvi->def[0]->reg.file == NV_FILE_GPR);
case NV_OP_MOV:
assert(s == 0);
return (nvi->def[0]->reg.file == NV_FILE_GPR);
default:
return FALSE;
}
}
boolean
nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
{
int i;
for (i = 0; i < 3 && nvi->src[i]; ++i)
if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
return FALSE;
switch (nvi->opcode) {
case NV_OP_ABS:
case NV_OP_ADD:
case NV_OP_CEIL:
case NV_OP_FLOOR:
case NV_OP_TRUNC:
case NV_OP_CVT:
case NV_OP_NEG:
case NV_OP_MAD:
case NV_OP_MUL:
case NV_OP_SAT:
case NV_OP_SUB:
case NV_OP_MAX:
case NV_OP_MIN:
if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
value->reg.file == NV_FILE_MEM_P))
return TRUE;
if (value->reg.file < NV_FILE_MEM_C(0) ||
value->reg.file > NV_FILE_MEM_C(15))
return FALSE;
return (s == 1) ||
((s == 2) && (nvi->src[1]->value->reg.file == NV_FILE_GPR));
case NV_OP_MOV:
assert(s == 0);
return /* TRUE */ FALSE; /* don't turn MOVs into loads */
default:
return FALSE;
}
}
/* Return whether this instruction can be executed conditionally. */
boolean
nv50_nvi_can_predicate(struct nv_instruction *nvi)
{
int i;
if (nvi->flags_src)
return FALSE;
for (i = 0; i < 4 && nvi->src[i]; ++i)
if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
return FALSE;
return TRUE;
}
ubyte
nv50_supported_src_mods(uint opcode, int s)
{
switch (opcode) {
case NV_OP_ABS:
return NV_MOD_NEG | NV_MOD_ABS; /* obviously */
case NV_OP_ADD:
case NV_OP_MUL:
case NV_OP_MAD:
return NV_MOD_NEG;
case NV_OP_DFDX:
case NV_OP_DFDY:
assert(s == 0);
return NV_MOD_NEG;
case NV_OP_MAX:
case NV_OP_MIN:
return NV_MOD_ABS;
case NV_OP_CVT:
case NV_OP_LG2:
case NV_OP_NEG:
case NV_OP_PREEX2:
case NV_OP_PRESIN:
case NV_OP_RCP:
case NV_OP_RSQ:
return NV_MOD_ABS | NV_MOD_NEG;
default:
return 0;
}
}
/* We may want an opcode table. */
boolean
nv50_op_can_write_flags(uint opcode)
{
if (nv_is_vector_op(opcode))
return FALSE;
switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */
case NV_OP_PHI:
case NV_OP_MOV:
case NV_OP_LINTERP:
case NV_OP_PINTERP:
case NV_OP_LDA:
return FALSE;
default:
break;
}
if (opcode >= NV_OP_RCP && opcode <= NV_OP_PREEX2)
return FALSE;
return TRUE;
}
int
nv_nvi_refcount(struct nv_instruction *nvi)
{
int i, rc;
rc = nvi->flags_def ? nvi->flags_def->refc : 0;
for (i = 0; i < 4; ++i) {
if (!nvi->def[i])
return rc;
rc += nvi->def[i]->refc;
}
return rc;
}
int
nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
struct nv_value *new_val)
{
int i, n;
if (old_val == new_val)
return old_val->refc;
for (i = 0, n = 0; i < pc->num_refs; ++i) {
if (pc->refs[i]->value == old_val) {
++n;
nv_reference(pc, &pc->refs[i], new_val);
}
}
return n;
}
struct nv_value *
nvcg_find_constant(struct nv_ref *ref)
{
struct nv_value *src;
if (!ref)
return NULL;
src = ref->value;
while (src->insn && src->insn->opcode == NV_OP_MOV) {
assert(!src->insn->src[0]->mod);
src = src->insn->src[0]->value;
}
if ((src->reg.file == NV_FILE_IMM) ||
(src->insn && src->insn->opcode == NV_OP_LDA &&
src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
return src;
return NULL;
}
struct nv_value *
nvcg_find_immediate(struct nv_ref *ref)
{
struct nv_value *src = nvcg_find_constant(ref);
return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
}
static void
nv_pc_free_refs(struct nv_pc *pc)
{
int i;
for (i = 0; i < pc->num_refs; i += 64)
FREE(pc->refs[i]);
FREE(pc->refs);
}
static const char *
edge_name(ubyte type)
{
switch (type) {
case CFG_EDGE_FORWARD: return "forward";
case CFG_EDGE_BACK: return "back";
case CFG_EDGE_LOOP_ENTER: return "loop";
case CFG_EDGE_LOOP_LEAVE: return "break";
case CFG_EDGE_FAKE: return "fake";
default:
return "?";
}
}
void
nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
{
struct nv_basic_block *bb[64], *bbb[16], *b;
int j, p, pp;
bb[0] = root;
p = 1;
pp = 0;
while (p > 0) {
b = bb[--p];
b->priv = 0;
for (j = 1; j >= 0; --j) {
if (!b->out[j])
continue;
switch (b->out_kind[j]) {
case CFG_EDGE_BACK:
continue;
case CFG_EDGE_FORWARD:
case CFG_EDGE_FAKE:
if (++b->out[j]->priv == b->out[j]->num_in)
bb[p++] = b->out[j];
break;
case CFG_EDGE_LOOP_ENTER:
bb[p++] = b->out[j];
break;
case CFG_EDGE_LOOP_LEAVE:
bbb[pp++] = b->out[j];
break;
default:
assert(0);
break;
}
}
f(priv, b);
if (!p) {
p = pp;
for (; pp > 0; --pp)
bb[pp - 1] = bbb[pp - 1];
}
}
}
static void
nv_do_print_function(void *priv, struct nv_basic_block *b)
{
struct nv_instruction *i = b->phi;
debug_printf("=== BB %i ", b->id);
if (b->out[0])
debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
if (b->out[1])
debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
debug_printf("===\n");
i = b->phi;
if (!i)
i = b->entry;
for (; i; i = i->next)
nv_print_instruction(i);
}
void
nv_print_function(struct nv_basic_block *root)
{
if (root->subroutine)
debug_printf("SUBROUTINE %i\n", root->subroutine);
else
debug_printf("MAIN\n");
nv_pc_pass_in_order(root, nv_do_print_function, root);
}
void
nv_print_program(struct nv_pc *pc)
{
int i;
for (i = 0; i < pc->num_subroutines + 1; ++i)
if (pc->root[i])
nv_print_function(pc->root[i]);
}
#ifdef NV50PC_DEBUG
static void
nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
{
int i;
b->pass_seq = pc->pass_seq;
fprintf(f, "\t%i [shape=box]\n", b->id);
for (i = 0; i < 2; ++i) {
if (!b->out[i])
continue;
switch (b->out_kind[i]) {
case CFG_EDGE_FORWARD:
fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
break;
case CFG_EDGE_LOOP_ENTER:
fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
break;
case CFG_EDGE_LOOP_LEAVE:
fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
break;
case CFG_EDGE_BACK:
fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
continue;
case CFG_EDGE_FAKE:
fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
break;
default:
assert(0);
break;
}
if (b->out[i]->pass_seq < pc->pass_seq)
nv_do_print_cfgraph(pc, f, b->out[i]);
}
}
/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
static void
nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
{
FILE *f;
f = fopen(filepath, "a");
if (!f)
return;
fprintf(f, "digraph G {\n");
++pc->pass_seq;
nv_do_print_cfgraph(pc, f, pc->root[subr]);
fprintf(f, "}\n");
fclose(f);
}
#endif
static INLINE void
nvcg_show_bincode(struct nv_pc *pc)
{
unsigned i;
for (i = 0; i < pc->bin_size / 4; ++i) {
debug_printf("0x%08x ", pc->emit[i]);
if ((i % 16) == 15)
debug_printf("\n");
}
debug_printf("\n");
}
static int
nv50_emit_program(struct nv_pc *pc)
{
uint32_t *code = pc->emit;
int n;
NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size);
for (n = 0; n < pc->num_blocks; ++n) {
struct nv_instruction *i;
struct nv_basic_block *b = pc->bb_list[n];
for (i = b->entry; i; i = i->next) {
nv50_emit_instruction(pc, i);
pc->bin_pos += 1 + (pc->emit[0] & 1);
pc->emit += 1 + (pc->emit[0] & 1);
}
}
assert(pc->emit == &code[pc->bin_size / 4]);
/* XXX: we can do better than this ... */
if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
pc->emit[0] = 0xf0000001;
pc->emit[1] = 0xe0000000;
pc->bin_size += 8;
}
pc->emit = code;
code[pc->bin_size / 4 - 1] |= 1;
#ifdef NV50PC_DEBUG
nvcg_show_bincode(pc);
#endif
return 0;
}
int
nv50_generate_code(struct nv50_translation_info *ti)
{
struct nv_pc *pc;
int ret;
int i;
pc = CALLOC_STRUCT(nv_pc);
if (!pc)
return 1;
pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
if (!pc->root) {
FREE(pc);
return 1;
}
pc->num_subroutines = ti->subr_nr;
ret = nv50_tgsi_to_nc(pc, ti);
if (ret)
goto out;
#ifdef NV50PC_DEBUG
nv_print_program(pc);
#endif
pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE;
/* optimization */
ret = nv_pc_exec_pass0(pc);
if (ret)
goto out;
#ifdef NV50PC_DEBUG
nv_print_program(pc);
#endif
/* register allocation */
ret = nv_pc_exec_pass1(pc);
if (ret)
goto out;
#ifdef NV50PC_DEBUG
nv_print_program(pc);
nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0);
#endif
/* prepare for emission */
ret = nv_pc_exec_pass2(pc);
if (ret)
goto out;
pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
if (!pc->emit) {
ret = 3;
goto out;
}
ret = nv50_emit_program(pc);
if (ret)
goto out;
ti->p->code_size = pc->bin_size;
ti->p->code = pc->emit;
ti->p->immd_size = pc->immd_count * 4;
ti->p->immd = pc->immd_buf;
/* highest 16 bit reg to num of 32 bit regs, limit to >= 4 */
ti->p->max_gpr = MAX2(4, (pc->max_reg[NV_FILE_GPR] >> 1) + 1);
ti->p->fixups = pc->fixups;
ti->p->num_fixups = pc->num_fixups;
NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
out:
nv_pc_free_refs(pc);
for (i = 0; i < pc->num_blocks; ++i)
FREE(pc->bb_list[i]);
if (pc->root)
FREE(pc->root);
if (ret) { /* on success, these will be referenced by nv50_program */
if (pc->emit)
FREE(pc->emit);
if (pc->immd_buf)
FREE(pc->immd_buf);
if (pc->fixups)
FREE(pc->fixups);
}
FREE(pc);
return ret;
}
static void
nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
{
if (!b->phi) {
i->prev = NULL;
b->phi = i;
i->next = b->entry;
if (b->entry) {
assert(!b->entry->prev && b->exit);
b->entry->prev = i;
} else {
b->entry = i;
b->exit = i;
}
} else {
assert(b->entry);
if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
assert(b->entry == b->exit);
b->entry->next = i;
i->prev = b->entry;
b->entry = i;
b->exit = i;
} else { /* insert before entry */
assert(b->entry->prev && b->exit);
i->next = b->entry;
i->prev = b->entry->prev;
b->entry->prev = i;
i->prev->next = i;
}
}
}
void
nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
{
if (i->opcode == NV_OP_PHI) {
nvbb_insert_phi(b, i);
} else {
i->prev = b->exit;
if (b->exit)
b->exit->next = i;
b->exit = i;
if (!b->entry)
b->entry = i;
else
if (i->prev && i->prev->opcode == NV_OP_PHI)
b->entry = i;
}
i->bb = b;
b->num_instructions++;
}
void
nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
{
if (!at->next) {
nvbb_insert_tail(at->bb, ni);
return;
}
ni->next = at->next;
ni->prev = at;
ni->next->prev = ni;
ni->prev->next = ni;
}
void
nv_nvi_delete(struct nv_instruction *nvi)
{
struct nv_basic_block *b = nvi->bb;
int j;
/* debug_printf("REM: "); nv_print_instruction(nvi); */
for (j = 0; j < 5; ++j)
nv_reference(NULL, &nvi->src[j], NULL);
nv_reference(NULL, &nvi->flags_src, NULL);
if (nvi->next)
nvi->next->prev = nvi->prev;
else {
assert(nvi == b->exit);
b->exit = nvi->prev;
}
if (nvi->prev)
nvi->prev->next = nvi->next;
if (nvi == b->entry) {
/* PHIs don't get hooked to b->entry */
b->entry = nvi->next;
assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
}
if (nvi == b->phi) {
if (nvi->opcode != NV_OP_PHI)
NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n");
assert(!nvi->prev);
if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
b->phi = NULL;
else
b->phi = nvi->next;
}
}
void
nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2)
{
struct nv_basic_block *b = i1->bb;
assert(i1->opcode != NV_OP_PHI &&
i2->opcode != NV_OP_PHI);
assert(i1->next == i2);
if (b->exit == i2)
b->exit = i1;
if (b->entry == i1)
b->entry = i2;
i2->prev = i1->prev;
i1->next = i2->next;
i2->next = i1;
i1->prev = i2;
if (i2->prev)
i2->prev->next = i2;
if (i1->next)
i1->next->prev = i1;
}
void
nvbb_attach_block(struct nv_basic_block *parent,
struct nv_basic_block *b, ubyte edge_kind)
{
assert(b->num_in < 8);
if (parent->out[0]) {
assert(!parent->out[1]);
parent->out[1] = b;
parent->out_kind[1] = edge_kind;
} else {
parent->out[0] = b;
parent->out_kind[0] = edge_kind;
}
b->in[b->num_in] = parent;
b->in_kind[b->num_in++] = edge_kind;
}
/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
boolean
nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
{
int j;
if (b == d)
return TRUE;
for (j = 0; j < b->num_in; ++j)
if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d))
return FALSE;
return j ? TRUE : FALSE;
}
/* check if @bf (future) can be reached from @bp (past), stop at @bt */
boolean
nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
struct nv_basic_block *bt)
{
struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
int i, p, n;
p = 0;
n = 1;
q[0] = bp;
while (p < n) {
b = q[p++];
if (b == bf)
break;
if (b == bt)
continue;
assert(n <= (1024 - 2));
for (i = 0; i < 2; ++i) {
if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
q[n] = b->out[i];
q[n++]->priv = 1;
}
}
}
for (--n; n >= 0; --n)
q[n]->priv = 0;
return (b == bf);
}
static struct nv_basic_block *
nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
{
struct nv_basic_block *out;
int i;
if (!nvbb_dominated_by(df, b)) {
for (i = 0; i < df->num_in; ++i) {
if (df->in_kind[i] == CFG_EDGE_BACK)
continue;
if (nvbb_dominated_by(df->in[i], b))
return df;
}
}
for (i = 0; i < 2 && df->out[i]; ++i) {
if (df->out_kind[i] == CFG_EDGE_BACK)
continue;
if ((out = nvbb_find_dom_frontier(b, df->out[i])))
return out;
}
return NULL;
}
struct nv_basic_block *
nvbb_dom_frontier(struct nv_basic_block *b)
{
struct nv_basic_block *df;
int i;
for (i = 0; i < 2 && b->out[i]; ++i)
if ((df = nvbb_find_dom_frontier(b, b->out[i])))
return df;
return NULL;
}

View file

@ -0,0 +1,514 @@
/*
* Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __NV50_COMPILER_H__
#define __NV50_COMPILER_H__
#ifdef NV50PC_DEBUG
# define NV50_DBGMSG(args...) debug_printf(args)
#else
# define NV50_DBGMSG(args...)
#endif
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#define NV_OP_PHI 0
#define NV_OP_EXTRACT 1
#define NV_OP_COMBINE 2
#define NV_OP_LDA 3
#define NV_OP_STA 4
#define NV_OP_MOV 5
#define NV_OP_ADD 6
#define NV_OP_SUB 7
#define NV_OP_NEG 8
#define NV_OP_MUL 9
#define NV_OP_MAD 10
#define NV_OP_CVT 11
#define NV_OP_SAT 12
#define NV_OP_NOT 13
#define NV_OP_AND 14
#define NV_OP_OR 15
#define NV_OP_XOR 16
#define NV_OP_SHL 17
#define NV_OP_SHR 18
#define NV_OP_RCP 19
#define NV_OP_UNDEF 20
#define NV_OP_RSQ 21
#define NV_OP_LG2 22
#define NV_OP_SIN 23
#define NV_OP_COS 24
#define NV_OP_EX2 25
#define NV_OP_PRESIN 26
#define NV_OP_PREEX2 27
#define NV_OP_MIN 28
#define NV_OP_MAX 29
#define NV_OP_SET 30
#define NV_OP_SAD 31
#define NV_OP_KIL 32
#define NV_OP_BRA 33
#define NV_OP_CALL 34
#define NV_OP_RET 35
#define NV_OP_BREAK 36
#define NV_OP_BREAKADDR 37
#define NV_OP_JOINAT 38
#define NV_OP_TEX 39
#define NV_OP_TXB 40
#define NV_OP_TXL 41
#define NV_OP_TXF 42
#define NV_OP_TXQ 43
#define NV_OP_DFDX 44
#define NV_OP_DFDY 45
#define NV_OP_QUADOP 46
#define NV_OP_LINTERP 47
#define NV_OP_PINTERP 48
#define NV_OP_ABS 49
#define NV_OP_CEIL 50
#define NV_OP_FLOOR 51
#define NV_OP_TRUNC 52
#define NV_OP_NOP 53
#define NV_OP_SELECT 54
#define NV_OP_EXPORT 55
#define NV_OP_JOIN 56
#define NV_OP_COUNT 57
#define NV_FILE_GPR 0
#define NV_FILE_OUT 1
#define NV_FILE_ADDR 2
#define NV_FILE_FLAGS 3
#define NV_FILE_IMM 16
#define NV_FILE_MEM_S 32
#define NV_FILE_MEM_P 33
#define NV_FILE_MEM_V 34
#define NV_FILE_MEM_L 48
#define NV_FILE_MEM_G(i) (64 + i)
#define NV_FILE_MEM_C(i) (80 + i)
#define NV_MOD_NEG 1
#define NV_MOD_ABS 2
#define NV_MOD_NOT 4
#define NV_MOD_SAT 8
#define NV_TYPE_U8 0x00
#define NV_TYPE_S8 0x01
#define NV_TYPE_U16 0x02
#define NV_TYPE_S16 0x03
#define NV_TYPE_U32 0x04
#define NV_TYPE_S32 0x05
#define NV_TYPE_P32 0x07
#define NV_TYPE_F32 0x09
#define NV_TYPE_F64 0x0b
#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
#define NV_TYPE_LO 0x00
#define NV_TYPE_HI 0x80
#define NV_TYPE_ANY 0xff
#define NV_TYPE_ISINT(t) ((t) <= 5)
#define NV_TYPE_ISFLT(t) ((t) & 0x08)
/* $cX registers contain 4 bits: OCSZ (Z is bit 0) */
#define NV_CC_FL 0x0
#define NV_CC_LT 0x1
#define NV_CC_EQ 0x2
#define NV_CC_LE 0x3
#define NV_CC_GT 0x4
#define NV_CC_NE 0x5
#define NV_CC_GE 0x6
#define NV_CC_U 0x8
#define NV_CC_TR 0xf
#define NV_CC_O 0x10
#define NV_CC_C 0x11
#define NV_CC_A 0x12
#define NV_CC_S 0x13
#define NV_PC_MAX_INSTRUCTIONS 2048
#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
#define NV_PC_MAX_BASIC_BLOCKS 1024
static INLINE boolean
nv_is_vector_op(uint opcode)
{
return (opcode >= NV_OP_TEX) && (opcode <= NV_OP_TXQ);
}
static INLINE uint
nv_type_order(ubyte type)
{
switch (type & 0xf) {
case NV_TYPE_U8:
case NV_TYPE_S8:
return 0;
case NV_TYPE_U16:
case NV_TYPE_S16:
return 1;
case NV_TYPE_U32:
case NV_TYPE_F32:
case NV_TYPE_S32:
case NV_TYPE_P32:
return 2;
case NV_TYPE_F64:
return 3;
}
assert(0);
}
static INLINE uint
nv_type_sizeof(ubyte type)
{
if (type & 0xf0)
return (1 << nv_type_order(type)) * (type >> 4);
return 1 << nv_type_order(type);
}
static INLINE uint
nv_type_sizeof_base(ubyte type)
{
return 1 << nv_type_order(type);
}
struct nv_reg {
int id;
ubyte file;
ubyte type; /* type of generating instruction's result */
ubyte as_type; /* default type for new references to this value */
union {
float f32;
double f64;
int32_t s32;
uint32_t u32;
} imm;
};
struct nv_range {
struct nv_range *next;
int bgn;
int end;
};
struct nv_value {
struct nv_reg reg;
struct nv_instruction *insn;
struct nv_value *join;
int n;
struct nv_range *livei;
int refc;
struct nv_value *next;
struct nv_value *prev;
};
struct nv_ref {
struct nv_value *value;
ubyte mod;
ubyte typecast;
ubyte flags; /* not used yet */
};
struct nv_basic_block;
struct nv_instruction {
struct nv_instruction *next;
struct nv_instruction *prev;
uint opcode;
int serial;
struct nv_value *def[4];
struct nv_value *flags_def;
struct nv_ref *src[5];
struct nv_ref *flags_src;
struct nv_basic_block *bb;
struct nv_basic_block *target; /* target block of control flow insn */
ubyte cc;
ubyte set_cond : 4;
ubyte fixed : 1; /* don't optimize away */
ubyte is_terminator : 1;
ubyte is_join : 1;
ubyte is_long : 1; /* for emission */
/* */
ubyte saturate : 1;
ubyte centroid : 1;
ubyte flat : 1;
ubyte lanes : 4;
ubyte tex_live : 1;
/* */
ubyte tex_t; /* TIC binding */
ubyte tex_s; /* TSC binding */
ubyte tex_argc : 3;
ubyte tex_cube : 1;
ubyte tex_mask : 4;
/* */
ubyte quadop;
};
#define CFG_EDGE_FORWARD 0
#define CFG_EDGE_BACK 1
#define CFG_EDGE_LOOP_ENTER 2
#define CFG_EDGE_LOOP_LEAVE 4
#define CFG_EDGE_FAKE 8
/* 'WALL' edge means where reachability check doesn't follow */
/* 'LOOP' edge means just having to do with loops */
#define IS_LOOP_EDGE(k) ((k) & 7)
#define IS_WALL_EDGE(k) ((k) & 9)
struct nv_basic_block {
struct nv_instruction *entry; /* first non-phi instruction */
struct nv_instruction *exit;
struct nv_instruction *phi; /* very first instruction */
int num_instructions;
struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
struct nv_basic_block *in[8]; /* hope that suffices */
uint num_in;
ubyte out_kind[2];
ubyte in_kind[8];
int id;
int subroutine;
uint priv; /* reset to 0 after you're done */
uint pass_seq;
uint32_t bin_pos; /* position, size in emitted code */
uint32_t bin_size;
uint32_t live_set[NV_PC_MAX_VALUES / 32];
};
#define NV_FIXUP_CFLOW_RELOC 0
#define NV_FIXUP_PARAM_RELOC 1
struct nv_fixup {
ubyte type;
ubyte shift;
uint32_t mask;
uint32_t data;
uint32_t offset;
};
static INLINE void
nv_fixup_apply(uint32_t *bin, struct nv_fixup *fixup, uint32_t data)
{
uint32_t val;
val = bin[fixup->offset / 4] & ~fixup->mask;
data = (fixup->shift < 0) ? (data >> fixup->shift) : (data << fixup->shift);
val |= (fixup->data + data) & fixup->mask;
bin[fixup->offset / 4] = val;
}
struct nv50_translation_info;
struct nv_pc {
struct nv_basic_block **root;
struct nv_basic_block *current_block;
struct nv_basic_block *parent_block;
int loop_nesting_bound;
uint pass_seq;
struct nv_value values[NV_PC_MAX_VALUES];
struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
struct nv_ref **refs;
struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
int num_values;
int num_instructions;
int num_refs;
int num_blocks;
int num_subroutines;
int max_reg[4];
uint32_t *immd_buf; /* populated on emit */
unsigned immd_count;
uint32_t *emit;
unsigned bin_size;
unsigned bin_pos;
struct nv_fixup *fixups;
int num_fixups;
/* optimization enables */
boolean opt_reload_elim;
};
void nvbb_insert_tail(struct nv_basic_block *, struct nv_instruction *);
void nvi_insert_after(struct nv_instruction *, struct nv_instruction *);
static INLINE struct nv_instruction *
nv_alloc_instruction(struct nv_pc *pc, uint opcode)
{
struct nv_instruction *insn;
insn = &pc->instructions[pc->num_instructions++];
assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
insn->cc = NV_CC_TR;
insn->opcode = opcode;
return insn;
}
static INLINE struct nv_instruction *
new_instruction(struct nv_pc *pc, uint opcode)
{
struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
nvbb_insert_tail(pc->current_block, insn);
return insn;
}
static INLINE struct nv_instruction *
new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
{
struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
nvi_insert_after(at, insn);
return insn;
}
static INLINE struct nv_value *
new_value(struct nv_pc *pc, ubyte file, ubyte type)
{
struct nv_value *value = &pc->values[pc->num_values];
assert(pc->num_values < NV_PC_MAX_VALUES - 1);
value->n = pc->num_values++;
value->join = value;
value->reg.id = -1;
value->reg.file = file;
value->reg.type = value->reg.as_type = type;
return value;
}
static INLINE struct nv_value *
new_value_like(struct nv_pc *pc, struct nv_value *like)
{
struct nv_value *val = new_value(pc, like->reg.file, like->reg.type);
val->reg.as_type = like->reg.as_type;
return val;
}
static INLINE struct nv_ref *
new_ref(struct nv_pc *pc, struct nv_value *val)
{
int i;
struct nv_ref *ref;
if ((pc->num_refs % 64) == 0) {
const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
pc->refs = REALLOC(pc->refs, old_size, new_size);
ref = CALLOC(64, sizeof(struct nv_ref));
for (i = 0; i < 64; ++i)
pc->refs[pc->num_refs + i] = &ref[i];
}
ref = pc->refs[pc->num_refs++];
ref->value = val;
ref->typecast = val->reg.as_type;
++val->refc;
return ref;
}
static INLINE struct nv_basic_block *
new_basic_block(struct nv_pc *pc)
{
struct nv_basic_block *bb;
if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
return NULL;
bb = CALLOC_STRUCT(nv_basic_block);
bb->id = pc->num_blocks;
pc->bb_list[pc->num_blocks++] = bb;
return bb;
}
static INLINE void
nv_reference(struct nv_pc *pc, struct nv_ref **d, struct nv_value *s)
{
if (*d)
--(*d)->value->refc;
if (s) {
if (!*d)
*d = new_ref(pc, s);
else {
(*d)->value = s;
++(s->refc);
}
} else {
*d = NULL;
}
}
/* nv50_emit.c */
void nv50_emit_instruction(struct nv_pc *, struct nv_instruction *);
/* nv50_print.c */
const char *nv_opcode_name(uint opcode);
void nv_print_instruction(struct nv_instruction *);
/* nv50_pc.c */
void nv_print_function(struct nv_basic_block *root);
void nv_print_program(struct nv_pc *);
boolean nv_op_commutative(uint opcode);
int nv50_indirect_opnd(struct nv_instruction *);
boolean nv50_nvi_can_use_imm(struct nv_instruction *, int s);
boolean nv50_nvi_can_predicate(struct nv_instruction *);
boolean nv50_nvi_can_load(struct nv_instruction *, int s, struct nv_value *);
boolean nv50_op_can_write_flags(uint opcode);
ubyte nv50_supported_src_mods(uint opcode, int s);
int nv_nvi_refcount(struct nv_instruction *);
void nv_nvi_delete(struct nv_instruction *);
void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
void nvbb_attach_block(struct nv_basic_block *parent,
struct nv_basic_block *, ubyte edge_kind);
boolean nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *);
boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *,
struct nv_basic_block *);
struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *);
int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
struct nv_value *new_val);
struct nv_value *nvcg_find_immediate(struct nv_ref *);
struct nv_value *nvcg_find_constant(struct nv_ref *);
typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
void nv_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
int nv_pc_exec_pass0(struct nv_pc *pc);
int nv_pc_exec_pass1(struct nv_pc *pc);
int nv_pc_exec_pass2(struct nv_pc *pc);
int nv50_tgsi_to_nc(struct nv_pc *, struct nv50_translation_info *);
#endif // NV50_COMPILER_H

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,320 @@
/*
* Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "nv50_context.h"
#include "nv50_pc.h"
#define NVXX_DEBUG 0
#define PRINT(args...) debug_printf(args)
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
#endif
static const char *norm = "\x1b[00m";
static const char *gree = "\x1b[32m";
static const char *blue = "\x1b[34m";
static const char *cyan = "\x1b[36m";
static const char *orng = "\x1b[33m";
static const char *mgta = "\x1b[35m";
static const char *nv_opcode_names[NV_OP_COUNT + 1] = {
"phi",
"extract",
"combine",
"lda",
"sta",
"mov",
"add",
"sub",
"neg",
"mul",
"mad",
"cvt",
"sat",
"not",
"and",
"or",
"xor",
"shl",
"shr",
"rcp",
"undef",
"rsqrt",
"lg2",
"sin",
"cos",
"ex2",
"presin",
"preex2",
"min",
"max",
"set",
"sad",
"kil",
"bra",
"call",
"ret",
"break",
"breakaddr",
"joinat",
"tex",
"texbias",
"texlod",
"texfetch",
"texsize",
"dfdx",
"dfdy",
"quadop",
"linterp",
"pinterp",
"abs",
"ceil",
"floor",
"trunc",
"nop",
"select",
"export",
"join",
"BAD_OP"
};
static const char *nv_cond_names[] =
{
"never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
"never", "ltu", "equ", "leu", "gtu", "neu", "geu", "",
"o", "c", "a", "s"
};
static const char *nv_modifier_strings[] =
{
"",
"neg",
"abs",
"neg abs",
"not",
"not neg"
"not abs",
"not neg abs",
"sat",
"BAD_MOD"
};
const char *
nv_opcode_name(uint opcode)
{
return nv_opcode_names[MIN2(opcode, ARRAY_SIZE(nv_opcode_names) - 1)];
}
static INLINE const char *
nv_type_name(ubyte type)
{
switch (type) {
case NV_TYPE_U16: return "u16";
case NV_TYPE_S16: return "s16";
case NV_TYPE_F32: return "f32";
case NV_TYPE_U32: return "u32";
case NV_TYPE_S32: return "s32";
case NV_TYPE_P32: return "p32";
case NV_TYPE_F64: return "f64";
default:
return "BAD_TYPE";
}
}
static INLINE const char *
nv_cond_name(ubyte cc)
{
return nv_cond_names[MIN2(cc, 19)];
}
static INLINE const char *
nv_modifier_string(ubyte mod)
{
return nv_modifier_strings[MIN2(mod, 9)];
}
static INLINE int
nv_value_id(struct nv_value *value)
{
if (value->join->reg.id >= 0)
return value->join->reg.id;
return value->n;
}
static INLINE boolean
nv_value_allocated(struct nv_value *value)
{
return (value->reg.id >= 0) ? TRUE : FALSE;
}
static INLINE void
nv_print_address(const char c, int buf, struct nv_value *a, int offset)
{
const char ac = (a && nv_value_allocated(a)) ? '$' : '%';
if (buf >= 0)
PRINT(" %s%c%i[", cyan, c, buf);
else
PRINT(" %s%c[", cyan, c);
if (a)
PRINT("%s%ca%i%s+", mgta, ac, nv_value_id(a), cyan);
PRINT("%s0x%x%s]", orng, offset, cyan);
}
static INLINE void
nv_print_cond(struct nv_instruction *nvi)
{
char pfx = nv_value_allocated(nvi->flags_src->value->join) ? '$' : '%';
PRINT("%s%s %s%cc%i ",
gree, nv_cond_name(nvi->cc),
mgta, pfx, nv_value_id(nvi->flags_src->value));
}
static INLINE void
nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type)
{
char reg_pfx = '$';
if (type == NV_TYPE_ANY)
type = value->reg.type;
if (value->reg.file != NV_FILE_FLAGS)
PRINT(" %s%s", gree, nv_type_name(type));
if (!nv_value_allocated(value->join))
reg_pfx = '%';
switch (value->reg.file) {
case NV_FILE_GPR:
PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
break;
case NV_FILE_OUT:
PRINT(" %s%co%i", mgta, reg_pfx, nv_value_id(value));
break;
case NV_FILE_ADDR:
PRINT(" %s%ca%i", mgta, reg_pfx, nv_value_id(value));
break;
case NV_FILE_FLAGS:
PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
break;
case NV_FILE_MEM_L:
nv_print_address('l', -1, ind, nv_value_id(value));
break;
case NV_FILE_MEM_S:
nv_print_address('s', -1, ind, 4 * nv_value_id(value));
break;
case NV_FILE_MEM_P:
nv_print_address('p', -1, ind, 4 * nv_value_id(value));
break;
case NV_FILE_MEM_V:
nv_print_address('v', -1, ind, 4 * nv_value_id(value));
break;
case NV_FILE_IMM:
switch (type) {
case NV_TYPE_U16:
case NV_TYPE_S16:
PRINT(" %s0x%04x", orng, value->reg.imm.u32);
break;
case NV_TYPE_F32:
PRINT(" %s%f", orng, value->reg.imm.f32);
break;
case NV_TYPE_F64:
PRINT(" %s%f", orng, value->reg.imm.f64);
break;
case NV_TYPE_U32:
case NV_TYPE_S32:
case NV_TYPE_P32:
PRINT(" %s0x%08x", orng, value->reg.imm.u32);
break;
}
break;
default:
if (value->reg.file >= NV_FILE_MEM_G(0) &&
value->reg.file <= NV_FILE_MEM_G(15))
nv_print_address('g', value->reg.file - NV_FILE_MEM_G(0), ind,
nv_value_id(value) * 4);
else
if (value->reg.file >= NV_FILE_MEM_C(0) &&
value->reg.file <= NV_FILE_MEM_C(15))
nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), ind,
nv_value_id(value) * 4);
else
NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
break;
}
}
static INLINE void
nv_print_ref(struct nv_ref *ref, struct nv_value *ind)
{
nv_print_value(ref->value, ind, ref->typecast);
}
void
nv_print_instruction(struct nv_instruction *i)
{
int j;
PRINT("%i: ", i->serial);
if (i->flags_src)
nv_print_cond(i);
PRINT("%s", gree);
if (i->opcode == NV_OP_SET)
PRINT("set %s", nv_cond_name(i->set_cond));
else
if (i->saturate)
PRINT("sat %s", nv_opcode_name(i->opcode));
else
PRINT("%s", nv_opcode_name(i->opcode));
if (i->flags_def)
nv_print_value(i->flags_def, NULL, NV_TYPE_ANY);
/* Only STORE & STA can write to MEM, and they do not def
* anything, so the address is thus part of the source.
*/
if (i->def[0])
nv_print_value(i->def[0], NULL, NV_TYPE_ANY);
else
if (i->target)
PRINT(" %s(BB:%i)", orng, i->target->id);
else
PRINT(" #");
for (j = 0; j < 4; ++j) {
if (!i->src[j])
continue;
if (i->src[j]->mod)
PRINT(" %s%s", gree, nv_modifier_string(i->src[j]->mod));
nv_print_ref(i->src[j],
(j == nv50_indirect_opnd(i)) ?
i->src[4]->value : NULL);
}
PRINT(" %s%c\n", norm, i->is_long ? 'l' : 's');
}

View file

@ -0,0 +1,962 @@
/*
* Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* #define NV50PC_DEBUG */
/* #define NV50_RA_DEBUG_LIVEI */
/* #define NV50_RA_DEBUG_LIVE_SETS */
/* #define NV50_RA_DEBUG_JOIN */
#include "nv50_context.h"
#include "nv50_pc.h"
#include "util/u_simple_list.h"
#define NUM_REGISTER_FILES 4
struct register_set {
struct nv_pc *pc;
uint32_t last[NUM_REGISTER_FILES];
uint32_t bits[NUM_REGISTER_FILES][8];
};
struct nv_pc_pass {
struct nv_pc *pc;
struct nv_instruction **insns;
int num_insns;
uint pass_seq;
};
static void
ranges_coalesce(struct nv_range *range)
{
while (range->next && range->end >= range->next->bgn) {
struct nv_range *rnn = range->next->next;
assert(range->bgn <= range->next->bgn);
range->end = MAX2(range->end, range->next->end);
FREE(range->next);
range->next = rnn;
}
}
static boolean
add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
{
struct nv_range *range, **nextp = &val->livei;
for (range = val->livei; range; range = range->next) {
if (end < range->bgn)
break; /* insert before */
if (bgn > range->end) {
nextp = &range->next;
continue; /* insert after */
}
/* overlap */
if (bgn < range->bgn) {
range->bgn = bgn;
if (end > range->end)
range->end = end;
ranges_coalesce(range);
return TRUE;
}
if (end > range->end) {
range->end = end;
ranges_coalesce(range);
return TRUE;
}
assert(bgn >= range->bgn);
assert(end <= range->end);
return TRUE;
}
if (!new_range)
new_range = CALLOC_STRUCT(nv_range);
new_range->bgn = bgn;
new_range->end = end;
new_range->next = range;
*(nextp) = new_range;
return FALSE;
}
static void
add_range(struct nv_value *val, struct nv_basic_block *b, int end)
{
int bgn;
if (!val->insn) /* ignore non-def values */
return;
assert(b->entry->serial <= b->exit->serial);
assert(b->phi->serial <= end);
assert(b->exit->serial + 1 >= end);
bgn = val->insn->serial;
if (bgn < b->entry->serial || bgn > b->exit->serial)
bgn = b->entry->serial;
assert(bgn <= end);
add_range_ex(val, bgn, end, NULL);
}
#if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI)
static void
livei_print(struct nv_value *a)
{
struct nv_range *r = a->livei;
debug_printf("livei %i: ", a->n);
while (r) {
debug_printf("[%i, %i) ", r->bgn, r->end);
r = r->next;
}
debug_printf("\n");
}
#endif
static void
livei_unify(struct nv_value *dst, struct nv_value *src)
{
struct nv_range *range, *next;
for (range = src->livei; range; range = next) {
next = range->next;
if (add_range_ex(dst, range->bgn, range->end, range))
FREE(range);
}
src->livei = NULL;
}
static void
livei_release(struct nv_value *val)
{
struct nv_range *range, *next;
for (range = val->livei; range; range = next) {
next = range->next;
FREE(range);
}
}
static boolean
livei_have_overlap(struct nv_value *a, struct nv_value *b)
{
struct nv_range *r_a, *r_b;
for (r_a = a->livei; r_a; r_a = r_a->next) {
for (r_b = b->livei; r_b; r_b = r_b->next) {
if (r_b->bgn < r_a->end &&
r_b->end > r_a->bgn)
return TRUE;
}
}
return FALSE;
}
static int
livei_end(struct nv_value *a)
{
struct nv_range *r = a->livei;
assert(r);
while (r->next)
r = r->next;
return r->end;
}
static boolean
livei_contains(struct nv_value *a, int pos)
{
struct nv_range *r;
for (r = a->livei; r && r->bgn <= pos; r = r->next)
if (r->end > pos)
return TRUE;
return FALSE;
}
static boolean
reg_assign(struct register_set *set, struct nv_value **def, int n)
{
int i, id, s;
uint m;
int f = def[0]->reg.file;
s = n << (nv_type_order(def[0]->reg.type) - 1);
m = (1 << s) - 1;
id = set->last[f];
for (i = 0; i * 32 < set->last[f]; ++i) {
if (set->bits[f][i] == 0xffffffff)
continue;
for (id = 0; id < 32; id += s)
if (!(set->bits[f][i] & (m << id)))
break;
if (id < 32)
break;
}
if (i * 32 + id > set->last[f])
return FALSE;
set->bits[f][i] |= m << id;
id += i * 32;
set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
id >>= nv_type_order(def[0]->reg.type) - 1;
for (i = 0; i < n; ++i)
if (def[i]->livei)
def[i]->reg.id = id++;
return TRUE;
}
static INLINE void
reg_occupy(struct register_set *set, struct nv_value *val)
{
int s, id = val->reg.id, f = val->reg.file;
uint m;
if (id < 0)
return;
s = nv_type_order(val->reg.type) - 1;
id <<= s;
m = (1 << (1 << s)) - 1;
set->bits[f][id / 32] |= m << (id % 32);
if (set->pc->max_reg[f] < id)
set->pc->max_reg[f] = id;
}
static INLINE void
reg_release(struct register_set *set, struct nv_value *val)
{
int s, id = val->reg.id, f = val->reg.file;
uint m;
if (id < 0)
return;
s = nv_type_order(val->reg.type) - 1;
id <<= s;
m = (1 << (1 << s)) - 1;
set->bits[f][id / 32] &= ~(m << (id % 32));
}
static INLINE boolean
join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
{
int i;
struct nv_value *val;
if (a->reg.file != b->reg.file ||
nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
return FALSE;
if (a->join->reg.id == b->join->reg.id)
return TRUE;
#if 1
/* either a or b or both have been assigned */
if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
return FALSE;
else
if (b->join->reg.id >= 0) {
if (a->join->reg.id >= 0)
return FALSE;
val = a;
a = b;
b = val;
}
for (i = 0; i < ctx->pc->num_values; ++i) {
val = &ctx->pc->values[i];
if (val->join->reg.id != a->join->reg.id)
continue;
if (val->join != a->join && livei_have_overlap(val->join, b->join))
return FALSE;
}
return TRUE;
#endif
return FALSE;
}
static INLINE void
do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
{
int j;
struct nv_value *bjoin = b->join;
if (b->join->reg.id >= 0)
a->join->reg.id = b->join->reg.id;
livei_unify(a->join, b->join);
#ifdef NV50_RA_DEBUG_JOIN
debug_printf("joining %i to %i\n", b->n, a->n);
#endif
/* make a->join the new representative */
for (j = 0; j < ctx->pc->num_values; ++j)
if (ctx->pc->values[j].join == bjoin)
ctx->pc->values[j].join = a->join;
assert(b->join == a->join);
}
static INLINE void
try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
{
if (!join_allowed(ctx, a, b)) {
#ifdef NV50_RA_DEBUG_JOIN
debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
#endif
return;
}
if (livei_have_overlap(a->join, b->join)) {
#ifdef NV50_RA_DEBUG_JOIN
debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
livei_print(a);
livei_print(b);
#endif
return;
}
do_join_values(ctx, a, b);
}
static INLINE boolean
need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
{
int i = 0, n = 0;
for (; i < 2; ++i)
if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
++n;
return (b->num_in > 1) && (n == 2);
}
static int
phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
struct nv_basic_block *tb)
{
int i, j;
for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) {
if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb))
continue;
/* NOTE: back-edges are ignored by the reachable-by check */
if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb,
phi->src[i]->value->insn->bb, tb))
j = i;
}
return j;
}
/* For each operand of each PHI in b, generate a new value by inserting a MOV
* at the end of the block it is coming from and replace the operand with its
* result. This eliminates liveness conflicts and enables us to let values be
* copied to the right register if such a conflict exists nonetheless.
*
* These MOVs are also crucial in making sure the live intervals of phi srces
* are extended until the end of the loop, since they are not included in the
* live-in sets.
*/
static int
pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *i, *ni;
struct nv_value *val;
struct nv_basic_block *p, *pn;
int n, j;
b->pass_seq = ctx->pc->pass_seq;
for (n = 0; n < b->num_in; ++n) {
p = pn = b->in[n];
assert(p);
if (need_new_else_block(b, p)) {
pn = new_basic_block(ctx->pc);
if (p->out[0] == b)
p->out[0] = pn;
else
p->out[1] = pn;
if (p->exit->target == b) /* target to new else-block */
p->exit->target = pn;
b->in[n] = pn;
pn->out[0] = b;
pn->in[0] = p;
pn->num_in = 1;
}
ctx->pc->current_block = pn;
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
if ((j = phi_opnd_for_bb(i, p, b)) < 0)
continue;
val = i->src[j]->value;
if (i->src[j]->flags) {
val = val->insn->src[0]->value;
while (j < 4 && i->src[j])
++j;
assert(j < 4);
}
ni = new_instruction(ctx->pc, NV_OP_MOV);
/* TODO: insert instruction at correct position in the first place */
if (ni->prev && ni->prev->target)
nv_nvi_permute(ni->prev, ni);
ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
ni->def[0]->insn = ni;
ni->src[0] = new_ref(ctx->pc, val);
nv_reference(ctx->pc, &i->src[j], ni->def[0]);
i->src[j]->flags = 1;
}
if (pn != p && pn->exit) {
ctx->pc->current_block = b->in[n ? 0 : 1];
ni = new_instruction(ctx->pc, NV_OP_BRA);
ni->target = b;
ni->is_terminator = 1;
}
}
for (j = 0; j < 2; ++j)
if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
pass_generate_phi_movs(ctx, b->out[j]);
return 0;
}
static int
pass_join_values(struct nv_pc_pass *ctx, int iter)
{
int c, n;
for (n = 0; n < ctx->num_insns; ++n) {
struct nv_instruction *i = ctx->insns[n];
switch (i->opcode) {
case NV_OP_PHI:
if (iter != 2)
break;
for (c = 0; c < 4 && i->src[c]; ++c)
try_join_values(ctx, i->def[0], i->src[c]->value);
break;
case NV_OP_MOV:
if ((iter == 2) && i->src[0]->value->insn &&
!nv_is_vector_op(i->src[0]->value->join->insn->opcode))
try_join_values(ctx, i->def[0], i->src[0]->value);
break;
case NV_OP_SELECT:
if (iter != 1)
break;
for (c = 0; c < 4 && i->src[c]; ++c) {
assert(join_allowed(ctx, i->def[0], i->src[c]->value));
do_join_values(ctx, i->def[0], i->src[c]->value);
}
break;
case NV_OP_TEX:
case NV_OP_TXB:
case NV_OP_TXL:
case NV_OP_TXQ:
if (iter)
break;
for (c = 0; c < 4; ++c) {
if (!i->src[c])
break;
do_join_values(ctx, i->def[c], i->src[c]->value);
}
break;
default:
break;
}
}
return 0;
}
/* Order the instructions so that live intervals can be expressed in numbers. */
static void
pass_order_instructions(void *priv, struct nv_basic_block *b)
{
struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
struct nv_instruction *i;
b->pass_seq = ctx->pc->pass_seq;
assert(!b->exit || !b->exit->next);
for (i = b->phi; i; i = i->next) {
i->serial = ctx->num_insns;
ctx->insns[ctx->num_insns++] = i;
}
}
static void
bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
{
#ifdef NV50_RA_DEBUG_LIVE_SETS
int j;
struct nv_value *val;
debug_printf("LIVE-INs of BB:%i: ", b->id);
for (j = 0; j < pc->num_values; ++j) {
if (!(b->live_set[j / 32] & (1 << (j % 32))))
continue;
val = &pc->values[j];
if (!val->insn)
continue;
debug_printf("%i ", val->n);
}
debug_printf("\n");
#endif
}
static INLINE void
live_set_add(struct nv_basic_block *b, struct nv_value *val)
{
if (!val->insn) /* don't add non-def values */
return;
b->live_set[val->n / 32] |= 1 << (val->n % 32);
}
static INLINE void
live_set_rem(struct nv_basic_block *b, struct nv_value *val)
{
b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
}
static INLINE boolean
live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
{
int n = ref->value->n;
return b->live_set[n / 32] & (1 << (n % 32));
}
/* The live set of a block contains those values that are live immediately
* before the beginning of the block, so do a backwards scan.
*/
static int
pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *i;
int j, n, ret = 0;
if (b->pass_seq >= ctx->pc->pass_seq)
return 0;
b->pass_seq = ctx->pc->pass_seq;
/* slight hack for undecidedness: set phi = entry if it's undefined */
if (!b->phi)
b->phi = b->entry;
for (n = 0; n < 2; ++n) {
if (!b->out[n] || b->out[n] == b)
continue;
ret = pass_build_live_sets(ctx, b->out[n]);
if (ret)
return ret;
if (n == 0) {
for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
b->live_set[j] = b->out[n]->live_set[j];
} else {
for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
b->live_set[j] |= b->out[n]->live_set[j];
}
}
if (!b->entry)
return 0;
bb_live_set_print(ctx->pc, b);
for (i = b->exit; i != b->entry->prev; i = i->prev) {
for (j = 0; j < 4; j++) {
if (!i->def[j])
break;
live_set_rem(b, i->def[j]);
}
for (j = 0; j < 4; j++) {
if (!i->src[j])
break;
live_set_add(b, i->src[j]->value);
}
if (i->src[4])
live_set_add(b, i->src[4]->value);
if (i->flags_def)
live_set_rem(b, i->flags_def);
if (i->flags_src)
live_set_add(b, i->flags_src->value);
}
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
live_set_rem(b, i->def[0]);
bb_live_set_print(ctx->pc, b);
return 0;
}
static void collect_live_values(struct nv_basic_block *b, const int n)
{
int i;
if (b->out[0]) {
if (b->out[1]) { /* what to do about back-edges ? */
for (i = 0; i < n; ++i)
b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
} else {
memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
}
} else
if (b->out[1]) {
memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
} else {
memset(b->live_set, 0, n * sizeof(uint32_t));
}
}
/* NOTE: the live intervals of phi functions start at the first non-phi insn. */
static int
pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *i, *i_stop;
int j, s;
const int n = (ctx->pc->num_values + 31) / 32;
/* verify that first block does not have live-in values */
if (b->num_in == 0)
for (j = 0; j < n; ++j)
assert(b->live_set[j] == 0);
collect_live_values(b, n);
/* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
for (j = 0; j < 2; ++j) {
if (!b->out[j] || !b->out[j]->phi)
continue;
for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
live_set_rem(b, i->def[0]);
for (s = 0; s < 4; ++s) {
if (!i->src[s])
break;
assert(i->src[s]->value->insn);
if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
live_set_add(b, i->src[s]->value);
else
live_set_rem(b, i->src[s]->value);
}
}
}
/* remaining live-outs are live until the end */
if (b->exit) {
for (j = 0; j < ctx->pc->num_values; ++j) {
if (!(b->live_set[j / 32] & (1 << (j % 32))))
continue;
add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
#ifdef NV50_RA_DEBUG_LIVEI
debug_printf("adding range for live value %i: ", j);
livei_print(&ctx->pc->values[j]);
#endif
}
}
i_stop = b->entry ? b->entry->prev : NULL;
/* don't have to include phi functions here (will have 0 live range) */
for (i = b->exit; i != i_stop; i = i->prev) {
assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
for (j = 0; j < 4; ++j) {
if (i->def[j])
live_set_rem(b, i->def[j]);
}
if (i->flags_def)
live_set_rem(b, i->flags_def);
for (j = 0; j < 5; ++j) {
if (i->src[j] && !live_set_test(b, i->src[j])) {
live_set_add(b, i->src[j]->value);
add_range(i->src[j]->value, b, i->serial);
#ifdef NV50_RA_DEBUG_LIVEI
debug_printf("adding range for source %i (ends living): ",
i->src[j]->value->n);
livei_print(i->src[j]->value);
#endif
}
}
if (i->flags_src && !live_set_test(b, i->flags_src)) {
live_set_add(b, i->flags_src->value);
add_range(i->flags_src->value, b, i->serial);
#ifdef NV50_RA_DEBUG_LIVEI
debug_printf("adding range for source %i (ends living): ",
i->flags_src->value->n);
livei_print(i->flags_src->value);
#endif
}
}
b->pass_seq = ctx->pc->pass_seq;
if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
pass_build_intervals(ctx, b->out[0]);
if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
pass_build_intervals(ctx, b->out[1]);
return 0;
}
static INLINE void
nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
{
memset(set, 0, sizeof(*set));
set->last[NV_FILE_GPR] = 255;
set->last[NV_FILE_OUT] = 127;
set->last[NV_FILE_FLAGS] = 4;
set->last[NV_FILE_ADDR] = 4;
set->pc = pc;
}
static void
insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
{
struct nv_value *elem = list->prev;
for (elem = list->prev;
elem != list && elem->livei->bgn > nval->livei->bgn;
elem = elem->prev);
/* now elem begins before or at the same time as val */
nval->prev = elem;
nval->next = elem->next;
elem->next->prev = nval;
elem->next = nval;
}
static int
pass_linear_scan(struct nv_pc_pass *ctx, int iter)
{
struct nv_instruction *i;
struct register_set f, free;
int k, n;
struct nv_value *cur, *val, *tmp[2];
struct nv_value active, inactive, handled, unhandled;
make_empty_list(&active);
make_empty_list(&inactive);
make_empty_list(&handled);
make_empty_list(&unhandled);
nv50_ctor_register_set(ctx->pc, &free);
/* joined values should have range = NULL and thus not be added;
* also, fixed memory values won't be added because they're not
* def'd, just used
*/
for (n = 0; n < ctx->num_insns; ++n) {
i = ctx->insns[n];
for (k = 0; k < 4; ++k) {
if (i->def[k] && i->def[k]->livei)
insert_ordered_tail(&unhandled, i->def[k]);
else
if (0 && i->def[k])
debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
}
if (i->flags_def && i->flags_def->livei)
insert_ordered_tail(&unhandled, i->flags_def);
}
for (val = unhandled.next; val != unhandled.prev; val = val->next) {
assert(val->join == val);
assert(val->livei->bgn <= val->next->livei->bgn);
}
foreach_s(cur, tmp[0], &unhandled) {
remove_from_list(cur);
foreach_s(val, tmp[1], &active) {
if (livei_end(val) <= cur->livei->bgn) {
reg_release(&free, val);
move_to_head(&handled, val);
} else
if (!livei_contains(val, cur->livei->bgn)) {
reg_release(&free, val);
move_to_head(&inactive, val);
}
}
foreach_s(val, tmp[1], &inactive) {
if (livei_end(val) <= cur->livei->bgn)
move_to_head(&handled, val);
else
if (livei_contains(val, cur->livei->bgn)) {
reg_occupy(&free, val);
move_to_head(&active, val);
}
}
f = free;
foreach(val, &inactive)
if (livei_have_overlap(val, cur))
reg_occupy(&f, val);
foreach(val, &unhandled)
if (val->reg.id >= 0 && livei_have_overlap(val, cur))
reg_occupy(&f, val);
if (cur->reg.id < 0) {
boolean mem = FALSE;
if (nv_is_vector_op(cur->insn->opcode))
mem = !reg_assign(&f, &cur->insn->def[0], 4);
else
if (iter)
mem = !reg_assign(&f, &cur, 1);
if (mem) {
NOUVEAU_ERR("out of registers\n");
abort();
}
}
insert_at_head(&active, cur);
reg_occupy(&free, cur);
}
return 0;
}
static int
nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pc_pass *ctx;
int i, ret;
NV50_DBGMSG("REGISTER ALLOCATION - entering\n");
ctx = CALLOC_STRUCT(nv_pc_pass);
if (!ctx)
return -1;
ctx->pc = pc;
ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
if (!ctx->insns) {
FREE(ctx);
return -1;
}
pc->pass_seq++;
ret = pass_generate_phi_movs(ctx, root);
assert(!ret);
for (i = 0; i < pc->loop_nesting_bound; ++i) {
pc->pass_seq++;
ret = pass_build_live_sets(ctx, root);
assert(!ret && "live sets");
if (ret) {
NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
goto out;
}
}
pc->pass_seq++;
nv_pc_pass_in_order(root, pass_order_instructions, ctx);
pc->pass_seq++;
ret = pass_build_intervals(ctx, root);
assert(!ret && "build intervals");
if (ret) {
NOUVEAU_ERR("failed to build live intervals\n");
goto out;
}
#ifdef NV50_RA_DEBUG_LIVEI
for (i = 0; i < pc->num_values; ++i)
livei_print(&pc->values[i]);
#endif
ret = pass_join_values(ctx, 0);
if (ret)
goto out;
ret = pass_linear_scan(ctx, 0);
if (ret)
goto out;
ret = pass_join_values(ctx, 1);
if (ret)
goto out;
ret = pass_join_values(ctx, 2);
if (ret)
goto out;
ret = pass_linear_scan(ctx, 1);
if (ret)
goto out;
for (i = 0; i < pc->num_values; ++i)
livei_release(&pc->values[i]);
NV50_DBGMSG("REGISTER ALLOCATION - leaving\n");
out:
FREE(ctx->insns);
FREE(ctx);
return ret;
}
int
nv_pc_exec_pass1(struct nv_pc *pc)
{
int i, ret;
for (i = 0; i < pc->num_subroutines + 1; ++i)
if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
return ret;
return 0;
}

File diff suppressed because it is too large Load diff

View file

@ -1,75 +1,131 @@
#ifndef __NV50_PROGRAM_H__
#define __NV50_PROGRAM_H__
/*
* Copyright 2010 Ben Skeggs
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __NV50_PROG_H__
#define __NV50_PROG_H__
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
struct nv50_program_exec {
struct nv50_program_exec *next;
#define NV50_CAP_MAX_PROGRAM_TEMPS 64
unsigned inst[2];
struct {
int index;
unsigned mask;
unsigned shift;
} param;
};
struct nv50_varying {
uint8_t id; /* tgsi index */
uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
struct nv50_sreg4 {
uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
uint8_t id; /* tgsi index */
uint8_t mask : 4;
uint8_t linear : 1;
uint8_t pad : 3;
uint8_t mask;
boolean linear;
ubyte sn, si; /* semantic name & index */
ubyte sn; /* semantic name */
ubyte si; /* semantic index */
};
struct nv50_program {
struct pipe_shader_state pipe;
struct tgsi_shader_info info;
boolean translated;
struct pipe_shader_state pipe;
unsigned type;
struct nv50_program_exec *exec_head;
struct nv50_program_exec *exec_tail;
unsigned exec_size;
struct nouveau_resource *data[1];
unsigned data_start[1];
ubyte type;
boolean translated;
struct nouveau_bo *bo;
struct nouveau_bo *bo;
struct nouveau_stateobj *so;
uint32_t *immd;
unsigned immd_nr;
unsigned param_nr;
uint32_t *code;
unsigned code_size;
unsigned code_start; /* offset inside bo */
uint32_t *immd;
unsigned immd_size;
unsigned parm_size; /* size limit of uniform buffer */
struct {
unsigned high_temp;
unsigned high_result;
ubyte max_gpr; /* REG_ALLOC_TEMP */
ubyte max_out; /* REG_ALLOC_RESULT or FP_RESULT_COUNT */
uint32_t attr[2];
uint32_t regs[4];
ubyte in_nr;
ubyte out_nr;
struct nv50_varying in[16];
struct nv50_varying out[16];
/* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */
unsigned in_nr, out_nr;
struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS];
struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS];
struct {
uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */
ubyte psiz;
ubyte bfc[2];
ubyte edgeflag;
ubyte clpd;
ubyte clpd_nr;
} vp;
/* FP colour inputs, VP/GP back colour outputs */
struct nv50_sreg4 two_side[2];
struct {
uint32_t flags[2]; /* 0x19a8, 196c */
uint32_t interp; /* 0x1988 */
uint32_t colors; /* 0x1904 */
} fp;
/* GP only */
unsigned vert_count;
uint8_t prim_type;
struct {
ubyte primid; /* primitive id output register */
uint8_t vert_count;
uint8_t prim_type; /* point, line strip or tri strip */
} gp;
/* VP & GP only */
uint8_t clpd, clpd_nr;
uint8_t psiz;
uint8_t edgeflag_in;
/* FP & GP only */
uint8_t prim_id;
} cfg;
void *fixups;
unsigned num_fixups;
};
#endif
#define NV50_INTERP_LINEAR (1 << 0)
#define NV50_INTERP_FLAT (1 << 1)
#define NV50_INTERP_CENTROID (1 << 2)
/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
struct nv50_subroutine {
unsigned id;
unsigned pos;
/* function inputs and outputs */
uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
};
struct nv50_translation_info {
struct nv50_program *p;
unsigned inst_nr;
struct tgsi_full_instruction *insns;
ubyte input_file;
ubyte output_file;
ubyte input_map[PIPE_MAX_SHADER_INPUTS][4];
ubyte output_map[PIPE_MAX_SHADER_OUTPUTS][4];
ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
int input_access[PIPE_MAX_SHADER_INPUTS][4];
int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
boolean indirect_inputs;
boolean indirect_outputs;
boolean store_to_memory;
struct tgsi_shader_info scan;
uint32_t *immd32;
unsigned immd32_nr;
ubyte *immd32_ty;
ubyte edgeflag_out;
struct nv50_subroutine *subr;
unsigned subr_nr;
};
int nv50_generate_code(struct nv50_translation_info *ti);
boolean nv50_program_tx(struct nv50_program *p);
#endif /* __NV50_PROG_H__ */

View file

@ -228,7 +228,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
ctx.idxbuf = NULL;
ctx.vtx_size = 0;
ctx.edgeflag = 0.5f;
ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
ctx.edgeflag_attr = nv50->vertprog->vp.edgeflag;
/* map vertex buffers, determine vertex size */
for (i = 0; i < nv50->vtxelt->num_elements; i++) {

View file

@ -448,7 +448,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_DMA_TIC 0x000001a0
#define NV50TCL_DMA_TEXTURE 0x000001a4
#define NV50TCL_DMA_STRMOUT 0x000001a8
#define NV50TCL_DMA_UNK01AC 0x000001ac
#define NV50TCL_DMA_CLIPID 0x000001ac
#define NV50TCL_DMA_COLOR(x) (0x000001c0+((x)*4))
#define NV50TCL_DMA_COLOR__SIZE 0x00000008
#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32))
@ -665,8 +665,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_DEPTH_RANGE_FAR__SIZE 0x00000010
#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8))
#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_SHIFT 0
#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_MASK 0x0000ffff
#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_SHIFT 16
#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_MASK 0xffff0000
#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8))
#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_SHIFT 0
#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_MASK 0x0000ffff
#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_SHIFT 16
#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_MASK 0xffff0000
#define NV50TCL_CLIPID_REGION_HORIZ(x) (0x00000d40+((x)*8))
#define NV50TCL_CLIPID_REGION_HORIZ__SIZE 0x00000004
#define NV50TCL_CLIPID_REGION_VERT(x) (0x00000d44+((x)*8))
#define NV50TCL_CLIPID_REGION_VERT__SIZE 0x00000004
#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74
#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78
#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4))
@ -724,14 +736,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_GP_ADDRESS_LOW 0x00000f74
#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c
#define NV50TCL_VP_ADDRESS_LOW 0x00000f80
#define NV50TCL_UNK0F84_ADDRESS_HIGH 0x00000f84
#define NV50TCL_UNK0F84_ADDRESS_LOW 0x00000f88
#define NV50TCL_VERTEX_RUNOUT_HIGH 0x00000f84
#define NV50TCL_VERTEX_RUNOUT_LOW 0x00000f88
#define NV50TCL_DEPTH_BOUNDS(x) (0x00000f9c+((x)*4))
#define NV50TCL_DEPTH_BOUNDS__SIZE 0x00000002
#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4
#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8
#define NV50TCL_MSAA_MASK(x) (0x00000fbc+((x)*4))
#define NV50TCL_MSAA_MASK__SIZE 0x00000004
#define NV50TCL_CLIPID_ADDRESS_HIGH 0x00000fcc
#define NV50TCL_CLIPID_ADDRESS_LOW 0x00000fd0
#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0
#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4
#define NV50TCL_ZETA_FORMAT 0x00000fe8
@ -861,37 +875,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344
#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00004000
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00004001
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00004300
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00004301
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00004302
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00004304
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00004305
#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00004306
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00004307
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00004308
#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x0000c001
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x0000c003
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR 0x0000c900
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA 0x0000c902
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348
#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00004000
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00004001
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00004300
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00004301
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00004302
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00004304
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00004305
#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00004306
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00004307
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00004308
#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x0000c001
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x0000c003
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_COLOR 0x0000c900
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_ALPHA 0x0000c902
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007
@ -899,37 +921,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00000001
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00000300
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00000302
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00000304
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00000306
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00000308
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x00008001
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x00008003
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00004000
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00004001
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00004300
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00004302
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00004304
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00004306
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00004308
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x0000c001
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x0000c003
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_COLOR 0x0000c900
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_ALPHA 0x0000c902
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00000001
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00000300
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00000302
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00000304
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00000306
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00000308
#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x00008001
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x00008003
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00004000
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00004001
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00004300
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00004302
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00004304
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00004306
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00004308
#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x0000c001
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x0000c003
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_COLOR 0x0000c900
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA 0x0000c902
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4))
#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008
#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001380
@ -988,6 +1018,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_FP_START_ID 0x00001414
#define NV50TCL_GP_VERTEX_OUTPUT_COUNT 0x00001420
#define NV50TCL_VB_ELEMENT_BASE 0x00001434
#define NV50TCL_INSTANCE_BASE 0x00001438
#define NV50TCL_CODE_CB_FLUSH 0x00001440
#define NV50TCL_BIND_TSC(x) (0x00001444+((x)*8))
#define NV50TCL_BIND_TSC__SIZE 0x00000003
@ -1005,6 +1036,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_BIND_TIC_TIC_MASK 0x7ffffe00
#define NV50TCL_STRMOUT_MAP(x) (0x00001480+((x)*4))
#define NV50TCL_STRMOUT_MAP__SIZE 0x00000020
#define NV50TCL_CLIPID_HEIGHT 0x00001504
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE 0x00001510
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_0 (1 << 0)
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_1 (1 << 1)
@ -1089,7 +1121,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_GP_BUILTIN_RESULT_EN 0x000015cc
#define NV50TCL_GP_BUILTIN_RESULT_EN_VPORT_IDX (1 << 0)
#define NV50TCL_GP_BUILTIN_RESULT_EN_LAYER_IDX (1 << 16)
#define NV50TCL_MULTISAMPLE_SAMPLES_LOG2 0x000015d0
#define NV50TCL_MULTISAMPLE_MODE 0x000015d0
#define NV50TCL_MULTISAMPLE_MODE_1X 0x00000000
#define NV50TCL_MULTISAMPLE_MODE_2XMS 0x00000001
#define NV50TCL_MULTISAMPLE_MODE_4XMS 0x00000002
#define NV50TCL_MULTISAMPLE_MODE_8XMS 0x00000004
#define NV50TCL_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
#define NV50TCL_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
#define NV50TCL_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
#define NV50TCL_VERTEX_BEGIN 0x000015dc
#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000
#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001
@ -1105,6 +1144,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY 0x0000000b
#define NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY 0x0000000c
#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY 0x0000000d
#define NV50TCL_VERTEX_BEGIN_PATCHES 0x0000000e
#define NV50TCL_VERTEX_END 0x000015e0
#define NV50TCL_EDGEFLAG_ENABLE 0x000015e4
#define NV50TCL_VB_ELEMENT_U32 0x000015e8
@ -1118,6 +1158,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
#define NV50TCL_VB_ELEMENT_U16_I1_SHIFT 16
#define NV50TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
#define NV50TCL_VERTEX_BASE_HIGH 0x000015f4
#define NV50TCL_VERTEX_BASE_LOW 0x000015f8
#define NV50TCL_VERTEX_DATA 0x00001640
#define NV50TCL_PRIM_RESTART_ENABLE 0x00001644
#define NV50TCL_PRIM_RESTART_INDEX 0x00001648
@ -1503,7 +1545,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_VIEWPORT_TRANSFORM_EN 0x0000192c
#define NV50TCL_VIEW_VOLUME_CLIP_CTRL 0x0000193c
#define NV50TCL_VIEWPORT_CLIP_RECTS_EN 0x0000194c
#define NV50TCL_VIEWPORT_CLIP_MODE 0x00001950
#define NV50TCL_VIEWPORT_CLIP_MODE_INCLUDE 0x00000000
#define NV50TCL_VIEWPORT_CLIP_MODE_EXCLUDE 0x00000001
#define NV50TCL_VIEWPORT_CLIP_MODE_UNKNOWN 0x00000002
#define NV50TCL_FP_CTRL_UNK196C 0x0000196c
#define NV50TCL_CLIPID_ENABLE 0x0000197c
#define NV50TCL_CLIPID_WIDTH 0x00001980
#define NV50TCL_CLIPID_ID 0x00001984
#define NV50TCL_FP_INTERPOLANT_CTRL 0x00001988
#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_SHIFT 24
#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_MASK 0xff000000
@ -1604,19 +1653,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10 0x01800000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SHIFT 25
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK 0x7e000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK 0x0e000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x0e000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x02000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x04000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x0a000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x0c000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x08000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x06000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_BGRA (1 << 31)
#define NV50TCL_QUERY_ADDRESS_HIGH 0x00001b00
#define NV50TCL_QUERY_ADDRESS_LOW 0x00001b04
#define NV50TCL_QUERY_COUNTER 0x00001b08
#define NV50TCL_QUERY_SEQUENCE 0x00001b08
#define NV50TCL_QUERY_GET 0x00001b0c

View file

@ -26,6 +26,7 @@
#include "nv50_context.h"
#include "nv50_screen.h"
#include "nv50_resource.h"
#include "nv50_program.h"
#include "nouveau/nouveau_stateobj.h"
@ -34,75 +35,38 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned tex_usage, unsigned geom_flags)
unsigned usage, unsigned geom_flags)
{
if (sample_count > 1)
return FALSE;
if (tex_usage & PIPE_BIND_RENDER_TARGET) {
if (!util_format_s3tc_enabled) {
switch (format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R16G16_SNORM:
case PIPE_FORMAT_R16G16_UNORM:
return TRUE;
default:
break;
}
} else
if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
switch (format) {
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
return TRUE;
default:
break;
}
} else {
if (tex_usage & PIPE_BIND_SAMPLER_VIEW) {
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
return util_format_s3tc_enabled;
default:
break;
}
}
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_B8G8R8A8_SRGB:
case PIPE_FORMAT_B8G8R8X8_SRGB:
case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R16G16_SNORM:
case PIPE_FORMAT_R16G16_UNORM:
return TRUE;
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
return FALSE;
default:
break;
}
}
return FALSE;
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
if ((nouveau_screen(pscreen)->device->chipset & 0xf0) != 0xa0)
return FALSE;
break;
default:
break;
}
/* transfers & shared are always supported */
usage &= ~(PIPE_BIND_TRANSFER_READ |
PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_SHARED);
return (nv50_format_table[format].usage & usage) == usage;
}
static int
@ -142,6 +106,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
case PIPE_CAP_TEXTURE_SWIZZLE:
return 1;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case PIPE_CAP_INDEP_BLEND_ENABLE:
@ -165,10 +131,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
}
static int
nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param)
nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
enum pipe_shader_cap param)
{
switch(shader)
{
switch(shader) {
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
@ -186,7 +152,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: /* need stack bo */
return 4;
case PIPE_SHADER_CAP_MAX_INPUTS: /* 128 / 4 with GP */
if(shader == PIPE_SHADER_GEOMETRY)
if (shader == PIPE_SHADER_GEOMETRY)
return 128 / 4;
else
return 64 / 4;
@ -197,7 +163,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum
case PIPE_SHADER_CAP_MAX_PREDS: /* not yet handled */
return 0;
case PIPE_SHADER_CAP_MAX_TEMPS: /* no spilling atm */
return 128 / 4;
return NV50_CAP_MAX_PROGRAM_TEMPS;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
default:
@ -301,14 +267,23 @@ nv50_screen_relocs(struct nv50_screen *screen)
}
}
#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
#endif
extern int nouveau_device_get_param(struct nouveau_device *dev,
uint64_t param, uint64_t *value);
struct pipe_screen *
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
struct nouveau_channel *chan;
struct pipe_screen *pscreen;
uint64_t value;
unsigned chipset = dev->chipset;
unsigned tesla_class = 0;
unsigned stack_size, local_size, max_warps;
int ret, i;
const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
@ -527,6 +502,41 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
OUT_RING (chan, 0x121 | (NV50_CB_PGP << 12));
OUT_RING (chan, 0x131 | (NV50_CB_PFP << 12));
/* shader stack */
nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
max_warps = util_bitcount(value & 0xffff);
max_warps *= util_bitcount((value >> 24) & 0xf) * 32;
stack_size = max_warps * 64 * 8;
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
stack_size, &screen->stack_bo);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
}
BEGIN_RING(chan, screen->tesla, NV50TCL_STACK_ADDRESS_HIGH, 3);
OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (chan, 4);
local_size = (NV50_CAP_MAX_PROGRAM_TEMPS * 16) * max_warps * 32;
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
local_size, &screen->local_bo);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
}
local_size = NV50_CAP_MAX_PROGRAM_TEMPS * 16;
BEGIN_RING(chan, screen->tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 3);
OUT_RELOCh(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (chan, util_unsigned_logbase2(local_size / 8));
/* Vertex array limits - max them out */
for (i = 0; i < 16; i++) {
BEGIN_RING(chan, screen->tesla,

View file

@ -22,11 +22,12 @@ struct nv50_screen {
struct nouveau_resource *immd_heap;
struct pipe_resource *strm_vbuf[16];
struct nouveau_bo *tic;
struct nouveau_bo *tsc;
struct nouveau_bo *stack_bo; /* control flow stack */
struct nouveau_bo *local_bo; /* l[] memory */
boolean force_push;
};
@ -38,4 +39,13 @@ nv50_screen(struct pipe_screen *screen)
extern void nv50_screen_relocs(struct nv50_screen *);
struct nv50_format {
uint32_t rt;
uint32_t tic;
uint32_t vtx;
uint32_t usage;
};
extern const struct nv50_format nv50_format_table[];
#endif

View file

@ -0,0 +1,626 @@
/*
* Copyright 2008 Ben Skeggs
* Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "nv50_context.h"
#include "nv50_transfer.h"
static void
nv50_transfer_constbuf(struct nv50_context *nv50,
struct pipe_resource *buf, unsigned size, unsigned cbi)
{
struct pipe_context *pipe = &nv50->pipe;
struct pipe_transfer *transfer;
struct nouveau_channel *chan = nv50->screen->base.channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
uint32_t *map;
unsigned count, start;
map = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &transfer);
if (!map)
return;
count = (buf->width0 + 3) / 4;
start = 0;
while (count) {
unsigned nr = count;
nr = MIN2(nr, 2047);
/* FIXME: emit relocs for unsuiTed MM */
BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
OUT_RING (chan, (start << 8) | cbi);
BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
OUT_RINGp (chan, map, nr);
count -= nr;
start += nr;
map += nr;
}
pipe_buffer_unmap(pipe, buf, transfer);
}
static void
nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
unsigned cbi;
if (p->immd_size) {
uint32_t *data = p->immd;
unsigned count = p->immd_size / 4;
unsigned start = 0;
while (count) {
unsigned nr = count;
nr = MIN2(nr, 2047);
BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
OUT_RING (chan, (start << 8) | NV50_CB_PMISC);
BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
OUT_RINGp (chan, data, nr);
count -= nr;
start += nr;
data += nr;
}
}
/* If the state tracker doesn't change the constbuf, and it is first
* validated with a program that doesn't use it, this check prevents
* it from even being uploaded. */
/*
if (p->parm_size == 0)
return;
*/
switch (p->type) {
case PIPE_SHADER_VERTEX:
cbi = NV50_CB_PVP;
break;
case PIPE_SHADER_FRAGMENT:
cbi = NV50_CB_PFP;
break;
case PIPE_SHADER_GEOMETRY:
cbi = NV50_CB_PGP;
break;
default:
assert(0);
break;
}
nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi);
}
static void
nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_grobj *eng2d = nv50->screen->eng2d;
int ret;
unsigned offset;
unsigned size = p->code_size;
uint32_t *data = p->code;
assert(p->translated);
/* TODO: use a single bo (for each type) for shader code */
if (p->bo)
return;
ret = nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, size, &p->bo);
assert(!ret);
offset = p->code_start = 0;
BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
OUT_RING (chan, NV50_2D_DST_FORMAT_R8_UNORM);
OUT_RING (chan, 1);
BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
OUT_RING (chan, 0x40000);
BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 2);
OUT_RING (chan, 0x10000);
OUT_RING (chan, 1);
while (size) {
unsigned nr = size / 4;
if (AVAIL_RING(chan) < 32)
FIRE_RING(chan);
nr = MIN2(nr, AVAIL_RING(chan) - 18);
nr = MIN2(nr, 1792);
if (nr < (size / 4))
nr &= ~0x3f;
assert(!(size & 3));
BEGIN_RING(chan, eng2d, NV50_2D_DST_ADDRESS_HIGH, 2);
OUT_RELOCh(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
OUT_RING (chan, 0);
OUT_RING (chan, NV50_2D_SIFC_FORMAT_R8_UNORM);
BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
OUT_RING (chan, nr * 4);
OUT_RING (chan, 1);
OUT_RING (chan, 0);
OUT_RING (chan, 1);
OUT_RING (chan, 0);
OUT_RING (chan, 1);
OUT_RING (chan, 0);
OUT_RING (chan, 0);
OUT_RING (chan, 0);
OUT_RING (chan, 0);
BEGIN_RING_NI(chan, eng2d, NV50_2D_SIFC_DATA, nr);
OUT_RINGp (chan, data, nr);
data += nr;
offset += nr * 4;
size -= nr * 4;
}
BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
OUT_RING (chan, 0);
}
static void
nv50_vp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_stateobj *so = so_new(5, 7, 2);
nv50_program_validate_code(nv50, p);
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_LOW, 0, 0);
so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
so_data (so, p->vp.attrs[0]);
so_data (so, p->vp.attrs[1]);
so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, p->max_out);
so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
so_data (so, p->max_gpr);
so_method(so, tesla, NV50TCL_VP_START_ID, 1);
so_data (so, p->code_start);
so_ref(so, &p->so);
so_ref(NULL, &so);
}
static void
nv50_fp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_stateobj *so = so_new(6, 7, 2);
nv50_program_validate_code(nv50, p);
so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_LOW, 0, 0);
so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
so_data (so, p->max_gpr);
so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
so_data (so, p->max_out);
so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
so_data (so, p->fp.flags[0]);
so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
so_data (so, p->fp.flags[1]);
so_method(so, tesla, NV50TCL_FP_START_ID, 1);
so_data (so, p->code_start);
so_ref(so, &p->so);
so_ref(NULL, &so);
}
static void
nv50_gp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_stateobj *so = so_new(6, 7, 2);
nv50_program_validate_code(nv50, p);
so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_LOW, 0, 0);
so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
so_data (so, p->max_gpr);
so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
so_data (so, p->max_out);
so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
so_data (so, p->gp.prim_type);
so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
so_data (so, p->gp.vert_count);
so_method(so, tesla, NV50TCL_GP_START_ID, 1);
so_data (so, p->code_start);
so_ref(so, &p->so);
so_ref(NULL, &so);
}
static boolean
nv50_program_validate(struct nv50_program *p)
{
p->translated = nv50_program_tx(p);
assert(p->translated);
return p->translated;
}
struct nouveau_stateobj *
nv50_vertprog_validate(struct nv50_context *nv50)
{
struct nv50_program *p = nv50->vertprog;
struct nouveau_stateobj *so = NULL;
if (!p->translated) {
if (nv50_program_validate(p))
nv50_vp_update_stateobj(nv50, p);
else
return NULL;
}
if (nv50->dirty & NV50_NEW_VERTPROG_CB)
nv50_program_validate_data(nv50, p);
if (!(nv50->dirty & NV50_NEW_VERTPROG))
return NULL;
nv50_program_validate_code(nv50, p);
so_ref(p->so, &so);
return so;
}
struct nouveau_stateobj *
nv50_fragprog_validate(struct nv50_context *nv50)
{
struct nv50_program *p = nv50->fragprog;
struct nouveau_stateobj *so = NULL;
if (!p->translated) {
if (nv50_program_validate(p))
nv50_fp_update_stateobj(nv50, p);
else
return NULL;
}
if (nv50->dirty & NV50_NEW_FRAGPROG_CB)
nv50_program_validate_data(nv50, p);
if (!(nv50->dirty & NV50_NEW_FRAGPROG))
return NULL;
nv50_program_validate_code(nv50, p);
so_ref(p->so, &so);
return so;
}
struct nouveau_stateobj *
nv50_geomprog_validate(struct nv50_context *nv50)
{
struct nv50_program *p = nv50->geomprog;
struct nouveau_stateobj *so = NULL;
if (!p->translated) {
if (nv50_program_validate(p))
nv50_gp_update_stateobj(nv50, p);
else
return NULL;
}
if (nv50->dirty & NV50_NEW_GEOMPROG_CB)
nv50_program_validate_data(nv50, p);
if (!(nv50->dirty & NV50_NEW_GEOMPROG))
return NULL;
nv50_program_validate_code(nv50, p);
so_ref(p->so, &so);
return so;
}
/* XXX: this might not work correctly in all cases yet: we assume that
* an FP generic input that is not written in the VP is gl_PointCoord.
*/
static uint32_t
nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned m)
{
struct nv50_program *vp = nv50->vertprog;
struct nv50_program *fp = nv50->fragprog;
unsigned i, c;
memset(pntc, 0, 8 * sizeof(uint32_t));
if (nv50->geomprog)
vp = nv50->geomprog;
for (i = 0; i < fp->in_nr; i++) {
unsigned j, n = util_bitcount(fp->in[i].mask);
if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
m += n;
continue;
}
for (j = 0; j < vp->out_nr; ++j)
if (vp->out[j].sn == fp->in[i].sn && vp->out[j].si == fp->in[i].si)
break;
if (j < vp->out_nr) {
uint32_t en = nv50->rasterizer->pipe.sprite_coord_enable;
if (!(en & (1 << vp->out[j].si))) {
m += n;
continue;
}
}
/* this is either PointCoord or replaced by sprite coords */
for (c = 0; c < 4; c++) {
if (!(fp->in[i].mask & (1 << c)))
continue;
pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
++m;
}
}
if (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
return 0;
return (1 << 4);
}
static int
nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4],
struct nv50_varying *in, struct nv50_varying *out)
{
int c;
uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
uint8_t *map = (uint8_t *)map32;
for (c = 0; c < 4; ++c) {
if (mf & 1) {
if (in->linear)
lin[mid / 32] |= 1 << (mid % 32);
if (mv & 1)
map[mid] = oid;
else
if (c == 3)
map[mid] |= 1;
++mid;
}
oid += mv & 1;
mf >>= 1;
mv >>= 1;
}
return mid;
}
struct nouveau_stateobj *
nv50_fp_linkage_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program *vp;
struct nv50_program *fp = nv50->fragprog;
struct nouveau_stateobj *so;
struct nv50_varying dummy;
int i, n, c, m;
uint32_t map[16], lin[4], pntc[8];
uint32_t interp = fp->fp.interp;
uint32_t colors = fp->fp.colors;
uint32_t clip = 0x04;
uint32_t psiz = 0x000;
uint32_t primid = 0;
uint32_t sysval = 0;
if (nv50->geomprog) {
vp = nv50->geomprog;
memset(map, 0x80, sizeof(map));
} else {
vp = nv50->vertprog;
memset(map, 0x40, sizeof(map));
}
memset(lin, 0, sizeof(lin));
dummy.linear = 0;
dummy.mask = 0xf; /* map all components of HPOS */
m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
if (vp->vp.clpd < 0x40) {
for (c = 0; c < vp->vp.clpd_nr; ++c) {
map[m / 4] |= (vp->vp.clpd + c) << ((m % 4) * 8);
++m;
}
clip |= vp->vp.clpd_nr << 8;
}
colors |= m << 8; /* adjust BFC0 id */
/* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
if (nv50->rasterizer->pipe.light_twoside) {
for (i = 0; i < 2; ++i)
m = nv50_vec4_map(map, m, lin,
&fp->in[fp->vp.bfc[i]],
&vp->out[vp->vp.bfc[i]]);
}
colors += m - 4; /* adjust FFC0 id */
interp |= m << 8; /* set mid where 'normal' FP inputs start */
dummy.mask = 0x0;
for (i = 0; i < fp->in_nr; i++) {
for (n = 0; n < vp->out_nr; ++n)
if (vp->out[n].sn == fp->in[i].sn &&
vp->out[n].si == fp->in[i].si)
break;
m = nv50_vec4_map(map, m, lin,
&fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
}
/* PrimitiveID either is replaced by the system value, or
* written by the geometry shader into an output register
*/
if (fp->gp.primid < 0x40) {
i = (m % 4) * 8;
map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->gp.primid << i);
primid = m++;
}
if (nv50->rasterizer->pipe.point_size_per_vertex) {
i = (m % 4) * 8;
map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->vp.psiz << i);
psiz = (m++ << 4) | 1;
}
/* now fill the stateobj (at most 28 so_data) */
so = so_new(10, 54, 0);
n = (m + 3) / 4;
assert(m <= 64);
if (vp->type == PIPE_SHADER_GEOMETRY) {
so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
so_data (so, m);
so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
so_datap (so, map, n);
} else {
so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
so_data (so, vp->vp.attrs[2]);
so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
so_data (so, primid);
so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
so_data (so, m);
so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
so_datap (so, map, n);
}
so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
so_data (so, colors);
so_data (so, clip);
so_data (so, sysval);
so_data (so, psiz);
so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
so_data (so, interp);
so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
so_datap (so, lin, 4);
if (nv50->rasterizer->pipe.point_quad_rasterization) {
so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
so_data (so,
nv50_pntc_replace(nv50, pntc, (interp >> 8) & 0xff));
so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
so_datap (so, pntc, 8);
}
so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
return so;
}
static int
nv50_vp_gp_mapping(uint32_t *map32, int m,
struct nv50_program *vp, struct nv50_program *gp)
{
uint8_t *map = (uint8_t *)map32;
int i, j, c;
for (i = 0; i < gp->in_nr; ++i) {
uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
for (j = 0; j < vp->out_nr; ++j) {
if (vp->out[j].sn == gp->in[i].sn &&
vp->out[j].si == gp->in[i].si) {
mv = vp->out[j].mask;
oid = vp->out[j].hw;
break;
}
}
for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
if (mg & mv & 1)
map[m++] = oid;
else
if (mg & 1)
map[m++] = (c == 3) ? 0x41 : 0x40;
oid += mv & 1;
}
}
return m;
}
struct nouveau_stateobj *
nv50_gp_linkage_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_stateobj *so;
struct nv50_program *vp = nv50->vertprog;
struct nv50_program *gp = nv50->geomprog;
uint32_t map[16];
int m = 0;
if (!gp)
return NULL;
memset(map, 0, sizeof(map));
m = nv50_vp_gp_mapping(map, m, vp, gp);
so = so_new(3, 24 - 3, 0);
so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
so_data (so, vp->vp.attrs[2] | gp->vp.attrs[2]);
assert(m <= 32);
so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
so_data (so, m);
m = (m + 3) / 4;
so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
so_datap (so, map, m);
return so;
}

View file

@ -48,6 +48,53 @@ nv50_colormask(unsigned mask)
return cmask;
}
static INLINE uint32_t
nv50_blend_func(unsigned factor)
{
switch (factor) {
case PIPE_BLENDFACTOR_ZERO:
return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO;
case PIPE_BLENDFACTOR_ONE:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE;
case PIPE_BLENDFACTOR_SRC_COLOR:
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR;
case PIPE_BLENDFACTOR_SRC_ALPHA:
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA;
case PIPE_BLENDFACTOR_DST_ALPHA:
return NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA;
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA;
case PIPE_BLENDFACTOR_DST_COLOR:
return NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE;
case PIPE_BLENDFACTOR_CONST_COLOR:
return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR;
case PIPE_BLENDFACTOR_CONST_ALPHA:
return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA;
case PIPE_BLENDFACTOR_SRC1_COLOR:
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR;
case PIPE_BLENDFACTOR_SRC1_ALPHA:
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA;
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA;
default:
return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO;
}
}
static void *
nv50_blend_state_create(struct pipe_context *pipe,
const struct pipe_blend_state *cso)
@ -80,12 +127,12 @@ nv50_blend_state_create(struct pipe_context *pipe,
if (blend_enabled) {
so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_src_factor));
so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_dst_factor));
so_data (so, nv50_blend_func(cso->rt[0].rgb_src_factor));
so_data (so, nv50_blend_func(cso->rt[0].rgb_dst_factor));
so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_src_factor));
so_data (so, nv50_blend_func(cso->rt[0].alpha_src_factor));
so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_dst_factor));
so_data (so, nv50_blend_func(cso->rt[0].alpha_dst_factor));
}
if (cso->logicop_enable == 0 ) {
@ -546,7 +593,6 @@ nv50_vp_state_create(struct pipe_context *pipe,
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
p->type = PIPE_SHADER_VERTEX;
tgsi_scan_shader(p->pipe.tokens, &p->info);
return (void *)p;
}
@ -578,7 +624,6 @@ nv50_fp_state_create(struct pipe_context *pipe,
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
p->type = PIPE_SHADER_FRAGMENT;
tgsi_scan_shader(p->pipe.tokens, &p->info);
return (void *)p;
}
@ -610,7 +655,6 @@ nv50_gp_state_create(struct pipe_context *pipe,
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
p->type = PIPE_SHADER_GEOMETRY;
tgsi_scan_shader(p->pipe.tokens, &p->info);
return (void *)p;
}

View file

@ -56,6 +56,8 @@ validate_fb(struct nv50_context *nv50)
assert(h == fb->cbufs[i]->height);
}
assert(nv50_format_table[fb->cbufs[i]->format].rt);
so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2);
so_data (so, fb->cbufs[i]->width);
so_data (so, fb->cbufs[i]->height);
@ -65,39 +67,9 @@ validate_fb(struct nv50_context *nv50)
NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->cbufs[i]->format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM);
break;
case PIPE_FORMAT_B8G8R8X8_UNORM:
so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
break;
case PIPE_FORMAT_B5G6R5_UNORM:
so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM);
break;
case PIPE_FORMAT_R16G16B16A16_SNORM:
so_data(so, NV50TCL_RT_FORMAT_R16G16B16A16_SNORM);
break;
case PIPE_FORMAT_R16G16B16A16_UNORM:
so_data(so, NV50TCL_RT_FORMAT_R16G16B16A16_UNORM);
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
so_data(so, NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT);
break;
case PIPE_FORMAT_R16G16_SNORM:
so_data(so, NV50TCL_RT_FORMAT_R16G16_SNORM);
break;
case PIPE_FORMAT_R16G16_UNORM:
so_data(so, NV50TCL_RT_FORMAT_R16G16_UNORM);
break;
default:
NOUVEAU_ERR("AIIII unknown format %s\n",
util_format_name(fb->cbufs[i]->format));
so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
break;
}
so_data(so, nv50_miptree(pt)->
level[fb->cbufs[i]->level].tile_mode << 4);
so_data (so, nv50_format_table[fb->cbufs[i]->format].rt);
so_data (so, nv50_miptree(pt)->
level[fb->cbufs[i]->level].tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
@ -117,33 +89,17 @@ validate_fb(struct nv50_context *nv50)
assert(h == fb->zsbuf->height);
}
assert(nv50_format_table[fb->zsbuf->format].rt);
so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->zsbuf->format) {
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
break;
case PIPE_FORMAT_Z24X8_UNORM:
so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM);
break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM);
break;
case PIPE_FORMAT_Z32_FLOAT:
so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT);
break;
default:
NOUVEAU_ERR("AIIII unknown format %s\n",
util_format_name(fb->zsbuf->format));
so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
break;
}
so_data(so, nv50_miptree(pt)->
level[fb->zsbuf->level].tile_mode << 4);
so_data(so, 0x00000000);
so_data (so, nv50_format_table[fb->zsbuf->format].rt);
so_data (so, nv50_miptree(pt)->
level[fb->zsbuf->level].tile_mode << 4);
so_data (so, 0x00000000);
so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
so_data (so, 1);

View file

@ -29,56 +29,6 @@
#include "util/u_format.h"
#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \
[PIPE_FORMAT_##pf] = ( \
NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
NV50TIC_0_0_FMT_##f)
#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f)
static const uint32_t nv50_texture_formats[PIPE_FORMAT_COUNT] =
{
_(B8G8R8A8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8),
_(B8G8R8A8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8),
_(B8G8R8X8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8),
_(B8G8R8X8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8),
_(B5G5R5A1_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5),
_(B4G4R4A4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4),
_(B5G6R5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5),
_(L8_UNORM, UNORM, C0, C0, C0, ONE, 8),
_(L8_SRGB, UNORM, C0, C0, C0, ONE, 8),
_(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8),
_(I8_UNORM, UNORM, C0, C0, C0, C0, 8),
_(L8A8_UNORM, UNORM, C0, C0, C0, C1, 8_8),
_(L8A8_SRGB, UNORM, C0, C0, C0, C1, 8_8),
_(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1),
_(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1),
_(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3),
_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
_MIXED(S8_USCALED_Z24_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
_MIXED(Z24_UNORM_S8_USCALED, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
_(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
_(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
_(R32G32B32A32_FLOAT, FLOAT, C0, C1, C2, C3, 32_32_32_32),
_(R16G16_SNORM, SNORM, C0, C1, ZERO, ONE, 16_16),
_(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16),
_MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH)
};
#undef _
#undef _MIXED
static INLINE uint32_t
nv50_tic_swizzle(uint32_t tc, unsigned swz)
{
@ -106,7 +56,7 @@ nv50_tex_construct(struct nv50_sampler_view *view)
struct nv50_miptree *mt = nv50_miptree(view->pipe.texture);
uint32_t swz[4], *tic = view->tic;
tic[0] = nv50_texture_formats[view->pipe.format];
tic[0] = nv50_format_table[view->pipe.format].tic;
swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);

View file

@ -45,24 +45,32 @@
#define NV50TIC_0_0_TYPEA_SNORM 0x00008000
#define NV50TIC_0_0_TYPEA_SINT 0x00018000
#define NV50TIC_0_0_TYPEA_UINT 0x00020000
#define NV50TIC_0_0_TYPEA_SSCALED 0x00028000
#define NV50TIC_0_0_TYPEA_USCALED 0x00030000
#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000
#define NV50TIC_0_0_TYPEB_MASK 0x00007000
#define NV50TIC_0_0_TYPEB_UNORM 0x00002000
#define NV50TIC_0_0_TYPEB_SNORM 0x00001000
#define NV50TIC_0_0_TYPEB_SINT 0x00003000
#define NV50TIC_0_0_TYPEB_UINT 0x00004000
#define NV50TIC_0_0_TYPEB_SSCALED 0x00005000
#define NV50TIC_0_0_TYPEB_USCALED 0x00006000
#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000
#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
#define NV50TIC_0_0_TYPEG_SNORM 0x00000200
#define NV50TIC_0_0_TYPEG_SINT 0x00000600
#define NV50TIC_0_0_TYPEG_UINT 0x00000800
#define NV50TIC_0_0_TYPEG_SSCALED 0x00000a00
#define NV50TIC_0_0_TYPEG_USCALED 0x00000c00
#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00
#define NV50TIC_0_0_TYPER_MASK 0x000001c0
#define NV50TIC_0_0_TYPER_UNORM 0x00000080
#define NV50TIC_0_0_TYPER_SNORM 0x00000040
#define NV50TIC_0_0_TYPER_SINT 0x000000c0
#define NV50TIC_0_0_TYPER_UINT 0x00000100
#define NV50TIC_0_0_TYPER_SSCALED 0x00000140
#define NV50TIC_0_0_TYPER_USCALED 0x00000180
#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0
#define NV50TIC_0_0_FMT_MASK 0x0000003f
#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001
@ -90,6 +98,7 @@
#define NV50TIC_0_0_FMT_8_24 0x0000002a
#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f
#define NV50TIC_0_0_FMT_32_8 0x00000030
#define NV50TIC_0_0_FMT_16_DEPTH 0x0000003a
#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0

File diff suppressed because it is too large Load diff

View file

@ -29,96 +29,6 @@
#include "nv50_context.h"
#include "nv50_resource.h"
static INLINE uint32_t
nv50_vbo_type_to_hw(enum pipe_format format)
{
const struct util_format_description *desc;
desc = util_format_description(format);
assert(desc);
switch (desc->channel[0].type) {
case UTIL_FORMAT_TYPE_FLOAT:
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
case UTIL_FORMAT_TYPE_UNSIGNED:
if (desc->channel[0].normalized) {
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
}
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
case UTIL_FORMAT_TYPE_SIGNED:
if (desc->channel[0].normalized) {
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
}
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
/*
case PIPE_FORMAT_TYPE_UINT:
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
case PIPE_FORMAT_TYPE_SINT:
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
default:
return 0;
}
}
static INLINE uint32_t
nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
{
static const uint32_t hw_values[] = {
0, 0, 0, 0,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
0, 0, 0, 0,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
/* we'd also have R11G11B10 and R10G10B10A2 */
assert(nr_c > 0 && nr_c <= 4);
if (size > 32)
return 0;
size >>= (3 - 2);
return hw_values[size + (nr_c - 1)];
}
static INLINE uint32_t
nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
{
uint32_t hw_type, hw_size;
enum pipe_format pf = ve->src_format;
const struct util_format_description *desc;
unsigned size, nr_components;
desc = util_format_description(pf);
assert(desc);
size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
nr_components = util_format_get_nr_components(pf);
hw_type = nv50_vbo_type_to_hw(pf);
hw_size = nv50_vbo_size_to_hw(size, nr_components);
if (!hw_type || !hw_size) {
NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
abort();
return 0x24e80000;
}
if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
hw_size |= (1 << 31); /* no real swizzle bits :-( */
return (hw_type | hw_size);
}
struct instance {
struct nouveau_bo *bo;
unsigned delta;
@ -533,7 +443,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
so_data (so, fui(v[1]));
break;
case 1:
if (attrib == nv50->vertprog->cfg.edgeflag_in) {
if (attrib == nv50->vertprog->vp.edgeflag) {
so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
so_data (so, v[0] ? 1 : 0);
}
@ -554,11 +464,8 @@ nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
{
unsigned i;
for (i = 0; i < cso->num_elements; ++i) {
struct pipe_vertex_element *ve = &cso->pipe[i];
cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve);
}
for (i = 0; i < cso->num_elements; ++i)
cso->hw[i] = nv50_format_table[cso->pipe[i].src_format].vtx;
}
struct nouveau_stateobj *
@ -574,7 +481,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
nv50->vbo_fifo = 0;
if (nv50->screen->force_push ||
nv50->vertprog->cfg.edgeflag_in < 16)
nv50->vertprog->vp.edgeflag < 16)
nv50->vbo_fifo = 0xffff;
for (i = 0; i < nv50->vtxbuf_nr; i++) {