mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
Merge remote branch 'origin/nv50-compiler'
Conflicts: src/gallium/drivers/nouveau/nouveau_class.h src/gallium/drivers/nv50/nv50_screen.c
This commit is contained in:
commit
26fe16a99b
23 changed files with 9083 additions and 5021 deletions
|
|
@ -8,9 +8,9 @@ C_SOURCES = \
|
|||
nv50_clear.c \
|
||||
nv50_context.c \
|
||||
nv50_draw.c \
|
||||
nv50_formats.c \
|
||||
nv50_miptree.c \
|
||||
nv50_query.c \
|
||||
nv50_program.c \
|
||||
nv50_resource.c \
|
||||
nv50_screen.c \
|
||||
nv50_state.c \
|
||||
|
|
@ -19,6 +19,14 @@ C_SOURCES = \
|
|||
nv50_tex.c \
|
||||
nv50_transfer.c \
|
||||
nv50_vbo.c \
|
||||
nv50_push.c
|
||||
nv50_push.c \
|
||||
nv50_program.c \
|
||||
nv50_shader_state.c \
|
||||
nv50_pc.c \
|
||||
nv50_pc_print.c \
|
||||
nv50_pc_emit.c \
|
||||
nv50_tgsi_to_nc.c \
|
||||
nv50_pc_optimize.c \
|
||||
nv50_pc_regalloc.c
|
||||
|
||||
include ../../Makefile.template
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ nv50 = env.ConvenienceLibrary(
|
|||
'nv50_clear.c',
|
||||
'nv50_context.c',
|
||||
'nv50_draw.c',
|
||||
'nv50_formats.c',
|
||||
'nv50_miptree.c',
|
||||
'nv50_query.c',
|
||||
'nv50_program.c',
|
||||
|
|
|
|||
452
src/gallium/drivers/nv50/nv50_formats.c
Normal file
452
src/gallium/drivers/nv50/nv50_formats.c
Normal file
|
|
@ -0,0 +1,452 @@
|
|||
/*
|
||||
* Copyright 2010 Christoph Bumiller
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nv50_screen.h"
|
||||
#include "nv50_texture.h"
|
||||
#include "nv50_reg.h"
|
||||
#include "pipe/p_defines.h"
|
||||
|
||||
#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
|
||||
NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
|
||||
NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
|
||||
NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
|
||||
NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
|
||||
NV50TIC_0_0_FMT_##sz, \
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_##sz | \
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 | \
|
||||
(NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 << 3) | (r << 31)
|
||||
|
||||
#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
|
||||
NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
|
||||
NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
|
||||
NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
|
||||
NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
|
||||
NV50TIC_0_0_FMT_##sz, 0
|
||||
|
||||
#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
|
||||
#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW
|
||||
#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
|
||||
#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
|
||||
#define SCANOUT PIPE_BIND_SCANOUT
|
||||
|
||||
/* for vertex buffers: */
|
||||
#define NV50TIC_0_0_FMT_8_8_8 NV50TIC_0_0_FMT_8_8_8_8
|
||||
#define NV50TIC_0_0_FMT_16_16_16 NV50TIC_0_0_FMT_16_16_16_16
|
||||
#define NV50TIC_0_0_FMT_32_32_32 NV50TIC_0_0_FMT_32_32_32_32
|
||||
|
||||
const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
|
||||
{
|
||||
/* COMMON FORMATS */
|
||||
|
||||
[PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50TCL_RT_FORMAT_A8R8G8B8_UNORM,
|
||||
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
|
||||
|
||||
[PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50TCL_RT_FORMAT_X8R8G8B8_UNORM,
|
||||
A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
|
||||
|
||||
[PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50TCL_RT_FORMAT_A8R8G8B8_SRGB,
|
||||
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50TCL_RT_FORMAT_X8R8G8B8_SRGB,
|
||||
A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_B5G6R5_UNORM] = { NV50TCL_RT_FORMAT_R5G6B5_UNORM,
|
||||
B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
|
||||
SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
|
||||
|
||||
[PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50TCL_RT_FORMAT_A1R5G5B5_UNORM,
|
||||
B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
|
||||
SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
|
||||
|
||||
[PIPE_FORMAT_B4G4R4A4_UNORM] = { 0,
|
||||
B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50TCL_RT_FORMAT_A2B10G10R10_UNORM,
|
||||
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
|
||||
SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
|
||||
|
||||
[PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50TCL_RT_FORMAT_A2R10G10B10_UNORM,
|
||||
A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
|
||||
SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
|
||||
|
||||
/* DEPTH/STENCIL FORMATS */
|
||||
|
||||
[PIPE_FORMAT_Z16_UNORM] = { NV50TCL_ZETA_FORMAT_Z16_UNORM,
|
||||
B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_DEPTH, 0),
|
||||
SAMPLER_VIEW | DEPTH_STENCIL },
|
||||
|
||||
[PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
|
||||
B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
|
||||
SAMPLER_VIEW | DEPTH_STENCIL },
|
||||
|
||||
[PIPE_FORMAT_Z24X8_UNORM] = { NV50TCL_ZETA_FORMAT_X8Z24_UNORM,
|
||||
B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
|
||||
SAMPLER_VIEW | DEPTH_STENCIL },
|
||||
|
||||
[PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
|
||||
B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
|
||||
SAMPLER_VIEW | DEPTH_STENCIL },
|
||||
|
||||
[PIPE_FORMAT_Z32_FLOAT] = { NV50TCL_ZETA_FORMAT_Z32_FLOAT,
|
||||
B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_DEPTH, 0),
|
||||
SAMPLER_VIEW | DEPTH_STENCIL },
|
||||
|
||||
[PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
|
||||
NV50TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
|
||||
B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
|
||||
SAMPLER_VIEW | DEPTH_STENCIL },
|
||||
|
||||
/* LUMINANCE, ALPHA, INTENSITY */
|
||||
|
||||
[PIPE_FORMAT_L8_UNORM] = { 0,
|
||||
A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_L8_SRGB] = { 0,
|
||||
A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_I8_UNORM] = { 0,
|
||||
A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_A8_UNORM] = { NV50TCL_RT_FORMAT_A8_UNORM,
|
||||
A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
|
||||
SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_L8A8_UNORM] = { 0,
|
||||
A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_L8A8_SRGB] = { 0,
|
||||
A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
/* DXT, RGTC */
|
||||
|
||||
[PIPE_FORMAT_DXT1_RGB] = { 0,
|
||||
B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_DXT1_RGBA] = { 0,
|
||||
B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_DXT3_RGBA] = { 0,
|
||||
B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_DXT5_RGBA] = { 0,
|
||||
B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_RGTC1_UNORM] = { 0,
|
||||
B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_RGTC1_SNORM] = { 0,
|
||||
B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_RGTC2_UNORM] = { 0,
|
||||
B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_RGTC2_SNORM] = { 0,
|
||||
B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
/* FLOAT 16 */
|
||||
|
||||
[PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16A16_FLOAT,
|
||||
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R16G16B16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16X16_FLOAT,
|
||||
A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R16G16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16_FLOAT,
|
||||
A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R16_FLOAT] = { NV50TCL_RT_FORMAT_R16_FLOAT,
|
||||
A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
/* FLOAT 32 */
|
||||
|
||||
[PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT,
|
||||
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R32G32B32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32X32_FLOAT,
|
||||
A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R32G32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32_FLOAT,
|
||||
A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R32_FLOAT] = { NV50TCL_RT_FORMAT_R32_FLOAT,
|
||||
A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
/* ODD FORMATS */
|
||||
|
||||
[PIPE_FORMAT_R11G11B10_FLOAT] = { NV50TCL_RT_FORMAT_B10G11R11_FLOAT,
|
||||
B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
|
||||
SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
|
||||
B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 5_9_9_9, 0),
|
||||
SAMPLER_VIEW },
|
||||
|
||||
/* SNORM 32 */
|
||||
|
||||
[PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
|
||||
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32G32B32_SNORM] = { 0,
|
||||
A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32G32_SNORM] = { 0,
|
||||
A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32_SNORM] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* UNORM 32 */
|
||||
|
||||
[PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
|
||||
A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32G32B32_UNORM] = { 0,
|
||||
A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32G32_UNORM] = { 0,
|
||||
A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32_UNORM] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* SNORM 16 */
|
||||
|
||||
[PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_SNORM,
|
||||
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R16G16B16_SNORM] = { 0,
|
||||
A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R16G16_SNORM] = { NV50TCL_RT_FORMAT_R16G16_SNORM,
|
||||
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R16_SNORM] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* UNORM 16 */
|
||||
|
||||
[PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_UNORM,
|
||||
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R16G16B16_UNORM] = { 0,
|
||||
A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R16G16_UNORM] = { NV50TCL_RT_FORMAT_R16G16_UNORM,
|
||||
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R16_UNORM] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* SNORM 8 */
|
||||
|
||||
[PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_SNORM,
|
||||
A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R8G8B8_SNORM] = { 0,
|
||||
A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R8G8_SNORM] = { NV50TCL_RT_FORMAT_R8G8_SNORM,
|
||||
A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R8_SNORM] = { NV50TCL_RT_FORMAT_R8_SNORM,
|
||||
A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
/* UNORM 8 */
|
||||
|
||||
[PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_UNORM,
|
||||
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50TCL_RT_FORMAT_A8B8G8R8_SRGB,
|
||||
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
|
||||
SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R8G8B8_UNORM] = { NV50TCL_RT_FORMAT_X8B8G8R8_UNORM,
|
||||
A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R8G8B8_SRGB] = { NV50TCL_RT_FORMAT_X8B8G8R8_SRGB,
|
||||
A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
|
||||
SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R8G8_UNORM] = { NV50TCL_RT_FORMAT_R8G8_UNORM,
|
||||
A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
[PIPE_FORMAT_R8_UNORM] = { NV50TCL_RT_FORMAT_R8_UNORM,
|
||||
A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
|
||||
|
||||
/* SSCALED 32 */
|
||||
|
||||
[PIPE_FORMAT_R32G32B32A32_SSCALED] = { 0,
|
||||
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
|
||||
A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32G32_SSCALED] = { 0,
|
||||
A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32_SSCALED] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* USCALED 32 */
|
||||
|
||||
[PIPE_FORMAT_R32G32B32A32_USCALED] = { 0,
|
||||
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32G32B32_USCALED] = { 0,
|
||||
A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32G32_USCALED] = { 0,
|
||||
A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R32_USCALED] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* SSCALED 16 */
|
||||
|
||||
[PIPE_FORMAT_R16G16B16A16_SSCALED] = { 0,
|
||||
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
|
||||
A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R16G16_SSCALED] = { 0,
|
||||
A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R16_SSCALED] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* USCALED 16 */
|
||||
|
||||
[PIPE_FORMAT_R16G16B16A16_USCALED] = { 0,
|
||||
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R16G16B16_USCALED] = { 0,
|
||||
A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R16G16_USCALED] = { 0,
|
||||
A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R16_USCALED] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* SSCALED 8 */
|
||||
|
||||
[PIPE_FORMAT_R8G8B8A8_SSCALED] = { 0,
|
||||
A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
|
||||
A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R8G8_SSCALED] = { 0,
|
||||
A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R8_SSCALED] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
/* USCALED 8 */
|
||||
|
||||
[PIPE_FORMAT_R8G8B8A8_USCALED] = { 0,
|
||||
A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R8G8B8_USCALED] = { 0,
|
||||
A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R8G8_USCALED] = { 0,
|
||||
A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
|
||||
[PIPE_FORMAT_R8_USCALED] = { 0,
|
||||
A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
|
||||
VERTEX_BUFFER | SAMPLER_VIEW },
|
||||
};
|
||||
|
|
@ -159,6 +159,9 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmp
|
|||
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
|
||||
tile_flags = 0x2800;
|
||||
break;
|
||||
case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
|
||||
tile_flags = 0xe000;
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
tile_flags = 0x7400;
|
||||
|
|
|
|||
804
src/gallium/drivers/nv50/nv50_pc.c
Normal file
804
src/gallium/drivers/nv50/nv50_pc.c
Normal file
|
|
@ -0,0 +1,804 @@
|
|||
/*
|
||||
* Copyright 2010 Christoph Bumiller
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
/* #define NV50PC_DEBUG */
|
||||
|
||||
#include "nv50_pc.h"
|
||||
#include "nv50_program.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
/* returns TRUE if operands 0 and 1 can be swapped */
|
||||
boolean
|
||||
nv_op_commutative(uint opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case NV_OP_ADD:
|
||||
case NV_OP_MUL:
|
||||
case NV_OP_MAD:
|
||||
case NV_OP_AND:
|
||||
case NV_OP_OR:
|
||||
case NV_OP_XOR:
|
||||
case NV_OP_MIN:
|
||||
case NV_OP_MAX:
|
||||
case NV_OP_SAD:
|
||||
return TRUE;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* return operand to which the address register applies */
|
||||
int
|
||||
nv50_indirect_opnd(struct nv_instruction *i)
|
||||
{
|
||||
if (!i->src[4])
|
||||
return -1;
|
||||
|
||||
switch (i->opcode) {
|
||||
case NV_OP_MOV:
|
||||
case NV_OP_LDA:
|
||||
case NV_OP_STA:
|
||||
return 0;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
boolean
|
||||
nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s)
|
||||
{
|
||||
if (nvi->flags_src || nvi->flags_def)
|
||||
return FALSE;
|
||||
|
||||
switch (nvi->opcode) {
|
||||
case NV_OP_ADD:
|
||||
case NV_OP_MUL:
|
||||
case NV_OP_AND:
|
||||
case NV_OP_OR:
|
||||
case NV_OP_XOR:
|
||||
case NV_OP_SHL:
|
||||
case NV_OP_SHR:
|
||||
return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) &&
|
||||
(nvi->def[0]->reg.file == NV_FILE_GPR);
|
||||
case NV_OP_MOV:
|
||||
assert(s == 0);
|
||||
return (nvi->def[0]->reg.file == NV_FILE_GPR);
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
boolean
|
||||
nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 3 && nvi->src[i]; ++i)
|
||||
if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
|
||||
return FALSE;
|
||||
|
||||
switch (nvi->opcode) {
|
||||
case NV_OP_ABS:
|
||||
case NV_OP_ADD:
|
||||
case NV_OP_CEIL:
|
||||
case NV_OP_FLOOR:
|
||||
case NV_OP_TRUNC:
|
||||
case NV_OP_CVT:
|
||||
case NV_OP_NEG:
|
||||
case NV_OP_MAD:
|
||||
case NV_OP_MUL:
|
||||
case NV_OP_SAT:
|
||||
case NV_OP_SUB:
|
||||
case NV_OP_MAX:
|
||||
case NV_OP_MIN:
|
||||
if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
|
||||
value->reg.file == NV_FILE_MEM_P))
|
||||
return TRUE;
|
||||
if (value->reg.file < NV_FILE_MEM_C(0) ||
|
||||
value->reg.file > NV_FILE_MEM_C(15))
|
||||
return FALSE;
|
||||
return (s == 1) ||
|
||||
((s == 2) && (nvi->src[1]->value->reg.file == NV_FILE_GPR));
|
||||
case NV_OP_MOV:
|
||||
assert(s == 0);
|
||||
return /* TRUE */ FALSE; /* don't turn MOVs into loads */
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return whether this instruction can be executed conditionally. */
|
||||
boolean
|
||||
nv50_nvi_can_predicate(struct nv_instruction *nvi)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (nvi->flags_src)
|
||||
return FALSE;
|
||||
for (i = 0; i < 4 && nvi->src[i]; ++i)
|
||||
if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
|
||||
return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
ubyte
|
||||
nv50_supported_src_mods(uint opcode, int s)
|
||||
{
|
||||
switch (opcode) {
|
||||
case NV_OP_ABS:
|
||||
return NV_MOD_NEG | NV_MOD_ABS; /* obviously */
|
||||
case NV_OP_ADD:
|
||||
case NV_OP_MUL:
|
||||
case NV_OP_MAD:
|
||||
return NV_MOD_NEG;
|
||||
case NV_OP_DFDX:
|
||||
case NV_OP_DFDY:
|
||||
assert(s == 0);
|
||||
return NV_MOD_NEG;
|
||||
case NV_OP_MAX:
|
||||
case NV_OP_MIN:
|
||||
return NV_MOD_ABS;
|
||||
case NV_OP_CVT:
|
||||
case NV_OP_LG2:
|
||||
case NV_OP_NEG:
|
||||
case NV_OP_PREEX2:
|
||||
case NV_OP_PRESIN:
|
||||
case NV_OP_RCP:
|
||||
case NV_OP_RSQ:
|
||||
return NV_MOD_ABS | NV_MOD_NEG;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* We may want an opcode table. */
|
||||
boolean
|
||||
nv50_op_can_write_flags(uint opcode)
|
||||
{
|
||||
if (nv_is_vector_op(opcode))
|
||||
return FALSE;
|
||||
switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */
|
||||
case NV_OP_PHI:
|
||||
case NV_OP_MOV:
|
||||
case NV_OP_LINTERP:
|
||||
case NV_OP_PINTERP:
|
||||
case NV_OP_LDA:
|
||||
return FALSE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (opcode >= NV_OP_RCP && opcode <= NV_OP_PREEX2)
|
||||
return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int
|
||||
nv_nvi_refcount(struct nv_instruction *nvi)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
rc = nvi->flags_def ? nvi->flags_def->refc : 0;
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
if (!nvi->def[i])
|
||||
return rc;
|
||||
rc += nvi->def[i]->refc;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
int
|
||||
nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
|
||||
struct nv_value *new_val)
|
||||
{
|
||||
int i, n;
|
||||
|
||||
if (old_val == new_val)
|
||||
return old_val->refc;
|
||||
|
||||
for (i = 0, n = 0; i < pc->num_refs; ++i) {
|
||||
if (pc->refs[i]->value == old_val) {
|
||||
++n;
|
||||
nv_reference(pc, &pc->refs[i], new_val);
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
struct nv_value *
|
||||
nvcg_find_constant(struct nv_ref *ref)
|
||||
{
|
||||
struct nv_value *src;
|
||||
|
||||
if (!ref)
|
||||
return NULL;
|
||||
|
||||
src = ref->value;
|
||||
while (src->insn && src->insn->opcode == NV_OP_MOV) {
|
||||
assert(!src->insn->src[0]->mod);
|
||||
src = src->insn->src[0]->value;
|
||||
}
|
||||
if ((src->reg.file == NV_FILE_IMM) ||
|
||||
(src->insn && src->insn->opcode == NV_OP_LDA &&
|
||||
src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
|
||||
src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
|
||||
return src;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct nv_value *
|
||||
nvcg_find_immediate(struct nv_ref *ref)
|
||||
{
|
||||
struct nv_value *src = nvcg_find_constant(ref);
|
||||
|
||||
return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
nv_pc_free_refs(struct nv_pc *pc)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < pc->num_refs; i += 64)
|
||||
FREE(pc->refs[i]);
|
||||
FREE(pc->refs);
|
||||
}
|
||||
|
||||
static const char *
|
||||
edge_name(ubyte type)
|
||||
{
|
||||
switch (type) {
|
||||
case CFG_EDGE_FORWARD: return "forward";
|
||||
case CFG_EDGE_BACK: return "back";
|
||||
case CFG_EDGE_LOOP_ENTER: return "loop";
|
||||
case CFG_EDGE_LOOP_LEAVE: return "break";
|
||||
case CFG_EDGE_FAKE: return "fake";
|
||||
default:
|
||||
return "?";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
|
||||
{
|
||||
struct nv_basic_block *bb[64], *bbb[16], *b;
|
||||
int j, p, pp;
|
||||
|
||||
bb[0] = root;
|
||||
p = 1;
|
||||
pp = 0;
|
||||
|
||||
while (p > 0) {
|
||||
b = bb[--p];
|
||||
b->priv = 0;
|
||||
|
||||
for (j = 1; j >= 0; --j) {
|
||||
if (!b->out[j])
|
||||
continue;
|
||||
|
||||
switch (b->out_kind[j]) {
|
||||
case CFG_EDGE_BACK:
|
||||
continue;
|
||||
case CFG_EDGE_FORWARD:
|
||||
case CFG_EDGE_FAKE:
|
||||
if (++b->out[j]->priv == b->out[j]->num_in)
|
||||
bb[p++] = b->out[j];
|
||||
break;
|
||||
case CFG_EDGE_LOOP_ENTER:
|
||||
bb[p++] = b->out[j];
|
||||
break;
|
||||
case CFG_EDGE_LOOP_LEAVE:
|
||||
bbb[pp++] = b->out[j];
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
f(priv, b);
|
||||
|
||||
if (!p) {
|
||||
p = pp;
|
||||
for (; pp > 0; --pp)
|
||||
bb[pp - 1] = bbb[pp - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nv_do_print_function(void *priv, struct nv_basic_block *b)
|
||||
{
|
||||
struct nv_instruction *i = b->phi;
|
||||
|
||||
debug_printf("=== BB %i ", b->id);
|
||||
if (b->out[0])
|
||||
debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
|
||||
if (b->out[1])
|
||||
debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
|
||||
debug_printf("===\n");
|
||||
|
||||
i = b->phi;
|
||||
if (!i)
|
||||
i = b->entry;
|
||||
for (; i; i = i->next)
|
||||
nv_print_instruction(i);
|
||||
}
|
||||
|
||||
void
|
||||
nv_print_function(struct nv_basic_block *root)
|
||||
{
|
||||
if (root->subroutine)
|
||||
debug_printf("SUBROUTINE %i\n", root->subroutine);
|
||||
else
|
||||
debug_printf("MAIN\n");
|
||||
|
||||
nv_pc_pass_in_order(root, nv_do_print_function, root);
|
||||
}
|
||||
|
||||
void
|
||||
nv_print_program(struct nv_pc *pc)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < pc->num_subroutines + 1; ++i)
|
||||
if (pc->root[i])
|
||||
nv_print_function(pc->root[i]);
|
||||
}
|
||||
|
||||
#ifdef NV50PC_DEBUG
|
||||
static void
|
||||
nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
|
||||
{
|
||||
int i;
|
||||
|
||||
b->pass_seq = pc->pass_seq;
|
||||
|
||||
fprintf(f, "\t%i [shape=box]\n", b->id);
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (!b->out[i])
|
||||
continue;
|
||||
switch (b->out_kind[i]) {
|
||||
case CFG_EDGE_FORWARD:
|
||||
fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
|
||||
break;
|
||||
case CFG_EDGE_LOOP_ENTER:
|
||||
fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
|
||||
break;
|
||||
case CFG_EDGE_LOOP_LEAVE:
|
||||
fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
|
||||
break;
|
||||
case CFG_EDGE_BACK:
|
||||
fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
|
||||
continue;
|
||||
case CFG_EDGE_FAKE:
|
||||
fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
if (b->out[i]->pass_seq < pc->pass_seq)
|
||||
nv_do_print_cfgraph(pc, f, b->out[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
|
||||
static void
|
||||
nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
|
||||
{
|
||||
FILE *f;
|
||||
|
||||
f = fopen(filepath, "a");
|
||||
if (!f)
|
||||
return;
|
||||
|
||||
fprintf(f, "digraph G {\n");
|
||||
|
||||
++pc->pass_seq;
|
||||
|
||||
nv_do_print_cfgraph(pc, f, pc->root[subr]);
|
||||
|
||||
fprintf(f, "}\n");
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE void
|
||||
nvcg_show_bincode(struct nv_pc *pc)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < pc->bin_size / 4; ++i) {
|
||||
debug_printf("0x%08x ", pc->emit[i]);
|
||||
if ((i % 16) == 15)
|
||||
debug_printf("\n");
|
||||
}
|
||||
debug_printf("\n");
|
||||
}
|
||||
|
||||
static int
|
||||
nv50_emit_program(struct nv_pc *pc)
|
||||
{
|
||||
uint32_t *code = pc->emit;
|
||||
int n;
|
||||
|
||||
NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size);
|
||||
|
||||
for (n = 0; n < pc->num_blocks; ++n) {
|
||||
struct nv_instruction *i;
|
||||
struct nv_basic_block *b = pc->bb_list[n];
|
||||
|
||||
for (i = b->entry; i; i = i->next) {
|
||||
nv50_emit_instruction(pc, i);
|
||||
|
||||
pc->bin_pos += 1 + (pc->emit[0] & 1);
|
||||
pc->emit += 1 + (pc->emit[0] & 1);
|
||||
}
|
||||
}
|
||||
assert(pc->emit == &code[pc->bin_size / 4]);
|
||||
|
||||
/* XXX: we can do better than this ... */
|
||||
if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
|
||||
pc->emit[0] = 0xf0000001;
|
||||
pc->emit[1] = 0xe0000000;
|
||||
pc->bin_size += 8;
|
||||
}
|
||||
|
||||
pc->emit = code;
|
||||
code[pc->bin_size / 4 - 1] |= 1;
|
||||
|
||||
#ifdef NV50PC_DEBUG
|
||||
nvcg_show_bincode(pc);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
nv50_generate_code(struct nv50_translation_info *ti)
|
||||
{
|
||||
struct nv_pc *pc;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
pc = CALLOC_STRUCT(nv_pc);
|
||||
if (!pc)
|
||||
return 1;
|
||||
|
||||
pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
|
||||
if (!pc->root) {
|
||||
FREE(pc);
|
||||
return 1;
|
||||
}
|
||||
pc->num_subroutines = ti->subr_nr;
|
||||
|
||||
ret = nv50_tgsi_to_nc(pc, ti);
|
||||
if (ret)
|
||||
goto out;
|
||||
#ifdef NV50PC_DEBUG
|
||||
nv_print_program(pc);
|
||||
#endif
|
||||
|
||||
pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE;
|
||||
|
||||
/* optimization */
|
||||
ret = nv_pc_exec_pass0(pc);
|
||||
if (ret)
|
||||
goto out;
|
||||
#ifdef NV50PC_DEBUG
|
||||
nv_print_program(pc);
|
||||
#endif
|
||||
|
||||
/* register allocation */
|
||||
ret = nv_pc_exec_pass1(pc);
|
||||
if (ret)
|
||||
goto out;
|
||||
#ifdef NV50PC_DEBUG
|
||||
nv_print_program(pc);
|
||||
nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0);
|
||||
#endif
|
||||
|
||||
/* prepare for emission */
|
||||
ret = nv_pc_exec_pass2(pc);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
|
||||
if (!pc->emit) {
|
||||
ret = 3;
|
||||
goto out;
|
||||
}
|
||||
ret = nv50_emit_program(pc);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ti->p->code_size = pc->bin_size;
|
||||
ti->p->code = pc->emit;
|
||||
|
||||
ti->p->immd_size = pc->immd_count * 4;
|
||||
ti->p->immd = pc->immd_buf;
|
||||
|
||||
/* highest 16 bit reg to num of 32 bit regs, limit to >= 4 */
|
||||
ti->p->max_gpr = MAX2(4, (pc->max_reg[NV_FILE_GPR] >> 1) + 1);
|
||||
|
||||
ti->p->fixups = pc->fixups;
|
||||
ti->p->num_fixups = pc->num_fixups;
|
||||
|
||||
NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
|
||||
|
||||
out:
|
||||
nv_pc_free_refs(pc);
|
||||
|
||||
for (i = 0; i < pc->num_blocks; ++i)
|
||||
FREE(pc->bb_list[i]);
|
||||
if (pc->root)
|
||||
FREE(pc->root);
|
||||
if (ret) { /* on success, these will be referenced by nv50_program */
|
||||
if (pc->emit)
|
||||
FREE(pc->emit);
|
||||
if (pc->immd_buf)
|
||||
FREE(pc->immd_buf);
|
||||
if (pc->fixups)
|
||||
FREE(pc->fixups);
|
||||
}
|
||||
FREE(pc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
|
||||
{
|
||||
if (!b->phi) {
|
||||
i->prev = NULL;
|
||||
b->phi = i;
|
||||
i->next = b->entry;
|
||||
if (b->entry) {
|
||||
assert(!b->entry->prev && b->exit);
|
||||
b->entry->prev = i;
|
||||
} else {
|
||||
b->entry = i;
|
||||
b->exit = i;
|
||||
}
|
||||
} else {
|
||||
assert(b->entry);
|
||||
if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
|
||||
assert(b->entry == b->exit);
|
||||
b->entry->next = i;
|
||||
i->prev = b->entry;
|
||||
b->entry = i;
|
||||
b->exit = i;
|
||||
} else { /* insert before entry */
|
||||
assert(b->entry->prev && b->exit);
|
||||
i->next = b->entry;
|
||||
i->prev = b->entry->prev;
|
||||
b->entry->prev = i;
|
||||
i->prev->next = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
|
||||
{
|
||||
if (i->opcode == NV_OP_PHI) {
|
||||
nvbb_insert_phi(b, i);
|
||||
} else {
|
||||
i->prev = b->exit;
|
||||
if (b->exit)
|
||||
b->exit->next = i;
|
||||
b->exit = i;
|
||||
if (!b->entry)
|
||||
b->entry = i;
|
||||
else
|
||||
if (i->prev && i->prev->opcode == NV_OP_PHI)
|
||||
b->entry = i;
|
||||
}
|
||||
|
||||
i->bb = b;
|
||||
b->num_instructions++;
|
||||
}
|
||||
|
||||
void
|
||||
nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
|
||||
{
|
||||
if (!at->next) {
|
||||
nvbb_insert_tail(at->bb, ni);
|
||||
return;
|
||||
}
|
||||
ni->next = at->next;
|
||||
ni->prev = at;
|
||||
ni->next->prev = ni;
|
||||
ni->prev->next = ni;
|
||||
}
|
||||
|
||||
void
|
||||
nv_nvi_delete(struct nv_instruction *nvi)
|
||||
{
|
||||
struct nv_basic_block *b = nvi->bb;
|
||||
int j;
|
||||
|
||||
/* debug_printf("REM: "); nv_print_instruction(nvi); */
|
||||
|
||||
for (j = 0; j < 5; ++j)
|
||||
nv_reference(NULL, &nvi->src[j], NULL);
|
||||
nv_reference(NULL, &nvi->flags_src, NULL);
|
||||
|
||||
if (nvi->next)
|
||||
nvi->next->prev = nvi->prev;
|
||||
else {
|
||||
assert(nvi == b->exit);
|
||||
b->exit = nvi->prev;
|
||||
}
|
||||
|
||||
if (nvi->prev)
|
||||
nvi->prev->next = nvi->next;
|
||||
|
||||
if (nvi == b->entry) {
|
||||
/* PHIs don't get hooked to b->entry */
|
||||
b->entry = nvi->next;
|
||||
assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
|
||||
}
|
||||
|
||||
if (nvi == b->phi) {
|
||||
if (nvi->opcode != NV_OP_PHI)
|
||||
NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n");
|
||||
|
||||
assert(!nvi->prev);
|
||||
if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
|
||||
b->phi = NULL;
|
||||
else
|
||||
b->phi = nvi->next;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2)
|
||||
{
|
||||
struct nv_basic_block *b = i1->bb;
|
||||
|
||||
assert(i1->opcode != NV_OP_PHI &&
|
||||
i2->opcode != NV_OP_PHI);
|
||||
assert(i1->next == i2);
|
||||
|
||||
if (b->exit == i2)
|
||||
b->exit = i1;
|
||||
|
||||
if (b->entry == i1)
|
||||
b->entry = i2;
|
||||
|
||||
i2->prev = i1->prev;
|
||||
i1->next = i2->next;
|
||||
i2->next = i1;
|
||||
i1->prev = i2;
|
||||
|
||||
if (i2->prev)
|
||||
i2->prev->next = i2;
|
||||
if (i1->next)
|
||||
i1->next->prev = i1;
|
||||
}
|
||||
|
||||
void
|
||||
nvbb_attach_block(struct nv_basic_block *parent,
|
||||
struct nv_basic_block *b, ubyte edge_kind)
|
||||
{
|
||||
assert(b->num_in < 8);
|
||||
|
||||
if (parent->out[0]) {
|
||||
assert(!parent->out[1]);
|
||||
parent->out[1] = b;
|
||||
parent->out_kind[1] = edge_kind;
|
||||
} else {
|
||||
parent->out[0] = b;
|
||||
parent->out_kind[0] = edge_kind;
|
||||
}
|
||||
|
||||
b->in[b->num_in] = parent;
|
||||
b->in_kind[b->num_in++] = edge_kind;
|
||||
}
|
||||
|
||||
/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
|
||||
|
||||
boolean
|
||||
nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
|
||||
{
|
||||
int j;
|
||||
|
||||
if (b == d)
|
||||
return TRUE;
|
||||
|
||||
for (j = 0; j < b->num_in; ++j)
|
||||
if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d))
|
||||
return FALSE;
|
||||
|
||||
return j ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
/* check if @bf (future) can be reached from @bp (past), stop at @bt */
|
||||
boolean
|
||||
nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
|
||||
struct nv_basic_block *bt)
|
||||
{
|
||||
struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
|
||||
int i, p, n;
|
||||
|
||||
p = 0;
|
||||
n = 1;
|
||||
q[0] = bp;
|
||||
|
||||
while (p < n) {
|
||||
b = q[p++];
|
||||
|
||||
if (b == bf)
|
||||
break;
|
||||
if (b == bt)
|
||||
continue;
|
||||
assert(n <= (1024 - 2));
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
|
||||
q[n] = b->out[i];
|
||||
q[n++]->priv = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (--n; n >= 0; --n)
|
||||
q[n]->priv = 0;
|
||||
|
||||
return (b == bf);
|
||||
}
|
||||
|
||||
static struct nv_basic_block *
|
||||
nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
|
||||
{
|
||||
struct nv_basic_block *out;
|
||||
int i;
|
||||
|
||||
if (!nvbb_dominated_by(df, b)) {
|
||||
for (i = 0; i < df->num_in; ++i) {
|
||||
if (df->in_kind[i] == CFG_EDGE_BACK)
|
||||
continue;
|
||||
if (nvbb_dominated_by(df->in[i], b))
|
||||
return df;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < 2 && df->out[i]; ++i) {
|
||||
if (df->out_kind[i] == CFG_EDGE_BACK)
|
||||
continue;
|
||||
if ((out = nvbb_find_dom_frontier(b, df->out[i])))
|
||||
return out;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct nv_basic_block *
|
||||
nvbb_dom_frontier(struct nv_basic_block *b)
|
||||
{
|
||||
struct nv_basic_block *df;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2 && b->out[i]; ++i)
|
||||
if ((df = nvbb_find_dom_frontier(b, b->out[i])))
|
||||
return df;
|
||||
return NULL;
|
||||
}
|
||||
514
src/gallium/drivers/nv50/nv50_pc.h
Normal file
514
src/gallium/drivers/nv50/nv50_pc.h
Normal file
|
|
@ -0,0 +1,514 @@
|
|||
/*
|
||||
* Copyright 2010 Christoph Bumiller
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NV50_COMPILER_H__
|
||||
#define __NV50_COMPILER_H__
|
||||
|
||||
#ifdef NV50PC_DEBUG
|
||||
# define NV50_DBGMSG(args...) debug_printf(args)
|
||||
#else
|
||||
# define NV50_DBGMSG(args...)
|
||||
#endif
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
#define NV_OP_PHI 0
|
||||
#define NV_OP_EXTRACT 1
|
||||
#define NV_OP_COMBINE 2
|
||||
#define NV_OP_LDA 3
|
||||
#define NV_OP_STA 4
|
||||
#define NV_OP_MOV 5
|
||||
#define NV_OP_ADD 6
|
||||
#define NV_OP_SUB 7
|
||||
#define NV_OP_NEG 8
|
||||
#define NV_OP_MUL 9
|
||||
#define NV_OP_MAD 10
|
||||
#define NV_OP_CVT 11
|
||||
#define NV_OP_SAT 12
|
||||
#define NV_OP_NOT 13
|
||||
#define NV_OP_AND 14
|
||||
#define NV_OP_OR 15
|
||||
#define NV_OP_XOR 16
|
||||
#define NV_OP_SHL 17
|
||||
#define NV_OP_SHR 18
|
||||
#define NV_OP_RCP 19
|
||||
#define NV_OP_UNDEF 20
|
||||
#define NV_OP_RSQ 21
|
||||
#define NV_OP_LG2 22
|
||||
#define NV_OP_SIN 23
|
||||
#define NV_OP_COS 24
|
||||
#define NV_OP_EX2 25
|
||||
#define NV_OP_PRESIN 26
|
||||
#define NV_OP_PREEX2 27
|
||||
#define NV_OP_MIN 28
|
||||
#define NV_OP_MAX 29
|
||||
#define NV_OP_SET 30
|
||||
#define NV_OP_SAD 31
|
||||
#define NV_OP_KIL 32
|
||||
#define NV_OP_BRA 33
|
||||
#define NV_OP_CALL 34
|
||||
#define NV_OP_RET 35
|
||||
#define NV_OP_BREAK 36
|
||||
#define NV_OP_BREAKADDR 37
|
||||
#define NV_OP_JOINAT 38
|
||||
#define NV_OP_TEX 39
|
||||
#define NV_OP_TXB 40
|
||||
#define NV_OP_TXL 41
|
||||
#define NV_OP_TXF 42
|
||||
#define NV_OP_TXQ 43
|
||||
#define NV_OP_DFDX 44
|
||||
#define NV_OP_DFDY 45
|
||||
#define NV_OP_QUADOP 46
|
||||
#define NV_OP_LINTERP 47
|
||||
#define NV_OP_PINTERP 48
|
||||
#define NV_OP_ABS 49
|
||||
#define NV_OP_CEIL 50
|
||||
#define NV_OP_FLOOR 51
|
||||
#define NV_OP_TRUNC 52
|
||||
#define NV_OP_NOP 53
|
||||
#define NV_OP_SELECT 54
|
||||
#define NV_OP_EXPORT 55
|
||||
#define NV_OP_JOIN 56
|
||||
#define NV_OP_COUNT 57
|
||||
|
||||
#define NV_FILE_GPR 0
|
||||
#define NV_FILE_OUT 1
|
||||
#define NV_FILE_ADDR 2
|
||||
#define NV_FILE_FLAGS 3
|
||||
#define NV_FILE_IMM 16
|
||||
#define NV_FILE_MEM_S 32
|
||||
#define NV_FILE_MEM_P 33
|
||||
#define NV_FILE_MEM_V 34
|
||||
#define NV_FILE_MEM_L 48
|
||||
#define NV_FILE_MEM_G(i) (64 + i)
|
||||
#define NV_FILE_MEM_C(i) (80 + i)
|
||||
|
||||
#define NV_MOD_NEG 1
|
||||
#define NV_MOD_ABS 2
|
||||
#define NV_MOD_NOT 4
|
||||
#define NV_MOD_SAT 8
|
||||
|
||||
#define NV_TYPE_U8 0x00
|
||||
#define NV_TYPE_S8 0x01
|
||||
#define NV_TYPE_U16 0x02
|
||||
#define NV_TYPE_S16 0x03
|
||||
#define NV_TYPE_U32 0x04
|
||||
#define NV_TYPE_S32 0x05
|
||||
#define NV_TYPE_P32 0x07
|
||||
#define NV_TYPE_F32 0x09
|
||||
#define NV_TYPE_F64 0x0b
|
||||
#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
|
||||
#define NV_TYPE_LO 0x00
|
||||
#define NV_TYPE_HI 0x80
|
||||
#define NV_TYPE_ANY 0xff
|
||||
|
||||
#define NV_TYPE_ISINT(t) ((t) <= 5)
|
||||
#define NV_TYPE_ISFLT(t) ((t) & 0x08)
|
||||
|
||||
/* $cX registers contain 4 bits: OCSZ (Z is bit 0) */
|
||||
#define NV_CC_FL 0x0
|
||||
#define NV_CC_LT 0x1
|
||||
#define NV_CC_EQ 0x2
|
||||
#define NV_CC_LE 0x3
|
||||
#define NV_CC_GT 0x4
|
||||
#define NV_CC_NE 0x5
|
||||
#define NV_CC_GE 0x6
|
||||
#define NV_CC_U 0x8
|
||||
#define NV_CC_TR 0xf
|
||||
#define NV_CC_O 0x10
|
||||
#define NV_CC_C 0x11
|
||||
#define NV_CC_A 0x12
|
||||
#define NV_CC_S 0x13
|
||||
|
||||
#define NV_PC_MAX_INSTRUCTIONS 2048
|
||||
#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
|
||||
|
||||
#define NV_PC_MAX_BASIC_BLOCKS 1024
|
||||
|
||||
static INLINE boolean
|
||||
nv_is_vector_op(uint opcode)
|
||||
{
|
||||
return (opcode >= NV_OP_TEX) && (opcode <= NV_OP_TXQ);
|
||||
}
|
||||
|
||||
static INLINE uint
|
||||
nv_type_order(ubyte type)
|
||||
{
|
||||
switch (type & 0xf) {
|
||||
case NV_TYPE_U8:
|
||||
case NV_TYPE_S8:
|
||||
return 0;
|
||||
case NV_TYPE_U16:
|
||||
case NV_TYPE_S16:
|
||||
return 1;
|
||||
case NV_TYPE_U32:
|
||||
case NV_TYPE_F32:
|
||||
case NV_TYPE_S32:
|
||||
case NV_TYPE_P32:
|
||||
return 2;
|
||||
case NV_TYPE_F64:
|
||||
return 3;
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
|
||||
static INLINE uint
|
||||
nv_type_sizeof(ubyte type)
|
||||
{
|
||||
if (type & 0xf0)
|
||||
return (1 << nv_type_order(type)) * (type >> 4);
|
||||
return 1 << nv_type_order(type);
|
||||
}
|
||||
|
||||
static INLINE uint
|
||||
nv_type_sizeof_base(ubyte type)
|
||||
{
|
||||
return 1 << nv_type_order(type);
|
||||
}
|
||||
|
||||
struct nv_reg {
|
||||
int id;
|
||||
ubyte file;
|
||||
ubyte type; /* type of generating instruction's result */
|
||||
ubyte as_type; /* default type for new references to this value */
|
||||
union {
|
||||
float f32;
|
||||
double f64;
|
||||
int32_t s32;
|
||||
uint32_t u32;
|
||||
} imm;
|
||||
};
|
||||
|
||||
struct nv_range {
|
||||
struct nv_range *next;
|
||||
int bgn;
|
||||
int end;
|
||||
};
|
||||
|
||||
struct nv_value {
|
||||
struct nv_reg reg;
|
||||
struct nv_instruction *insn;
|
||||
struct nv_value *join;
|
||||
int n;
|
||||
struct nv_range *livei;
|
||||
int refc;
|
||||
|
||||
struct nv_value *next;
|
||||
struct nv_value *prev;
|
||||
};
|
||||
|
||||
struct nv_ref {
|
||||
struct nv_value *value;
|
||||
ubyte mod;
|
||||
ubyte typecast;
|
||||
ubyte flags; /* not used yet */
|
||||
};
|
||||
|
||||
struct nv_basic_block;
|
||||
|
||||
struct nv_instruction {
|
||||
struct nv_instruction *next;
|
||||
struct nv_instruction *prev;
|
||||
uint opcode;
|
||||
int serial;
|
||||
struct nv_value *def[4];
|
||||
struct nv_value *flags_def;
|
||||
struct nv_ref *src[5];
|
||||
struct nv_ref *flags_src;
|
||||
struct nv_basic_block *bb;
|
||||
struct nv_basic_block *target; /* target block of control flow insn */
|
||||
ubyte cc;
|
||||
ubyte set_cond : 4;
|
||||
ubyte fixed : 1; /* don't optimize away */
|
||||
ubyte is_terminator : 1;
|
||||
ubyte is_join : 1;
|
||||
ubyte is_long : 1; /* for emission */
|
||||
/* */
|
||||
ubyte saturate : 1;
|
||||
ubyte centroid : 1;
|
||||
ubyte flat : 1;
|
||||
ubyte lanes : 4;
|
||||
ubyte tex_live : 1;
|
||||
/* */
|
||||
ubyte tex_t; /* TIC binding */
|
||||
ubyte tex_s; /* TSC binding */
|
||||
ubyte tex_argc : 3;
|
||||
ubyte tex_cube : 1;
|
||||
ubyte tex_mask : 4;
|
||||
/* */
|
||||
ubyte quadop;
|
||||
};
|
||||
|
||||
#define CFG_EDGE_FORWARD 0
|
||||
#define CFG_EDGE_BACK 1
|
||||
#define CFG_EDGE_LOOP_ENTER 2
|
||||
#define CFG_EDGE_LOOP_LEAVE 4
|
||||
#define CFG_EDGE_FAKE 8
|
||||
|
||||
/* 'WALL' edge means where reachability check doesn't follow */
|
||||
/* 'LOOP' edge means just having to do with loops */
|
||||
#define IS_LOOP_EDGE(k) ((k) & 7)
|
||||
#define IS_WALL_EDGE(k) ((k) & 9)
|
||||
|
||||
struct nv_basic_block {
|
||||
struct nv_instruction *entry; /* first non-phi instruction */
|
||||
struct nv_instruction *exit;
|
||||
struct nv_instruction *phi; /* very first instruction */
|
||||
int num_instructions;
|
||||
|
||||
struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
|
||||
struct nv_basic_block *in[8]; /* hope that suffices */
|
||||
uint num_in;
|
||||
ubyte out_kind[2];
|
||||
ubyte in_kind[8];
|
||||
|
||||
int id;
|
||||
int subroutine;
|
||||
uint priv; /* reset to 0 after you're done */
|
||||
uint pass_seq;
|
||||
|
||||
uint32_t bin_pos; /* position, size in emitted code */
|
||||
uint32_t bin_size;
|
||||
|
||||
uint32_t live_set[NV_PC_MAX_VALUES / 32];
|
||||
};
|
||||
|
||||
#define NV_FIXUP_CFLOW_RELOC 0
|
||||
#define NV_FIXUP_PARAM_RELOC 1
|
||||
|
||||
struct nv_fixup {
|
||||
ubyte type;
|
||||
ubyte shift;
|
||||
uint32_t mask;
|
||||
uint32_t data;
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
static INLINE void
|
||||
nv_fixup_apply(uint32_t *bin, struct nv_fixup *fixup, uint32_t data)
|
||||
{
|
||||
uint32_t val;
|
||||
|
||||
val = bin[fixup->offset / 4] & ~fixup->mask;
|
||||
data = (fixup->shift < 0) ? (data >> fixup->shift) : (data << fixup->shift);
|
||||
val |= (fixup->data + data) & fixup->mask;
|
||||
bin[fixup->offset / 4] = val;
|
||||
}
|
||||
|
||||
struct nv50_translation_info;
|
||||
|
||||
struct nv_pc {
|
||||
struct nv_basic_block **root;
|
||||
struct nv_basic_block *current_block;
|
||||
struct nv_basic_block *parent_block;
|
||||
|
||||
int loop_nesting_bound;
|
||||
uint pass_seq;
|
||||
|
||||
struct nv_value values[NV_PC_MAX_VALUES];
|
||||
struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
|
||||
struct nv_ref **refs;
|
||||
struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
|
||||
int num_values;
|
||||
int num_instructions;
|
||||
int num_refs;
|
||||
int num_blocks;
|
||||
int num_subroutines;
|
||||
|
||||
int max_reg[4];
|
||||
|
||||
uint32_t *immd_buf; /* populated on emit */
|
||||
unsigned immd_count;
|
||||
|
||||
uint32_t *emit;
|
||||
unsigned bin_size;
|
||||
unsigned bin_pos;
|
||||
|
||||
struct nv_fixup *fixups;
|
||||
int num_fixups;
|
||||
|
||||
/* optimization enables */
|
||||
boolean opt_reload_elim;
|
||||
};
|
||||
|
||||
void nvbb_insert_tail(struct nv_basic_block *, struct nv_instruction *);
|
||||
void nvi_insert_after(struct nv_instruction *, struct nv_instruction *);
|
||||
|
||||
static INLINE struct nv_instruction *
|
||||
nv_alloc_instruction(struct nv_pc *pc, uint opcode)
|
||||
{
|
||||
struct nv_instruction *insn;
|
||||
|
||||
insn = &pc->instructions[pc->num_instructions++];
|
||||
assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
|
||||
|
||||
insn->cc = NV_CC_TR;
|
||||
insn->opcode = opcode;
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
||||
static INLINE struct nv_instruction *
|
||||
new_instruction(struct nv_pc *pc, uint opcode)
|
||||
{
|
||||
struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
|
||||
|
||||
nvbb_insert_tail(pc->current_block, insn);
|
||||
return insn;
|
||||
}
|
||||
|
||||
static INLINE struct nv_instruction *
|
||||
new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
|
||||
{
|
||||
struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
|
||||
|
||||
nvi_insert_after(at, insn);
|
||||
return insn;
|
||||
}
|
||||
|
||||
static INLINE struct nv_value *
|
||||
new_value(struct nv_pc *pc, ubyte file, ubyte type)
|
||||
{
|
||||
struct nv_value *value = &pc->values[pc->num_values];
|
||||
|
||||
assert(pc->num_values < NV_PC_MAX_VALUES - 1);
|
||||
|
||||
value->n = pc->num_values++;
|
||||
value->join = value;
|
||||
value->reg.id = -1;
|
||||
value->reg.file = file;
|
||||
value->reg.type = value->reg.as_type = type;
|
||||
return value;
|
||||
}
|
||||
|
||||
static INLINE struct nv_value *
|
||||
new_value_like(struct nv_pc *pc, struct nv_value *like)
|
||||
{
|
||||
struct nv_value *val = new_value(pc, like->reg.file, like->reg.type);
|
||||
val->reg.as_type = like->reg.as_type;
|
||||
return val;
|
||||
}
|
||||
|
||||
static INLINE struct nv_ref *
|
||||
new_ref(struct nv_pc *pc, struct nv_value *val)
|
||||
{
|
||||
int i;
|
||||
struct nv_ref *ref;
|
||||
|
||||
if ((pc->num_refs % 64) == 0) {
|
||||
const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
|
||||
const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
|
||||
|
||||
pc->refs = REALLOC(pc->refs, old_size, new_size);
|
||||
|
||||
ref = CALLOC(64, sizeof(struct nv_ref));
|
||||
for (i = 0; i < 64; ++i)
|
||||
pc->refs[pc->num_refs + i] = &ref[i];
|
||||
}
|
||||
|
||||
ref = pc->refs[pc->num_refs++];
|
||||
ref->value = val;
|
||||
ref->typecast = val->reg.as_type;
|
||||
|
||||
++val->refc;
|
||||
return ref;
|
||||
}
|
||||
|
||||
static INLINE struct nv_basic_block *
|
||||
new_basic_block(struct nv_pc *pc)
|
||||
{
|
||||
struct nv_basic_block *bb;
|
||||
|
||||
if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
|
||||
return NULL;
|
||||
|
||||
bb = CALLOC_STRUCT(nv_basic_block);
|
||||
|
||||
bb->id = pc->num_blocks;
|
||||
pc->bb_list[pc->num_blocks++] = bb;
|
||||
return bb;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nv_reference(struct nv_pc *pc, struct nv_ref **d, struct nv_value *s)
|
||||
{
|
||||
if (*d)
|
||||
--(*d)->value->refc;
|
||||
|
||||
if (s) {
|
||||
if (!*d)
|
||||
*d = new_ref(pc, s);
|
||||
else {
|
||||
(*d)->value = s;
|
||||
++(s->refc);
|
||||
}
|
||||
} else {
|
||||
*d = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* nv50_emit.c */
|
||||
void nv50_emit_instruction(struct nv_pc *, struct nv_instruction *);
|
||||
|
||||
/* nv50_print.c */
|
||||
const char *nv_opcode_name(uint opcode);
|
||||
void nv_print_instruction(struct nv_instruction *);
|
||||
|
||||
/* nv50_pc.c */
|
||||
|
||||
void nv_print_function(struct nv_basic_block *root);
|
||||
void nv_print_program(struct nv_pc *);
|
||||
|
||||
boolean nv_op_commutative(uint opcode);
|
||||
int nv50_indirect_opnd(struct nv_instruction *);
|
||||
boolean nv50_nvi_can_use_imm(struct nv_instruction *, int s);
|
||||
boolean nv50_nvi_can_predicate(struct nv_instruction *);
|
||||
boolean nv50_nvi_can_load(struct nv_instruction *, int s, struct nv_value *);
|
||||
boolean nv50_op_can_write_flags(uint opcode);
|
||||
ubyte nv50_supported_src_mods(uint opcode, int s);
|
||||
int nv_nvi_refcount(struct nv_instruction *);
|
||||
void nv_nvi_delete(struct nv_instruction *);
|
||||
void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
|
||||
void nvbb_attach_block(struct nv_basic_block *parent,
|
||||
struct nv_basic_block *, ubyte edge_kind);
|
||||
boolean nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *);
|
||||
boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *,
|
||||
struct nv_basic_block *);
|
||||
struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *);
|
||||
int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
|
||||
struct nv_value *new_val);
|
||||
struct nv_value *nvcg_find_immediate(struct nv_ref *);
|
||||
struct nv_value *nvcg_find_constant(struct nv_ref *);
|
||||
|
||||
typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
|
||||
|
||||
void nv_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
|
||||
|
||||
int nv_pc_exec_pass0(struct nv_pc *pc);
|
||||
int nv_pc_exec_pass1(struct nv_pc *pc);
|
||||
int nv_pc_exec_pass2(struct nv_pc *pc);
|
||||
|
||||
int nv50_tgsi_to_nc(struct nv_pc *, struct nv50_translation_info *);
|
||||
|
||||
#endif // NV50_COMPILER_H
|
||||
1216
src/gallium/drivers/nv50/nv50_pc_emit.c
Normal file
1216
src/gallium/drivers/nv50/nv50_pc_emit.c
Normal file
File diff suppressed because it is too large
Load diff
1154
src/gallium/drivers/nv50/nv50_pc_optimize.c
Normal file
1154
src/gallium/drivers/nv50/nv50_pc_optimize.c
Normal file
File diff suppressed because it is too large
Load diff
320
src/gallium/drivers/nv50/nv50_pc_print.c
Normal file
320
src/gallium/drivers/nv50/nv50_pc_print.c
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
/*
|
||||
* Copyright 2010 Christoph Bumiller
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nv50_context.h"
|
||||
#include "nv50_pc.h"
|
||||
|
||||
#define NVXX_DEBUG 0
|
||||
|
||||
#define PRINT(args...) debug_printf(args)
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
|
||||
#endif
|
||||
|
||||
static const char *norm = "\x1b[00m";
|
||||
static const char *gree = "\x1b[32m";
|
||||
static const char *blue = "\x1b[34m";
|
||||
static const char *cyan = "\x1b[36m";
|
||||
static const char *orng = "\x1b[33m";
|
||||
static const char *mgta = "\x1b[35m";
|
||||
|
||||
static const char *nv_opcode_names[NV_OP_COUNT + 1] = {
|
||||
"phi",
|
||||
"extract",
|
||||
"combine",
|
||||
"lda",
|
||||
"sta",
|
||||
"mov",
|
||||
"add",
|
||||
"sub",
|
||||
"neg",
|
||||
"mul",
|
||||
"mad",
|
||||
"cvt",
|
||||
"sat",
|
||||
"not",
|
||||
"and",
|
||||
"or",
|
||||
"xor",
|
||||
"shl",
|
||||
"shr",
|
||||
"rcp",
|
||||
"undef",
|
||||
"rsqrt",
|
||||
"lg2",
|
||||
"sin",
|
||||
"cos",
|
||||
"ex2",
|
||||
"presin",
|
||||
"preex2",
|
||||
"min",
|
||||
"max",
|
||||
"set",
|
||||
"sad",
|
||||
"kil",
|
||||
"bra",
|
||||
"call",
|
||||
"ret",
|
||||
"break",
|
||||
"breakaddr",
|
||||
"joinat",
|
||||
"tex",
|
||||
"texbias",
|
||||
"texlod",
|
||||
"texfetch",
|
||||
"texsize",
|
||||
"dfdx",
|
||||
"dfdy",
|
||||
"quadop",
|
||||
"linterp",
|
||||
"pinterp",
|
||||
"abs",
|
||||
"ceil",
|
||||
"floor",
|
||||
"trunc",
|
||||
"nop",
|
||||
"select",
|
||||
"export",
|
||||
"join",
|
||||
"BAD_OP"
|
||||
};
|
||||
|
||||
static const char *nv_cond_names[] =
|
||||
{
|
||||
"never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
|
||||
"never", "ltu", "equ", "leu", "gtu", "neu", "geu", "",
|
||||
"o", "c", "a", "s"
|
||||
};
|
||||
|
||||
static const char *nv_modifier_strings[] =
|
||||
{
|
||||
"",
|
||||
"neg",
|
||||
"abs",
|
||||
"neg abs",
|
||||
"not",
|
||||
"not neg"
|
||||
"not abs",
|
||||
"not neg abs",
|
||||
"sat",
|
||||
"BAD_MOD"
|
||||
};
|
||||
|
||||
const char *
|
||||
nv_opcode_name(uint opcode)
|
||||
{
|
||||
return nv_opcode_names[MIN2(opcode, ARRAY_SIZE(nv_opcode_names) - 1)];
|
||||
}
|
||||
|
||||
static INLINE const char *
|
||||
nv_type_name(ubyte type)
|
||||
{
|
||||
switch (type) {
|
||||
case NV_TYPE_U16: return "u16";
|
||||
case NV_TYPE_S16: return "s16";
|
||||
case NV_TYPE_F32: return "f32";
|
||||
case NV_TYPE_U32: return "u32";
|
||||
case NV_TYPE_S32: return "s32";
|
||||
case NV_TYPE_P32: return "p32";
|
||||
case NV_TYPE_F64: return "f64";
|
||||
default:
|
||||
return "BAD_TYPE";
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE const char *
|
||||
nv_cond_name(ubyte cc)
|
||||
{
|
||||
return nv_cond_names[MIN2(cc, 19)];
|
||||
}
|
||||
|
||||
static INLINE const char *
|
||||
nv_modifier_string(ubyte mod)
|
||||
{
|
||||
return nv_modifier_strings[MIN2(mod, 9)];
|
||||
}
|
||||
|
||||
static INLINE int
|
||||
nv_value_id(struct nv_value *value)
|
||||
{
|
||||
if (value->join->reg.id >= 0)
|
||||
return value->join->reg.id;
|
||||
return value->n;
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
nv_value_allocated(struct nv_value *value)
|
||||
{
|
||||
return (value->reg.id >= 0) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nv_print_address(const char c, int buf, struct nv_value *a, int offset)
|
||||
{
|
||||
const char ac = (a && nv_value_allocated(a)) ? '$' : '%';
|
||||
|
||||
if (buf >= 0)
|
||||
PRINT(" %s%c%i[", cyan, c, buf);
|
||||
else
|
||||
PRINT(" %s%c[", cyan, c);
|
||||
if (a)
|
||||
PRINT("%s%ca%i%s+", mgta, ac, nv_value_id(a), cyan);
|
||||
PRINT("%s0x%x%s]", orng, offset, cyan);
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nv_print_cond(struct nv_instruction *nvi)
|
||||
{
|
||||
char pfx = nv_value_allocated(nvi->flags_src->value->join) ? '$' : '%';
|
||||
|
||||
PRINT("%s%s %s%cc%i ",
|
||||
gree, nv_cond_name(nvi->cc),
|
||||
mgta, pfx, nv_value_id(nvi->flags_src->value));
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type)
|
||||
{
|
||||
char reg_pfx = '$';
|
||||
|
||||
if (type == NV_TYPE_ANY)
|
||||
type = value->reg.type;
|
||||
|
||||
if (value->reg.file != NV_FILE_FLAGS)
|
||||
PRINT(" %s%s", gree, nv_type_name(type));
|
||||
|
||||
if (!nv_value_allocated(value->join))
|
||||
reg_pfx = '%';
|
||||
|
||||
switch (value->reg.file) {
|
||||
case NV_FILE_GPR:
|
||||
PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_OUT:
|
||||
PRINT(" %s%co%i", mgta, reg_pfx, nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_ADDR:
|
||||
PRINT(" %s%ca%i", mgta, reg_pfx, nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_FLAGS:
|
||||
PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_MEM_L:
|
||||
nv_print_address('l', -1, ind, nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_MEM_S:
|
||||
nv_print_address('s', -1, ind, 4 * nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_MEM_P:
|
||||
nv_print_address('p', -1, ind, 4 * nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_MEM_V:
|
||||
nv_print_address('v', -1, ind, 4 * nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_IMM:
|
||||
switch (type) {
|
||||
case NV_TYPE_U16:
|
||||
case NV_TYPE_S16:
|
||||
PRINT(" %s0x%04x", orng, value->reg.imm.u32);
|
||||
break;
|
||||
case NV_TYPE_F32:
|
||||
PRINT(" %s%f", orng, value->reg.imm.f32);
|
||||
break;
|
||||
case NV_TYPE_F64:
|
||||
PRINT(" %s%f", orng, value->reg.imm.f64);
|
||||
break;
|
||||
case NV_TYPE_U32:
|
||||
case NV_TYPE_S32:
|
||||
case NV_TYPE_P32:
|
||||
PRINT(" %s0x%08x", orng, value->reg.imm.u32);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (value->reg.file >= NV_FILE_MEM_G(0) &&
|
||||
value->reg.file <= NV_FILE_MEM_G(15))
|
||||
nv_print_address('g', value->reg.file - NV_FILE_MEM_G(0), ind,
|
||||
nv_value_id(value) * 4);
|
||||
else
|
||||
if (value->reg.file >= NV_FILE_MEM_C(0) &&
|
||||
value->reg.file <= NV_FILE_MEM_C(15))
|
||||
nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), ind,
|
||||
nv_value_id(value) * 4);
|
||||
else
|
||||
NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nv_print_ref(struct nv_ref *ref, struct nv_value *ind)
|
||||
{
|
||||
nv_print_value(ref->value, ind, ref->typecast);
|
||||
}
|
||||
|
||||
void
|
||||
nv_print_instruction(struct nv_instruction *i)
|
||||
{
|
||||
int j;
|
||||
|
||||
PRINT("%i: ", i->serial);
|
||||
|
||||
if (i->flags_src)
|
||||
nv_print_cond(i);
|
||||
|
||||
PRINT("%s", gree);
|
||||
if (i->opcode == NV_OP_SET)
|
||||
PRINT("set %s", nv_cond_name(i->set_cond));
|
||||
else
|
||||
if (i->saturate)
|
||||
PRINT("sat %s", nv_opcode_name(i->opcode));
|
||||
else
|
||||
PRINT("%s", nv_opcode_name(i->opcode));
|
||||
|
||||
if (i->flags_def)
|
||||
nv_print_value(i->flags_def, NULL, NV_TYPE_ANY);
|
||||
|
||||
/* Only STORE & STA can write to MEM, and they do not def
|
||||
* anything, so the address is thus part of the source.
|
||||
*/
|
||||
if (i->def[0])
|
||||
nv_print_value(i->def[0], NULL, NV_TYPE_ANY);
|
||||
else
|
||||
if (i->target)
|
||||
PRINT(" %s(BB:%i)", orng, i->target->id);
|
||||
else
|
||||
PRINT(" #");
|
||||
|
||||
for (j = 0; j < 4; ++j) {
|
||||
if (!i->src[j])
|
||||
continue;
|
||||
|
||||
if (i->src[j]->mod)
|
||||
PRINT(" %s%s", gree, nv_modifier_string(i->src[j]->mod));
|
||||
|
||||
nv_print_ref(i->src[j],
|
||||
(j == nv50_indirect_opnd(i)) ?
|
||||
i->src[4]->value : NULL);
|
||||
}
|
||||
PRINT(" %s%c\n", norm, i->is_long ? 'l' : 's');
|
||||
}
|
||||
962
src/gallium/drivers/nv50/nv50_pc_regalloc.c
Normal file
962
src/gallium/drivers/nv50/nv50_pc_regalloc.c
Normal file
|
|
@ -0,0 +1,962 @@
|
|||
/*
|
||||
* Copyright 2010 Christoph Bumiller
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
/* #define NV50PC_DEBUG */
|
||||
|
||||
/* #define NV50_RA_DEBUG_LIVEI */
|
||||
/* #define NV50_RA_DEBUG_LIVE_SETS */
|
||||
/* #define NV50_RA_DEBUG_JOIN */
|
||||
|
||||
#include "nv50_context.h"
|
||||
#include "nv50_pc.h"
|
||||
|
||||
#include "util/u_simple_list.h"
|
||||
|
||||
#define NUM_REGISTER_FILES 4
|
||||
|
||||
struct register_set {
|
||||
struct nv_pc *pc;
|
||||
|
||||
uint32_t last[NUM_REGISTER_FILES];
|
||||
uint32_t bits[NUM_REGISTER_FILES][8];
|
||||
};
|
||||
|
||||
struct nv_pc_pass {
|
||||
struct nv_pc *pc;
|
||||
|
||||
struct nv_instruction **insns;
|
||||
int num_insns;
|
||||
|
||||
uint pass_seq;
|
||||
};
|
||||
|
||||
static void
|
||||
ranges_coalesce(struct nv_range *range)
|
||||
{
|
||||
while (range->next && range->end >= range->next->bgn) {
|
||||
struct nv_range *rnn = range->next->next;
|
||||
assert(range->bgn <= range->next->bgn);
|
||||
range->end = MAX2(range->end, range->next->end);
|
||||
FREE(range->next);
|
||||
range->next = rnn;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean
|
||||
add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
|
||||
{
|
||||
struct nv_range *range, **nextp = &val->livei;
|
||||
|
||||
for (range = val->livei; range; range = range->next) {
|
||||
if (end < range->bgn)
|
||||
break; /* insert before */
|
||||
|
||||
if (bgn > range->end) {
|
||||
nextp = &range->next;
|
||||
continue; /* insert after */
|
||||
}
|
||||
|
||||
/* overlap */
|
||||
if (bgn < range->bgn) {
|
||||
range->bgn = bgn;
|
||||
if (end > range->end)
|
||||
range->end = end;
|
||||
ranges_coalesce(range);
|
||||
return TRUE;
|
||||
}
|
||||
if (end > range->end) {
|
||||
range->end = end;
|
||||
ranges_coalesce(range);
|
||||
return TRUE;
|
||||
}
|
||||
assert(bgn >= range->bgn);
|
||||
assert(end <= range->end);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (!new_range)
|
||||
new_range = CALLOC_STRUCT(nv_range);
|
||||
|
||||
new_range->bgn = bgn;
|
||||
new_range->end = end;
|
||||
new_range->next = range;
|
||||
*(nextp) = new_range;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void
|
||||
add_range(struct nv_value *val, struct nv_basic_block *b, int end)
|
||||
{
|
||||
int bgn;
|
||||
|
||||
if (!val->insn) /* ignore non-def values */
|
||||
return;
|
||||
assert(b->entry->serial <= b->exit->serial);
|
||||
assert(b->phi->serial <= end);
|
||||
assert(b->exit->serial + 1 >= end);
|
||||
|
||||
bgn = val->insn->serial;
|
||||
if (bgn < b->entry->serial || bgn > b->exit->serial)
|
||||
bgn = b->entry->serial;
|
||||
|
||||
assert(bgn <= end);
|
||||
|
||||
add_range_ex(val, bgn, end, NULL);
|
||||
}
|
||||
|
||||
#if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI)
|
||||
static void
|
||||
livei_print(struct nv_value *a)
|
||||
{
|
||||
struct nv_range *r = a->livei;
|
||||
|
||||
debug_printf("livei %i: ", a->n);
|
||||
while (r) {
|
||||
debug_printf("[%i, %i) ", r->bgn, r->end);
|
||||
r = r->next;
|
||||
}
|
||||
debug_printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
livei_unify(struct nv_value *dst, struct nv_value *src)
|
||||
{
|
||||
struct nv_range *range, *next;
|
||||
|
||||
for (range = src->livei; range; range = next) {
|
||||
next = range->next;
|
||||
if (add_range_ex(dst, range->bgn, range->end, range))
|
||||
FREE(range);
|
||||
}
|
||||
src->livei = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
livei_release(struct nv_value *val)
|
||||
{
|
||||
struct nv_range *range, *next;
|
||||
|
||||
for (range = val->livei; range; range = next) {
|
||||
next = range->next;
|
||||
FREE(range);
|
||||
}
|
||||
}
|
||||
|
||||
static boolean
|
||||
livei_have_overlap(struct nv_value *a, struct nv_value *b)
|
||||
{
|
||||
struct nv_range *r_a, *r_b;
|
||||
|
||||
for (r_a = a->livei; r_a; r_a = r_a->next) {
|
||||
for (r_b = b->livei; r_b; r_b = r_b->next) {
|
||||
if (r_b->bgn < r_a->end &&
|
||||
r_b->end > r_a->bgn)
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
livei_end(struct nv_value *a)
|
||||
{
|
||||
struct nv_range *r = a->livei;
|
||||
|
||||
assert(r);
|
||||
while (r->next)
|
||||
r = r->next;
|
||||
return r->end;
|
||||
}
|
||||
|
||||
static boolean
|
||||
livei_contains(struct nv_value *a, int pos)
|
||||
{
|
||||
struct nv_range *r;
|
||||
|
||||
for (r = a->livei; r && r->bgn <= pos; r = r->next)
|
||||
if (r->end > pos)
|
||||
return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static boolean
|
||||
reg_assign(struct register_set *set, struct nv_value **def, int n)
|
||||
{
|
||||
int i, id, s;
|
||||
uint m;
|
||||
int f = def[0]->reg.file;
|
||||
|
||||
s = n << (nv_type_order(def[0]->reg.type) - 1);
|
||||
m = (1 << s) - 1;
|
||||
|
||||
id = set->last[f];
|
||||
|
||||
for (i = 0; i * 32 < set->last[f]; ++i) {
|
||||
if (set->bits[f][i] == 0xffffffff)
|
||||
continue;
|
||||
|
||||
for (id = 0; id < 32; id += s)
|
||||
if (!(set->bits[f][i] & (m << id)))
|
||||
break;
|
||||
if (id < 32)
|
||||
break;
|
||||
}
|
||||
if (i * 32 + id > set->last[f])
|
||||
return FALSE;
|
||||
|
||||
set->bits[f][i] |= m << id;
|
||||
|
||||
id += i * 32;
|
||||
|
||||
set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
|
||||
|
||||
id >>= nv_type_order(def[0]->reg.type) - 1;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
if (def[i]->livei)
|
||||
def[i]->reg.id = id++;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
reg_occupy(struct register_set *set, struct nv_value *val)
|
||||
{
|
||||
int s, id = val->reg.id, f = val->reg.file;
|
||||
uint m;
|
||||
|
||||
if (id < 0)
|
||||
return;
|
||||
s = nv_type_order(val->reg.type) - 1;
|
||||
id <<= s;
|
||||
m = (1 << (1 << s)) - 1;
|
||||
|
||||
set->bits[f][id / 32] |= m << (id % 32);
|
||||
|
||||
if (set->pc->max_reg[f] < id)
|
||||
set->pc->max_reg[f] = id;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
reg_release(struct register_set *set, struct nv_value *val)
|
||||
{
|
||||
int s, id = val->reg.id, f = val->reg.file;
|
||||
uint m;
|
||||
|
||||
if (id < 0)
|
||||
return;
|
||||
|
||||
s = nv_type_order(val->reg.type) - 1;
|
||||
id <<= s;
|
||||
m = (1 << (1 << s)) - 1;
|
||||
|
||||
set->bits[f][id / 32] &= ~(m << (id % 32));
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
|
||||
{
|
||||
int i;
|
||||
struct nv_value *val;
|
||||
|
||||
if (a->reg.file != b->reg.file ||
|
||||
nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
|
||||
return FALSE;
|
||||
|
||||
if (a->join->reg.id == b->join->reg.id)
|
||||
return TRUE;
|
||||
|
||||
#if 1
|
||||
/* either a or b or both have been assigned */
|
||||
|
||||
if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
|
||||
return FALSE;
|
||||
else
|
||||
if (b->join->reg.id >= 0) {
|
||||
if (a->join->reg.id >= 0)
|
||||
return FALSE;
|
||||
val = a;
|
||||
a = b;
|
||||
b = val;
|
||||
}
|
||||
|
||||
for (i = 0; i < ctx->pc->num_values; ++i) {
|
||||
val = &ctx->pc->values[i];
|
||||
|
||||
if (val->join->reg.id != a->join->reg.id)
|
||||
continue;
|
||||
if (val->join != a->join && livei_have_overlap(val->join, b->join))
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
#endif
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
|
||||
{
|
||||
int j;
|
||||
struct nv_value *bjoin = b->join;
|
||||
|
||||
if (b->join->reg.id >= 0)
|
||||
a->join->reg.id = b->join->reg.id;
|
||||
|
||||
livei_unify(a->join, b->join);
|
||||
|
||||
#ifdef NV50_RA_DEBUG_JOIN
|
||||
debug_printf("joining %i to %i\n", b->n, a->n);
|
||||
#endif
|
||||
|
||||
/* make a->join the new representative */
|
||||
for (j = 0; j < ctx->pc->num_values; ++j)
|
||||
if (ctx->pc->values[j].join == bjoin)
|
||||
ctx->pc->values[j].join = a->join;
|
||||
|
||||
assert(b->join == a->join);
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
|
||||
{
|
||||
if (!join_allowed(ctx, a, b)) {
|
||||
#ifdef NV50_RA_DEBUG_JOIN
|
||||
debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
if (livei_have_overlap(a->join, b->join)) {
|
||||
#ifdef NV50_RA_DEBUG_JOIN
|
||||
debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
|
||||
livei_print(a);
|
||||
livei_print(b);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
do_join_values(ctx, a, b);
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
|
||||
{
|
||||
int i = 0, n = 0;
|
||||
|
||||
for (; i < 2; ++i)
|
||||
if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
|
||||
++n;
|
||||
|
||||
return (b->num_in > 1) && (n == 2);
|
||||
}
|
||||
|
||||
static int
|
||||
phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
|
||||
struct nv_basic_block *tb)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) {
|
||||
if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb))
|
||||
continue;
|
||||
/* NOTE: back-edges are ignored by the reachable-by check */
|
||||
if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb,
|
||||
phi->src[i]->value->insn->bb, tb))
|
||||
j = i;
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
/* For each operand of each PHI in b, generate a new value by inserting a MOV
|
||||
* at the end of the block it is coming from and replace the operand with its
|
||||
* result. This eliminates liveness conflicts and enables us to let values be
|
||||
* copied to the right register if such a conflict exists nonetheless.
|
||||
*
|
||||
* These MOVs are also crucial in making sure the live intervals of phi srces
|
||||
* are extended until the end of the loop, since they are not included in the
|
||||
* live-in sets.
|
||||
*/
|
||||
static int
|
||||
pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
|
||||
{
|
||||
struct nv_instruction *i, *ni;
|
||||
struct nv_value *val;
|
||||
struct nv_basic_block *p, *pn;
|
||||
int n, j;
|
||||
|
||||
b->pass_seq = ctx->pc->pass_seq;
|
||||
|
||||
for (n = 0; n < b->num_in; ++n) {
|
||||
p = pn = b->in[n];
|
||||
assert(p);
|
||||
|
||||
if (need_new_else_block(b, p)) {
|
||||
pn = new_basic_block(ctx->pc);
|
||||
|
||||
if (p->out[0] == b)
|
||||
p->out[0] = pn;
|
||||
else
|
||||
p->out[1] = pn;
|
||||
|
||||
if (p->exit->target == b) /* target to new else-block */
|
||||
p->exit->target = pn;
|
||||
|
||||
b->in[n] = pn;
|
||||
|
||||
pn->out[0] = b;
|
||||
pn->in[0] = p;
|
||||
pn->num_in = 1;
|
||||
}
|
||||
ctx->pc->current_block = pn;
|
||||
|
||||
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
|
||||
if ((j = phi_opnd_for_bb(i, p, b)) < 0)
|
||||
continue;
|
||||
val = i->src[j]->value;
|
||||
|
||||
if (i->src[j]->flags) {
|
||||
val = val->insn->src[0]->value;
|
||||
while (j < 4 && i->src[j])
|
||||
++j;
|
||||
assert(j < 4);
|
||||
}
|
||||
|
||||
ni = new_instruction(ctx->pc, NV_OP_MOV);
|
||||
|
||||
/* TODO: insert instruction at correct position in the first place */
|
||||
if (ni->prev && ni->prev->target)
|
||||
nv_nvi_permute(ni->prev, ni);
|
||||
|
||||
ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
|
||||
ni->def[0]->insn = ni;
|
||||
ni->src[0] = new_ref(ctx->pc, val);
|
||||
|
||||
nv_reference(ctx->pc, &i->src[j], ni->def[0]);
|
||||
|
||||
i->src[j]->flags = 1;
|
||||
}
|
||||
|
||||
if (pn != p && pn->exit) {
|
||||
ctx->pc->current_block = b->in[n ? 0 : 1];
|
||||
ni = new_instruction(ctx->pc, NV_OP_BRA);
|
||||
ni->target = b;
|
||||
ni->is_terminator = 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 2; ++j)
|
||||
if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
|
||||
pass_generate_phi_movs(ctx, b->out[j]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pass_join_values(struct nv_pc_pass *ctx, int iter)
|
||||
{
|
||||
int c, n;
|
||||
|
||||
for (n = 0; n < ctx->num_insns; ++n) {
|
||||
struct nv_instruction *i = ctx->insns[n];
|
||||
|
||||
switch (i->opcode) {
|
||||
case NV_OP_PHI:
|
||||
if (iter != 2)
|
||||
break;
|
||||
for (c = 0; c < 4 && i->src[c]; ++c)
|
||||
try_join_values(ctx, i->def[0], i->src[c]->value);
|
||||
break;
|
||||
case NV_OP_MOV:
|
||||
if ((iter == 2) && i->src[0]->value->insn &&
|
||||
!nv_is_vector_op(i->src[0]->value->join->insn->opcode))
|
||||
try_join_values(ctx, i->def[0], i->src[0]->value);
|
||||
break;
|
||||
case NV_OP_SELECT:
|
||||
if (iter != 1)
|
||||
break;
|
||||
for (c = 0; c < 4 && i->src[c]; ++c) {
|
||||
assert(join_allowed(ctx, i->def[0], i->src[c]->value));
|
||||
do_join_values(ctx, i->def[0], i->src[c]->value);
|
||||
}
|
||||
break;
|
||||
case NV_OP_TEX:
|
||||
case NV_OP_TXB:
|
||||
case NV_OP_TXL:
|
||||
case NV_OP_TXQ:
|
||||
if (iter)
|
||||
break;
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!i->src[c])
|
||||
break;
|
||||
do_join_values(ctx, i->def[c], i->src[c]->value);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Order the instructions so that live intervals can be expressed in numbers. */
|
||||
static void
|
||||
pass_order_instructions(void *priv, struct nv_basic_block *b)
|
||||
{
|
||||
struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
|
||||
struct nv_instruction *i;
|
||||
|
||||
b->pass_seq = ctx->pc->pass_seq;
|
||||
|
||||
assert(!b->exit || !b->exit->next);
|
||||
for (i = b->phi; i; i = i->next) {
|
||||
i->serial = ctx->num_insns;
|
||||
ctx->insns[ctx->num_insns++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
|
||||
{
|
||||
#ifdef NV50_RA_DEBUG_LIVE_SETS
|
||||
int j;
|
||||
struct nv_value *val;
|
||||
|
||||
debug_printf("LIVE-INs of BB:%i: ", b->id);
|
||||
|
||||
for (j = 0; j < pc->num_values; ++j) {
|
||||
if (!(b->live_set[j / 32] & (1 << (j % 32))))
|
||||
continue;
|
||||
val = &pc->values[j];
|
||||
if (!val->insn)
|
||||
continue;
|
||||
debug_printf("%i ", val->n);
|
||||
}
|
||||
debug_printf("\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
live_set_add(struct nv_basic_block *b, struct nv_value *val)
|
||||
{
|
||||
if (!val->insn) /* don't add non-def values */
|
||||
return;
|
||||
b->live_set[val->n / 32] |= 1 << (val->n % 32);
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
live_set_rem(struct nv_basic_block *b, struct nv_value *val)
|
||||
{
|
||||
b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
|
||||
{
|
||||
int n = ref->value->n;
|
||||
return b->live_set[n / 32] & (1 << (n % 32));
|
||||
}
|
||||
|
||||
/* The live set of a block contains those values that are live immediately
|
||||
* before the beginning of the block, so do a backwards scan.
|
||||
*/
|
||||
static int
|
||||
pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
|
||||
{
|
||||
struct nv_instruction *i;
|
||||
int j, n, ret = 0;
|
||||
|
||||
if (b->pass_seq >= ctx->pc->pass_seq)
|
||||
return 0;
|
||||
b->pass_seq = ctx->pc->pass_seq;
|
||||
|
||||
/* slight hack for undecidedness: set phi = entry if it's undefined */
|
||||
if (!b->phi)
|
||||
b->phi = b->entry;
|
||||
|
||||
for (n = 0; n < 2; ++n) {
|
||||
if (!b->out[n] || b->out[n] == b)
|
||||
continue;
|
||||
ret = pass_build_live_sets(ctx, b->out[n]);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (n == 0) {
|
||||
for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
|
||||
b->live_set[j] = b->out[n]->live_set[j];
|
||||
} else {
|
||||
for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
|
||||
b->live_set[j] |= b->out[n]->live_set[j];
|
||||
}
|
||||
}
|
||||
|
||||
if (!b->entry)
|
||||
return 0;
|
||||
|
||||
bb_live_set_print(ctx->pc, b);
|
||||
|
||||
for (i = b->exit; i != b->entry->prev; i = i->prev) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
if (!i->def[j])
|
||||
break;
|
||||
live_set_rem(b, i->def[j]);
|
||||
}
|
||||
for (j = 0; j < 4; j++) {
|
||||
if (!i->src[j])
|
||||
break;
|
||||
live_set_add(b, i->src[j]->value);
|
||||
}
|
||||
if (i->src[4])
|
||||
live_set_add(b, i->src[4]->value);
|
||||
if (i->flags_def)
|
||||
live_set_rem(b, i->flags_def);
|
||||
if (i->flags_src)
|
||||
live_set_add(b, i->flags_src->value);
|
||||
}
|
||||
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
|
||||
live_set_rem(b, i->def[0]);
|
||||
|
||||
bb_live_set_print(ctx->pc, b);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void collect_live_values(struct nv_basic_block *b, const int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (b->out[0]) {
|
||||
if (b->out[1]) { /* what to do about back-edges ? */
|
||||
for (i = 0; i < n; ++i)
|
||||
b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
|
||||
} else {
|
||||
memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
|
||||
}
|
||||
} else
|
||||
if (b->out[1]) {
|
||||
memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
|
||||
} else {
|
||||
memset(b->live_set, 0, n * sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: the live intervals of phi functions start at the first non-phi insn. */
|
||||
static int
|
||||
pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
|
||||
{
|
||||
struct nv_instruction *i, *i_stop;
|
||||
int j, s;
|
||||
const int n = (ctx->pc->num_values + 31) / 32;
|
||||
|
||||
/* verify that first block does not have live-in values */
|
||||
if (b->num_in == 0)
|
||||
for (j = 0; j < n; ++j)
|
||||
assert(b->live_set[j] == 0);
|
||||
|
||||
collect_live_values(b, n);
|
||||
|
||||
/* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
|
||||
for (j = 0; j < 2; ++j) {
|
||||
if (!b->out[j] || !b->out[j]->phi)
|
||||
continue;
|
||||
for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
|
||||
live_set_rem(b, i->def[0]);
|
||||
|
||||
for (s = 0; s < 4; ++s) {
|
||||
if (!i->src[s])
|
||||
break;
|
||||
assert(i->src[s]->value->insn);
|
||||
if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
|
||||
live_set_add(b, i->src[s]->value);
|
||||
else
|
||||
live_set_rem(b, i->src[s]->value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* remaining live-outs are live until the end */
|
||||
if (b->exit) {
|
||||
for (j = 0; j < ctx->pc->num_values; ++j) {
|
||||
if (!(b->live_set[j / 32] & (1 << (j % 32))))
|
||||
continue;
|
||||
add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
|
||||
#ifdef NV50_RA_DEBUG_LIVEI
|
||||
debug_printf("adding range for live value %i: ", j);
|
||||
livei_print(&ctx->pc->values[j]);
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
i_stop = b->entry ? b->entry->prev : NULL;
|
||||
|
||||
/* don't have to include phi functions here (will have 0 live range) */
|
||||
for (i = b->exit; i != i_stop; i = i->prev) {
|
||||
assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
|
||||
for (j = 0; j < 4; ++j) {
|
||||
if (i->def[j])
|
||||
live_set_rem(b, i->def[j]);
|
||||
}
|
||||
if (i->flags_def)
|
||||
live_set_rem(b, i->flags_def);
|
||||
|
||||
for (j = 0; j < 5; ++j) {
|
||||
if (i->src[j] && !live_set_test(b, i->src[j])) {
|
||||
live_set_add(b, i->src[j]->value);
|
||||
add_range(i->src[j]->value, b, i->serial);
|
||||
#ifdef NV50_RA_DEBUG_LIVEI
|
||||
debug_printf("adding range for source %i (ends living): ",
|
||||
i->src[j]->value->n);
|
||||
livei_print(i->src[j]->value);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (i->flags_src && !live_set_test(b, i->flags_src)) {
|
||||
live_set_add(b, i->flags_src->value);
|
||||
add_range(i->flags_src->value, b, i->serial);
|
||||
#ifdef NV50_RA_DEBUG_LIVEI
|
||||
debug_printf("adding range for source %i (ends living): ",
|
||||
i->flags_src->value->n);
|
||||
livei_print(i->flags_src->value);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
b->pass_seq = ctx->pc->pass_seq;
|
||||
|
||||
if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
|
||||
pass_build_intervals(ctx, b->out[0]);
|
||||
|
||||
if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
|
||||
pass_build_intervals(ctx, b->out[1]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
|
||||
{
|
||||
memset(set, 0, sizeof(*set));
|
||||
|
||||
set->last[NV_FILE_GPR] = 255;
|
||||
set->last[NV_FILE_OUT] = 127;
|
||||
set->last[NV_FILE_FLAGS] = 4;
|
||||
set->last[NV_FILE_ADDR] = 4;
|
||||
|
||||
set->pc = pc;
|
||||
}
|
||||
|
||||
static void
|
||||
insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
|
||||
{
|
||||
struct nv_value *elem = list->prev;
|
||||
|
||||
for (elem = list->prev;
|
||||
elem != list && elem->livei->bgn > nval->livei->bgn;
|
||||
elem = elem->prev);
|
||||
/* now elem begins before or at the same time as val */
|
||||
|
||||
nval->prev = elem;
|
||||
nval->next = elem->next;
|
||||
elem->next->prev = nval;
|
||||
elem->next = nval;
|
||||
}
|
||||
|
||||
static int
|
||||
pass_linear_scan(struct nv_pc_pass *ctx, int iter)
|
||||
{
|
||||
struct nv_instruction *i;
|
||||
struct register_set f, free;
|
||||
int k, n;
|
||||
struct nv_value *cur, *val, *tmp[2];
|
||||
struct nv_value active, inactive, handled, unhandled;
|
||||
|
||||
make_empty_list(&active);
|
||||
make_empty_list(&inactive);
|
||||
make_empty_list(&handled);
|
||||
make_empty_list(&unhandled);
|
||||
|
||||
nv50_ctor_register_set(ctx->pc, &free);
|
||||
|
||||
/* joined values should have range = NULL and thus not be added;
|
||||
* also, fixed memory values won't be added because they're not
|
||||
* def'd, just used
|
||||
*/
|
||||
for (n = 0; n < ctx->num_insns; ++n) {
|
||||
i = ctx->insns[n];
|
||||
|
||||
for (k = 0; k < 4; ++k) {
|
||||
if (i->def[k] && i->def[k]->livei)
|
||||
insert_ordered_tail(&unhandled, i->def[k]);
|
||||
else
|
||||
if (0 && i->def[k])
|
||||
debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
|
||||
}
|
||||
if (i->flags_def && i->flags_def->livei)
|
||||
insert_ordered_tail(&unhandled, i->flags_def);
|
||||
}
|
||||
|
||||
for (val = unhandled.next; val != unhandled.prev; val = val->next) {
|
||||
assert(val->join == val);
|
||||
assert(val->livei->bgn <= val->next->livei->bgn);
|
||||
}
|
||||
|
||||
foreach_s(cur, tmp[0], &unhandled) {
|
||||
remove_from_list(cur);
|
||||
|
||||
foreach_s(val, tmp[1], &active) {
|
||||
if (livei_end(val) <= cur->livei->bgn) {
|
||||
reg_release(&free, val);
|
||||
move_to_head(&handled, val);
|
||||
} else
|
||||
if (!livei_contains(val, cur->livei->bgn)) {
|
||||
reg_release(&free, val);
|
||||
move_to_head(&inactive, val);
|
||||
}
|
||||
}
|
||||
|
||||
foreach_s(val, tmp[1], &inactive) {
|
||||
if (livei_end(val) <= cur->livei->bgn)
|
||||
move_to_head(&handled, val);
|
||||
else
|
||||
if (livei_contains(val, cur->livei->bgn)) {
|
||||
reg_occupy(&free, val);
|
||||
move_to_head(&active, val);
|
||||
}
|
||||
}
|
||||
|
||||
f = free;
|
||||
|
||||
foreach(val, &inactive)
|
||||
if (livei_have_overlap(val, cur))
|
||||
reg_occupy(&f, val);
|
||||
|
||||
foreach(val, &unhandled)
|
||||
if (val->reg.id >= 0 && livei_have_overlap(val, cur))
|
||||
reg_occupy(&f, val);
|
||||
|
||||
if (cur->reg.id < 0) {
|
||||
boolean mem = FALSE;
|
||||
|
||||
if (nv_is_vector_op(cur->insn->opcode))
|
||||
mem = !reg_assign(&f, &cur->insn->def[0], 4);
|
||||
else
|
||||
if (iter)
|
||||
mem = !reg_assign(&f, &cur, 1);
|
||||
|
||||
if (mem) {
|
||||
NOUVEAU_ERR("out of registers\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
insert_at_head(&active, cur);
|
||||
reg_occupy(&free, cur);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
|
||||
{
|
||||
struct nv_pc_pass *ctx;
|
||||
int i, ret;
|
||||
|
||||
NV50_DBGMSG("REGISTER ALLOCATION - entering\n");
|
||||
|
||||
ctx = CALLOC_STRUCT(nv_pc_pass);
|
||||
if (!ctx)
|
||||
return -1;
|
||||
ctx->pc = pc;
|
||||
|
||||
ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
|
||||
if (!ctx->insns) {
|
||||
FREE(ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
pc->pass_seq++;
|
||||
ret = pass_generate_phi_movs(ctx, root);
|
||||
assert(!ret);
|
||||
|
||||
for (i = 0; i < pc->loop_nesting_bound; ++i) {
|
||||
pc->pass_seq++;
|
||||
ret = pass_build_live_sets(ctx, root);
|
||||
assert(!ret && "live sets");
|
||||
if (ret) {
|
||||
NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
pc->pass_seq++;
|
||||
nv_pc_pass_in_order(root, pass_order_instructions, ctx);
|
||||
|
||||
pc->pass_seq++;
|
||||
ret = pass_build_intervals(ctx, root);
|
||||
assert(!ret && "build intervals");
|
||||
if (ret) {
|
||||
NOUVEAU_ERR("failed to build live intervals\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef NV50_RA_DEBUG_LIVEI
|
||||
for (i = 0; i < pc->num_values; ++i)
|
||||
livei_print(&pc->values[i]);
|
||||
#endif
|
||||
|
||||
ret = pass_join_values(ctx, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = pass_linear_scan(ctx, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = pass_join_values(ctx, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = pass_join_values(ctx, 2);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = pass_linear_scan(ctx, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < pc->num_values; ++i)
|
||||
livei_release(&pc->values[i]);
|
||||
|
||||
NV50_DBGMSG("REGISTER ALLOCATION - leaving\n");
|
||||
|
||||
out:
|
||||
FREE(ctx->insns);
|
||||
FREE(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
nv_pc_exec_pass1(struct nv_pc *pc)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < pc->num_subroutines + 1; ++i)
|
||||
if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,75 +1,131 @@
|
|||
#ifndef __NV50_PROGRAM_H__
|
||||
#define __NV50_PROGRAM_H__
|
||||
/*
|
||||
* Copyright 2010 Ben Skeggs
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NV50_PROG_H__
|
||||
#define __NV50_PROG_H__
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
|
||||
struct nv50_program_exec {
|
||||
struct nv50_program_exec *next;
|
||||
#define NV50_CAP_MAX_PROGRAM_TEMPS 64
|
||||
|
||||
unsigned inst[2];
|
||||
struct {
|
||||
int index;
|
||||
unsigned mask;
|
||||
unsigned shift;
|
||||
} param;
|
||||
};
|
||||
struct nv50_varying {
|
||||
uint8_t id; /* tgsi index */
|
||||
uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
|
||||
|
||||
struct nv50_sreg4 {
|
||||
uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
|
||||
uint8_t id; /* tgsi index */
|
||||
uint8_t mask : 4;
|
||||
uint8_t linear : 1;
|
||||
uint8_t pad : 3;
|
||||
|
||||
uint8_t mask;
|
||||
boolean linear;
|
||||
|
||||
ubyte sn, si; /* semantic name & index */
|
||||
ubyte sn; /* semantic name */
|
||||
ubyte si; /* semantic index */
|
||||
};
|
||||
|
||||
struct nv50_program {
|
||||
struct pipe_shader_state pipe;
|
||||
struct tgsi_shader_info info;
|
||||
boolean translated;
|
||||
struct pipe_shader_state pipe;
|
||||
|
||||
unsigned type;
|
||||
struct nv50_program_exec *exec_head;
|
||||
struct nv50_program_exec *exec_tail;
|
||||
unsigned exec_size;
|
||||
struct nouveau_resource *data[1];
|
||||
unsigned data_start[1];
|
||||
ubyte type;
|
||||
boolean translated;
|
||||
|
||||
struct nouveau_bo *bo;
|
||||
struct nouveau_bo *bo;
|
||||
struct nouveau_stateobj *so;
|
||||
|
||||
uint32_t *immd;
|
||||
unsigned immd_nr;
|
||||
unsigned param_nr;
|
||||
uint32_t *code;
|
||||
unsigned code_size;
|
||||
unsigned code_start; /* offset inside bo */
|
||||
uint32_t *immd;
|
||||
unsigned immd_size;
|
||||
unsigned parm_size; /* size limit of uniform buffer */
|
||||
|
||||
struct {
|
||||
unsigned high_temp;
|
||||
unsigned high_result;
|
||||
ubyte max_gpr; /* REG_ALLOC_TEMP */
|
||||
ubyte max_out; /* REG_ALLOC_RESULT or FP_RESULT_COUNT */
|
||||
|
||||
uint32_t attr[2];
|
||||
uint32_t regs[4];
|
||||
ubyte in_nr;
|
||||
ubyte out_nr;
|
||||
struct nv50_varying in[16];
|
||||
struct nv50_varying out[16];
|
||||
|
||||
/* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */
|
||||
unsigned in_nr, out_nr;
|
||||
struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS];
|
||||
struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS];
|
||||
struct {
|
||||
uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */
|
||||
ubyte psiz;
|
||||
ubyte bfc[2];
|
||||
ubyte edgeflag;
|
||||
ubyte clpd;
|
||||
ubyte clpd_nr;
|
||||
} vp;
|
||||
|
||||
/* FP colour inputs, VP/GP back colour outputs */
|
||||
struct nv50_sreg4 two_side[2];
|
||||
struct {
|
||||
uint32_t flags[2]; /* 0x19a8, 196c */
|
||||
uint32_t interp; /* 0x1988 */
|
||||
uint32_t colors; /* 0x1904 */
|
||||
} fp;
|
||||
|
||||
/* GP only */
|
||||
unsigned vert_count;
|
||||
uint8_t prim_type;
|
||||
struct {
|
||||
ubyte primid; /* primitive id output register */
|
||||
uint8_t vert_count;
|
||||
uint8_t prim_type; /* point, line strip or tri strip */
|
||||
} gp;
|
||||
|
||||
/* VP & GP only */
|
||||
uint8_t clpd, clpd_nr;
|
||||
uint8_t psiz;
|
||||
uint8_t edgeflag_in;
|
||||
|
||||
/* FP & GP only */
|
||||
uint8_t prim_id;
|
||||
} cfg;
|
||||
void *fixups;
|
||||
unsigned num_fixups;
|
||||
};
|
||||
|
||||
#endif
|
||||
#define NV50_INTERP_LINEAR (1 << 0)
|
||||
#define NV50_INTERP_FLAT (1 << 1)
|
||||
#define NV50_INTERP_CENTROID (1 << 2)
|
||||
|
||||
/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
|
||||
struct nv50_subroutine {
|
||||
unsigned id;
|
||||
unsigned pos;
|
||||
/* function inputs and outputs */
|
||||
uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
|
||||
uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
|
||||
};
|
||||
|
||||
struct nv50_translation_info {
|
||||
struct nv50_program *p;
|
||||
unsigned inst_nr;
|
||||
struct tgsi_full_instruction *insns;
|
||||
ubyte input_file;
|
||||
ubyte output_file;
|
||||
ubyte input_map[PIPE_MAX_SHADER_INPUTS][4];
|
||||
ubyte output_map[PIPE_MAX_SHADER_OUTPUTS][4];
|
||||
ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
|
||||
int input_access[PIPE_MAX_SHADER_INPUTS][4];
|
||||
int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
|
||||
boolean indirect_inputs;
|
||||
boolean indirect_outputs;
|
||||
boolean store_to_memory;
|
||||
struct tgsi_shader_info scan;
|
||||
uint32_t *immd32;
|
||||
unsigned immd32_nr;
|
||||
ubyte *immd32_ty;
|
||||
ubyte edgeflag_out;
|
||||
struct nv50_subroutine *subr;
|
||||
unsigned subr_nr;
|
||||
};
|
||||
|
||||
int nv50_generate_code(struct nv50_translation_info *ti);
|
||||
boolean nv50_program_tx(struct nv50_program *p);
|
||||
|
||||
#endif /* __NV50_PROG_H__ */
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
|
|||
ctx.idxbuf = NULL;
|
||||
ctx.vtx_size = 0;
|
||||
ctx.edgeflag = 0.5f;
|
||||
ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
|
||||
ctx.edgeflag_attr = nv50->vertprog->vp.edgeflag;
|
||||
|
||||
/* map vertex buffers, determine vertex size */
|
||||
for (i = 0; i < nv50->vtxelt->num_elements; i++) {
|
||||
|
|
|
|||
|
|
@ -448,7 +448,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_DMA_TIC 0x000001a0
|
||||
#define NV50TCL_DMA_TEXTURE 0x000001a4
|
||||
#define NV50TCL_DMA_STRMOUT 0x000001a8
|
||||
#define NV50TCL_DMA_UNK01AC 0x000001ac
|
||||
#define NV50TCL_DMA_CLIPID 0x000001ac
|
||||
#define NV50TCL_DMA_COLOR(x) (0x000001c0+((x)*4))
|
||||
#define NV50TCL_DMA_COLOR__SIZE 0x00000008
|
||||
#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32))
|
||||
|
|
@ -665,8 +665,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_DEPTH_RANGE_FAR__SIZE 0x00000010
|
||||
#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8))
|
||||
#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
|
||||
#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_SHIFT 0
|
||||
#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_MASK 0x0000ffff
|
||||
#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_SHIFT 16
|
||||
#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_MASK 0xffff0000
|
||||
#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8))
|
||||
#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
|
||||
#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_SHIFT 0
|
||||
#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_MASK 0x0000ffff
|
||||
#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_SHIFT 16
|
||||
#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_MASK 0xffff0000
|
||||
#define NV50TCL_CLIPID_REGION_HORIZ(x) (0x00000d40+((x)*8))
|
||||
#define NV50TCL_CLIPID_REGION_HORIZ__SIZE 0x00000004
|
||||
#define NV50TCL_CLIPID_REGION_VERT(x) (0x00000d44+((x)*8))
|
||||
#define NV50TCL_CLIPID_REGION_VERT__SIZE 0x00000004
|
||||
#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74
|
||||
#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78
|
||||
#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4))
|
||||
|
|
@ -724,14 +736,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_GP_ADDRESS_LOW 0x00000f74
|
||||
#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c
|
||||
#define NV50TCL_VP_ADDRESS_LOW 0x00000f80
|
||||
#define NV50TCL_UNK0F84_ADDRESS_HIGH 0x00000f84
|
||||
#define NV50TCL_UNK0F84_ADDRESS_LOW 0x00000f88
|
||||
#define NV50TCL_VERTEX_RUNOUT_HIGH 0x00000f84
|
||||
#define NV50TCL_VERTEX_RUNOUT_LOW 0x00000f88
|
||||
#define NV50TCL_DEPTH_BOUNDS(x) (0x00000f9c+((x)*4))
|
||||
#define NV50TCL_DEPTH_BOUNDS__SIZE 0x00000002
|
||||
#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4
|
||||
#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8
|
||||
#define NV50TCL_MSAA_MASK(x) (0x00000fbc+((x)*4))
|
||||
#define NV50TCL_MSAA_MASK__SIZE 0x00000004
|
||||
#define NV50TCL_CLIPID_ADDRESS_HIGH 0x00000fcc
|
||||
#define NV50TCL_CLIPID_ADDRESS_LOW 0x00000fd0
|
||||
#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0
|
||||
#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4
|
||||
#define NV50TCL_ZETA_FORMAT 0x00000fe8
|
||||
|
|
@ -861,37 +875,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
|
||||
#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00004000
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00004001
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00004300
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00004301
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00004302
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00004304
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00004305
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00004306
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00004307
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00004308
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x0000c001
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x0000c003
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR 0x0000c900
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA 0x0000c902
|
||||
#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00004000
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00004001
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00004300
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00004301
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00004302
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00004304
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00004305
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00004306
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00004307
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00004308
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x0000c001
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x0000c003
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_COLOR 0x0000c900
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_ALPHA 0x0000c902
|
||||
#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
|
||||
#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c
|
||||
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
|
||||
#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007
|
||||
|
|
@ -899,37 +921,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
|
||||
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00000001
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00000300
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00000302
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00000304
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00000306
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00000308
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x00008001
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x00008003
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00004000
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00004001
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00004300
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00004302
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00004304
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00004306
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00004308
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x0000c001
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x0000c003
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_COLOR 0x0000c900
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_ALPHA 0x0000c902
|
||||
#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00000001
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00000300
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00000302
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00000304
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00000306
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00000308
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x00008001
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x00008003
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00004000
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00004001
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00004300
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00004302
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00004304
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00004306
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00004308
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x0000c001
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x0000c003
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_COLOR 0x0000c900
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA 0x0000c902
|
||||
#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
|
||||
#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4))
|
||||
#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008
|
||||
#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001380
|
||||
|
|
@ -988,6 +1018,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_FP_START_ID 0x00001414
|
||||
#define NV50TCL_GP_VERTEX_OUTPUT_COUNT 0x00001420
|
||||
#define NV50TCL_VB_ELEMENT_BASE 0x00001434
|
||||
#define NV50TCL_INSTANCE_BASE 0x00001438
|
||||
#define NV50TCL_CODE_CB_FLUSH 0x00001440
|
||||
#define NV50TCL_BIND_TSC(x) (0x00001444+((x)*8))
|
||||
#define NV50TCL_BIND_TSC__SIZE 0x00000003
|
||||
|
|
@ -1005,6 +1036,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_BIND_TIC_TIC_MASK 0x7ffffe00
|
||||
#define NV50TCL_STRMOUT_MAP(x) (0x00001480+((x)*4))
|
||||
#define NV50TCL_STRMOUT_MAP__SIZE 0x00000020
|
||||
#define NV50TCL_CLIPID_HEIGHT 0x00001504
|
||||
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE 0x00001510
|
||||
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_0 (1 << 0)
|
||||
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_1 (1 << 1)
|
||||
|
|
@ -1089,7 +1121,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_GP_BUILTIN_RESULT_EN 0x000015cc
|
||||
#define NV50TCL_GP_BUILTIN_RESULT_EN_VPORT_IDX (1 << 0)
|
||||
#define NV50TCL_GP_BUILTIN_RESULT_EN_LAYER_IDX (1 << 16)
|
||||
#define NV50TCL_MULTISAMPLE_SAMPLES_LOG2 0x000015d0
|
||||
#define NV50TCL_MULTISAMPLE_MODE 0x000015d0
|
||||
#define NV50TCL_MULTISAMPLE_MODE_1X 0x00000000
|
||||
#define NV50TCL_MULTISAMPLE_MODE_2XMS 0x00000001
|
||||
#define NV50TCL_MULTISAMPLE_MODE_4XMS 0x00000002
|
||||
#define NV50TCL_MULTISAMPLE_MODE_8XMS 0x00000004
|
||||
#define NV50TCL_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
|
||||
#define NV50TCL_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
|
||||
#define NV50TCL_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
|
||||
#define NV50TCL_VERTEX_BEGIN 0x000015dc
|
||||
#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000
|
||||
#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001
|
||||
|
|
@ -1105,6 +1144,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY 0x0000000b
|
||||
#define NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY 0x0000000c
|
||||
#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY 0x0000000d
|
||||
#define NV50TCL_VERTEX_BEGIN_PATCHES 0x0000000e
|
||||
#define NV50TCL_VERTEX_END 0x000015e0
|
||||
#define NV50TCL_EDGEFLAG_ENABLE 0x000015e4
|
||||
#define NV50TCL_VB_ELEMENT_U32 0x000015e8
|
||||
|
|
@ -1118,6 +1158,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
|
||||
#define NV50TCL_VB_ELEMENT_U16_I1_SHIFT 16
|
||||
#define NV50TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
|
||||
#define NV50TCL_VERTEX_BASE_HIGH 0x000015f4
|
||||
#define NV50TCL_VERTEX_BASE_LOW 0x000015f8
|
||||
#define NV50TCL_VERTEX_DATA 0x00001640
|
||||
#define NV50TCL_PRIM_RESTART_ENABLE 0x00001644
|
||||
#define NV50TCL_PRIM_RESTART_INDEX 0x00001648
|
||||
|
|
@ -1503,7 +1545,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_VIEWPORT_TRANSFORM_EN 0x0000192c
|
||||
#define NV50TCL_VIEW_VOLUME_CLIP_CTRL 0x0000193c
|
||||
#define NV50TCL_VIEWPORT_CLIP_RECTS_EN 0x0000194c
|
||||
#define NV50TCL_VIEWPORT_CLIP_MODE 0x00001950
|
||||
#define NV50TCL_VIEWPORT_CLIP_MODE_INCLUDE 0x00000000
|
||||
#define NV50TCL_VIEWPORT_CLIP_MODE_EXCLUDE 0x00000001
|
||||
#define NV50TCL_VIEWPORT_CLIP_MODE_UNKNOWN 0x00000002
|
||||
#define NV50TCL_FP_CTRL_UNK196C 0x0000196c
|
||||
#define NV50TCL_CLIPID_ENABLE 0x0000197c
|
||||
#define NV50TCL_CLIPID_WIDTH 0x00001980
|
||||
#define NV50TCL_CLIPID_ID 0x00001984
|
||||
#define NV50TCL_FP_INTERPOLANT_CTRL 0x00001988
|
||||
#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_SHIFT 24
|
||||
#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_MASK 0xff000000
|
||||
|
|
@ -1604,19 +1653,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10 0x01800000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SHIFT 25
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK 0x7e000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK 0x0e000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x0e000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x02000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x04000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x0a000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x0c000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x08000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x06000000
|
||||
#define NV50TCL_VERTEX_ARRAY_ATTRIB_BGRA (1 << 31)
|
||||
#define NV50TCL_QUERY_ADDRESS_HIGH 0x00001b00
|
||||
#define NV50TCL_QUERY_ADDRESS_LOW 0x00001b04
|
||||
#define NV50TCL_QUERY_COUNTER 0x00001b08
|
||||
#define NV50TCL_QUERY_SEQUENCE 0x00001b08
|
||||
#define NV50TCL_QUERY_GET 0x00001b0c
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#include "nv50_context.h"
|
||||
#include "nv50_screen.h"
|
||||
#include "nv50_resource.h"
|
||||
#include "nv50_program.h"
|
||||
|
||||
#include "nouveau/nouveau_stateobj.h"
|
||||
|
||||
|
|
@ -34,75 +35,38 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
|
|||
enum pipe_format format,
|
||||
enum pipe_texture_target target,
|
||||
unsigned sample_count,
|
||||
unsigned tex_usage, unsigned geom_flags)
|
||||
unsigned usage, unsigned geom_flags)
|
||||
{
|
||||
if (sample_count > 1)
|
||||
return FALSE;
|
||||
|
||||
if (tex_usage & PIPE_BIND_RENDER_TARGET) {
|
||||
if (!util_format_s3tc_enabled) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
case PIPE_FORMAT_R16G16B16A16_SNORM:
|
||||
case PIPE_FORMAT_R16G16B16A16_UNORM:
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
case PIPE_FORMAT_R16G16_SNORM:
|
||||
case PIPE_FORMAT_R16G16_UNORM:
|
||||
return TRUE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else
|
||||
if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_Z32_FLOAT:
|
||||
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
|
||||
return TRUE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (tex_usage & PIPE_BIND_SAMPLER_VIEW) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_DXT1_RGB:
|
||||
case PIPE_FORMAT_DXT1_RGBA:
|
||||
case PIPE_FORMAT_DXT3_RGBA:
|
||||
case PIPE_FORMAT_DXT5_RGBA:
|
||||
return util_format_s3tc_enabled;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8A8_SRGB:
|
||||
case PIPE_FORMAT_B8G8R8X8_SRGB:
|
||||
case PIPE_FORMAT_B5G5R5A1_UNORM:
|
||||
case PIPE_FORMAT_B4G4R4A4_UNORM:
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
case PIPE_FORMAT_A8_UNORM:
|
||||
case PIPE_FORMAT_I8_UNORM:
|
||||
case PIPE_FORMAT_L8A8_UNORM:
|
||||
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
|
||||
case PIPE_FORMAT_Z32_FLOAT:
|
||||
case PIPE_FORMAT_R16G16B16A16_SNORM:
|
||||
case PIPE_FORMAT_R16G16B16A16_UNORM:
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
case PIPE_FORMAT_R16G16_SNORM:
|
||||
case PIPE_FORMAT_R16G16_UNORM:
|
||||
return TRUE;
|
||||
case PIPE_FORMAT_DXT1_RGB:
|
||||
case PIPE_FORMAT_DXT1_RGBA:
|
||||
case PIPE_FORMAT_DXT3_RGBA:
|
||||
case PIPE_FORMAT_DXT5_RGBA:
|
||||
return FALSE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
if ((nouveau_screen(pscreen)->device->chipset & 0xf0) != 0xa0)
|
||||
return FALSE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* transfers & shared are always supported */
|
||||
usage &= ~(PIPE_BIND_TRANSFER_READ |
|
||||
PIPE_BIND_TRANSFER_WRITE |
|
||||
PIPE_BIND_SHARED);
|
||||
|
||||
return (nv50_format_table[format].usage & usage) == usage;
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -142,6 +106,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
|
||||
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
|
||||
return 1;
|
||||
case PIPE_CAP_TEXTURE_SWIZZLE:
|
||||
return 1;
|
||||
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
|
||||
return 1;
|
||||
case PIPE_CAP_INDEP_BLEND_ENABLE:
|
||||
|
|
@ -165,10 +131,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
}
|
||||
|
||||
static int
|
||||
nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param)
|
||||
nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
enum pipe_shader_cap param)
|
||||
{
|
||||
switch(shader)
|
||||
{
|
||||
switch(shader) {
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
case PIPE_SHADER_VERTEX:
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
|
|
@ -186,7 +152,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum
|
|||
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: /* need stack bo */
|
||||
return 4;
|
||||
case PIPE_SHADER_CAP_MAX_INPUTS: /* 128 / 4 with GP */
|
||||
if(shader == PIPE_SHADER_GEOMETRY)
|
||||
if (shader == PIPE_SHADER_GEOMETRY)
|
||||
return 128 / 4;
|
||||
else
|
||||
return 64 / 4;
|
||||
|
|
@ -197,7 +163,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum
|
|||
case PIPE_SHADER_CAP_MAX_PREDS: /* not yet handled */
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_TEMPS: /* no spilling atm */
|
||||
return 128 / 4;
|
||||
return NV50_CAP_MAX_PROGRAM_TEMPS;
|
||||
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
|
||||
return 1;
|
||||
default:
|
||||
|
|
@ -301,14 +267,23 @@ nv50_screen_relocs(struct nv50_screen *screen)
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
|
||||
# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
|
||||
#endif
|
||||
|
||||
extern int nouveau_device_get_param(struct nouveau_device *dev,
|
||||
uint64_t param, uint64_t *value);
|
||||
|
||||
struct pipe_screen *
|
||||
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
|
||||
{
|
||||
struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
|
||||
struct nouveau_channel *chan;
|
||||
struct pipe_screen *pscreen;
|
||||
uint64_t value;
|
||||
unsigned chipset = dev->chipset;
|
||||
unsigned tesla_class = 0;
|
||||
unsigned stack_size, local_size, max_warps;
|
||||
int ret, i;
|
||||
const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
|
||||
|
||||
|
|
@ -527,6 +502,41 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
|
|||
OUT_RING (chan, 0x121 | (NV50_CB_PGP << 12));
|
||||
OUT_RING (chan, 0x131 | (NV50_CB_PFP << 12));
|
||||
|
||||
/* shader stack */
|
||||
nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
|
||||
|
||||
max_warps = util_bitcount(value & 0xffff);
|
||||
max_warps *= util_bitcount((value >> 24) & 0xf) * 32;
|
||||
|
||||
stack_size = max_warps * 64 * 8;
|
||||
|
||||
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
|
||||
stack_size, &screen->stack_bo);
|
||||
if (ret) {
|
||||
nv50_screen_destroy(pscreen);
|
||||
return NULL;
|
||||
}
|
||||
BEGIN_RING(chan, screen->tesla, NV50TCL_STACK_ADDRESS_HIGH, 3);
|
||||
OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RING (chan, 4);
|
||||
|
||||
local_size = (NV50_CAP_MAX_PROGRAM_TEMPS * 16) * max_warps * 32;
|
||||
|
||||
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
|
||||
local_size, &screen->local_bo);
|
||||
if (ret) {
|
||||
nv50_screen_destroy(pscreen);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
local_size = NV50_CAP_MAX_PROGRAM_TEMPS * 16;
|
||||
|
||||
BEGIN_RING(chan, screen->tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 3);
|
||||
OUT_RELOCh(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RELOCl(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RING (chan, util_unsigned_logbase2(local_size / 8));
|
||||
|
||||
/* Vertex array limits - max them out */
|
||||
for (i = 0; i < 16; i++) {
|
||||
BEGIN_RING(chan, screen->tesla,
|
||||
|
|
|
|||
|
|
@ -22,11 +22,12 @@ struct nv50_screen {
|
|||
|
||||
struct nouveau_resource *immd_heap;
|
||||
|
||||
struct pipe_resource *strm_vbuf[16];
|
||||
|
||||
struct nouveau_bo *tic;
|
||||
struct nouveau_bo *tsc;
|
||||
|
||||
struct nouveau_bo *stack_bo; /* control flow stack */
|
||||
struct nouveau_bo *local_bo; /* l[] memory */
|
||||
|
||||
boolean force_push;
|
||||
};
|
||||
|
||||
|
|
@ -38,4 +39,13 @@ nv50_screen(struct pipe_screen *screen)
|
|||
|
||||
extern void nv50_screen_relocs(struct nv50_screen *);
|
||||
|
||||
struct nv50_format {
|
||||
uint32_t rt;
|
||||
uint32_t tic;
|
||||
uint32_t vtx;
|
||||
uint32_t usage;
|
||||
};
|
||||
|
||||
extern const struct nv50_format nv50_format_table[];
|
||||
|
||||
#endif
|
||||
|
|
|
|||
626
src/gallium/drivers/nv50/nv50_shader_state.c
Normal file
626
src/gallium/drivers/nv50/nv50_shader_state.c
Normal file
|
|
@ -0,0 +1,626 @@
|
|||
/*
|
||||
* Copyright 2008 Ben Skeggs
|
||||
* Copyright 2010 Christoph Bumiller
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_inlines.h"
|
||||
|
||||
#include "nv50_context.h"
|
||||
#include "nv50_transfer.h"
|
||||
|
||||
static void
|
||||
nv50_transfer_constbuf(struct nv50_context *nv50,
|
||||
struct pipe_resource *buf, unsigned size, unsigned cbi)
|
||||
{
|
||||
struct pipe_context *pipe = &nv50->pipe;
|
||||
struct pipe_transfer *transfer;
|
||||
struct nouveau_channel *chan = nv50->screen->base.channel;
|
||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||
uint32_t *map;
|
||||
unsigned count, start;
|
||||
|
||||
map = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &transfer);
|
||||
if (!map)
|
||||
return;
|
||||
|
||||
count = (buf->width0 + 3) / 4;
|
||||
start = 0;
|
||||
|
||||
while (count) {
|
||||
unsigned nr = count;
|
||||
nr = MIN2(nr, 2047);
|
||||
|
||||
/* FIXME: emit relocs for unsuiTed MM */
|
||||
BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
|
||||
OUT_RING (chan, (start << 8) | cbi);
|
||||
BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
|
||||
OUT_RINGp (chan, map, nr);
|
||||
|
||||
count -= nr;
|
||||
start += nr;
|
||||
map += nr;
|
||||
}
|
||||
|
||||
pipe_buffer_unmap(pipe, buf, transfer);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
|
||||
{
|
||||
struct nouveau_channel *chan = nv50->screen->base.channel;
|
||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||
unsigned cbi;
|
||||
|
||||
if (p->immd_size) {
|
||||
uint32_t *data = p->immd;
|
||||
unsigned count = p->immd_size / 4;
|
||||
unsigned start = 0;
|
||||
|
||||
while (count) {
|
||||
unsigned nr = count;
|
||||
nr = MIN2(nr, 2047);
|
||||
|
||||
BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
|
||||
OUT_RING (chan, (start << 8) | NV50_CB_PMISC);
|
||||
BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
|
||||
OUT_RINGp (chan, data, nr);
|
||||
|
||||
count -= nr;
|
||||
start += nr;
|
||||
data += nr;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the state tracker doesn't change the constbuf, and it is first
|
||||
* validated with a program that doesn't use it, this check prevents
|
||||
* it from even being uploaded. */
|
||||
/*
|
||||
if (p->parm_size == 0)
|
||||
return;
|
||||
*/
|
||||
|
||||
switch (p->type) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
cbi = NV50_CB_PVP;
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
cbi = NV50_CB_PFP;
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
cbi = NV50_CB_PGP;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
|
||||
{
|
||||
struct nouveau_channel *chan = nv50->screen->base.channel;
|
||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||
struct nouveau_grobj *eng2d = nv50->screen->eng2d;
|
||||
int ret;
|
||||
unsigned offset;
|
||||
unsigned size = p->code_size;
|
||||
uint32_t *data = p->code;
|
||||
|
||||
assert(p->translated);
|
||||
|
||||
/* TODO: use a single bo (for each type) for shader code */
|
||||
if (p->bo)
|
||||
return;
|
||||
ret = nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, size, &p->bo);
|
||||
assert(!ret);
|
||||
|
||||
offset = p->code_start = 0;
|
||||
|
||||
BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
|
||||
OUT_RING (chan, NV50_2D_DST_FORMAT_R8_UNORM);
|
||||
OUT_RING (chan, 1);
|
||||
BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
|
||||
OUT_RING (chan, 0x40000);
|
||||
BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 2);
|
||||
OUT_RING (chan, 0x10000);
|
||||
OUT_RING (chan, 1);
|
||||
|
||||
while (size) {
|
||||
unsigned nr = size / 4;
|
||||
|
||||
if (AVAIL_RING(chan) < 32)
|
||||
FIRE_RING(chan);
|
||||
|
||||
nr = MIN2(nr, AVAIL_RING(chan) - 18);
|
||||
nr = MIN2(nr, 1792);
|
||||
if (nr < (size / 4))
|
||||
nr &= ~0x3f;
|
||||
assert(!(size & 3));
|
||||
|
||||
BEGIN_RING(chan, eng2d, NV50_2D_DST_ADDRESS_HIGH, 2);
|
||||
OUT_RELOCh(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RELOCl(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
|
||||
OUT_RING (chan, 0);
|
||||
OUT_RING (chan, NV50_2D_SIFC_FORMAT_R8_UNORM);
|
||||
BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
|
||||
OUT_RING (chan, nr * 4);
|
||||
OUT_RING (chan, 1);
|
||||
OUT_RING (chan, 0);
|
||||
OUT_RING (chan, 1);
|
||||
OUT_RING (chan, 0);
|
||||
OUT_RING (chan, 1);
|
||||
OUT_RING (chan, 0);
|
||||
OUT_RING (chan, 0);
|
||||
OUT_RING (chan, 0);
|
||||
OUT_RING (chan, 0);
|
||||
|
||||
BEGIN_RING_NI(chan, eng2d, NV50_2D_SIFC_DATA, nr);
|
||||
OUT_RINGp (chan, data, nr);
|
||||
|
||||
data += nr;
|
||||
offset += nr * 4;
|
||||
size -= nr * 4;
|
||||
}
|
||||
|
||||
BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
|
||||
OUT_RING (chan, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_vp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
|
||||
{
|
||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||
struct nouveau_stateobj *so = so_new(5, 7, 2);
|
||||
|
||||
nv50_program_validate_code(nv50, p);
|
||||
|
||||
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
|
||||
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_HIGH, 0, 0);
|
||||
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_LOW, 0, 0);
|
||||
so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
|
||||
so_data (so, p->vp.attrs[0]);
|
||||
so_data (so, p->vp.attrs[1]);
|
||||
so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
|
||||
so_data (so, p->max_out);
|
||||
so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
|
||||
so_data (so, p->max_gpr);
|
||||
so_method(so, tesla, NV50TCL_VP_START_ID, 1);
|
||||
so_data (so, p->code_start);
|
||||
|
||||
so_ref(so, &p->so);
|
||||
so_ref(NULL, &so);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_fp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
|
||||
{
|
||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||
struct nouveau_stateobj *so = so_new(6, 7, 2);
|
||||
|
||||
nv50_program_validate_code(nv50, p);
|
||||
|
||||
so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
|
||||
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_HIGH, 0, 0);
|
||||
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_LOW, 0, 0);
|
||||
so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
|
||||
so_data (so, p->max_gpr);
|
||||
so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
|
||||
so_data (so, p->max_out);
|
||||
so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
|
||||
so_data (so, p->fp.flags[0]);
|
||||
so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
|
||||
so_data (so, p->fp.flags[1]);
|
||||
so_method(so, tesla, NV50TCL_FP_START_ID, 1);
|
||||
so_data (so, p->code_start);
|
||||
|
||||
so_ref(so, &p->so);
|
||||
so_ref(NULL, &so);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_gp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
|
||||
{
|
||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||
struct nouveau_stateobj *so = so_new(6, 7, 2);
|
||||
|
||||
nv50_program_validate_code(nv50, p);
|
||||
|
||||
so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
|
||||
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_HIGH, 0, 0);
|
||||
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_LOW, 0, 0);
|
||||
so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
|
||||
so_data (so, p->max_gpr);
|
||||
so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
|
||||
so_data (so, p->max_out);
|
||||
so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
|
||||
so_data (so, p->gp.prim_type);
|
||||
so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
|
||||
so_data (so, p->gp.vert_count);
|
||||
so_method(so, tesla, NV50TCL_GP_START_ID, 1);
|
||||
so_data (so, p->code_start);
|
||||
|
||||
so_ref(so, &p->so);
|
||||
so_ref(NULL, &so);
|
||||
}
|
||||
|
||||
static boolean
|
||||
nv50_program_validate(struct nv50_program *p)
|
||||
{
|
||||
p->translated = nv50_program_tx(p);
|
||||
assert(p->translated);
|
||||
return p->translated;
|
||||
}
|
||||
|
||||
struct nouveau_stateobj *
|
||||
nv50_vertprog_validate(struct nv50_context *nv50)
|
||||
{
|
||||
struct nv50_program *p = nv50->vertprog;
|
||||
struct nouveau_stateobj *so = NULL;
|
||||
|
||||
if (!p->translated) {
|
||||
if (nv50_program_validate(p))
|
||||
nv50_vp_update_stateobj(nv50, p);
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (nv50->dirty & NV50_NEW_VERTPROG_CB)
|
||||
nv50_program_validate_data(nv50, p);
|
||||
|
||||
if (!(nv50->dirty & NV50_NEW_VERTPROG))
|
||||
return NULL;
|
||||
|
||||
nv50_program_validate_code(nv50, p);
|
||||
|
||||
so_ref(p->so, &so);
|
||||
return so;
|
||||
}
|
||||
|
||||
struct nouveau_stateobj *
|
||||
nv50_fragprog_validate(struct nv50_context *nv50)
|
||||
{
|
||||
struct nv50_program *p = nv50->fragprog;
|
||||
struct nouveau_stateobj *so = NULL;
|
||||
|
||||
if (!p->translated) {
|
||||
if (nv50_program_validate(p))
|
||||
nv50_fp_update_stateobj(nv50, p);
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (nv50->dirty & NV50_NEW_FRAGPROG_CB)
|
||||
nv50_program_validate_data(nv50, p);
|
||||
|
||||
if (!(nv50->dirty & NV50_NEW_FRAGPROG))
|
||||
return NULL;
|
||||
|
||||
nv50_program_validate_code(nv50, p);
|
||||
|
||||
so_ref(p->so, &so);
|
||||
return so;
|
||||
}
|
||||
|
||||
struct nouveau_stateobj *
|
||||
nv50_geomprog_validate(struct nv50_context *nv50)
|
||||
{
|
||||
struct nv50_program *p = nv50->geomprog;
|
||||
struct nouveau_stateobj *so = NULL;
|
||||
|
||||
if (!p->translated) {
|
||||
if (nv50_program_validate(p))
|
||||
nv50_gp_update_stateobj(nv50, p);
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (nv50->dirty & NV50_NEW_GEOMPROG_CB)
|
||||
nv50_program_validate_data(nv50, p);
|
||||
|
||||
if (!(nv50->dirty & NV50_NEW_GEOMPROG))
|
||||
return NULL;
|
||||
|
||||
nv50_program_validate_code(nv50, p);
|
||||
|
||||
so_ref(p->so, &so);
|
||||
return so;
|
||||
}
|
||||
|
||||
/* XXX: this might not work correctly in all cases yet: we assume that
|
||||
* an FP generic input that is not written in the VP is gl_PointCoord.
|
||||
*/
|
||||
static uint32_t
|
||||
nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned m)
|
||||
{
|
||||
struct nv50_program *vp = nv50->vertprog;
|
||||
struct nv50_program *fp = nv50->fragprog;
|
||||
unsigned i, c;
|
||||
|
||||
memset(pntc, 0, 8 * sizeof(uint32_t));
|
||||
|
||||
if (nv50->geomprog)
|
||||
vp = nv50->geomprog;
|
||||
|
||||
for (i = 0; i < fp->in_nr; i++) {
|
||||
unsigned j, n = util_bitcount(fp->in[i].mask);
|
||||
|
||||
if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
|
||||
m += n;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = 0; j < vp->out_nr; ++j)
|
||||
if (vp->out[j].sn == fp->in[i].sn && vp->out[j].si == fp->in[i].si)
|
||||
break;
|
||||
|
||||
if (j < vp->out_nr) {
|
||||
uint32_t en = nv50->rasterizer->pipe.sprite_coord_enable;
|
||||
|
||||
if (!(en & (1 << vp->out[j].si))) {
|
||||
m += n;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* this is either PointCoord or replaced by sprite coords */
|
||||
for (c = 0; c < 4; c++) {
|
||||
if (!(fp->in[i].mask & (1 << c)))
|
||||
continue;
|
||||
pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
|
||||
++m;
|
||||
}
|
||||
}
|
||||
if (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
|
||||
return 0;
|
||||
return (1 << 4);
|
||||
}
|
||||
|
||||
static int
|
||||
nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4],
|
||||
struct nv50_varying *in, struct nv50_varying *out)
|
||||
{
|
||||
int c;
|
||||
uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
|
||||
uint8_t *map = (uint8_t *)map32;
|
||||
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (mf & 1) {
|
||||
if (in->linear)
|
||||
lin[mid / 32] |= 1 << (mid % 32);
|
||||
if (mv & 1)
|
||||
map[mid] = oid;
|
||||
else
|
||||
if (c == 3)
|
||||
map[mid] |= 1;
|
||||
++mid;
|
||||
}
|
||||
|
||||
oid += mv & 1;
|
||||
mf >>= 1;
|
||||
mv >>= 1;
|
||||
}
|
||||
|
||||
return mid;
|
||||
}
|
||||
|
||||
struct nouveau_stateobj *
|
||||
nv50_fp_linkage_validate(struct nv50_context *nv50)
|
||||
{
|
||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||
struct nv50_program *vp;
|
||||
struct nv50_program *fp = nv50->fragprog;
|
||||
struct nouveau_stateobj *so;
|
||||
struct nv50_varying dummy;
|
||||
int i, n, c, m;
|
||||
|
||||
uint32_t map[16], lin[4], pntc[8];
|
||||
|
||||
uint32_t interp = fp->fp.interp;
|
||||
uint32_t colors = fp->fp.colors;
|
||||
uint32_t clip = 0x04;
|
||||
uint32_t psiz = 0x000;
|
||||
uint32_t primid = 0;
|
||||
uint32_t sysval = 0;
|
||||
|
||||
if (nv50->geomprog) {
|
||||
vp = nv50->geomprog;
|
||||
memset(map, 0x80, sizeof(map));
|
||||
} else {
|
||||
vp = nv50->vertprog;
|
||||
memset(map, 0x40, sizeof(map));
|
||||
}
|
||||
memset(lin, 0, sizeof(lin));
|
||||
|
||||
dummy.linear = 0;
|
||||
dummy.mask = 0xf; /* map all components of HPOS */
|
||||
m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
|
||||
|
||||
if (vp->vp.clpd < 0x40) {
|
||||
for (c = 0; c < vp->vp.clpd_nr; ++c) {
|
||||
map[m / 4] |= (vp->vp.clpd + c) << ((m % 4) * 8);
|
||||
++m;
|
||||
}
|
||||
clip |= vp->vp.clpd_nr << 8;
|
||||
}
|
||||
|
||||
colors |= m << 8; /* adjust BFC0 id */
|
||||
|
||||
/* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
|
||||
if (nv50->rasterizer->pipe.light_twoside) {
|
||||
for (i = 0; i < 2; ++i)
|
||||
m = nv50_vec4_map(map, m, lin,
|
||||
&fp->in[fp->vp.bfc[i]],
|
||||
&vp->out[vp->vp.bfc[i]]);
|
||||
}
|
||||
|
||||
colors += m - 4; /* adjust FFC0 id */
|
||||
interp |= m << 8; /* set mid where 'normal' FP inputs start */
|
||||
|
||||
dummy.mask = 0x0;
|
||||
for (i = 0; i < fp->in_nr; i++) {
|
||||
for (n = 0; n < vp->out_nr; ++n)
|
||||
if (vp->out[n].sn == fp->in[i].sn &&
|
||||
vp->out[n].si == fp->in[i].si)
|
||||
break;
|
||||
|
||||
m = nv50_vec4_map(map, m, lin,
|
||||
&fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
|
||||
}
|
||||
|
||||
/* PrimitiveID either is replaced by the system value, or
|
||||
* written by the geometry shader into an output register
|
||||
*/
|
||||
if (fp->gp.primid < 0x40) {
|
||||
i = (m % 4) * 8;
|
||||
map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->gp.primid << i);
|
||||
primid = m++;
|
||||
}
|
||||
|
||||
if (nv50->rasterizer->pipe.point_size_per_vertex) {
|
||||
i = (m % 4) * 8;
|
||||
map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->vp.psiz << i);
|
||||
psiz = (m++ << 4) | 1;
|
||||
}
|
||||
|
||||
/* now fill the stateobj (at most 28 so_data) */
|
||||
so = so_new(10, 54, 0);
|
||||
|
||||
n = (m + 3) / 4;
|
||||
assert(m <= 64);
|
||||
if (vp->type == PIPE_SHADER_GEOMETRY) {
|
||||
so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
|
||||
so_data (so, m);
|
||||
so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
|
||||
so_datap (so, map, n);
|
||||
} else {
|
||||
so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
|
||||
so_data (so, vp->vp.attrs[2]);
|
||||
|
||||
so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
|
||||
so_data (so, primid);
|
||||
|
||||
so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
|
||||
so_data (so, m);
|
||||
so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
|
||||
so_datap (so, map, n);
|
||||
}
|
||||
|
||||
so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
|
||||
so_data (so, colors);
|
||||
so_data (so, clip);
|
||||
so_data (so, sysval);
|
||||
so_data (so, psiz);
|
||||
|
||||
so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
|
||||
so_data (so, interp);
|
||||
|
||||
so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
|
||||
so_datap (so, lin, 4);
|
||||
|
||||
if (nv50->rasterizer->pipe.point_quad_rasterization) {
|
||||
so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
|
||||
so_data (so,
|
||||
nv50_pntc_replace(nv50, pntc, (interp >> 8) & 0xff));
|
||||
|
||||
so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
|
||||
so_datap (so, pntc, 8);
|
||||
}
|
||||
|
||||
so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
|
||||
so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
static int
|
||||
nv50_vp_gp_mapping(uint32_t *map32, int m,
|
||||
struct nv50_program *vp, struct nv50_program *gp)
|
||||
{
|
||||
uint8_t *map = (uint8_t *)map32;
|
||||
int i, j, c;
|
||||
|
||||
for (i = 0; i < gp->in_nr; ++i) {
|
||||
uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
|
||||
|
||||
for (j = 0; j < vp->out_nr; ++j) {
|
||||
if (vp->out[j].sn == gp->in[i].sn &&
|
||||
vp->out[j].si == gp->in[i].si) {
|
||||
mv = vp->out[j].mask;
|
||||
oid = vp->out[j].hw;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
|
||||
if (mg & mv & 1)
|
||||
map[m++] = oid;
|
||||
else
|
||||
if (mg & 1)
|
||||
map[m++] = (c == 3) ? 0x41 : 0x40;
|
||||
oid += mv & 1;
|
||||
}
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
struct nouveau_stateobj *
|
||||
nv50_gp_linkage_validate(struct nv50_context *nv50)
|
||||
{
|
||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||
struct nouveau_stateobj *so;
|
||||
struct nv50_program *vp = nv50->vertprog;
|
||||
struct nv50_program *gp = nv50->geomprog;
|
||||
uint32_t map[16];
|
||||
int m = 0;
|
||||
|
||||
if (!gp)
|
||||
return NULL;
|
||||
memset(map, 0, sizeof(map));
|
||||
|
||||
m = nv50_vp_gp_mapping(map, m, vp, gp);
|
||||
|
||||
so = so_new(3, 24 - 3, 0);
|
||||
|
||||
so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
|
||||
so_data (so, vp->vp.attrs[2] | gp->vp.attrs[2]);
|
||||
|
||||
assert(m <= 32);
|
||||
so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
|
||||
so_data (so, m);
|
||||
|
||||
m = (m + 3) / 4;
|
||||
so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
|
||||
so_datap (so, map, m);
|
||||
|
||||
return so;
|
||||
}
|
||||
|
|
@ -48,6 +48,53 @@ nv50_colormask(unsigned mask)
|
|||
return cmask;
|
||||
}
|
||||
|
||||
static INLINE uint32_t
|
||||
nv50_blend_func(unsigned factor)
|
||||
{
|
||||
switch (factor) {
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO;
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA;
|
||||
case PIPE_BLENDFACTOR_DST_ALPHA:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA;
|
||||
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA;
|
||||
case PIPE_BLENDFACTOR_DST_COLOR:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR;
|
||||
case PIPE_BLENDFACTOR_INV_DST_COLOR:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE;
|
||||
case PIPE_BLENDFACTOR_CONST_COLOR:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR;
|
||||
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR;
|
||||
case PIPE_BLENDFACTOR_CONST_ALPHA:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA;
|
||||
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA;
|
||||
case PIPE_BLENDFACTOR_SRC1_COLOR:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR;
|
||||
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR;
|
||||
case PIPE_BLENDFACTOR_SRC1_ALPHA:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA;
|
||||
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA;
|
||||
default:
|
||||
return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO;
|
||||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
nv50_blend_state_create(struct pipe_context *pipe,
|
||||
const struct pipe_blend_state *cso)
|
||||
|
|
@ -80,12 +127,12 @@ nv50_blend_state_create(struct pipe_context *pipe,
|
|||
if (blend_enabled) {
|
||||
so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
|
||||
so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
|
||||
so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_src_factor));
|
||||
so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_dst_factor));
|
||||
so_data (so, nv50_blend_func(cso->rt[0].rgb_src_factor));
|
||||
so_data (so, nv50_blend_func(cso->rt[0].rgb_dst_factor));
|
||||
so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
|
||||
so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_src_factor));
|
||||
so_data (so, nv50_blend_func(cso->rt[0].alpha_src_factor));
|
||||
so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
|
||||
so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_dst_factor));
|
||||
so_data (so, nv50_blend_func(cso->rt[0].alpha_dst_factor));
|
||||
}
|
||||
|
||||
if (cso->logicop_enable == 0 ) {
|
||||
|
|
@ -546,7 +593,6 @@ nv50_vp_state_create(struct pipe_context *pipe,
|
|||
|
||||
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
|
||||
p->type = PIPE_SHADER_VERTEX;
|
||||
tgsi_scan_shader(p->pipe.tokens, &p->info);
|
||||
return (void *)p;
|
||||
}
|
||||
|
||||
|
|
@ -578,7 +624,6 @@ nv50_fp_state_create(struct pipe_context *pipe,
|
|||
|
||||
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
|
||||
p->type = PIPE_SHADER_FRAGMENT;
|
||||
tgsi_scan_shader(p->pipe.tokens, &p->info);
|
||||
return (void *)p;
|
||||
}
|
||||
|
||||
|
|
@ -610,7 +655,6 @@ nv50_gp_state_create(struct pipe_context *pipe,
|
|||
|
||||
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
|
||||
p->type = PIPE_SHADER_GEOMETRY;
|
||||
tgsi_scan_shader(p->pipe.tokens, &p->info);
|
||||
return (void *)p;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -56,6 +56,8 @@ validate_fb(struct nv50_context *nv50)
|
|||
assert(h == fb->cbufs[i]->height);
|
||||
}
|
||||
|
||||
assert(nv50_format_table[fb->cbufs[i]->format].rt);
|
||||
|
||||
so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2);
|
||||
so_data (so, fb->cbufs[i]->width);
|
||||
so_data (so, fb->cbufs[i]->height);
|
||||
|
|
@ -65,39 +67,9 @@ validate_fb(struct nv50_context *nv50)
|
|||
NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
|
||||
so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
|
||||
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
|
||||
switch (fb->cbufs[i]->format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_R16G16B16A16_SNORM:
|
||||
so_data(so, NV50TCL_RT_FORMAT_R16G16B16A16_SNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_R16G16B16A16_UNORM:
|
||||
so_data(so, NV50TCL_RT_FORMAT_R16G16B16A16_UNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
so_data(so, NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT);
|
||||
break;
|
||||
case PIPE_FORMAT_R16G16_SNORM:
|
||||
so_data(so, NV50TCL_RT_FORMAT_R16G16_SNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_R16G16_UNORM:
|
||||
so_data(so, NV50TCL_RT_FORMAT_R16G16_UNORM);
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("AIIII unknown format %s\n",
|
||||
util_format_name(fb->cbufs[i]->format));
|
||||
so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
|
||||
break;
|
||||
}
|
||||
so_data(so, nv50_miptree(pt)->
|
||||
level[fb->cbufs[i]->level].tile_mode << 4);
|
||||
so_data (so, nv50_format_table[fb->cbufs[i]->format].rt);
|
||||
so_data (so, nv50_miptree(pt)->
|
||||
level[fb->cbufs[i]->level].tile_mode << 4);
|
||||
so_data(so, 0x00000000);
|
||||
|
||||
so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
|
||||
|
|
@ -117,33 +89,17 @@ validate_fb(struct nv50_context *nv50)
|
|||
assert(h == fb->zsbuf->height);
|
||||
}
|
||||
|
||||
assert(nv50_format_table[fb->zsbuf->format].rt);
|
||||
|
||||
so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
|
||||
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
|
||||
NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
|
||||
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
|
||||
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
|
||||
switch (fb->zsbuf->format) {
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
|
||||
so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
|
||||
so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM);
|
||||
break;
|
||||
case PIPE_FORMAT_Z32_FLOAT:
|
||||
so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT);
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("AIIII unknown format %s\n",
|
||||
util_format_name(fb->zsbuf->format));
|
||||
so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
|
||||
break;
|
||||
}
|
||||
so_data(so, nv50_miptree(pt)->
|
||||
level[fb->zsbuf->level].tile_mode << 4);
|
||||
so_data(so, 0x00000000);
|
||||
so_data (so, nv50_format_table[fb->zsbuf->format].rt);
|
||||
so_data (so, nv50_miptree(pt)->
|
||||
level[fb->zsbuf->level].tile_mode << 4);
|
||||
so_data (so, 0x00000000);
|
||||
|
||||
so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
|
||||
so_data (so, 1);
|
||||
|
|
|
|||
|
|
@ -29,56 +29,6 @@
|
|||
|
||||
#include "util/u_format.h"
|
||||
|
||||
#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \
|
||||
[PIPE_FORMAT_##pf] = ( \
|
||||
NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
|
||||
NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
|
||||
NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
|
||||
NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
|
||||
NV50TIC_0_0_FMT_##f)
|
||||
|
||||
#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f)
|
||||
|
||||
static const uint32_t nv50_texture_formats[PIPE_FORMAT_COUNT] =
|
||||
{
|
||||
_(B8G8R8A8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8),
|
||||
_(B8G8R8A8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8),
|
||||
_(B8G8R8X8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8),
|
||||
_(B8G8R8X8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8),
|
||||
_(B5G5R5A1_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5),
|
||||
_(B4G4R4A4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4),
|
||||
|
||||
_(B5G6R5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5),
|
||||
|
||||
_(L8_UNORM, UNORM, C0, C0, C0, ONE, 8),
|
||||
_(L8_SRGB, UNORM, C0, C0, C0, ONE, 8),
|
||||
_(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8),
|
||||
_(I8_UNORM, UNORM, C0, C0, C0, C0, 8),
|
||||
|
||||
_(L8A8_UNORM, UNORM, C0, C0, C0, C1, 8_8),
|
||||
_(L8A8_SRGB, UNORM, C0, C0, C0, C1, 8_8),
|
||||
|
||||
_(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1),
|
||||
_(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1),
|
||||
_(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3),
|
||||
_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
|
||||
|
||||
_MIXED(S8_USCALED_Z24_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
|
||||
_MIXED(Z24_UNORM_S8_USCALED, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
|
||||
|
||||
_(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
|
||||
_(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
|
||||
_(R32G32B32A32_FLOAT, FLOAT, C0, C1, C2, C3, 32_32_32_32),
|
||||
|
||||
_(R16G16_SNORM, SNORM, C0, C1, ZERO, ONE, 16_16),
|
||||
_(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16),
|
||||
|
||||
_MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH)
|
||||
};
|
||||
|
||||
#undef _
|
||||
#undef _MIXED
|
||||
|
||||
static INLINE uint32_t
|
||||
nv50_tic_swizzle(uint32_t tc, unsigned swz)
|
||||
{
|
||||
|
|
@ -106,7 +56,7 @@ nv50_tex_construct(struct nv50_sampler_view *view)
|
|||
struct nv50_miptree *mt = nv50_miptree(view->pipe.texture);
|
||||
uint32_t swz[4], *tic = view->tic;
|
||||
|
||||
tic[0] = nv50_texture_formats[view->pipe.format];
|
||||
tic[0] = nv50_format_table[view->pipe.format].tic;
|
||||
|
||||
swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
|
||||
swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);
|
||||
|
|
|
|||
|
|
@ -45,24 +45,32 @@
|
|||
#define NV50TIC_0_0_TYPEA_SNORM 0x00008000
|
||||
#define NV50TIC_0_0_TYPEA_SINT 0x00018000
|
||||
#define NV50TIC_0_0_TYPEA_UINT 0x00020000
|
||||
#define NV50TIC_0_0_TYPEA_SSCALED 0x00028000
|
||||
#define NV50TIC_0_0_TYPEA_USCALED 0x00030000
|
||||
#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000
|
||||
#define NV50TIC_0_0_TYPEB_MASK 0x00007000
|
||||
#define NV50TIC_0_0_TYPEB_UNORM 0x00002000
|
||||
#define NV50TIC_0_0_TYPEB_SNORM 0x00001000
|
||||
#define NV50TIC_0_0_TYPEB_SINT 0x00003000
|
||||
#define NV50TIC_0_0_TYPEB_UINT 0x00004000
|
||||
#define NV50TIC_0_0_TYPEB_SSCALED 0x00005000
|
||||
#define NV50TIC_0_0_TYPEB_USCALED 0x00006000
|
||||
#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000
|
||||
#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
|
||||
#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
|
||||
#define NV50TIC_0_0_TYPEG_SNORM 0x00000200
|
||||
#define NV50TIC_0_0_TYPEG_SINT 0x00000600
|
||||
#define NV50TIC_0_0_TYPEG_UINT 0x00000800
|
||||
#define NV50TIC_0_0_TYPEG_SSCALED 0x00000a00
|
||||
#define NV50TIC_0_0_TYPEG_USCALED 0x00000c00
|
||||
#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00
|
||||
#define NV50TIC_0_0_TYPER_MASK 0x000001c0
|
||||
#define NV50TIC_0_0_TYPER_UNORM 0x00000080
|
||||
#define NV50TIC_0_0_TYPER_SNORM 0x00000040
|
||||
#define NV50TIC_0_0_TYPER_SINT 0x000000c0
|
||||
#define NV50TIC_0_0_TYPER_UINT 0x00000100
|
||||
#define NV50TIC_0_0_TYPER_SSCALED 0x00000140
|
||||
#define NV50TIC_0_0_TYPER_USCALED 0x00000180
|
||||
#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0
|
||||
#define NV50TIC_0_0_FMT_MASK 0x0000003f
|
||||
#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001
|
||||
|
|
@ -90,6 +98,7 @@
|
|||
#define NV50TIC_0_0_FMT_8_24 0x0000002a
|
||||
#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f
|
||||
#define NV50TIC_0_0_FMT_32_8 0x00000030
|
||||
#define NV50TIC_0_0_FMT_16_DEPTH 0x0000003a
|
||||
|
||||
#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
|
||||
#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
|
||||
|
|
|
|||
2050
src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
Normal file
2050
src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -29,96 +29,6 @@
|
|||
#include "nv50_context.h"
|
||||
#include "nv50_resource.h"
|
||||
|
||||
static INLINE uint32_t
|
||||
nv50_vbo_type_to_hw(enum pipe_format format)
|
||||
{
|
||||
const struct util_format_description *desc;
|
||||
|
||||
desc = util_format_description(format);
|
||||
assert(desc);
|
||||
|
||||
switch (desc->channel[0].type) {
|
||||
case UTIL_FORMAT_TYPE_FLOAT:
|
||||
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
|
||||
case UTIL_FORMAT_TYPE_UNSIGNED:
|
||||
if (desc->channel[0].normalized) {
|
||||
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
|
||||
}
|
||||
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
|
||||
case UTIL_FORMAT_TYPE_SIGNED:
|
||||
if (desc->channel[0].normalized) {
|
||||
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
|
||||
}
|
||||
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
|
||||
/*
|
||||
case PIPE_FORMAT_TYPE_UINT:
|
||||
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
|
||||
case PIPE_FORMAT_TYPE_SINT:
|
||||
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE uint32_t
|
||||
nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
|
||||
{
|
||||
static const uint32_t hw_values[] = {
|
||||
0, 0, 0, 0,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
|
||||
0, 0, 0, 0,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
|
||||
NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
|
||||
|
||||
/* we'd also have R11G11B10 and R10G10B10A2 */
|
||||
|
||||
assert(nr_c > 0 && nr_c <= 4);
|
||||
|
||||
if (size > 32)
|
||||
return 0;
|
||||
size >>= (3 - 2);
|
||||
|
||||
return hw_values[size + (nr_c - 1)];
|
||||
}
|
||||
|
||||
static INLINE uint32_t
|
||||
nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
|
||||
{
|
||||
uint32_t hw_type, hw_size;
|
||||
enum pipe_format pf = ve->src_format;
|
||||
const struct util_format_description *desc;
|
||||
unsigned size, nr_components;
|
||||
|
||||
desc = util_format_description(pf);
|
||||
assert(desc);
|
||||
|
||||
size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
|
||||
nr_components = util_format_get_nr_components(pf);
|
||||
|
||||
hw_type = nv50_vbo_type_to_hw(pf);
|
||||
hw_size = nv50_vbo_size_to_hw(size, nr_components);
|
||||
|
||||
if (!hw_type || !hw_size) {
|
||||
NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
|
||||
abort();
|
||||
return 0x24e80000;
|
||||
}
|
||||
|
||||
if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
|
||||
hw_size |= (1 << 31); /* no real swizzle bits :-( */
|
||||
|
||||
return (hw_type | hw_size);
|
||||
}
|
||||
|
||||
struct instance {
|
||||
struct nouveau_bo *bo;
|
||||
unsigned delta;
|
||||
|
|
@ -533,7 +443,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
|
|||
so_data (so, fui(v[1]));
|
||||
break;
|
||||
case 1:
|
||||
if (attrib == nv50->vertprog->cfg.edgeflag_in) {
|
||||
if (attrib == nv50->vertprog->vp.edgeflag) {
|
||||
so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
|
||||
so_data (so, v[0] ? 1 : 0);
|
||||
}
|
||||
|
|
@ -554,11 +464,8 @@ nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
|
|||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < cso->num_elements; ++i) {
|
||||
struct pipe_vertex_element *ve = &cso->pipe[i];
|
||||
|
||||
cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve);
|
||||
}
|
||||
for (i = 0; i < cso->num_elements; ++i)
|
||||
cso->hw[i] = nv50_format_table[cso->pipe[i].src_format].vtx;
|
||||
}
|
||||
|
||||
struct nouveau_stateobj *
|
||||
|
|
@ -574,7 +481,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
|
|||
|
||||
nv50->vbo_fifo = 0;
|
||||
if (nv50->screen->force_push ||
|
||||
nv50->vertprog->cfg.edgeflag_in < 16)
|
||||
nv50->vertprog->vp.edgeflag < 16)
|
||||
nv50->vbo_fifo = 0xffff;
|
||||
|
||||
for (i = 0; i < nv50->vtxbuf_nr; i++) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue