mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
Merge remote-tracking branch 'origin/master' into vulkan
This commit is contained in:
commit
59f5728995
358 changed files with 12431 additions and 4750 deletions
|
|
@ -21,13 +21,8 @@
|
|||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# use c99 compiler by default
|
||||
ifeq ($(LOCAL_CC),)
|
||||
ifeq ($(LOCAL_IS_HOST_MODULE),true)
|
||||
LOCAL_CC := $(HOST_CC) -std=c99 -D_GNU_SOURCE
|
||||
else
|
||||
LOCAL_CC := $(TARGET_CC) -std=c99
|
||||
endif
|
||||
LOCAL_CFLAGS += -D_GNU_SOURCE
|
||||
endif
|
||||
|
||||
LOCAL_C_INCLUDES += \
|
||||
|
|
@ -37,6 +32,7 @@ LOCAL_C_INCLUDES += \
|
|||
MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION)
|
||||
# define ANDROID_VERSION (e.g., 4.0.x => 0x0400)
|
||||
LOCAL_CFLAGS += \
|
||||
-Wno-unused-parameter \
|
||||
-DPACKAGE_VERSION=\"$(MESA_VERSION)\" \
|
||||
-DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\" \
|
||||
-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
|
||||
|
|
@ -60,6 +56,10 @@ LOCAL_CFLAGS += \
|
|||
-fvisibility=hidden \
|
||||
-Wno-sign-compare
|
||||
|
||||
# mesa requires at least c99 compiler
|
||||
LOCAL_CONLYFLAGS += \
|
||||
-std=c99
|
||||
|
||||
ifeq ($(strip $(MESA_ENABLE_ASM)),true)
|
||||
ifeq ($(TARGET_ARCH),x86)
|
||||
LOCAL_CFLAGS += \
|
||||
|
|
|
|||
2
VERSION
2
VERSION
|
|
@ -1 +1 @@
|
|||
11.2.0-devel
|
||||
11.3.0-devel
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
|
|||
LIBDRM_INTEL_REQUIRED=2.4.61
|
||||
LIBDRM_NVVIEUX_REQUIRED=2.4.66
|
||||
LIBDRM_NOUVEAU_REQUIRED=2.4.66
|
||||
LIBDRM_FREEDRENO_REQUIRED=2.4.65
|
||||
LIBDRM_FREEDRENO_REQUIRED=2.4.67
|
||||
DRI2PROTO_REQUIRED=2.6
|
||||
DRI3PROTO_REQUIRED=1.0
|
||||
PRESENTPROTO_REQUIRED=1.0
|
||||
|
|
@ -2297,6 +2297,9 @@ dnl in LLVM_LIBS.
|
|||
|
||||
if test "x$MESA_LLVM" != x0; then
|
||||
|
||||
if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
|
||||
AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed])
|
||||
fi
|
||||
LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
|
||||
|
||||
dnl llvm-config may not give the right answer when llvm is a built as a
|
||||
|
|
|
|||
10
docs/GL3.txt
10
docs/GL3.txt
|
|
@ -215,10 +215,10 @@ GLES3.1, GLSL ES 3.1
|
|||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
GL_ARB_framebuffer_no_attachments DONE (i965)
|
||||
GL_ARB_program_interface_query DONE (all drivers)
|
||||
GL_ARB_shader_atomic_counters DONE (i965)
|
||||
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
|
||||
GL_ARB_shader_image_load_store DONE (i965)
|
||||
GL_ARB_shader_image_size DONE (i965)
|
||||
GL_ARB_shader_storage_buffer_object DONE (i965)
|
||||
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
|
||||
GL_ARB_shading_language_packing DONE (all drivers)
|
||||
GL_ARB_separate_shader_objects DONE (all drivers)
|
||||
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
|
|
@ -249,14 +249,14 @@ GLES3.2, GLSL ES 3.2
|
|||
GL_OES_primitive_bounding box not started
|
||||
GL_OES_sample_shading not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
|
||||
GL_OES_sample_variables not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
|
||||
GL_OES_shader_image_atomic not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers)
|
||||
GL_OES_shader_image_atomic DONE (all drivers that support GL_ARB_shader_image_load_store)
|
||||
GL_OES_shader_io_blocks not started (based on parts of GLSL 1.50, which is done)
|
||||
GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done)
|
||||
GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
|
||||
GL_OES_texture_border_clamp not started (based on GL_ARB_texture_border_clamp, which is done)
|
||||
GL_OES_texture_border_clamp DONE (all drivers)
|
||||
GL_OES_texture_buffer not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
|
||||
GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
|
||||
GL_OES_texture_stencil8 not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
|
||||
GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8)
|
||||
GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
|
||||
More info about these features and the work involved can be found at
|
||||
|
|
|
|||
|
|
@ -91,6 +91,14 @@ This is only valid for versions >= 3.0.
|
|||
<li> Mesa may not really implement all the features of the given version.
|
||||
(for developers only)
|
||||
</ul>
|
||||
<li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by
|
||||
glGetString(GL_VERSION) for OpenGL ES.
|
||||
<ul>
|
||||
<li> The format should be MAJOR.MINOR
|
||||
<li> Examples: 2.0, 3.0, 3.1
|
||||
<li> Mesa may not really implement all the features of the given version.
|
||||
(for developers only)
|
||||
</ul>
|
||||
<li>MESA_GLSL_VERSION_OVERRIDE - changes the value returned by
|
||||
glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as
|
||||
"130". Mesa will not really implement all the features of the given language version
|
||||
|
|
@ -224,7 +232,7 @@ See src/mesa/state_tracker/st_debug.c for other options.
|
|||
<li>LP_PERF - a comma-separated list of options to selectively no-op various
|
||||
parts of the driver. See the source code for details.
|
||||
<li>LP_NUM_THREADS - an integer indicating how many threads to use for rendering.
|
||||
Zero turns of threading completely. The default value is the number of CPU
|
||||
Zero turns off threading completely. The default value is the number of CPU
|
||||
cores present.
|
||||
</ul>
|
||||
|
||||
|
|
@ -245,6 +253,25 @@ for details.
|
|||
</ul>
|
||||
|
||||
|
||||
<h3>VC4 driver environment variables</h3>
|
||||
<ul>
|
||||
<li>VC4_DEBUG - a comma-separated list of named flags, which do various things:
|
||||
<ul>
|
||||
<li>cl - dump command list during creation</li>
|
||||
<li>qpu - dump generated QPU instructions</li>
|
||||
<li>qir - dump QPU IR during program compile</li>
|
||||
<li>nir - dump NIR during program compile</li>
|
||||
<li>tgsi - dump TGSI during program compile</li>
|
||||
<li>shaderdb - dump program compile information for shader-db analysis</li>
|
||||
<li>perf - print during performance-related events</li>
|
||||
<li>norast - skip actual hardware execution of commands</li>
|
||||
<li>always_flush - flush after each draw call</li>
|
||||
<li>always_sync - wait for finish after each flush</li>
|
||||
<li>dump - write a GPU command stream trace file (VC4 simulator only)</li>
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
|
||||
<p>
|
||||
Other Gallium drivers have their own environment variables. These may change
|
||||
frequently so the source code should be consulted for details.
|
||||
|
|
|
|||
|
|
@ -58,6 +58,9 @@ On Windows with MinGW, install flex and bison with:
|
|||
For MSVC on Windows, install
|
||||
<a href="http://winflexbison.sourceforge.net/">Win flex-bison</a>.
|
||||
</li>
|
||||
<br>
|
||||
<li>For building on Windows, Microsoft Visual Studio 2013 or later is required.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -74,6 +74,10 @@ TBD.
|
|||
|
||||
<h2>Changes</h2>
|
||||
|
||||
Microsoft Visual Studio 2013 or later is now required for building
|
||||
on Windows.
|
||||
Previously, Visual Studio 2008 and later were supported.
|
||||
|
||||
TBD.
|
||||
|
||||
</div>
|
||||
|
|
|
|||
61
docs/relnotes/11.3.0.html
Normal file
61
docs/relnotes/11.3.0.html
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.3.0 Release Notes / TBD</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.3.0 is a new development release.
|
||||
People who are concerned with stability and reliability should stick
|
||||
with a previous release or wait for Mesa 11.3.1.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.3.0 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
|
||||
<p>
|
||||
Note: some of the new features are only available with certain drivers.
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
|
||||
<li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li>
|
||||
</ul>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
TBD.
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
TBD.
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -112,6 +112,7 @@ CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
|
|||
CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
|
||||
CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
|
||||
CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
|
||||
CHIPSET(0x190B, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
|
||||
CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
|
||||
CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
|
||||
CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
|
||||
|
|
@ -122,16 +123,17 @@ CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
|
|||
CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
|
||||
CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
|
||||
CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
|
||||
CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2")
|
||||
CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
|
||||
CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
|
||||
CHIPSET(0x1921, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
|
||||
CHIPSET(0x1923, skl_gt3, "Intel(R) Skylake GT3e")
|
||||
CHIPSET(0x1926, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
|
||||
CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
|
||||
CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
|
||||
CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics 555 (Skylake GT3e)")
|
||||
CHIPSET(0x192D, skl_gt3, "Intel(R) Iris Graphics P555 (Skylake GT3e)")
|
||||
CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
|
||||
CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
|
||||
CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
|
||||
CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
|
||||
CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
|
||||
CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
|
||||
CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
|
|||
DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
|
||||
DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
|
||||
|
||||
DECL_TYPE(sampler, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_VOID)
|
||||
DECL_TYPE(sampler1D, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT)
|
||||
DECL_TYPE(sampler2D, GL_SAMPLER_2D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT)
|
||||
DECL_TYPE(sampler3D, GL_SAMPLER_3D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT)
|
||||
|
|
|
|||
|
|
@ -685,18 +685,6 @@ struct ast_type_qualifier {
|
|||
*/
|
||||
bool has_auxiliary_storage() const;
|
||||
|
||||
/**
|
||||
* \brief Return string representation of interpolation qualifier.
|
||||
*
|
||||
* If an interpolation qualifier is present, then return that qualifier's
|
||||
* string representation. Otherwise, return null. For example, if the
|
||||
* noperspective bit is set, then this returns "noperspective".
|
||||
*
|
||||
* If multiple interpolation qualifiers are somehow present, then the
|
||||
* returned string is undefined but not null.
|
||||
*/
|
||||
const char *interpolation_string() const;
|
||||
|
||||
bool merge_qualifier(YYLTYPE *loc,
|
||||
_mesa_glsl_parse_state *state,
|
||||
const ast_type_qualifier &q,
|
||||
|
|
|
|||
|
|
@ -1405,9 +1405,9 @@ emit_inline_matrix_constructor(const glsl_type *type,
|
|||
zero.d[i] = 0.0;
|
||||
|
||||
ir_instruction *inst =
|
||||
new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
|
||||
new(ctx) ir_constant(rhs_var->type, &zero),
|
||||
NULL);
|
||||
new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
|
||||
new(ctx) ir_constant(rhs_var->type, &zero),
|
||||
NULL);
|
||||
instructions->push_tail(inst);
|
||||
|
||||
ir_dereference *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
|
||||
|
|
@ -1422,36 +1422,36 @@ emit_inline_matrix_constructor(const glsl_type *type,
|
|||
* columns than rows).
|
||||
*/
|
||||
static const unsigned rhs_swiz[4][4] = {
|
||||
{ 0, 1, 1, 1 },
|
||||
{ 1, 0, 1, 1 },
|
||||
{ 1, 1, 0, 1 },
|
||||
{ 1, 1, 1, 0 }
|
||||
{ 0, 1, 1, 1 },
|
||||
{ 1, 0, 1, 1 },
|
||||
{ 1, 1, 0, 1 },
|
||||
{ 1, 1, 1, 0 }
|
||||
};
|
||||
|
||||
const unsigned cols_to_init = MIN2(type->matrix_columns,
|
||||
type->vector_elements);
|
||||
type->vector_elements);
|
||||
for (unsigned i = 0; i < cols_to_init; i++) {
|
||||
ir_constant *const col_idx = new(ctx) ir_constant(i);
|
||||
ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
|
||||
ir_constant *const col_idx = new(ctx) ir_constant(i);
|
||||
ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
|
||||
|
||||
ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
|
||||
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i],
|
||||
type->vector_elements);
|
||||
ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
|
||||
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i],
|
||||
type->vector_elements);
|
||||
|
||||
inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
|
||||
instructions->push_tail(inst);
|
||||
inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
|
||||
instructions->push_tail(inst);
|
||||
}
|
||||
|
||||
for (unsigned i = cols_to_init; i < type->matrix_columns; i++) {
|
||||
ir_constant *const col_idx = new(ctx) ir_constant(i);
|
||||
ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
|
||||
ir_constant *const col_idx = new(ctx) ir_constant(i);
|
||||
ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
|
||||
|
||||
ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
|
||||
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1,
|
||||
type->vector_elements);
|
||||
ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
|
||||
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1,
|
||||
type->vector_elements);
|
||||
|
||||
inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
|
||||
instructions->push_tail(inst);
|
||||
inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
|
||||
instructions->push_tail(inst);
|
||||
}
|
||||
} else if (first_param->type->is_matrix()) {
|
||||
/* From page 50 (56 of the PDF) of the GLSL 1.50 spec:
|
||||
|
|
@ -1469,36 +1469,43 @@ emit_inline_matrix_constructor(const glsl_type *type,
|
|||
/* If the source matrix is smaller, pre-initialize the relavent parts of
|
||||
* the destination matrix to the identity matrix.
|
||||
*/
|
||||
if ((src_matrix->type->matrix_columns < var->type->matrix_columns)
|
||||
|| (src_matrix->type->vector_elements < var->type->vector_elements)) {
|
||||
if ((src_matrix->type->matrix_columns < var->type->matrix_columns) ||
|
||||
(src_matrix->type->vector_elements < var->type->vector_elements)) {
|
||||
|
||||
/* If the source matrix has fewer rows, every column of the destination
|
||||
* must be initialized. Otherwise only the columns in the destination
|
||||
* that do not exist in the source must be initialized.
|
||||
*/
|
||||
unsigned col =
|
||||
(src_matrix->type->vector_elements < var->type->vector_elements)
|
||||
? 0 : src_matrix->type->matrix_columns;
|
||||
/* If the source matrix has fewer rows, every column of the destination
|
||||
* must be initialized. Otherwise only the columns in the destination
|
||||
* that do not exist in the source must be initialized.
|
||||
*/
|
||||
unsigned col =
|
||||
(src_matrix->type->vector_elements < var->type->vector_elements)
|
||||
? 0 : src_matrix->type->matrix_columns;
|
||||
|
||||
const glsl_type *const col_type = var->type->column_type();
|
||||
for (/* empty */; col < var->type->matrix_columns; col++) {
|
||||
ir_constant_data ident;
|
||||
const glsl_type *const col_type = var->type->column_type();
|
||||
for (/* empty */; col < var->type->matrix_columns; col++) {
|
||||
ir_constant_data ident;
|
||||
|
||||
ident.f[0] = 0.0;
|
||||
ident.f[1] = 0.0;
|
||||
ident.f[2] = 0.0;
|
||||
ident.f[3] = 0.0;
|
||||
if (!col_type->is_double()) {
|
||||
ident.f[0] = 0.0f;
|
||||
ident.f[1] = 0.0f;
|
||||
ident.f[2] = 0.0f;
|
||||
ident.f[3] = 0.0f;
|
||||
ident.f[col] = 1.0f;
|
||||
} else {
|
||||
ident.d[0] = 0.0;
|
||||
ident.d[1] = 0.0;
|
||||
ident.d[2] = 0.0;
|
||||
ident.d[3] = 0.0;
|
||||
ident.d[col] = 1.0;
|
||||
}
|
||||
|
||||
ident.f[col] = 1.0;
|
||||
ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident);
|
||||
|
||||
ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident);
|
||||
ir_rvalue *const lhs =
|
||||
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col));
|
||||
|
||||
ir_rvalue *const lhs =
|
||||
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col));
|
||||
|
||||
ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL);
|
||||
instructions->push_tail(inst);
|
||||
}
|
||||
ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL);
|
||||
instructions->push_tail(inst);
|
||||
}
|
||||
}
|
||||
|
||||
/* Assign columns from the source matrix to the destination matrix.
|
||||
|
|
@ -1507,51 +1514,51 @@ emit_inline_matrix_constructor(const glsl_type *type,
|
|||
* generate a temporary and copy the paramter there.
|
||||
*/
|
||||
ir_variable *const rhs_var =
|
||||
new(ctx) ir_variable(first_param->type, "mat_ctor_mat",
|
||||
ir_var_temporary);
|
||||
new(ctx) ir_variable(first_param->type, "mat_ctor_mat",
|
||||
ir_var_temporary);
|
||||
instructions->push_tail(rhs_var);
|
||||
|
||||
ir_dereference *const rhs_var_ref =
|
||||
new(ctx) ir_dereference_variable(rhs_var);
|
||||
new(ctx) ir_dereference_variable(rhs_var);
|
||||
ir_instruction *const inst =
|
||||
new(ctx) ir_assignment(rhs_var_ref, first_param, NULL);
|
||||
new(ctx) ir_assignment(rhs_var_ref, first_param, NULL);
|
||||
instructions->push_tail(inst);
|
||||
|
||||
const unsigned last_row = MIN2(src_matrix->type->vector_elements,
|
||||
var->type->vector_elements);
|
||||
var->type->vector_elements);
|
||||
const unsigned last_col = MIN2(src_matrix->type->matrix_columns,
|
||||
var->type->matrix_columns);
|
||||
var->type->matrix_columns);
|
||||
|
||||
unsigned swiz[4] = { 0, 0, 0, 0 };
|
||||
for (unsigned i = 1; i < last_row; i++)
|
||||
swiz[i] = i;
|
||||
swiz[i] = i;
|
||||
|
||||
const unsigned write_mask = (1U << last_row) - 1;
|
||||
const unsigned write_mask = (1U << last_row) - 1;
|
||||
|
||||
for (unsigned i = 0; i < last_col; i++) {
|
||||
ir_dereference *const lhs =
|
||||
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i));
|
||||
ir_rvalue *const rhs_col =
|
||||
new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i));
|
||||
ir_dereference *const lhs =
|
||||
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i));
|
||||
ir_rvalue *const rhs_col =
|
||||
new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i));
|
||||
|
||||
/* If one matrix has columns that are smaller than the columns of the
|
||||
* other matrix, wrap the column access of the larger with a swizzle
|
||||
* so that the LHS and RHS of the assignment have the same size (and
|
||||
* therefore have the same type).
|
||||
*
|
||||
* It would be perfectly valid to unconditionally generate the
|
||||
* swizzles, this this will typically result in a more compact IR tree.
|
||||
*/
|
||||
ir_rvalue *rhs;
|
||||
if (lhs->type->vector_elements != rhs_col->type->vector_elements) {
|
||||
rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row);
|
||||
} else {
|
||||
rhs = rhs_col;
|
||||
}
|
||||
/* If one matrix has columns that are smaller than the columns of the
|
||||
* other matrix, wrap the column access of the larger with a swizzle
|
||||
* so that the LHS and RHS of the assignment have the same size (and
|
||||
* therefore have the same type).
|
||||
*
|
||||
* It would be perfectly valid to unconditionally generate the
|
||||
* swizzles, this this will typically result in a more compact IR tree.
|
||||
*/
|
||||
ir_rvalue *rhs;
|
||||
if (lhs->type->vector_elements != rhs_col->type->vector_elements) {
|
||||
rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row);
|
||||
} else {
|
||||
rhs = rhs_col;
|
||||
}
|
||||
|
||||
ir_instruction *inst =
|
||||
new(ctx) ir_assignment(lhs, rhs, NULL, write_mask);
|
||||
instructions->push_tail(inst);
|
||||
ir_instruction *inst =
|
||||
new(ctx) ir_assignment(lhs, rhs, NULL, write_mask);
|
||||
instructions->push_tail(inst);
|
||||
}
|
||||
} else {
|
||||
const unsigned cols = type->matrix_columns;
|
||||
|
|
|
|||
|
|
@ -1133,9 +1133,9 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
|
|||
case GLSL_TYPE_SAMPLER:
|
||||
case GLSL_TYPE_IMAGE:
|
||||
case GLSL_TYPE_INTERFACE:
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
case GLSL_TYPE_ATOMIC_UINT:
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
/* I assume a comparison of a struct containing a sampler just
|
||||
* ignores the sampler present in the type.
|
||||
*/
|
||||
|
|
@ -2268,7 +2268,7 @@ get_type_name_for_precision_qualifier(const glsl_type *type)
|
|||
type->sampler_array + 2 * type->sampler_shadow;
|
||||
const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4;
|
||||
assert(type_idx < 4);
|
||||
switch (type->sampler_type) {
|
||||
switch (type->sampled_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
switch (type->sampler_dimensionality) {
|
||||
case GLSL_SAMPLER_DIM_1D: {
|
||||
|
|
@ -2750,6 +2750,17 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
|
|||
"vertex shader inputs or fragment shader outputs",
|
||||
interpolation_string(interpolation));
|
||||
}
|
||||
} else if (state->es_shader &&
|
||||
((mode == ir_var_shader_in &&
|
||||
state->stage != MESA_SHADER_VERTEX) ||
|
||||
(mode == ir_var_shader_out &&
|
||||
state->stage != MESA_SHADER_FRAGMENT))) {
|
||||
/* Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
|
||||
*
|
||||
* "When no interpolation qualifier is present, smooth interpolation
|
||||
* is used."
|
||||
*/
|
||||
interpolation = INTERP_QUALIFIER_SMOOTH;
|
||||
}
|
||||
|
||||
return interpolation;
|
||||
|
|
@ -2954,7 +2965,7 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
|
|||
"used on image function parameters");
|
||||
}
|
||||
|
||||
if (qual->image_base_type != base_type->sampler_type) {
|
||||
if (qual->image_base_type != base_type->sampled_type) {
|
||||
_mesa_glsl_error(loc, state, "format qualifier doesn't match the "
|
||||
"base data type of the image");
|
||||
}
|
||||
|
|
@ -4679,8 +4690,7 @@ ast_declarator_list::hir(exec_list *instructions,
|
|||
&& this->type->qualifier.has_interpolation()
|
||||
&& this->type->qualifier.flags.q.varying) {
|
||||
|
||||
const char *i = this->type->qualifier.interpolation_string();
|
||||
assert(i != NULL);
|
||||
const char *i = interpolation_string(var->data.interpolation);
|
||||
const char *s;
|
||||
if (this->type->qualifier.flags.q.centroid)
|
||||
s = "centroid varying";
|
||||
|
|
@ -4710,9 +4720,7 @@ ast_declarator_list::hir(exec_list *instructions,
|
|||
if (state->is_version(130, 300)
|
||||
&& this->type->qualifier.has_interpolation()) {
|
||||
|
||||
const char *i = this->type->qualifier.interpolation_string();
|
||||
assert(i != NULL);
|
||||
|
||||
const char *i = interpolation_string(var->data.interpolation);
|
||||
switch (state->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (this->type->qualifier.flags.q.in) {
|
||||
|
|
@ -6259,7 +6267,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
|
|||
decl_count);
|
||||
|
||||
bool first_member = true;
|
||||
bool first_member_has_explicit_location;
|
||||
bool first_member_has_explicit_location = false;
|
||||
|
||||
unsigned i = 0;
|
||||
foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
|
||||
|
|
|
|||
|
|
@ -102,19 +102,6 @@ ast_type_qualifier::has_auxiliary_storage() const
|
|||
|| this->flags.q.patch;
|
||||
}
|
||||
|
||||
const char*
|
||||
ast_type_qualifier::interpolation_string() const
|
||||
{
|
||||
if (this->flags.q.smooth)
|
||||
return "smooth";
|
||||
else if (this->flags.q.flat)
|
||||
return "flat";
|
||||
else if (this->flags.q.noperspective)
|
||||
return "noperspective";
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function merges both duplicate identifies within a single layout and
|
||||
* multiple layout qualifiers on a single variable declaration. The
|
||||
|
|
|
|||
|
|
@ -448,8 +448,16 @@ shader_image_load_store(const _mesa_glsl_parse_state *state)
|
|||
static bool
|
||||
shader_image_atomic(const _mesa_glsl_parse_state *state)
|
||||
{
|
||||
return (state->is_version(420, 0) ||
|
||||
state->ARB_shader_image_load_store_enable);
|
||||
return (state->is_version(420, 320) ||
|
||||
state->ARB_shader_image_load_store_enable ||
|
||||
state->OES_shader_image_atomic_enable);
|
||||
}
|
||||
|
||||
static bool
|
||||
shader_image_atomic_exchange_float(const _mesa_glsl_parse_state *state)
|
||||
{
|
||||
return (state->is_version(450, 320) ||
|
||||
state->OES_shader_image_atomic_enable);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -577,17 +585,6 @@ private:
|
|||
unsigned num_arguments,
|
||||
unsigned flags);
|
||||
|
||||
enum image_function_flags {
|
||||
IMAGE_FUNCTION_EMIT_STUB = (1 << 0),
|
||||
IMAGE_FUNCTION_RETURNS_VOID = (1 << 1),
|
||||
IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2),
|
||||
IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
|
||||
IMAGE_FUNCTION_READ_ONLY = (1 << 4),
|
||||
IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
|
||||
IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
|
||||
IMAGE_FUNCTION_MS_ONLY = (1 << 7),
|
||||
};
|
||||
|
||||
/**
|
||||
* Create a new image built-in function for all known image types.
|
||||
* \p flags is a bitfield of \c image_function_flags flags.
|
||||
|
|
@ -836,6 +833,18 @@ private:
|
|||
/** @} */
|
||||
};
|
||||
|
||||
enum image_function_flags {
|
||||
IMAGE_FUNCTION_EMIT_STUB = (1 << 0),
|
||||
IMAGE_FUNCTION_RETURNS_VOID = (1 << 1),
|
||||
IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2),
|
||||
IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
|
||||
IMAGE_FUNCTION_READ_ONLY = (1 << 4),
|
||||
IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
|
||||
IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
|
||||
IMAGE_FUNCTION_MS_ONLY = (1 << 7),
|
||||
IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE = (1 << 8)
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
||||
/**
|
||||
|
|
@ -2921,7 +2930,7 @@ builtin_builder::add_image_function(const char *name,
|
|||
ir_function *f = new(mem_ctx) ir_function(name);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) {
|
||||
if ((types[i]->sampler_type != GLSL_TYPE_FLOAT ||
|
||||
if ((types[i]->sampled_type != GLSL_TYPE_FLOAT ||
|
||||
(flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) &&
|
||||
(types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS ||
|
||||
!(flags & IMAGE_FUNCTION_MS_ONLY)))
|
||||
|
|
@ -2981,7 +2990,9 @@ builtin_builder::add_image_functions(bool glsl)
|
|||
add_image_function((glsl ? "imageAtomicExchange" :
|
||||
"__intrinsic_image_atomic_exchange"),
|
||||
"__intrinsic_image_atomic_exchange",
|
||||
&builtin_builder::_image_prototype, 1, atom_flags);
|
||||
&builtin_builder::_image_prototype, 1,
|
||||
(flags | IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE |
|
||||
IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE));
|
||||
|
||||
add_image_function((glsl ? "imageAtomicCompSwap" :
|
||||
"__intrinsic_image_atomic_comp_swap"),
|
||||
|
|
@ -5232,13 +5243,28 @@ builtin_builder::_mid3(const glsl_type *type)
|
|||
return sig;
|
||||
}
|
||||
|
||||
static builtin_available_predicate
|
||||
get_image_available_predicate(const glsl_type *type, unsigned flags)
|
||||
{
|
||||
if ((flags & IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE) &&
|
||||
type->sampled_type == GLSL_TYPE_FLOAT)
|
||||
return shader_image_atomic_exchange_float;
|
||||
|
||||
else if (flags & (IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE |
|
||||
IMAGE_FUNCTION_AVAIL_ATOMIC))
|
||||
return shader_image_atomic;
|
||||
|
||||
else
|
||||
return shader_image_load_store;
|
||||
}
|
||||
|
||||
ir_function_signature *
|
||||
builtin_builder::_image_prototype(const glsl_type *image_type,
|
||||
unsigned num_arguments,
|
||||
unsigned flags)
|
||||
{
|
||||
const glsl_type *data_type = glsl_type::get_instance(
|
||||
image_type->sampler_type,
|
||||
image_type->sampled_type,
|
||||
(flags & IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE ? 4 : 1),
|
||||
1);
|
||||
const glsl_type *ret_type = (flags & IMAGE_FUNCTION_RETURNS_VOID ?
|
||||
|
|
@ -5249,10 +5275,9 @@ builtin_builder::_image_prototype(const glsl_type *image_type,
|
|||
ir_variable *coord = in_var(
|
||||
glsl_type::ivec(image_type->coordinate_components()), "coord");
|
||||
|
||||
const builtin_available_predicate avail =
|
||||
(flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic :
|
||||
shader_image_load_store);
|
||||
ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord);
|
||||
ir_function_signature *sig = new_sig(
|
||||
ret_type, get_image_available_predicate(image_type, flags),
|
||||
2, image, coord);
|
||||
|
||||
/* Sample index for multisample images. */
|
||||
if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS)
|
||||
|
|
|
|||
|
|
@ -770,11 +770,16 @@ builtin_variable_generator::generate_constants()
|
|||
}
|
||||
|
||||
if (state->is_version(430, 310) || state->ARB_compute_shader_enable) {
|
||||
add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS);
|
||||
add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS);
|
||||
add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS);
|
||||
add_const("gl_MaxComputeTextureImageUnits", MAX_COMPUTE_TEXTURE_IMAGE_UNITS);
|
||||
add_const("gl_MaxComputeUniformComponents", MAX_COMPUTE_UNIFORM_COMPONENTS);
|
||||
add_const("gl_MaxComputeAtomicCounterBuffers",
|
||||
state->Const.MaxComputeAtomicCounterBuffers);
|
||||
add_const("gl_MaxComputeAtomicCounters",
|
||||
state->Const.MaxComputeAtomicCounters);
|
||||
add_const("gl_MaxComputeImageUniforms",
|
||||
state->Const.MaxComputeImageUniforms);
|
||||
add_const("gl_MaxComputeTextureImageUnits",
|
||||
state->Const.MaxComputeTextureImageUnits);
|
||||
add_const("gl_MaxComputeUniformComponents",
|
||||
state->Const.MaxComputeUniformComponents);
|
||||
|
||||
add_const_ivec3("gl_MaxComputeWorkGroupCount",
|
||||
state->Const.MaxComputeWorkGroupCount[0],
|
||||
|
|
|
|||
|
|
@ -2096,6 +2096,9 @@ _check_for_reserved_macro_name (glcpp_parser_t *parser, YYLTYPE *loc,
|
|||
if (strncmp(identifier, "GL_", 3) == 0) {
|
||||
glcpp_error (loc, parser, "Macro names starting with \"GL_\" are reserved.\n");
|
||||
}
|
||||
if (strcmp(identifier, "defined") == 0) {
|
||||
glcpp_error (loc, parser, "\"defined\" cannot be used as a macro name");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -2388,6 +2391,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
|
|||
add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
|
||||
|
||||
if (version >= 310) {
|
||||
if (extensions->ARB_shader_image_load_store)
|
||||
add_builtin_define(parser, "GL_OES_shader_image_atomic", 1);
|
||||
|
||||
if (extensions->OES_geometry_shader) {
|
||||
add_builtin_define(parser, "GL_OES_geometry_point_size", 1);
|
||||
add_builtin_define(parser, "GL_OES_geometry_shader", 1);
|
||||
|
|
|
|||
|
|
@ -113,11 +113,7 @@ literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state,
|
|||
if (base == 16)
|
||||
digits += 2;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
unsigned __int64 value = _strtoui64(digits, NULL, base);
|
||||
#else
|
||||
unsigned long long value = strtoull(digits, NULL, base);
|
||||
#endif
|
||||
|
||||
lval->n = (int)value;
|
||||
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
|
|||
this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters;
|
||||
this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters;
|
||||
this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters;
|
||||
this->Const.MaxComputeAtomicCounters = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters;
|
||||
this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters;
|
||||
this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings;
|
||||
this->Const.MaxVertexAtomicCounterBuffers =
|
||||
|
|
@ -132,6 +133,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
|
|||
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers;
|
||||
this->Const.MaxFragmentAtomicCounterBuffers =
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
|
||||
this->Const.MaxComputeAtomicCounterBuffers =
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers;
|
||||
this->Const.MaxCombinedAtomicCounterBuffers =
|
||||
ctx->Const.MaxCombinedAtomicBuffers;
|
||||
this->Const.MaxAtomicCounterBufferSize =
|
||||
|
|
@ -143,6 +146,9 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
|
|||
for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++)
|
||||
this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i];
|
||||
|
||||
this->Const.MaxComputeTextureImageUnits = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
|
||||
this->Const.MaxComputeUniformComponents = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents;
|
||||
|
||||
this->Const.MaxImageUnits = ctx->Const.MaxImageUnits;
|
||||
this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources;
|
||||
this->Const.MaxImageSamples = ctx->Const.MaxImageSamples;
|
||||
|
|
@ -151,6 +157,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
|
|||
this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms;
|
||||
this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms;
|
||||
this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms;
|
||||
this->Const.MaxComputeImageUniforms = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms;
|
||||
this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms;
|
||||
|
||||
/* ARB_viewport_array */
|
||||
|
|
@ -601,6 +608,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
|
|||
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
|
||||
EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
|
||||
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
|
||||
EXT(OES_shader_image_atomic, false, true, ARB_shader_image_load_store),
|
||||
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
|
||||
EXT(OES_texture_3D, false, true, dummy_true),
|
||||
EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
|
||||
|
|
@ -946,27 +954,11 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
|
|||
"the interface block");
|
||||
}
|
||||
|
||||
/* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
|
||||
*
|
||||
* "GLSL ES 3.0 does not support interface blocks for shader inputs or
|
||||
* outputs."
|
||||
*
|
||||
* And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
|
||||
*
|
||||
* "Only variables output from a shader can be candidates for
|
||||
* invariance."
|
||||
*
|
||||
* From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
|
||||
*
|
||||
* "If optional qualifiers are used, they can include interpolation
|
||||
* qualifiers, auxiliary storage qualifiers, and storage qualifiers
|
||||
* and they must declare an input, output, or uniform member
|
||||
* consistent with the interface qualifier of the block"
|
||||
*/
|
||||
if (qualifier.flags.q.invariant)
|
||||
if (!(q.flags.q.in || q.flags.q.out) && qualifier.flags.q.invariant)
|
||||
_mesa_glsl_error(locp, state,
|
||||
"invariant qualifiers cannot be used "
|
||||
"with interface blocks members");
|
||||
"invariant qualifiers can be used only "
|
||||
"in interface block members for shader "
|
||||
"inputs or outputs");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -422,6 +422,11 @@ struct _mesa_glsl_parse_state {
|
|||
unsigned MaxAtomicCounterBufferSize;
|
||||
|
||||
/* ARB_compute_shader */
|
||||
unsigned MaxComputeAtomicCounterBuffers;
|
||||
unsigned MaxComputeAtomicCounters;
|
||||
unsigned MaxComputeImageUniforms;
|
||||
unsigned MaxComputeTextureImageUnits;
|
||||
unsigned MaxComputeUniformComponents;
|
||||
unsigned MaxComputeWorkGroupCount[3];
|
||||
unsigned MaxComputeWorkGroupSize[3];
|
||||
|
||||
|
|
@ -588,6 +593,8 @@ struct _mesa_glsl_parse_state {
|
|||
bool OES_geometry_point_size_warn;
|
||||
bool OES_geometry_shader_enable;
|
||||
bool OES_geometry_shader_warn;
|
||||
bool OES_shader_image_atomic_enable;
|
||||
bool OES_shader_image_atomic_warn;
|
||||
bool OES_standard_derivatives_enable;
|
||||
bool OES_standard_derivatives_warn;
|
||||
bool OES_texture_3D_enable;
|
||||
|
|
|
|||
|
|
@ -1442,7 +1442,7 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
|
|||
assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
|
||||
assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
|
||||
} else {
|
||||
assert(sampler->type->sampler_type == (int) type->base_type);
|
||||
assert(sampler->type->sampled_type == (int) type->base_type);
|
||||
if (sampler->type->sampler_shadow)
|
||||
assert(type->vector_elements == 4 || type->vector_elements == 1);
|
||||
else
|
||||
|
|
@ -1696,21 +1696,6 @@ interpolation_string(unsigned interpolation)
|
|||
return "";
|
||||
}
|
||||
|
||||
|
||||
glsl_interp_qualifier
|
||||
ir_variable::determine_interpolation_mode(bool flat_shade)
|
||||
{
|
||||
if (this->data.interpolation != INTERP_QUALIFIER_NONE)
|
||||
return (glsl_interp_qualifier) this->data.interpolation;
|
||||
int location = this->data.location;
|
||||
bool is_gl_Color =
|
||||
location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
|
||||
if (flat_shade && is_gl_Color)
|
||||
return INTERP_QUALIFIER_FLAT;
|
||||
else
|
||||
return INTERP_QUALIFIER_SMOOTH;
|
||||
}
|
||||
|
||||
const char *const ir_variable::warn_extension_table[] = {
|
||||
"",
|
||||
"GL_ARB_shader_stencil_export",
|
||||
|
|
|
|||
|
|
@ -431,17 +431,6 @@ public:
|
|||
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
|
||||
|
||||
|
||||
/**
|
||||
* Determine how this variable should be interpolated based on its
|
||||
* interpolation qualifier (if present), whether it is gl_Color or
|
||||
* gl_SecondaryColor, and whether flatshading is enabled in the current GL
|
||||
* state.
|
||||
*
|
||||
* The return value will always be either INTERP_QUALIFIER_SMOOTH,
|
||||
* INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT.
|
||||
*/
|
||||
glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);
|
||||
|
||||
/**
|
||||
* Determine whether or not a variable is part of a uniform or
|
||||
* shader storage block.
|
||||
|
|
|
|||
|
|
@ -366,7 +366,6 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
|
|||
return c;
|
||||
}
|
||||
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
case GLSL_TYPE_IMAGE:
|
||||
case GLSL_TYPE_ATOMIC_UINT:
|
||||
|
|
@ -374,6 +373,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
|
|||
case GLSL_TYPE_ERROR:
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
case GLSL_TYPE_INTERFACE:
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
assert(!"Should not get here.");
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,9 +88,9 @@ copy_constant_to_storage(union gl_constant_value *storage,
|
|||
case GLSL_TYPE_IMAGE:
|
||||
case GLSL_TYPE_ATOMIC_UINT:
|
||||
case GLSL_TYPE_INTERFACE:
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
case GLSL_TYPE_VOID:
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
case GLSL_TYPE_ERROR:
|
||||
/* All other types should have already been filtered by other
|
||||
* paths in the caller.
|
||||
|
|
|
|||
|
|
@ -649,15 +649,15 @@ private:
|
|||
current_var->data.image_write_only ? GL_WRITE_ONLY :
|
||||
GL_READ_WRITE);
|
||||
|
||||
for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j)
|
||||
prog->_LinkedShaders[shader_type]->
|
||||
ImageAccess[this->next_image + j] = access;
|
||||
const unsigned first = this->next_image;
|
||||
|
||||
/* Increment the image index by 1 for non-arrays and by the
|
||||
* number of array elements for arrays.
|
||||
*/
|
||||
this->next_image += MAX2(1, uniform->array_elements);
|
||||
|
||||
for (unsigned i = first; i < MIN2(next_image, MAX_IMAGE_UNIFORMS); i++)
|
||||
prog->_LinkedShaders[shader_type]->ImageAccess[i] = access;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1038,9 +1038,43 @@ assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id,
|
|||
uniform_size->map->put(hidden_uniform_start + hidden_id, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search through the list of empty blocks to find one that fits the current
|
||||
* uniform.
|
||||
*/
|
||||
static int
|
||||
find_empty_block(struct gl_shader_program *prog,
|
||||
struct gl_uniform_storage *uniform)
|
||||
{
|
||||
const unsigned entries = MAX2(1, uniform->array_elements);
|
||||
|
||||
foreach_list_typed(struct empty_uniform_block, block, link,
|
||||
&prog->EmptyUniformLocations) {
|
||||
/* Found a block with enough slots to fit the uniform */
|
||||
if (block->slots == entries) {
|
||||
unsigned start = block->start;
|
||||
exec_node_remove(&block->link);
|
||||
ralloc_free(block);
|
||||
|
||||
return start;
|
||||
/* Found a block with more slots than needed. It can still be used. */
|
||||
} else if (block->slots > entries) {
|
||||
unsigned start = block->start;
|
||||
block->start += entries;
|
||||
block->slots -= entries;
|
||||
|
||||
return start;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
link_assign_uniform_locations(struct gl_shader_program *prog,
|
||||
unsigned int boolean_true)
|
||||
unsigned int boolean_true,
|
||||
unsigned int num_explicit_uniform_locs,
|
||||
unsigned int max_uniform_locs)
|
||||
{
|
||||
ralloc_free(prog->UniformStorage);
|
||||
prog->UniformStorage = NULL;
|
||||
|
|
@ -1131,6 +1165,9 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
|
||||
parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data);
|
||||
|
||||
unsigned total_entries = num_explicit_uniform_locs;
|
||||
unsigned empty_locs = prog->NumUniformRemapTable - num_explicit_uniform_locs;
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] == NULL)
|
||||
continue;
|
||||
|
|
@ -1194,21 +1231,44 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
/* how many new entries for this uniform? */
|
||||
const unsigned entries = MAX2(1, uniforms[i].array_elements);
|
||||
|
||||
/* resize remap table to fit new entries */
|
||||
prog->UniformRemapTable =
|
||||
reralloc(prog,
|
||||
prog->UniformRemapTable,
|
||||
gl_uniform_storage *,
|
||||
prog->NumUniformRemapTable + entries);
|
||||
/* Find UniformRemapTable for empty blocks where we can fit this uniform. */
|
||||
int chosen_location = -1;
|
||||
|
||||
if (empty_locs)
|
||||
chosen_location = find_empty_block(prog, &uniforms[i]);
|
||||
|
||||
/* Add new entries to the total amount of entries. */
|
||||
total_entries += entries;
|
||||
|
||||
if (chosen_location != -1) {
|
||||
empty_locs -= entries;
|
||||
} else {
|
||||
chosen_location = prog->NumUniformRemapTable;
|
||||
|
||||
/* resize remap table to fit new entries */
|
||||
prog->UniformRemapTable =
|
||||
reralloc(prog,
|
||||
prog->UniformRemapTable,
|
||||
gl_uniform_storage *,
|
||||
prog->NumUniformRemapTable + entries);
|
||||
prog->NumUniformRemapTable += entries;
|
||||
}
|
||||
|
||||
/* set pointers for this uniform */
|
||||
for (unsigned j = 0; j < entries; j++)
|
||||
prog->UniformRemapTable[prog->NumUniformRemapTable+j] = &uniforms[i];
|
||||
prog->UniformRemapTable[chosen_location + j] = &uniforms[i];
|
||||
|
||||
/* set the base location in remap table for the uniform */
|
||||
uniforms[i].remap_location = prog->NumUniformRemapTable;
|
||||
uniforms[i].remap_location = chosen_location;
|
||||
}
|
||||
|
||||
prog->NumUniformRemapTable += entries;
|
||||
/* Verify that total amount of entries for explicit and implicit locations
|
||||
* is less than MAX_UNIFORM_LOCATIONS.
|
||||
*/
|
||||
|
||||
if (total_entries > max_uniform_locs) {
|
||||
linker_error(prog, "count of uniform locations > MAX_UNIFORM_LOCATIONS"
|
||||
"(%u > %u)", total_entries, max_uniform_locs);
|
||||
}
|
||||
|
||||
/* Reserve all the explicit locations of the active subroutine uniforms. */
|
||||
|
|
|
|||
|
|
@ -1739,22 +1739,7 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
|
||||
if (var && var->data.mode == ir_var_shader_in &&
|
||||
var->data.is_unmatched_generic_inout) {
|
||||
if (prog->IsES) {
|
||||
/*
|
||||
* On Page 91 (Page 97 of the PDF) of the GLSL ES 1.0 spec:
|
||||
*
|
||||
* If the vertex shader declares but doesn't write to a
|
||||
* varying and the fragment shader declares and reads it,
|
||||
* is this an error?
|
||||
*
|
||||
* RESOLUTION: No.
|
||||
*/
|
||||
linker_warning(prog, "%s shader varying %s not written "
|
||||
"by %s shader\n.",
|
||||
_mesa_shader_stage_to_string(consumer->Stage),
|
||||
var->name,
|
||||
_mesa_shader_stage_to_string(producer->Stage));
|
||||
} else if (prog->Version <= 120) {
|
||||
if (!prog->IsES && prog->Version <= 120) {
|
||||
/* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
|
||||
*
|
||||
* Only those varying variables used (i.e. read) in
|
||||
|
|
@ -1772,6 +1757,12 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
_mesa_shader_stage_to_string(consumer->Stage),
|
||||
var->name,
|
||||
_mesa_shader_stage_to_string(producer->Stage));
|
||||
} else {
|
||||
linker_warning(prog, "%s shader varying %s not written "
|
||||
"by %s shader\n.",
|
||||
_mesa_shader_stage_to_string(consumer->Stage),
|
||||
var->name,
|
||||
_mesa_shader_stage_to_string(producer->Stage));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3008,12 +3008,13 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
* for a variable, checks for overlaps between other uniforms using explicit
|
||||
* locations.
|
||||
*/
|
||||
static bool
|
||||
static int
|
||||
reserve_explicit_locations(struct gl_shader_program *prog,
|
||||
string_to_uint_map *map, ir_variable *var)
|
||||
{
|
||||
unsigned slots = var->type->uniform_locations();
|
||||
unsigned max_loc = var->data.location + slots - 1;
|
||||
unsigned return_value = slots;
|
||||
|
||||
/* Resize remap table if locations do not fit in the current one. */
|
||||
if (max_loc + 1 > prog->NumUniformRemapTable) {
|
||||
|
|
@ -3024,7 +3025,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
|
|||
|
||||
if (!prog->UniformRemapTable) {
|
||||
linker_error(prog, "Out of memory during linking.\n");
|
||||
return false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Initialize allocated space. */
|
||||
|
|
@ -3042,8 +3043,10 @@ reserve_explicit_locations(struct gl_shader_program *prog,
|
|||
|
||||
/* Possibly same uniform from a different stage, this is ok. */
|
||||
unsigned hash_loc;
|
||||
if (map->get(hash_loc, var->name) && hash_loc == loc - i)
|
||||
continue;
|
||||
if (map->get(hash_loc, var->name) && hash_loc == loc - i) {
|
||||
return_value = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* ARB_explicit_uniform_location specification states:
|
||||
*
|
||||
|
|
@ -3055,7 +3058,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
|
|||
"location qualifier for uniform %s overlaps "
|
||||
"previously used location\n",
|
||||
var->name);
|
||||
return false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Initialize location as inactive before optimization
|
||||
|
|
@ -3067,7 +3070,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
|
|||
/* Note, base location used for arrays. */
|
||||
map->put(var->data.location, var->name);
|
||||
|
||||
return true;
|
||||
return return_value;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -3128,12 +3131,12 @@ reserve_subroutine_explicit_locations(struct gl_shader_program *prog,
|
|||
* any optimizations happen to handle also inactive uniforms and
|
||||
* inactive array elements that may get trimmed away.
|
||||
*/
|
||||
static void
|
||||
static int
|
||||
check_explicit_uniform_locations(struct gl_context *ctx,
|
||||
struct gl_shader_program *prog)
|
||||
{
|
||||
if (!ctx->Extensions.ARB_explicit_uniform_location)
|
||||
return;
|
||||
return -1;
|
||||
|
||||
/* This map is used to detect if overlapping explicit locations
|
||||
* occur with the same uniform (from different stage) or a different one.
|
||||
|
|
@ -3142,7 +3145,7 @@ check_explicit_uniform_locations(struct gl_context *ctx,
|
|||
|
||||
if (!uniform_map) {
|
||||
linker_error(prog, "Out of memory during linking.\n");
|
||||
return;
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned entries_total = 0;
|
||||
|
|
@ -3157,31 +3160,47 @@ check_explicit_uniform_locations(struct gl_context *ctx,
|
|||
if (!var || var->data.mode != ir_var_uniform)
|
||||
continue;
|
||||
|
||||
entries_total += var->type->uniform_locations();
|
||||
|
||||
if (var->data.explicit_location) {
|
||||
bool ret;
|
||||
bool ret = false;
|
||||
if (var->type->without_array()->is_subroutine())
|
||||
ret = reserve_subroutine_explicit_locations(prog, sh, var);
|
||||
else
|
||||
ret = reserve_explicit_locations(prog, uniform_map, var);
|
||||
else {
|
||||
int slots = reserve_explicit_locations(prog, uniform_map,
|
||||
var);
|
||||
if (slots != -1) {
|
||||
ret = true;
|
||||
entries_total += slots;
|
||||
}
|
||||
}
|
||||
if (!ret) {
|
||||
delete uniform_map;
|
||||
return;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Verify that total amount of entries for explicit and implicit locations
|
||||
* is less than MAX_UNIFORM_LOCATIONS.
|
||||
*/
|
||||
if (entries_total >= ctx->Const.MaxUserAssignableUniformLocations) {
|
||||
linker_error(prog, "count of uniform locations >= MAX_UNIFORM_LOCATIONS"
|
||||
"(%u >= %u)", entries_total,
|
||||
ctx->Const.MaxUserAssignableUniformLocations);
|
||||
exec_list_make_empty(&prog->EmptyUniformLocations);
|
||||
struct empty_uniform_block *current_block = NULL;
|
||||
|
||||
for (unsigned i = 0; i < prog->NumUniformRemapTable; i++) {
|
||||
/* We found empty space in UniformRemapTable. */
|
||||
if (prog->UniformRemapTable[i] == NULL) {
|
||||
/* We've found the beginning of a new continous block of empty slots */
|
||||
if (!current_block || current_block->start + current_block->slots != i) {
|
||||
current_block = rzalloc(prog, struct empty_uniform_block);
|
||||
current_block->start = i;
|
||||
exec_list_push_tail(&prog->EmptyUniformLocations,
|
||||
¤t_block->link);
|
||||
}
|
||||
|
||||
/* The current block continues, so we simply increment its slots */
|
||||
current_block->slots++;
|
||||
}
|
||||
}
|
||||
|
||||
delete uniform_map;
|
||||
return entries_total;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -4129,6 +4148,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
|
||||
tfeedback_decl *tfeedback_decls = NULL;
|
||||
unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying;
|
||||
unsigned int num_explicit_uniform_locs = 0;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL); // temporary linker context
|
||||
|
||||
|
|
@ -4310,7 +4330,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
last = i;
|
||||
}
|
||||
|
||||
check_explicit_uniform_locations(ctx, prog);
|
||||
num_explicit_uniform_locs = check_explicit_uniform_locations(ctx, prog);
|
||||
link_assign_subroutine_types(prog);
|
||||
|
||||
if (!prog->LinkStatus)
|
||||
|
|
@ -4541,7 +4561,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
goto done;
|
||||
|
||||
update_array_sizes(prog);
|
||||
link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue);
|
||||
link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue,
|
||||
num_explicit_uniform_locs,
|
||||
ctx->Const.MaxUserAssignableUniformLocations);
|
||||
link_assign_atomic_counter_resources(ctx, prog);
|
||||
store_fragdepth_layout(prog);
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,9 @@ link_invalidate_variable_locations(exec_list *ir);
|
|||
|
||||
extern void
|
||||
link_assign_uniform_locations(struct gl_shader_program *prog,
|
||||
unsigned int boolean_true);
|
||||
unsigned int boolean_true,
|
||||
unsigned int num_explicit_uniform_locs,
|
||||
unsigned int max_uniform_locs);
|
||||
|
||||
extern void
|
||||
link_set_uniform_initializers(struct gl_shader_program *prog,
|
||||
|
|
@ -202,4 +204,17 @@ linker_error(gl_shader_program *prog, const char *fmt, ...);
|
|||
void
|
||||
linker_warning(gl_shader_program *prog, const char *fmt, ...);
|
||||
|
||||
/**
|
||||
* Sometimes there are empty slots left over in UniformRemapTable after we
|
||||
* allocate slots to explicit locations. This struct represents a single
|
||||
* continouous block of empty slots in UniformRemapTable.
|
||||
*/
|
||||
struct empty_uniform_block {
|
||||
struct exec_node link;
|
||||
/* The start location of the block */
|
||||
unsigned start;
|
||||
/* The number of slots in the block */
|
||||
unsigned slots;
|
||||
};
|
||||
|
||||
#endif /* GLSL_LINKER_H */
|
||||
|
|
|
|||
|
|
@ -62,8 +62,8 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
ir_visitor_status visit(ir_loop_jump *ir);
|
||||
ir_visitor_status visit_enter(ir_discard *ir);
|
||||
ir_visitor_status visit_enter(ir_loop_jump *ir);
|
||||
ir_visitor_status visit_enter(ir_loop *ir);
|
||||
ir_visitor_status visit_enter(ir_function_signature *ir);
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public:
|
|||
} /* anonymous namespace */
|
||||
|
||||
ir_visitor_status
|
||||
lower_discard_flow_visitor::visit_enter(ir_loop_jump *ir)
|
||||
lower_discard_flow_visitor::visit(ir_loop_jump *ir)
|
||||
{
|
||||
if (ir->mode != ir_loop_jump::jump_continue)
|
||||
return visit_continue;
|
||||
|
|
|
|||
|
|
@ -58,10 +58,16 @@ initialize_context(struct gl_context *ctx, gl_api api)
|
|||
ctx->Const.MaxComputeWorkGroupSize[1] = 1024;
|
||||
ctx->Const.MaxComputeWorkGroupSize[2] = 64;
|
||||
ctx->Const.MaxComputeWorkGroupInvocations = 1024;
|
||||
ctx->Const.MaxComputeSharedMemorySize = 32768;
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16;
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = 8;
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = 8;
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = 8;
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformBlocks = 12;
|
||||
|
||||
switch (ctx->Const.GLSLVersion) {
|
||||
case 100:
|
||||
|
|
@ -77,12 +83,14 @@ initialize_context(struct gl_context *ctx, gl_api api)
|
|||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 8;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 128 * 4;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 128 * 4;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32;
|
||||
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits =
|
||||
ctx->Const.MaxCombinedTextureImageUnits;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 16 * 4;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 16 * 4;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
|
||||
|
|
@ -103,12 +111,14 @@ initialize_context(struct gl_context *ctx, gl_api api)
|
|||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 512;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32;
|
||||
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits =
|
||||
ctx->Const.MaxCombinedTextureImageUnits;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 64;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
|
||||
|
|
@ -129,11 +139,13 @@ initialize_context(struct gl_context *ctx, gl_api api)
|
|||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64;
|
||||
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
|
||||
|
|
@ -153,17 +165,20 @@ initialize_context(struct gl_context *ctx, gl_api api)
|
|||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64;
|
||||
|
||||
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 16;
|
||||
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
|
||||
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
|
||||
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
|
||||
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
|
||||
|
|
@ -191,11 +206,13 @@ initialize_context(struct gl_context *ctx, gl_api api)
|
|||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 16 * 4;
|
||||
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 224;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 224;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 15 * 4;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ TEST(sampler_types, TYPE) \
|
|||
const glsl_type *type = glsl_type::TYPE##_type; \
|
||||
EXPECT_EQ(GLSL_TYPE_SAMPLER, type->base_type); \
|
||||
EXPECT_EQ(DIM, type->sampler_dimensionality); \
|
||||
EXPECT_EQ(DATA_TYPE, type->sampler_type); \
|
||||
EXPECT_EQ(DATA_TYPE, type->sampled_type); \
|
||||
ARR; \
|
||||
SHAD; \
|
||||
EXPECT_EQ(COMPS, type->coordinate_components()); \
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ generate_data_element(void *mem_ctx, const glsl_type *type,
|
|||
case GLSL_TYPE_ERROR:
|
||||
case GLSL_TYPE_INTERFACE:
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
ASSERT_TRUE(false);
|
||||
break;
|
||||
}
|
||||
|
|
@ -136,6 +137,7 @@ generate_data_element(void *mem_ctx, const glsl_type *type,
|
|||
case GLSL_TYPE_ERROR:
|
||||
case GLSL_TYPE_INTERFACE:
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
ASSERT_TRUE(false);
|
||||
break;
|
||||
}
|
||||
|
|
@ -241,6 +243,7 @@ verify_data(gl_constant_value *storage, unsigned storage_array_size,
|
|||
case GLSL_TYPE_ERROR:
|
||||
case GLSL_TYPE_INTERFACE:
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
case GLSL_TYPE_FUNCTION:
|
||||
ASSERT_TRUE(false);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ glsl_type::glsl_type(GLenum gl_type,
|
|||
gl_type(gl_type),
|
||||
base_type(base_type),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
sampler_type(0), interface_packing(0),
|
||||
sampled_type(0), interface_packing(0),
|
||||
vector_elements(vector_elements), matrix_columns(matrix_columns),
|
||||
length(0)
|
||||
{
|
||||
|
|
@ -75,7 +75,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
|
|||
gl_type(gl_type),
|
||||
base_type(base_type),
|
||||
sampler_dimensionality(dim), sampler_shadow(shadow),
|
||||
sampler_array(array), sampler_type(type), interface_packing(0),
|
||||
sampler_array(array), sampled_type(type), interface_packing(0),
|
||||
length(0)
|
||||
{
|
||||
mtx_lock(&glsl_type::mutex);
|
||||
|
|
@ -101,7 +101,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
|
|||
gl_type(0),
|
||||
base_type(GLSL_TYPE_STRUCT),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
sampler_type(0), interface_packing(0),
|
||||
sampled_type(0), interface_packing(0),
|
||||
vector_elements(0), matrix_columns(0),
|
||||
length(num_fields)
|
||||
{
|
||||
|
|
@ -141,7 +141,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
|
|||
gl_type(0),
|
||||
base_type(GLSL_TYPE_INTERFACE),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
sampler_type(0), interface_packing((unsigned) packing),
|
||||
sampled_type(0), interface_packing((unsigned) packing),
|
||||
vector_elements(0), matrix_columns(0),
|
||||
length(num_fields)
|
||||
{
|
||||
|
|
@ -180,7 +180,7 @@ glsl_type::glsl_type(const glsl_type *return_type,
|
|||
gl_type(0),
|
||||
base_type(GLSL_TYPE_FUNCTION),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
sampler_type(0), interface_packing(0),
|
||||
sampled_type(0), interface_packing(0),
|
||||
vector_elements(0), matrix_columns(0),
|
||||
length(num_params)
|
||||
{
|
||||
|
|
@ -212,7 +212,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
|
|||
gl_type(0),
|
||||
base_type(GLSL_TYPE_SUBROUTINE),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
sampler_type(0), interface_packing(0),
|
||||
sampled_type(0), interface_packing(0),
|
||||
vector_elements(1), matrix_columns(1),
|
||||
length(0)
|
||||
{
|
||||
|
|
@ -428,7 +428,7 @@ _mesa_glsl_release_types(void)
|
|||
glsl_type::glsl_type(const glsl_type *array, unsigned length) :
|
||||
base_type(GLSL_TYPE_ARRAY),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
sampler_type(0), interface_packing(0),
|
||||
sampled_type(0), interface_packing(0),
|
||||
vector_elements(0), matrix_columns(0),
|
||||
length(length), name(NULL)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -56,11 +56,11 @@ enum glsl_base_type {
|
|||
GLSL_TYPE_IMAGE,
|
||||
GLSL_TYPE_ATOMIC_UINT,
|
||||
GLSL_TYPE_STRUCT,
|
||||
GLSL_TYPE_FUNCTION,
|
||||
GLSL_TYPE_INTERFACE,
|
||||
GLSL_TYPE_ARRAY,
|
||||
GLSL_TYPE_VOID,
|
||||
GLSL_TYPE_SUBROUTINE,
|
||||
GLSL_TYPE_FUNCTION,
|
||||
GLSL_TYPE_ERROR
|
||||
};
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ struct glsl_type {
|
|||
unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */
|
||||
unsigned sampler_shadow:1;
|
||||
unsigned sampler_array:1;
|
||||
unsigned sampler_type:2; /**< Type of data returned using this
|
||||
unsigned sampled_type:2; /**< Type of data returned using this
|
||||
* sampler or image. Only \c
|
||||
* GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT,
|
||||
* and \c GLSL_TYPE_UINT are valid.
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ glsl_base_type
|
|||
glsl_get_sampler_result_type(const struct glsl_type *type)
|
||||
{
|
||||
assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
|
||||
return (glsl_base_type)type->sampler_type;
|
||||
return (glsl_base_type)type->sampled_type;
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
@ -314,6 +314,12 @@ glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array,
|
|||
return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type);
|
||||
}
|
||||
|
||||
const struct glsl_type *
|
||||
glsl_bare_sampler_type()
|
||||
{
|
||||
return glsl_type::sampler_type;
|
||||
}
|
||||
|
||||
const struct glsl_type *
|
||||
glsl_image_type(enum glsl_sampler_dim dim, bool is_array,
|
||||
enum glsl_base_type base_type)
|
||||
|
|
@ -331,6 +337,7 @@ glsl_function_type(const glsl_type *return_type,
|
|||
const glsl_type *
|
||||
glsl_transposed_type(const struct glsl_type *type)
|
||||
{
|
||||
assert(glsl_type_is_matrix(type));
|
||||
return glsl_type::get_instance(type->base_type, type->matrix_columns,
|
||||
type->vector_elements);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,6 +113,7 @@ const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
|
|||
const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim,
|
||||
bool is_shadow, bool is_array,
|
||||
enum glsl_base_type base_type);
|
||||
const struct glsl_type *glsl_bare_sampler_type();
|
||||
const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim,
|
||||
bool is_array,
|
||||
enum glsl_base_type base_type);
|
||||
|
|
|
|||
|
|
@ -44,9 +44,8 @@ LOCAL_CFLAGS := \
|
|||
-DHAVE_ANDROID_PLATFORM
|
||||
|
||||
ifeq ($(MESA_LOLLIPOP_BUILD),true)
|
||||
LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
|
||||
LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
|
||||
LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
|
||||
LOCAL_CFLAGS_32 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
|
||||
LOCAL_CFLAGS_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
|
||||
else
|
||||
LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
|
||||
endif
|
||||
|
|
|
|||
|
|
@ -532,7 +532,12 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
|
|||
{ HAL_PIXEL_FORMAT_RGB_888, { 0xff, 0xff00, 0xff0000, 0x0 } },
|
||||
{ HAL_PIXEL_FORMAT_RGB_565, { 0xf800, 0x7e0, 0x1f, 0x0 } },
|
||||
{ HAL_PIXEL_FORMAT_BGRA_8888, { 0xff0000, 0xff00, 0xff, 0xff000000 } },
|
||||
{ 0, 0, { 0, 0, 0, 0 } }
|
||||
{ 0, { 0, 0, 0, 0 } }
|
||||
};
|
||||
EGLint config_attrs[] = {
|
||||
EGL_NATIVE_VISUAL_ID, 0,
|
||||
EGL_NATIVE_VISUAL_TYPE, 0,
|
||||
EGL_NONE
|
||||
};
|
||||
int count, i, j;
|
||||
|
||||
|
|
@ -540,6 +545,9 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
|
|||
for (i = 0; visuals[i].format; i++) {
|
||||
int format_count = 0;
|
||||
|
||||
config_attrs[1] = visuals[i].format;
|
||||
config_attrs[3] = visuals[i].format;
|
||||
|
||||
for (j = 0; dri2_dpy->driver_configs[j]; j++) {
|
||||
const EGLint surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT;
|
||||
struct dri2_egl_config *dri2_conf;
|
||||
|
|
@ -553,10 +561,8 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
|
|||
continue;
|
||||
|
||||
dri2_conf = dri2_add_config(dpy, dri2_dpy->driver_configs[j],
|
||||
count + 1, surface_type, NULL, visuals[i].rgba_masks);
|
||||
count + 1, surface_type, config_attrs, visuals[i].rgba_masks);
|
||||
if (dri2_conf) {
|
||||
dri2_conf->base.NativeVisualID = visuals[i].format;
|
||||
dri2_conf->base.NativeVisualType = visuals[i].format;
|
||||
count++;
|
||||
format_count++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -472,6 +472,8 @@ dri2_x11_get_buffers(__DRIdrawable * driDrawable,
|
|||
dri2_surf->drawable,
|
||||
count, count, attachments);
|
||||
reply = xcb_dri2_get_buffers_reply (dri2_dpy->conn, cookie, NULL);
|
||||
if (reply == NULL)
|
||||
return NULL;
|
||||
buffers = xcb_dri2_get_buffers_buffers (reply);
|
||||
if (buffers == NULL)
|
||||
return NULL;
|
||||
|
|
@ -870,7 +872,12 @@ dri2_x11_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
|
|||
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
|
||||
|
||||
if (dri2_dpy->dri2) {
|
||||
return dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1;
|
||||
if (dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1) {
|
||||
return EGL_TRUE;
|
||||
}
|
||||
/* Swap failed with a window drawable. */
|
||||
_eglError(EGL_BAD_NATIVE_WINDOW, __FUNCTION__);
|
||||
return EGL_FALSE;
|
||||
} else {
|
||||
assert(dri2_dpy->swrast);
|
||||
|
||||
|
|
|
|||
|
|
@ -1555,8 +1555,14 @@ eglGetSyncAttrib(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *valu
|
|||
static EGLBoolean EGLAPIENTRY
|
||||
eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value)
|
||||
{
|
||||
EGLAttrib attrib = *value;
|
||||
EGLBoolean result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
|
||||
EGLAttrib attrib;
|
||||
EGLBoolean result;
|
||||
|
||||
if (!value)
|
||||
RETURN_EGL_ERROR(NULL, EGL_BAD_PARAMETER, EGL_FALSE);
|
||||
|
||||
attrib = *value;
|
||||
result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
|
||||
|
||||
/* The EGL_KHR_fence_sync spec says this about eglGetSyncAttribKHR:
|
||||
*
|
||||
|
|
|
|||
|
|
@ -144,9 +144,6 @@ EGLBoolean
|
|||
_eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
|
||||
EGLint attribute, EGLAttrib *value)
|
||||
{
|
||||
if (!value)
|
||||
return _eglError(EGL_BAD_PARAMETER, "eglGetSyncAttribKHR");
|
||||
|
||||
switch (attribute) {
|
||||
case EGL_SYNC_TYPE_KHR:
|
||||
*value = sync->Type;
|
||||
|
|
|
|||
|
|
@ -69,8 +69,11 @@ struct cso_context {
|
|||
|
||||
boolean has_geometry_shader;
|
||||
boolean has_tessellation;
|
||||
boolean has_compute_shader;
|
||||
boolean has_streamout;
|
||||
|
||||
unsigned saved_state; /**< bitmask of CSO_BIT_x flags */
|
||||
|
||||
struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
|
||||
unsigned nr_fragment_views;
|
||||
|
||||
|
|
@ -106,6 +109,7 @@ struct cso_context {
|
|||
void *geometry_shader, *geometry_shader_saved;
|
||||
void *tessctrl_shader, *tessctrl_shader_saved;
|
||||
void *tesseval_shader, *tesseval_shader_saved;
|
||||
void *compute_shader;
|
||||
void *velements, *velements_saved;
|
||||
struct pipe_query *render_condition, *render_condition_saved;
|
||||
uint render_condition_mode, render_condition_mode_saved;
|
||||
|
|
@ -272,6 +276,15 @@ struct cso_context *cso_create_context( struct pipe_context *pipe )
|
|||
PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
|
||||
ctx->has_tessellation = TRUE;
|
||||
}
|
||||
if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
|
||||
PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
|
||||
int supported_irs =
|
||||
pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
|
||||
PIPE_SHADER_CAP_SUPPORTED_IRS);
|
||||
if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
|
||||
ctx->has_compute_shader = TRUE;
|
||||
}
|
||||
}
|
||||
if (pipe->screen->get_param(pipe->screen,
|
||||
PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) {
|
||||
ctx->has_streamout = TRUE;
|
||||
|
|
@ -333,6 +346,10 @@ void cso_destroy_context( struct cso_context *ctx )
|
|||
ctx->pipe->bind_tes_state(ctx->pipe, NULL);
|
||||
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL);
|
||||
}
|
||||
if (ctx->has_compute_shader) {
|
||||
ctx->pipe->bind_compute_state(ctx->pipe, NULL);
|
||||
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_COMPUTE, 0, NULL);
|
||||
}
|
||||
ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
|
||||
|
||||
if (ctx->has_streamout)
|
||||
|
|
@ -425,13 +442,15 @@ enum pipe_error cso_set_blend(struct cso_context *ctx,
|
|||
return PIPE_OK;
|
||||
}
|
||||
|
||||
void cso_save_blend(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_blend(struct cso_context *ctx)
|
||||
{
|
||||
assert(!ctx->blend_saved);
|
||||
ctx->blend_saved = ctx->blend;
|
||||
}
|
||||
|
||||
void cso_restore_blend(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_blend(struct cso_context *ctx)
|
||||
{
|
||||
if (ctx->blend != ctx->blend_saved) {
|
||||
ctx->blend = ctx->blend_saved;
|
||||
|
|
@ -488,13 +507,15 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx,
|
|||
return PIPE_OK;
|
||||
}
|
||||
|
||||
void cso_save_depth_stencil_alpha(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_depth_stencil_alpha(struct cso_context *ctx)
|
||||
{
|
||||
assert(!ctx->depth_stencil_saved);
|
||||
ctx->depth_stencil_saved = ctx->depth_stencil;
|
||||
}
|
||||
|
||||
void cso_restore_depth_stencil_alpha(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_depth_stencil_alpha(struct cso_context *ctx)
|
||||
{
|
||||
if (ctx->depth_stencil != ctx->depth_stencil_saved) {
|
||||
ctx->depth_stencil = ctx->depth_stencil_saved;
|
||||
|
|
@ -547,13 +568,15 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx,
|
|||
return PIPE_OK;
|
||||
}
|
||||
|
||||
void cso_save_rasterizer(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_rasterizer(struct cso_context *ctx)
|
||||
{
|
||||
assert(!ctx->rasterizer_saved);
|
||||
ctx->rasterizer_saved = ctx->rasterizer;
|
||||
}
|
||||
|
||||
void cso_restore_rasterizer(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_rasterizer(struct cso_context *ctx)
|
||||
{
|
||||
if (ctx->rasterizer != ctx->rasterizer_saved) {
|
||||
ctx->rasterizer = ctx->rasterizer_saved;
|
||||
|
|
@ -581,13 +604,15 @@ void cso_delete_fragment_shader(struct cso_context *ctx, void *handle )
|
|||
ctx->pipe->delete_fs_state(ctx->pipe, handle);
|
||||
}
|
||||
|
||||
void cso_save_fragment_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_fragment_shader(struct cso_context *ctx)
|
||||
{
|
||||
assert(!ctx->fragment_shader_saved);
|
||||
ctx->fragment_shader_saved = ctx->fragment_shader;
|
||||
}
|
||||
|
||||
void cso_restore_fragment_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_fragment_shader(struct cso_context *ctx)
|
||||
{
|
||||
if (ctx->fragment_shader_saved != ctx->fragment_shader) {
|
||||
ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved);
|
||||
|
|
@ -615,13 +640,15 @@ void cso_delete_vertex_shader(struct cso_context *ctx, void *handle )
|
|||
ctx->pipe->delete_vs_state(ctx->pipe, handle);
|
||||
}
|
||||
|
||||
void cso_save_vertex_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_vertex_shader(struct cso_context *ctx)
|
||||
{
|
||||
assert(!ctx->vertex_shader_saved);
|
||||
ctx->vertex_shader_saved = ctx->vertex_shader;
|
||||
}
|
||||
|
||||
void cso_restore_vertex_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_vertex_shader(struct cso_context *ctx)
|
||||
{
|
||||
if (ctx->vertex_shader_saved != ctx->vertex_shader) {
|
||||
ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved);
|
||||
|
|
@ -640,12 +667,14 @@ void cso_set_framebuffer(struct cso_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
void cso_save_framebuffer(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_framebuffer(struct cso_context *ctx)
|
||||
{
|
||||
util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb);
|
||||
}
|
||||
|
||||
void cso_restore_framebuffer(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_framebuffer(struct cso_context *ctx)
|
||||
{
|
||||
if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) {
|
||||
util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved);
|
||||
|
|
@ -664,13 +693,33 @@ void cso_set_viewport(struct cso_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
void cso_save_viewport(struct cso_context *ctx)
|
||||
/**
|
||||
* Setup viewport state for given width and height (position is always (0,0)).
|
||||
* Invert the Y axis if 'invert' is true.
|
||||
*/
|
||||
void
|
||||
cso_set_viewport_dims(struct cso_context *ctx,
|
||||
float width, float height, boolean invert)
|
||||
{
|
||||
struct pipe_viewport_state vp;
|
||||
vp.scale[0] = width * 0.5f;
|
||||
vp.scale[1] = height * (invert ? -0.5f : 0.5f);
|
||||
vp.scale[2] = 0.5f;
|
||||
vp.translate[0] = 0.5f * width;
|
||||
vp.translate[1] = 0.5f * height;
|
||||
vp.translate[2] = 0.5f;
|
||||
cso_set_viewport(ctx, &vp);
|
||||
}
|
||||
|
||||
static void
|
||||
cso_save_viewport(struct cso_context *ctx)
|
||||
{
|
||||
ctx->vp_saved = ctx->vp;
|
||||
}
|
||||
|
||||
|
||||
void cso_restore_viewport(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_viewport(struct cso_context *ctx)
|
||||
{
|
||||
if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) {
|
||||
ctx->vp = ctx->vp_saved;
|
||||
|
|
@ -696,12 +745,14 @@ void cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask)
|
|||
}
|
||||
}
|
||||
|
||||
void cso_save_sample_mask(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_sample_mask(struct cso_context *ctx)
|
||||
{
|
||||
ctx->sample_mask_saved = ctx->sample_mask;
|
||||
}
|
||||
|
||||
void cso_restore_sample_mask(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_sample_mask(struct cso_context *ctx)
|
||||
{
|
||||
cso_set_sample_mask(ctx, ctx->sample_mask_saved);
|
||||
}
|
||||
|
|
@ -714,12 +765,14 @@ void cso_set_min_samples(struct cso_context *ctx, unsigned min_samples)
|
|||
}
|
||||
}
|
||||
|
||||
void cso_save_min_samples(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_min_samples(struct cso_context *ctx)
|
||||
{
|
||||
ctx->min_samples_saved = ctx->min_samples;
|
||||
}
|
||||
|
||||
void cso_restore_min_samples(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_min_samples(struct cso_context *ctx)
|
||||
{
|
||||
cso_set_min_samples(ctx, ctx->min_samples_saved);
|
||||
}
|
||||
|
|
@ -733,13 +786,15 @@ void cso_set_stencil_ref(struct cso_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
void cso_save_stencil_ref(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_stencil_ref(struct cso_context *ctx)
|
||||
{
|
||||
ctx->stencil_ref_saved = ctx->stencil_ref;
|
||||
}
|
||||
|
||||
|
||||
void cso_restore_stencil_ref(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_stencil_ref(struct cso_context *ctx)
|
||||
{
|
||||
if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved,
|
||||
sizeof(ctx->stencil_ref))) {
|
||||
|
|
@ -764,14 +819,16 @@ void cso_set_render_condition(struct cso_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
void cso_save_render_condition(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_render_condition(struct cso_context *ctx)
|
||||
{
|
||||
ctx->render_condition_saved = ctx->render_condition;
|
||||
ctx->render_condition_cond_saved = ctx->render_condition_cond;
|
||||
ctx->render_condition_mode_saved = ctx->render_condition_mode;
|
||||
}
|
||||
|
||||
void cso_restore_render_condition(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_render_condition(struct cso_context *ctx)
|
||||
{
|
||||
cso_set_render_condition(ctx, ctx->render_condition_saved,
|
||||
ctx->render_condition_cond_saved,
|
||||
|
|
@ -798,7 +855,8 @@ void cso_delete_geometry_shader(struct cso_context *ctx, void *handle)
|
|||
ctx->pipe->delete_gs_state(ctx->pipe, handle);
|
||||
}
|
||||
|
||||
void cso_save_geometry_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_geometry_shader(struct cso_context *ctx)
|
||||
{
|
||||
if (!ctx->has_geometry_shader) {
|
||||
return;
|
||||
|
|
@ -808,7 +866,8 @@ void cso_save_geometry_shader(struct cso_context *ctx)
|
|||
ctx->geometry_shader_saved = ctx->geometry_shader;
|
||||
}
|
||||
|
||||
void cso_restore_geometry_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_geometry_shader(struct cso_context *ctx)
|
||||
{
|
||||
if (!ctx->has_geometry_shader) {
|
||||
return;
|
||||
|
|
@ -841,7 +900,8 @@ void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle)
|
|||
ctx->pipe->delete_tcs_state(ctx->pipe, handle);
|
||||
}
|
||||
|
||||
void cso_save_tessctrl_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_tessctrl_shader(struct cso_context *ctx)
|
||||
{
|
||||
if (!ctx->has_tessellation) {
|
||||
return;
|
||||
|
|
@ -851,7 +911,8 @@ void cso_save_tessctrl_shader(struct cso_context *ctx)
|
|||
ctx->tessctrl_shader_saved = ctx->tessctrl_shader;
|
||||
}
|
||||
|
||||
void cso_restore_tessctrl_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_tessctrl_shader(struct cso_context *ctx)
|
||||
{
|
||||
if (!ctx->has_tessellation) {
|
||||
return;
|
||||
|
|
@ -884,7 +945,8 @@ void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle)
|
|||
ctx->pipe->delete_tes_state(ctx->pipe, handle);
|
||||
}
|
||||
|
||||
void cso_save_tesseval_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_tesseval_shader(struct cso_context *ctx)
|
||||
{
|
||||
if (!ctx->has_tessellation) {
|
||||
return;
|
||||
|
|
@ -894,7 +956,8 @@ void cso_save_tesseval_shader(struct cso_context *ctx)
|
|||
ctx->tesseval_shader_saved = ctx->tesseval_shader;
|
||||
}
|
||||
|
||||
void cso_restore_tesseval_shader(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_tesseval_shader(struct cso_context *ctx)
|
||||
{
|
||||
if (!ctx->has_tessellation) {
|
||||
return;
|
||||
|
|
@ -907,6 +970,26 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
|
|||
ctx->tesseval_shader_saved = NULL;
|
||||
}
|
||||
|
||||
void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle)
|
||||
{
|
||||
assert(ctx->has_compute_shader || !handle);
|
||||
|
||||
if (ctx->has_compute_shader && ctx->compute_shader != handle) {
|
||||
ctx->compute_shader = handle;
|
||||
ctx->pipe->bind_compute_state(ctx->pipe, handle);
|
||||
}
|
||||
}
|
||||
|
||||
void cso_delete_compute_shader(struct cso_context *ctx, void *handle)
|
||||
{
|
||||
if (handle == ctx->compute_shader) {
|
||||
/* unbind before deleting */
|
||||
ctx->pipe->bind_compute_state(ctx->pipe, NULL);
|
||||
ctx->compute_shader = NULL;
|
||||
}
|
||||
ctx->pipe->delete_compute_state(ctx->pipe, handle);
|
||||
}
|
||||
|
||||
enum pipe_error
|
||||
cso_set_vertex_elements(struct cso_context *ctx,
|
||||
unsigned count,
|
||||
|
|
@ -967,7 +1050,8 @@ cso_set_vertex_elements(struct cso_context *ctx,
|
|||
return PIPE_OK;
|
||||
}
|
||||
|
||||
void cso_save_vertex_elements(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_vertex_elements(struct cso_context *ctx)
|
||||
{
|
||||
struct u_vbuf *vbuf = ctx->vbuf;
|
||||
|
||||
|
|
@ -980,7 +1064,8 @@ void cso_save_vertex_elements(struct cso_context *ctx)
|
|||
ctx->velements_saved = ctx->velements;
|
||||
}
|
||||
|
||||
void cso_restore_vertex_elements(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_vertex_elements(struct cso_context *ctx)
|
||||
{
|
||||
struct u_vbuf *vbuf = ctx->vbuf;
|
||||
|
||||
|
|
@ -1032,7 +1117,8 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
|
|||
ctx->pipe->set_vertex_buffers(ctx->pipe, start_slot, count, buffers);
|
||||
}
|
||||
|
||||
void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
|
||||
static void
|
||||
cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
|
||||
{
|
||||
struct u_vbuf *vbuf = ctx->vbuf;
|
||||
|
||||
|
|
@ -1047,7 +1133,8 @@ void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
|
|||
sizeof(struct pipe_vertex_buffer));
|
||||
}
|
||||
|
||||
void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
|
||||
static void
|
||||
cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
|
||||
{
|
||||
struct u_vbuf *vbuf = ctx->vbuf;
|
||||
|
||||
|
|
@ -1165,7 +1252,7 @@ cso_set_samplers(struct cso_context *ctx,
|
|||
return error;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
cso_save_fragment_samplers(struct cso_context *ctx)
|
||||
{
|
||||
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
|
||||
|
|
@ -1176,7 +1263,7 @@ cso_save_fragment_samplers(struct cso_context *ctx)
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
static void
|
||||
cso_restore_fragment_samplers(struct cso_context *ctx)
|
||||
{
|
||||
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
|
||||
|
|
@ -1223,7 +1310,7 @@ cso_set_sampler_views(struct cso_context *ctx,
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
static void
|
||||
cso_save_fragment_sampler_views(struct cso_context *ctx)
|
||||
{
|
||||
unsigned i;
|
||||
|
|
@ -1238,7 +1325,7 @@ cso_save_fragment_sampler_views(struct cso_context *ctx)
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
static void
|
||||
cso_restore_fragment_sampler_views(struct cso_context *ctx)
|
||||
{
|
||||
unsigned i, nr_saved = ctx->nr_fragment_views_saved;
|
||||
|
|
@ -1298,7 +1385,7 @@ cso_set_stream_outputs(struct cso_context *ctx,
|
|||
ctx->nr_so_targets = num_targets;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
cso_save_stream_outputs(struct cso_context *ctx)
|
||||
{
|
||||
uint i;
|
||||
|
|
@ -1315,7 +1402,7 @@ cso_save_stream_outputs(struct cso_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
cso_restore_stream_outputs(struct cso_context *ctx)
|
||||
{
|
||||
struct pipe_context *pipe = ctx->pipe;
|
||||
|
|
@ -1402,6 +1489,113 @@ cso_restore_constant_buffer_slot0(struct cso_context *cso,
|
|||
NULL);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Save all the CSO state items specified by the state_mask bitmask
|
||||
* of CSO_BIT_x flags.
|
||||
*/
|
||||
void
|
||||
cso_save_state(struct cso_context *cso, unsigned state_mask)
|
||||
{
|
||||
assert(cso->saved_state == 0);
|
||||
|
||||
cso->saved_state = state_mask;
|
||||
|
||||
if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT)
|
||||
cso_save_aux_vertex_buffer_slot(cso);
|
||||
if (state_mask & CSO_BIT_BLEND)
|
||||
cso_save_blend(cso);
|
||||
if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA)
|
||||
cso_save_depth_stencil_alpha(cso);
|
||||
if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
|
||||
cso_save_fragment_samplers(cso);
|
||||
if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS)
|
||||
cso_save_fragment_sampler_views(cso);
|
||||
if (state_mask & CSO_BIT_FRAGMENT_SHADER)
|
||||
cso_save_fragment_shader(cso);
|
||||
if (state_mask & CSO_BIT_FRAMEBUFFER)
|
||||
cso_save_framebuffer(cso);
|
||||
if (state_mask & CSO_BIT_GEOMETRY_SHADER)
|
||||
cso_save_geometry_shader(cso);
|
||||
if (state_mask & CSO_BIT_MIN_SAMPLES)
|
||||
cso_save_min_samples(cso);
|
||||
if (state_mask & CSO_BIT_RASTERIZER)
|
||||
cso_save_rasterizer(cso);
|
||||
if (state_mask & CSO_BIT_RENDER_CONDITION)
|
||||
cso_save_render_condition(cso);
|
||||
if (state_mask & CSO_BIT_SAMPLE_MASK)
|
||||
cso_save_sample_mask(cso);
|
||||
if (state_mask & CSO_BIT_STENCIL_REF)
|
||||
cso_save_stencil_ref(cso);
|
||||
if (state_mask & CSO_BIT_STREAM_OUTPUTS)
|
||||
cso_save_stream_outputs(cso);
|
||||
if (state_mask & CSO_BIT_TESSCTRL_SHADER)
|
||||
cso_save_tessctrl_shader(cso);
|
||||
if (state_mask & CSO_BIT_TESSEVAL_SHADER)
|
||||
cso_save_tesseval_shader(cso);
|
||||
if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
|
||||
cso_save_vertex_elements(cso);
|
||||
if (state_mask & CSO_BIT_VERTEX_SHADER)
|
||||
cso_save_vertex_shader(cso);
|
||||
if (state_mask & CSO_BIT_VIEWPORT)
|
||||
cso_save_viewport(cso);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Restore the state which was saved by cso_save_state().
|
||||
*/
|
||||
void
|
||||
cso_restore_state(struct cso_context *cso)
|
||||
{
|
||||
unsigned state_mask = cso->saved_state;
|
||||
|
||||
assert(state_mask);
|
||||
|
||||
if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT)
|
||||
cso_restore_aux_vertex_buffer_slot(cso);
|
||||
if (state_mask & CSO_BIT_BLEND)
|
||||
cso_restore_blend(cso);
|
||||
if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA)
|
||||
cso_restore_depth_stencil_alpha(cso);
|
||||
if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
|
||||
cso_restore_fragment_samplers(cso);
|
||||
if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS)
|
||||
cso_restore_fragment_sampler_views(cso);
|
||||
if (state_mask & CSO_BIT_FRAGMENT_SHADER)
|
||||
cso_restore_fragment_shader(cso);
|
||||
if (state_mask & CSO_BIT_FRAMEBUFFER)
|
||||
cso_restore_framebuffer(cso);
|
||||
if (state_mask & CSO_BIT_GEOMETRY_SHADER)
|
||||
cso_restore_geometry_shader(cso);
|
||||
if (state_mask & CSO_BIT_MIN_SAMPLES)
|
||||
cso_restore_min_samples(cso);
|
||||
if (state_mask & CSO_BIT_RASTERIZER)
|
||||
cso_restore_rasterizer(cso);
|
||||
if (state_mask & CSO_BIT_RENDER_CONDITION)
|
||||
cso_restore_render_condition(cso);
|
||||
if (state_mask & CSO_BIT_SAMPLE_MASK)
|
||||
cso_restore_sample_mask(cso);
|
||||
if (state_mask & CSO_BIT_STENCIL_REF)
|
||||
cso_restore_stencil_ref(cso);
|
||||
if (state_mask & CSO_BIT_STREAM_OUTPUTS)
|
||||
cso_restore_stream_outputs(cso);
|
||||
if (state_mask & CSO_BIT_TESSCTRL_SHADER)
|
||||
cso_restore_tessctrl_shader(cso);
|
||||
if (state_mask & CSO_BIT_TESSEVAL_SHADER)
|
||||
cso_restore_tesseval_shader(cso);
|
||||
if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
|
||||
cso_restore_vertex_elements(cso);
|
||||
if (state_mask & CSO_BIT_VERTEX_SHADER)
|
||||
cso_restore_vertex_shader(cso);
|
||||
if (state_mask & CSO_BIT_VIEWPORT)
|
||||
cso_restore_viewport(cso);
|
||||
|
||||
cso->saved_state = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* drawing */
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -47,22 +47,15 @@ void cso_destroy_context( struct cso_context *cso );
|
|||
|
||||
enum pipe_error cso_set_blend( struct cso_context *cso,
|
||||
const struct pipe_blend_state *blend );
|
||||
void cso_save_blend(struct cso_context *cso);
|
||||
void cso_restore_blend(struct cso_context *cso);
|
||||
|
||||
|
||||
|
||||
enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso,
|
||||
const struct pipe_depth_stencil_alpha_state *dsa );
|
||||
void cso_save_depth_stencil_alpha(struct cso_context *cso);
|
||||
void cso_restore_depth_stencil_alpha(struct cso_context *cso);
|
||||
|
||||
|
||||
|
||||
enum pipe_error cso_set_rasterizer( struct cso_context *cso,
|
||||
const struct pipe_rasterizer_state *rasterizer );
|
||||
void cso_save_rasterizer(struct cso_context *cso);
|
||||
void cso_restore_rasterizer(struct cso_context *cso);
|
||||
|
||||
|
||||
enum pipe_error
|
||||
|
|
@ -71,11 +64,6 @@ cso_set_samplers(struct cso_context *cso,
|
|||
unsigned count,
|
||||
const struct pipe_sampler_state **states);
|
||||
|
||||
void
|
||||
cso_save_fragment_samplers(struct cso_context *cso);
|
||||
|
||||
void
|
||||
cso_restore_fragment_samplers(struct cso_context *cso);
|
||||
|
||||
/* Alternate interface to support state trackers that like to modify
|
||||
* samplers one at a time:
|
||||
|
|
@ -91,9 +79,6 @@ cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
|
|||
enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
|
||||
unsigned count,
|
||||
const struct pipe_vertex_element *states);
|
||||
void cso_save_vertex_elements(struct cso_context *ctx);
|
||||
void cso_restore_vertex_elements(struct cso_context *ctx);
|
||||
|
||||
|
||||
void cso_set_vertex_buffers(struct cso_context *ctx,
|
||||
unsigned start_slot, unsigned count,
|
||||
|
|
@ -101,8 +86,6 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
|
|||
|
||||
/* One vertex buffer slot is provided with the save/restore functionality.
|
||||
* cso_context chooses the slot, it can be non-zero. */
|
||||
void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx);
|
||||
void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx);
|
||||
unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx);
|
||||
|
||||
|
||||
|
|
@ -110,8 +93,6 @@ void cso_set_stream_outputs(struct cso_context *ctx,
|
|||
unsigned num_targets,
|
||||
struct pipe_stream_output_target **targets,
|
||||
const unsigned *offsets);
|
||||
void cso_save_stream_outputs(struct cso_context *ctx);
|
||||
void cso_restore_stream_outputs(struct cso_context *ctx);
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -123,67 +104,81 @@ void cso_restore_stream_outputs(struct cso_context *ctx);
|
|||
|
||||
void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle);
|
||||
void cso_delete_fragment_shader(struct cso_context *ctx, void *handle );
|
||||
void cso_save_fragment_shader(struct cso_context *cso);
|
||||
void cso_restore_fragment_shader(struct cso_context *cso);
|
||||
|
||||
|
||||
void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle);
|
||||
void cso_delete_vertex_shader(struct cso_context *ctx, void *handle );
|
||||
void cso_save_vertex_shader(struct cso_context *cso);
|
||||
void cso_restore_vertex_shader(struct cso_context *cso);
|
||||
|
||||
|
||||
void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle);
|
||||
void cso_delete_geometry_shader(struct cso_context *ctx, void *handle);
|
||||
void cso_save_geometry_shader(struct cso_context *cso);
|
||||
void cso_restore_geometry_shader(struct cso_context *cso);
|
||||
|
||||
|
||||
void cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle);
|
||||
void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle);
|
||||
void cso_save_tessctrl_shader(struct cso_context *cso);
|
||||
void cso_restore_tessctrl_shader(struct cso_context *cso);
|
||||
|
||||
|
||||
void cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle);
|
||||
void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle);
|
||||
void cso_save_tesseval_shader(struct cso_context *cso);
|
||||
void cso_restore_tesseval_shader(struct cso_context *cso);
|
||||
|
||||
|
||||
void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle);
|
||||
void cso_delete_compute_shader(struct cso_context *ctx, void *handle);
|
||||
|
||||
|
||||
void cso_set_framebuffer(struct cso_context *cso,
|
||||
const struct pipe_framebuffer_state *fb);
|
||||
void cso_save_framebuffer(struct cso_context *cso);
|
||||
void cso_restore_framebuffer(struct cso_context *cso);
|
||||
|
||||
|
||||
void cso_set_viewport(struct cso_context *cso,
|
||||
const struct pipe_viewport_state *vp);
|
||||
void cso_save_viewport(struct cso_context *cso);
|
||||
void cso_restore_viewport(struct cso_context *cso);
|
||||
void cso_set_viewport_dims(struct cso_context *ctx,
|
||||
float width, float height, boolean invert);
|
||||
|
||||
|
||||
void cso_set_blend_color(struct cso_context *cso,
|
||||
const struct pipe_blend_color *bc);
|
||||
|
||||
void cso_set_sample_mask(struct cso_context *cso, unsigned stencil_mask);
|
||||
void cso_save_sample_mask(struct cso_context *ctx);
|
||||
void cso_restore_sample_mask(struct cso_context *ctx);
|
||||
|
||||
void cso_set_min_samples(struct cso_context *cso, unsigned min_samples);
|
||||
void cso_save_min_samples(struct cso_context *ctx);
|
||||
void cso_restore_min_samples(struct cso_context *ctx);
|
||||
|
||||
void cso_set_stencil_ref(struct cso_context *cso,
|
||||
const struct pipe_stencil_ref *sr);
|
||||
void cso_save_stencil_ref(struct cso_context *cso);
|
||||
void cso_restore_stencil_ref(struct cso_context *cso);
|
||||
|
||||
void cso_set_render_condition(struct cso_context *cso,
|
||||
struct pipe_query *query,
|
||||
boolean condition, uint mode);
|
||||
void cso_save_render_condition(struct cso_context *cso);
|
||||
void cso_restore_render_condition(struct cso_context *cso);
|
||||
|
||||
|
||||
#define CSO_BIT_AUX_VERTEX_BUFFER_SLOT 0x1
|
||||
#define CSO_BIT_BLEND 0x2
|
||||
#define CSO_BIT_DEPTH_STENCIL_ALPHA 0x4
|
||||
#define CSO_BIT_FRAGMENT_SAMPLERS 0x8
|
||||
#define CSO_BIT_FRAGMENT_SAMPLER_VIEWS 0x10
|
||||
#define CSO_BIT_FRAGMENT_SHADER 0x20
|
||||
#define CSO_BIT_FRAMEBUFFER 0x40
|
||||
#define CSO_BIT_GEOMETRY_SHADER 0x80
|
||||
#define CSO_BIT_MIN_SAMPLES 0x100
|
||||
#define CSO_BIT_RASTERIZER 0x200
|
||||
#define CSO_BIT_RENDER_CONDITION 0x400
|
||||
#define CSO_BIT_SAMPLE_MASK 0x800
|
||||
#define CSO_BIT_STENCIL_REF 0x1000
|
||||
#define CSO_BIT_STREAM_OUTPUTS 0x2000
|
||||
#define CSO_BIT_TESSCTRL_SHADER 0x4000
|
||||
#define CSO_BIT_TESSEVAL_SHADER 0x8000
|
||||
#define CSO_BIT_VERTEX_ELEMENTS 0x10000
|
||||
#define CSO_BIT_VERTEX_SHADER 0x20000
|
||||
#define CSO_BIT_VIEWPORT 0x40000
|
||||
|
||||
#define CSO_BITS_ALL_SHADERS (CSO_BIT_VERTEX_SHADER | \
|
||||
CSO_BIT_FRAGMENT_SHADER | \
|
||||
CSO_BIT_GEOMETRY_SHADER | \
|
||||
CSO_BIT_TESSCTRL_SHADER | \
|
||||
CSO_BIT_TESSEVAL_SHADER)
|
||||
|
||||
void cso_save_state(struct cso_context *cso, unsigned state_mask);
|
||||
void cso_restore_state(struct cso_context *cso);
|
||||
|
||||
|
||||
/* sampler view state */
|
||||
|
|
@ -194,12 +189,6 @@ cso_set_sampler_views(struct cso_context *cso,
|
|||
unsigned count,
|
||||
struct pipe_sampler_view **views);
|
||||
|
||||
void
|
||||
cso_save_fragment_sampler_views(struct cso_context *ctx);
|
||||
|
||||
void
|
||||
cso_restore_fragment_sampler_views(struct cso_context *ctx);
|
||||
|
||||
|
||||
/* constant buffers */
|
||||
|
||||
|
|
@ -230,7 +219,6 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode,
|
|||
uint start, uint count,
|
||||
uint start_instance, uint instance_count);
|
||||
|
||||
/* helper drawing function */
|
||||
void
|
||||
cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count);
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,9 @@
|
|||
**************************************************************************/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm-c/Disassembler.h>
|
||||
|
|
@ -125,7 +128,7 @@ lp_debug_dump_value(LLVMValueRef value)
|
|||
* - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
|
||||
*/
|
||||
static size_t
|
||||
disassemble(const void* func)
|
||||
disassemble(const void* func, std::stringstream &buffer)
|
||||
{
|
||||
const uint8_t *bytes = (const uint8_t *)func;
|
||||
|
||||
|
|
@ -143,8 +146,8 @@ disassemble(const void* func)
|
|||
char outline[1024];
|
||||
|
||||
if (!D) {
|
||||
_debug_printf("error: couldn't create disassembler for triple %s\n",
|
||||
Triple.c_str());
|
||||
buffer << "error: could not create disassembler for triple "
|
||||
<< Triple.c_str() << '\n';
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -158,13 +161,13 @@ disassemble(const void* func)
|
|||
* so that between runs.
|
||||
*/
|
||||
|
||||
_debug_printf("%6lu:\t", (unsigned long)pc);
|
||||
buffer << std::setw(6) << (unsigned long)pc << ":\t";
|
||||
|
||||
Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline,
|
||||
sizeof outline);
|
||||
|
||||
if (!Size) {
|
||||
_debug_printf("invalid\n");
|
||||
buffer << "invalid\n";
|
||||
pc += 1;
|
||||
break;
|
||||
}
|
||||
|
|
@ -176,10 +179,11 @@ disassemble(const void* func)
|
|||
if (0) {
|
||||
unsigned i;
|
||||
for (i = 0; i < Size; ++i) {
|
||||
_debug_printf("%02x ", bytes[pc + i]);
|
||||
buffer << std::hex << std::setfill('0') << std::setw(2)
|
||||
<< static_cast<int> (bytes[pc + i]);
|
||||
}
|
||||
for (; i < 16; ++i) {
|
||||
_debug_printf(" ");
|
||||
buffer << std::dec << " ";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -187,9 +191,7 @@ disassemble(const void* func)
|
|||
* Print the instruction.
|
||||
*/
|
||||
|
||||
_debug_printf("%*s", Size, outline);
|
||||
|
||||
_debug_printf("\n");
|
||||
buffer << std::setw(Size) << outline << '\n';
|
||||
|
||||
/*
|
||||
* Stop disassembling on return statements, if there is no record of a
|
||||
|
|
@ -198,9 +200,11 @@ disassemble(const void* func)
|
|||
* XXX: This currently assumes x86
|
||||
*/
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if (Size == 1 && bytes[pc] == 0xc3) {
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Advance.
|
||||
|
|
@ -209,12 +213,12 @@ disassemble(const void* func)
|
|||
pc += Size;
|
||||
|
||||
if (pc >= extent) {
|
||||
_debug_printf("disassembly larger than %ull bytes, aborting\n", extent);
|
||||
buffer << "disassembly larger than " << extent << " bytes, aborting\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
_debug_printf("\n");
|
||||
buffer << '\n';
|
||||
|
||||
LLVMDisasmDispose(D);
|
||||
|
||||
|
|
@ -222,7 +226,8 @@ disassemble(const void* func)
|
|||
* Print GDB command, useful to verify output.
|
||||
*/
|
||||
if (0) {
|
||||
_debug_printf("disassemble %p %p\n", bytes, bytes + pc);
|
||||
buffer << "disassemble " << static_cast<const void*>(bytes) << ' '
|
||||
<< static_cast<const void*>(bytes + pc) << '\n';
|
||||
}
|
||||
|
||||
return pc;
|
||||
|
|
@ -231,8 +236,14 @@ disassemble(const void* func)
|
|||
|
||||
extern "C" void
|
||||
lp_disassemble(LLVMValueRef func, const void *code) {
|
||||
_debug_printf("%s:\n", LLVMGetValueName(func));
|
||||
disassemble(code);
|
||||
std::stringstream buffer;
|
||||
std::string s;
|
||||
|
||||
buffer << LLVMGetValueName(func) << ":\n";
|
||||
disassemble(code, buffer);
|
||||
s = buffer.str();
|
||||
_debug_printf("%s", s.c_str());
|
||||
_debug_printf("\n");
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -248,9 +259,10 @@ extern "C" void
|
|||
lp_profile(LLVMValueRef func, const void *code)
|
||||
{
|
||||
#if defined(__linux__) && defined(PROFILE)
|
||||
std::stringstream buffer;
|
||||
static std::ofstream perf_asm_file;
|
||||
static boolean first_time = TRUE;
|
||||
static FILE *perf_map_file = NULL;
|
||||
static int perf_asm_fd = -1;
|
||||
if (first_time) {
|
||||
/*
|
||||
* We rely on the disassembler for determining a function's size, but
|
||||
|
|
@ -264,17 +276,16 @@ lp_profile(LLVMValueRef func, const void *code)
|
|||
util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
|
||||
perf_map_file = fopen(filename, "wt");
|
||||
util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
|
||||
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
|
||||
perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
|
||||
perf_asm_file.open(filename);
|
||||
}
|
||||
first_time = FALSE;
|
||||
}
|
||||
if (perf_map_file) {
|
||||
const char *symbol = LLVMGetValueName(func);
|
||||
unsigned long addr = (uintptr_t)code;
|
||||
llvm::raw_fd_ostream Out(perf_asm_fd, false);
|
||||
Out << symbol << ":\n";
|
||||
unsigned long size = disassemble(code);
|
||||
buffer << symbol << ":\n";
|
||||
unsigned long size = disassemble(code, buffer);
|
||||
perf_asm_file << buffer.rdbuf() << std::flush;
|
||||
fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
|
||||
fflush(perf_map_file);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -118,8 +118,10 @@ create_pass_manager(struct gallivm_state *gallivm)
|
|||
* simple, or constant propagation into them, etc.
|
||||
*/
|
||||
|
||||
#if HAVE_LLVM < 0x0309
|
||||
// Old versions of LLVM get the DataLayout from the pass manager.
|
||||
LLVMAddTargetData(gallivm->target, gallivm->passmgr);
|
||||
#endif
|
||||
|
||||
/* Setting the module's DataLayout to an empty string will cause the
|
||||
* ExecutionEngine to copy to the DataLayout string from its target
|
||||
|
|
|
|||
|
|
@ -128,6 +128,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
|
|||
return PIPE_MAX_SHADER_SAMPLER_VIEWS;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_SUPPORTED_IRS:
|
||||
return 1 << PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 1;
|
||||
|
|
@ -137,6 +139,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
|
|||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
|
|
|
|||
|
|
@ -61,6 +61,11 @@
|
|||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
||||
#include <llvm/ADT/Triple.h>
|
||||
#if HAVE_LLVM >= 0x0307
|
||||
#include <llvm/Analysis/TargetLibraryInfo.h>
|
||||
#else
|
||||
#include <llvm/Target/TargetLibraryInfo.h>
|
||||
#endif
|
||||
#if HAVE_LLVM < 0x0306
|
||||
#include <llvm/ExecutionEngine/JITMemoryManager.h>
|
||||
#else
|
||||
|
|
@ -147,6 +152,31 @@ lp_set_target_options(void)
|
|||
gallivm_init_llvm_targets();
|
||||
}
|
||||
|
||||
extern "C"
|
||||
LLVMTargetLibraryInfoRef
|
||||
gallivm_create_target_library_info(const char *triple)
|
||||
{
|
||||
return reinterpret_cast<LLVMTargetLibraryInfoRef>(
|
||||
#if HAVE_LLVM < 0x0307
|
||||
new llvm::TargetLibraryInfo(
|
||||
#else
|
||||
new llvm::TargetLibraryInfoImpl(
|
||||
#endif
|
||||
llvm::Triple(triple)));
|
||||
}
|
||||
|
||||
extern "C"
|
||||
void
|
||||
gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
|
||||
{
|
||||
delete reinterpret_cast<
|
||||
#if HAVE_LLVM < 0x0307
|
||||
llvm::TargetLibraryInfo
|
||||
#else
|
||||
llvm::TargetLibraryInfoImpl
|
||||
#endif
|
||||
*>(library_info);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
LLVMValueRef
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#include "lp_bld.h"
|
||||
#include <llvm-c/ExecutionEngine.h>
|
||||
#include <llvm-c/Target.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
@ -44,6 +45,12 @@ struct lp_generated_code;
|
|||
extern void
|
||||
gallivm_init_llvm_targets(void);
|
||||
|
||||
extern LLVMTargetLibraryInfoRef
|
||||
gallivm_create_target_library_info(const char *triple);
|
||||
|
||||
extern void
|
||||
gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
|
||||
|
||||
extern void
|
||||
lp_set_target_options(void);
|
||||
|
||||
|
|
|
|||
|
|
@ -2592,7 +2592,10 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
|
|||
explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
|
||||
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
|
||||
}
|
||||
/* XXX: for real msaa support, the w component would be the sample index. */
|
||||
/*
|
||||
* XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
|
||||
* would be the sample index.
|
||||
*/
|
||||
|
||||
for (i = 0; i < dims; i++) {
|
||||
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
|
||||
|
|
@ -2742,6 +2745,7 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
|
|||
opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
|
||||
opcode == TGSI_OPCODE_SAMPLE_D ||
|
||||
opcode == TGSI_OPCODE_SAMPLE_I ||
|
||||
opcode == TGSI_OPCODE_SAMPLE_I_MS ||
|
||||
opcode == TGSI_OPCODE_SAMPLE_L ||
|
||||
opcode == TGSI_OPCODE_SVIEWINFO ||
|
||||
opcode == TGSI_OPCODE_CAL ||
|
||||
|
|
@ -3989,6 +3993,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
|
|||
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
|
||||
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
|
||||
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
|
||||
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
|
||||
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
|
||||
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
|
||||
|
||||
|
|
|
|||
|
|
@ -199,6 +199,7 @@ static const GLubyte Fixed8x13_Character_123[] = { 8, 0, 0, 0, 14, 16, 16,
|
|||
static const GLubyte Fixed8x13_Character_124[] = { 8, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0};
|
||||
static const GLubyte Fixed8x13_Character_125[] = { 8, 0, 0, 0,112, 8, 8, 16, 12, 16, 8, 8,112, 0, 0};
|
||||
static const GLubyte Fixed8x13_Character_126[] = { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 84, 36, 0, 0};
|
||||
#if 0 /* currently unused */
|
||||
static const GLubyte Fixed8x13_Character_127[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
|
||||
static const GLubyte Fixed8x13_Character_128[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
|
||||
static const GLubyte Fixed8x13_Character_129[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
|
||||
|
|
@ -232,6 +233,7 @@ static const GLubyte Fixed8x13_Character_156[] = { 9, 0, 0, 0, 0, 0, 0,17
|
|||
static const GLubyte Fixed8x13_Character_157[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
|
||||
static const GLubyte Fixed8x13_Character_158[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
|
||||
static const GLubyte Fixed8x13_Character_159[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
|
||||
#endif
|
||||
static const GLubyte Fixed8x13_Character_160[] = { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
static const GLubyte Fixed8x13_Character_161[] = { 8, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 0, 16, 0, 0};
|
||||
static const GLubyte Fixed8x13_Character_162[] = { 8, 0, 0, 0, 0, 16, 56, 84, 80, 80, 84, 56, 16, 0, 0};
|
||||
|
|
|
|||
|
|
@ -460,25 +460,25 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
|
|||
hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
|
||||
hud->constants.two_div_fb_height = 2.0f / hud->fb_height;
|
||||
|
||||
cso_save_framebuffer(cso);
|
||||
cso_save_sample_mask(cso);
|
||||
cso_save_min_samples(cso);
|
||||
cso_save_blend(cso);
|
||||
cso_save_depth_stencil_alpha(cso);
|
||||
cso_save_fragment_shader(cso);
|
||||
cso_save_fragment_sampler_views(cso);
|
||||
cso_save_fragment_samplers(cso);
|
||||
cso_save_rasterizer(cso);
|
||||
cso_save_viewport(cso);
|
||||
cso_save_stream_outputs(cso);
|
||||
cso_save_geometry_shader(cso);
|
||||
cso_save_tessctrl_shader(cso);
|
||||
cso_save_tesseval_shader(cso);
|
||||
cso_save_vertex_shader(cso);
|
||||
cso_save_vertex_elements(cso);
|
||||
cso_save_aux_vertex_buffer_slot(cso);
|
||||
cso_save_state(cso, (CSO_BIT_FRAMEBUFFER |
|
||||
CSO_BIT_SAMPLE_MASK |
|
||||
CSO_BIT_MIN_SAMPLES |
|
||||
CSO_BIT_BLEND |
|
||||
CSO_BIT_DEPTH_STENCIL_ALPHA |
|
||||
CSO_BIT_FRAGMENT_SHADER |
|
||||
CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
|
||||
CSO_BIT_FRAGMENT_SAMPLERS |
|
||||
CSO_BIT_RASTERIZER |
|
||||
CSO_BIT_VIEWPORT |
|
||||
CSO_BIT_STREAM_OUTPUTS |
|
||||
CSO_BIT_GEOMETRY_SHADER |
|
||||
CSO_BIT_TESSCTRL_SHADER |
|
||||
CSO_BIT_TESSEVAL_SHADER |
|
||||
CSO_BIT_VERTEX_SHADER |
|
||||
CSO_BIT_VERTEX_ELEMENTS |
|
||||
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
|
||||
CSO_BIT_RENDER_CONDITION));
|
||||
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
|
||||
cso_save_render_condition(cso);
|
||||
|
||||
/* set states */
|
||||
memset(&surf_templ, 0, sizeof(surf_templ));
|
||||
|
|
@ -591,26 +591,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
|
|||
hud_pane_draw_colored_objects(hud, pane);
|
||||
}
|
||||
|
||||
/* restore states */
|
||||
cso_restore_framebuffer(cso);
|
||||
cso_restore_sample_mask(cso);
|
||||
cso_restore_min_samples(cso);
|
||||
cso_restore_blend(cso);
|
||||
cso_restore_depth_stencil_alpha(cso);
|
||||
cso_restore_fragment_shader(cso);
|
||||
cso_restore_fragment_sampler_views(cso);
|
||||
cso_restore_fragment_samplers(cso);
|
||||
cso_restore_rasterizer(cso);
|
||||
cso_restore_viewport(cso);
|
||||
cso_restore_stream_outputs(cso);
|
||||
cso_restore_tessctrl_shader(cso);
|
||||
cso_restore_tesseval_shader(cso);
|
||||
cso_restore_geometry_shader(cso);
|
||||
cso_restore_vertex_shader(cso);
|
||||
cso_restore_vertex_elements(cso);
|
||||
cso_restore_aux_vertex_buffer_slot(cso);
|
||||
cso_restore_state(cso);
|
||||
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
|
||||
cso_restore_render_condition(cso);
|
||||
|
||||
pipe_surface_reference(&surf, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -283,8 +283,8 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
|
|||
return SUPER(mm);
|
||||
|
||||
failure:
|
||||
if(mm->heap)
|
||||
u_mmDestroy(mm->heap);
|
||||
if(mm->heap)
|
||||
u_mmDestroy(mm->heap);
|
||||
if(mm->map)
|
||||
pb_unmap(mm->buffer);
|
||||
FREE(mm);
|
||||
|
|
|
|||
|
|
@ -115,27 +115,27 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
|
|||
}
|
||||
|
||||
/* save state (restored below) */
|
||||
cso_save_blend(cso);
|
||||
cso_save_depth_stencil_alpha(cso);
|
||||
cso_save_fragment_shader(cso);
|
||||
cso_save_framebuffer(cso);
|
||||
cso_save_tessctrl_shader(cso);
|
||||
cso_save_tesseval_shader(cso);
|
||||
cso_save_geometry_shader(cso);
|
||||
cso_save_rasterizer(cso);
|
||||
cso_save_sample_mask(cso);
|
||||
cso_save_min_samples(cso);
|
||||
cso_save_fragment_samplers(cso);
|
||||
cso_save_fragment_sampler_views(cso);
|
||||
cso_save_stencil_ref(cso);
|
||||
cso_save_stream_outputs(cso);
|
||||
cso_save_vertex_elements(cso);
|
||||
cso_save_vertex_shader(cso);
|
||||
cso_save_viewport(cso);
|
||||
cso_save_aux_vertex_buffer_slot(cso);
|
||||
cso_save_state(cso, (CSO_BIT_BLEND |
|
||||
CSO_BIT_DEPTH_STENCIL_ALPHA |
|
||||
CSO_BIT_FRAGMENT_SHADER |
|
||||
CSO_BIT_FRAMEBUFFER |
|
||||
CSO_BIT_TESSCTRL_SHADER |
|
||||
CSO_BIT_TESSEVAL_SHADER |
|
||||
CSO_BIT_GEOMETRY_SHADER |
|
||||
CSO_BIT_RASTERIZER |
|
||||
CSO_BIT_SAMPLE_MASK |
|
||||
CSO_BIT_MIN_SAMPLES |
|
||||
CSO_BIT_FRAGMENT_SAMPLERS |
|
||||
CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
|
||||
CSO_BIT_STENCIL_REF |
|
||||
CSO_BIT_STREAM_OUTPUTS |
|
||||
CSO_BIT_VERTEX_ELEMENTS |
|
||||
CSO_BIT_VERTEX_SHADER |
|
||||
CSO_BIT_VIEWPORT |
|
||||
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
|
||||
CSO_BIT_RENDER_CONDITION));
|
||||
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
|
||||
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
|
||||
cso_save_render_condition(cso);
|
||||
|
||||
/* set default state */
|
||||
cso_set_sample_mask(cso, ~0);
|
||||
|
|
@ -186,27 +186,9 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
|
|||
}
|
||||
|
||||
/* restore state we changed */
|
||||
cso_restore_blend(cso);
|
||||
cso_restore_depth_stencil_alpha(cso);
|
||||
cso_restore_fragment_shader(cso);
|
||||
cso_restore_framebuffer(cso);
|
||||
cso_restore_tessctrl_shader(cso);
|
||||
cso_restore_tesseval_shader(cso);
|
||||
cso_restore_geometry_shader(cso);
|
||||
cso_restore_rasterizer(cso);
|
||||
cso_restore_sample_mask(cso);
|
||||
cso_restore_min_samples(cso);
|
||||
cso_restore_fragment_samplers(cso);
|
||||
cso_restore_fragment_sampler_views(cso);
|
||||
cso_restore_stencil_ref(cso);
|
||||
cso_restore_stream_outputs(cso);
|
||||
cso_restore_vertex_elements(cso);
|
||||
cso_restore_vertex_shader(cso);
|
||||
cso_restore_viewport(cso);
|
||||
cso_restore_aux_vertex_buffer_slot(cso);
|
||||
cso_restore_state(cso);
|
||||
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
|
||||
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
|
||||
cso_restore_render_condition(cso);
|
||||
|
||||
pipe_resource_reference(&ppq->depth, NULL);
|
||||
pipe_resource_reference(&refin, NULL);
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ tgsi_default_declaration( void )
|
|||
declaration.Local = 0;
|
||||
declaration.Array = 0;
|
||||
declaration.Atomic = 0;
|
||||
declaration.Shared = 0;
|
||||
declaration.Padding = 0;
|
||||
|
||||
return declaration;
|
||||
|
|
|
|||
|
|
@ -352,7 +352,7 @@ iter_declaration(
|
|||
TXT(", ");
|
||||
ENM(decl->Image.Resource, tgsi_texture_names);
|
||||
TXT(", ");
|
||||
UID(decl->Image.Format);
|
||||
TXT(util_format_name(decl->Image.Format));
|
||||
if (decl->Image.Writable)
|
||||
TXT(", WR");
|
||||
if (decl->Image.Raw)
|
||||
|
|
@ -364,6 +364,11 @@ iter_declaration(
|
|||
TXT(", ATOMIC");
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_MEMORY) {
|
||||
if (decl->Declaration.Shared)
|
||||
TXT(", SHARED");
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
|
||||
TXT(", ");
|
||||
ENM(decl->SamplerView.Resource, tgsi_texture_names);
|
||||
|
|
|
|||
|
|
@ -2300,7 +2300,8 @@ exec_txf(struct tgsi_exec_machine *mach,
|
|||
|
||||
IFETCH(&r[3], 0, TGSI_CHAN_W);
|
||||
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
|
||||
inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
|
||||
target = mach->SamplerViews[unit].Resource;
|
||||
}
|
||||
else {
|
||||
|
|
@ -2342,7 +2343,8 @@ exec_txf(struct tgsi_exec_machine *mach,
|
|||
r[3].f[j] = rgba[3][j];
|
||||
}
|
||||
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
|
||||
inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
|
||||
unsigned char swizzles[4];
|
||||
swizzles[0] = inst->Src[1].Register.SwizzleX;
|
||||
swizzles[1] = inst->Src[1].Register.SwizzleY;
|
||||
|
|
@ -4967,7 +4969,7 @@ exec_instruction(
|
|||
break;
|
||||
|
||||
case TGSI_OPCODE_SAMPLE_I_MS:
|
||||
assert(0);
|
||||
exec_txf(mach, inst);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SAMPLE:
|
||||
|
|
|
|||
|
|
@ -465,6 +465,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
|
|||
return PIPE_MAX_SHADER_SAMPLER_VIEWS;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_SUPPORTED_IRS:
|
||||
return 1 << PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_DOUBLES:
|
||||
|
|
@ -474,6 +476,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
|
|||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
|
|
|
|||
|
|
@ -247,7 +247,14 @@ scan_declaration(struct tgsi_shader_info *info,
|
|||
info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
|
||||
info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
|
||||
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
|
||||
info->num_inputs++;
|
||||
|
||||
/* Vertex shaders can have inputs with holes between them. */
|
||||
if (info->processor == TGSI_PROCESSOR_VERTEX)
|
||||
info->num_inputs = MAX2(info->num_inputs, reg + 1);
|
||||
else {
|
||||
info->num_inputs++;
|
||||
assert(reg < info->num_inputs);
|
||||
}
|
||||
|
||||
/* Only interpolated varyings. Don't include POSITION.
|
||||
* Don't include integer varyings, because they are not
|
||||
|
|
@ -341,6 +348,7 @@ scan_declaration(struct tgsi_shader_info *info,
|
|||
info->output_semantic_name[reg] = (ubyte) semName;
|
||||
info->output_semantic_index[reg] = (ubyte) semIndex;
|
||||
info->num_outputs++;
|
||||
assert(reg < info->num_outputs);
|
||||
|
||||
if (semName == TGSI_SEMANTIC_COLOR)
|
||||
info->colors_written |= 1 << semIndex;
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ static const char *tgsi_file_names[] =
|
|||
"IMAGE",
|
||||
"SVIEW",
|
||||
"BUFFER",
|
||||
"MEMORY",
|
||||
};
|
||||
|
||||
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
|
||||
|
|
|
|||
|
|
@ -1290,8 +1290,6 @@ static boolean parse_declaration( struct translate_ctx *ctx )
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
/* XXX format */
|
||||
|
||||
cur2 = cur;
|
||||
eat_opt_white(&cur2);
|
||||
while (*cur2 == ',') {
|
||||
|
|
@ -1304,7 +1302,16 @@ static boolean parse_declaration( struct translate_ctx *ctx )
|
|||
decl.Image.Writable = 1;
|
||||
|
||||
} else {
|
||||
break;
|
||||
for (i = 0; i < PIPE_FORMAT_COUNT; i++) {
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(i);
|
||||
if (desc && str_match_nocase_whole(&cur2, desc->name)) {
|
||||
decl.Image.Format = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == PIPE_FORMAT_COUNT)
|
||||
break;
|
||||
}
|
||||
cur = cur2;
|
||||
eat_opt_white(&cur2);
|
||||
|
|
@ -1381,6 +1388,9 @@ static boolean parse_declaration( struct translate_ctx *ctx )
|
|||
if (str_match_nocase_whole(&cur, "ATOMIC")) {
|
||||
decl.Declaration.Atomic = 1;
|
||||
ctx->cur = cur;
|
||||
} else if (str_match_nocase_whole(&cur, "SHARED")) {
|
||||
decl.Declaration.Shared = 1;
|
||||
ctx->cur = cur;
|
||||
}
|
||||
} else {
|
||||
if (str_match_nocase_whole(&cur, "LOCAL")) {
|
||||
|
|
|
|||
|
|
@ -189,6 +189,8 @@ struct ureg_program
|
|||
unsigned nr_instructions;
|
||||
|
||||
struct ureg_tokens domain[2];
|
||||
|
||||
bool use_shared_memory;
|
||||
};
|
||||
|
||||
static union tgsi_any_token error_tokens[32];
|
||||
|
|
@ -727,6 +729,16 @@ struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
|
|||
return reg;
|
||||
}
|
||||
|
||||
/* Allocate a shared memory area.
|
||||
*/
|
||||
struct ureg_src ureg_DECL_shared_memory(struct ureg_program *ureg)
|
||||
{
|
||||
struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, 0);
|
||||
|
||||
ureg->use_shared_memory = true;
|
||||
return reg;
|
||||
}
|
||||
|
||||
static int
|
||||
match_or_expand_immediate64( const unsigned *v,
|
||||
int type,
|
||||
|
|
@ -1653,6 +1665,23 @@ emit_decl_buffer(struct ureg_program *ureg,
|
|||
out[1].decl_range.Last = index;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_decl_shared_memory(struct ureg_program *ureg)
|
||||
{
|
||||
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
|
||||
|
||||
out[0].value = 0;
|
||||
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
|
||||
out[0].decl.NrTokens = 2;
|
||||
out[0].decl.File = TGSI_FILE_MEMORY;
|
||||
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
|
||||
out[0].decl.Shared = true;
|
||||
|
||||
out[1].value = 0;
|
||||
out[1].decl_range.First = 0;
|
||||
out[1].decl_range.Last = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_immediate( struct ureg_program *ureg,
|
||||
const unsigned *v,
|
||||
|
|
@ -1825,6 +1854,9 @@ static void emit_decls( struct ureg_program *ureg )
|
|||
emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
|
||||
}
|
||||
|
||||
if (ureg->use_shared_memory)
|
||||
emit_decl_shared_memory(ureg);
|
||||
|
||||
if (ureg->const_decls.nr_constant_ranges) {
|
||||
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
|
||||
emit_decl_range(ureg,
|
||||
|
|
|
|||
|
|
@ -337,6 +337,9 @@ ureg_DECL_image(struct ureg_program *ureg,
|
|||
struct ureg_src
|
||||
ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic);
|
||||
|
||||
struct ureg_src
|
||||
ureg_DECL_shared_memory(struct ureg_program *ureg);
|
||||
|
||||
static inline struct ureg_src
|
||||
ureg_imm4f( struct ureg_program *ureg,
|
||||
float a, float b,
|
||||
|
|
|
|||
|
|
@ -541,23 +541,23 @@ util_blit_pixels_tex(struct blit_state *ctx,
|
|||
PIPE_BIND_RENDER_TARGET));
|
||||
|
||||
/* save state (restored below) */
|
||||
cso_save_blend(ctx->cso);
|
||||
cso_save_depth_stencil_alpha(ctx->cso);
|
||||
cso_save_rasterizer(ctx->cso);
|
||||
cso_save_sample_mask(ctx->cso);
|
||||
cso_save_min_samples(ctx->cso);
|
||||
cso_save_fragment_samplers(ctx->cso);
|
||||
cso_save_fragment_sampler_views(ctx->cso);
|
||||
cso_save_stream_outputs(ctx->cso);
|
||||
cso_save_viewport(ctx->cso);
|
||||
cso_save_framebuffer(ctx->cso);
|
||||
cso_save_fragment_shader(ctx->cso);
|
||||
cso_save_vertex_shader(ctx->cso);
|
||||
cso_save_tessctrl_shader(ctx->cso);
|
||||
cso_save_tesseval_shader(ctx->cso);
|
||||
cso_save_geometry_shader(ctx->cso);
|
||||
cso_save_vertex_elements(ctx->cso);
|
||||
cso_save_aux_vertex_buffer_slot(ctx->cso);
|
||||
cso_save_state(ctx->cso, (CSO_BIT_BLEND |
|
||||
CSO_BIT_DEPTH_STENCIL_ALPHA |
|
||||
CSO_BIT_RASTERIZER |
|
||||
CSO_BIT_SAMPLE_MASK |
|
||||
CSO_BIT_MIN_SAMPLES |
|
||||
CSO_BIT_FRAGMENT_SAMPLERS |
|
||||
CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
|
||||
CSO_BIT_STREAM_OUTPUTS |
|
||||
CSO_BIT_VIEWPORT |
|
||||
CSO_BIT_FRAMEBUFFER |
|
||||
CSO_BIT_FRAGMENT_SHADER |
|
||||
CSO_BIT_VERTEX_SHADER |
|
||||
CSO_BIT_TESSCTRL_SHADER |
|
||||
CSO_BIT_TESSEVAL_SHADER |
|
||||
CSO_BIT_GEOMETRY_SHADER |
|
||||
CSO_BIT_VERTEX_ELEMENTS |
|
||||
CSO_BIT_AUX_VERTEX_BUFFER_SLOT));
|
||||
|
||||
/* set misc state we care about */
|
||||
cso_set_blend(ctx->cso, &ctx->blend_write_color);
|
||||
|
|
@ -625,21 +625,5 @@ util_blit_pixels_tex(struct blit_state *ctx,
|
|||
2); /* attribs/vert */
|
||||
|
||||
/* restore state we changed */
|
||||
cso_restore_blend(ctx->cso);
|
||||
cso_restore_depth_stencil_alpha(ctx->cso);
|
||||
cso_restore_rasterizer(ctx->cso);
|
||||
cso_restore_sample_mask(ctx->cso);
|
||||
cso_restore_min_samples(ctx->cso);
|
||||
cso_restore_fragment_samplers(ctx->cso);
|
||||
cso_restore_fragment_sampler_views(ctx->cso);
|
||||
cso_restore_viewport(ctx->cso);
|
||||
cso_restore_framebuffer(ctx->cso);
|
||||
cso_restore_fragment_shader(ctx->cso);
|
||||
cso_restore_vertex_shader(ctx->cso);
|
||||
cso_restore_tessctrl_shader(ctx->cso);
|
||||
cso_restore_tesseval_shader(ctx->cso);
|
||||
cso_restore_geometry_shader(ctx->cso);
|
||||
cso_restore_vertex_elements(ctx->cso);
|
||||
cso_restore_aux_vertex_buffer_slot(ctx->cso);
|
||||
cso_restore_stream_outputs(ctx->cso);
|
||||
cso_restore_state(ctx->cso);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -173,17 +173,6 @@ pipe_sampler_view_release(struct pipe_context *ctx,
|
|||
*ptr = NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view *view)
|
||||
{
|
||||
struct pipe_image_view *old_view = *ptr;
|
||||
|
||||
if (pipe_reference_described(&(*ptr)->reference, &view->reference,
|
||||
(debug_reference_descriptor)debug_describe_image_view))
|
||||
old_view->context->image_view_destroy(old_view->context, old_view);
|
||||
*ptr = view;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pipe_so_target_reference(struct pipe_stream_output_target **ptr,
|
||||
struct pipe_stream_output_target *target)
|
||||
|
|
|
|||
|
|
@ -415,6 +415,9 @@ to be 0.
|
|||
(also used to implement atomic counters). Having this be non-0 also
|
||||
implies support for the ``LOAD``, ``STORE``, and ``ATOM*`` TGSI
|
||||
opcodes.
|
||||
* ``PIPE_SHADER_CAP_SUPPORTED_IRS``: Supported representations of the
|
||||
program. It should be a mask of ``pipe_shader_ir`` bits.
|
||||
* ``PIPE_SHADER_CAP_MAX_SHADER_IMAGES``: Maximum number of image units.
|
||||
|
||||
|
||||
.. _pipe_compute_cap:
|
||||
|
|
|
|||
|
|
@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context *_pipe,
|
|||
pipe->sampler_view_destroy(pipe, view);
|
||||
}
|
||||
|
||||
static struct pipe_image_view *
|
||||
dd_context_create_image_view(struct pipe_context *_pipe,
|
||||
struct pipe_resource *resource,
|
||||
const struct pipe_image_view *templ)
|
||||
{
|
||||
struct pipe_context *pipe = dd_context(_pipe)->pipe;
|
||||
struct pipe_image_view *view =
|
||||
pipe->create_image_view(pipe, resource, templ);
|
||||
|
||||
if (!view)
|
||||
return NULL;
|
||||
view->context = _pipe;
|
||||
return view;
|
||||
}
|
||||
|
||||
static void
|
||||
dd_context_image_view_destroy(struct pipe_context *_pipe,
|
||||
struct pipe_image_view *view)
|
||||
{
|
||||
struct pipe_context *pipe = dd_context(_pipe)->pipe;
|
||||
|
||||
pipe->image_view_destroy(pipe, view);
|
||||
}
|
||||
|
||||
static struct pipe_stream_output_target *
|
||||
dd_context_create_stream_output_target(struct pipe_context *_pipe,
|
||||
struct pipe_resource *res,
|
||||
|
|
@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader,
|
|||
static void
|
||||
dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
|
||||
unsigned start, unsigned num,
|
||||
struct pipe_image_view **views)
|
||||
struct pipe_image_view *views)
|
||||
{
|
||||
struct dd_context *dctx = dd_context(_pipe);
|
||||
struct pipe_context *pipe = dctx->pipe;
|
||||
|
|
@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
|
|||
CTX_INIT(sampler_view_destroy);
|
||||
CTX_INIT(create_surface);
|
||||
CTX_INIT(surface_destroy);
|
||||
CTX_INIT(create_image_view);
|
||||
CTX_INIT(image_view_destroy);
|
||||
CTX_INIT(transfer_map);
|
||||
CTX_INIT(transfer_flush_region);
|
||||
CTX_INIT(transfer_unmap);
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ struct dd_context
|
|||
struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
|
||||
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
|
||||
struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
|
||||
struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
|
||||
struct pipe_image_view shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
|
||||
struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
|
||||
|
||||
struct dd_state *velems;
|
||||
|
|
|
|||
|
|
@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
|
|||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
|
|
|
|||
|
|
@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
|
|||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
|
|
@ -255,11 +256,273 @@ enum a3xx_color_fmt {
|
|||
RB_R32G32B32A32_UINT = 59,
|
||||
};
|
||||
|
||||
enum a3xx_cp_perfcounter_select {
|
||||
CP_ALWAYS_COUNT = 0,
|
||||
CP_AHB_PFPTRANS_WAIT = 3,
|
||||
CP_AHB_NRTTRANS_WAIT = 6,
|
||||
CP_CSF_NRT_READ_WAIT = 8,
|
||||
CP_CSF_I1_FIFO_FULL = 9,
|
||||
CP_CSF_I2_FIFO_FULL = 10,
|
||||
CP_CSF_ST_FIFO_FULL = 11,
|
||||
CP_RESERVED_12 = 12,
|
||||
CP_CSF_RING_ROQ_FULL = 13,
|
||||
CP_CSF_I1_ROQ_FULL = 14,
|
||||
CP_CSF_I2_ROQ_FULL = 15,
|
||||
CP_CSF_ST_ROQ_FULL = 16,
|
||||
CP_RESERVED_17 = 17,
|
||||
CP_MIU_TAG_MEM_FULL = 18,
|
||||
CP_MIU_NRT_WRITE_STALLED = 22,
|
||||
CP_MIU_NRT_READ_STALLED = 23,
|
||||
CP_ME_REGS_RB_DONE_FIFO_FULL = 26,
|
||||
CP_ME_REGS_VS_EVENT_FIFO_FULL = 27,
|
||||
CP_ME_REGS_PS_EVENT_FIFO_FULL = 28,
|
||||
CP_ME_REGS_CF_EVENT_FIFO_FULL = 29,
|
||||
CP_ME_MICRO_RB_STARVED = 30,
|
||||
CP_AHB_RBBM_DWORD_SENT = 40,
|
||||
CP_ME_BUSY_CLOCKS = 41,
|
||||
CP_ME_WAIT_CONTEXT_AVAIL = 42,
|
||||
CP_PFP_TYPE0_PACKET = 43,
|
||||
CP_PFP_TYPE3_PACKET = 44,
|
||||
CP_CSF_RB_WPTR_NEQ_RPTR = 45,
|
||||
CP_CSF_I1_SIZE_NEQ_ZERO = 46,
|
||||
CP_CSF_I2_SIZE_NEQ_ZERO = 47,
|
||||
CP_CSF_RBI1I2_FETCHING = 48,
|
||||
};
|
||||
|
||||
enum a3xx_gras_tse_perfcounter_select {
|
||||
GRAS_TSEPERF_INPUT_PRIM = 0,
|
||||
GRAS_TSEPERF_INPUT_NULL_PRIM = 1,
|
||||
GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2,
|
||||
GRAS_TSEPERF_CLIPPED_PRIM = 3,
|
||||
GRAS_TSEPERF_NEW_PRIM = 4,
|
||||
GRAS_TSEPERF_ZERO_AREA_PRIM = 5,
|
||||
GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6,
|
||||
GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7,
|
||||
GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8,
|
||||
GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9,
|
||||
GRAS_TSEPERF_PRE_CLIP_PRIM = 10,
|
||||
GRAS_TSEPERF_POST_CLIP_PRIM = 11,
|
||||
GRAS_TSEPERF_WORKING_CYCLES = 12,
|
||||
GRAS_TSEPERF_PC_STARVE = 13,
|
||||
GRAS_TSERASPERF_STALL = 14,
|
||||
};
|
||||
|
||||
enum a3xx_gras_ras_perfcounter_select {
|
||||
GRAS_RASPERF_16X16_TILES = 0,
|
||||
GRAS_RASPERF_8X8_TILES = 1,
|
||||
GRAS_RASPERF_4X4_TILES = 2,
|
||||
GRAS_RASPERF_WORKING_CYCLES = 3,
|
||||
GRAS_RASPERF_STALL_CYCLES_BY_RB = 4,
|
||||
GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5,
|
||||
GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6,
|
||||
};
|
||||
|
||||
enum a3xx_hlsq_perfcounter_select {
|
||||
HLSQ_PERF_SP_VS_CONSTANT = 0,
|
||||
HLSQ_PERF_SP_VS_INSTRUCTIONS = 1,
|
||||
HLSQ_PERF_SP_FS_CONSTANT = 2,
|
||||
HLSQ_PERF_SP_FS_INSTRUCTIONS = 3,
|
||||
HLSQ_PERF_TP_STATE = 4,
|
||||
HLSQ_PERF_QUADS = 5,
|
||||
HLSQ_PERF_PIXELS = 6,
|
||||
HLSQ_PERF_VERTICES = 7,
|
||||
HLSQ_PERF_FS8_THREADS = 8,
|
||||
HLSQ_PERF_FS16_THREADS = 9,
|
||||
HLSQ_PERF_FS32_THREADS = 10,
|
||||
HLSQ_PERF_VS8_THREADS = 11,
|
||||
HLSQ_PERF_VS16_THREADS = 12,
|
||||
HLSQ_PERF_SP_VS_DATA_BYTES = 13,
|
||||
HLSQ_PERF_SP_FS_DATA_BYTES = 14,
|
||||
HLSQ_PERF_ACTIVE_CYCLES = 15,
|
||||
HLSQ_PERF_STALL_CYCLES_SP_STATE = 16,
|
||||
HLSQ_PERF_STALL_CYCLES_SP_VS = 17,
|
||||
HLSQ_PERF_STALL_CYCLES_SP_FS = 18,
|
||||
HLSQ_PERF_STALL_CYCLES_UCHE = 19,
|
||||
HLSQ_PERF_RBBM_LOAD_CYCLES = 20,
|
||||
HLSQ_PERF_DI_TO_VS_START_SP0 = 21,
|
||||
HLSQ_PERF_DI_TO_FS_START_SP0 = 22,
|
||||
HLSQ_PERF_VS_START_TO_DONE_SP0 = 23,
|
||||
HLSQ_PERF_FS_START_TO_DONE_SP0 = 24,
|
||||
HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25,
|
||||
HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26,
|
||||
HLSQ_PERF_UCHE_LATENCY_CYCLES = 27,
|
||||
HLSQ_PERF_UCHE_LATENCY_COUNT = 28,
|
||||
};
|
||||
|
||||
enum a3xx_pc_perfcounter_select {
|
||||
PC_PCPERF_VISIBILITY_STREAMS = 0,
|
||||
PC_PCPERF_TOTAL_INSTANCES = 1,
|
||||
PC_PCPERF_PRIMITIVES_PC_VPC = 2,
|
||||
PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3,
|
||||
PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4,
|
||||
PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5,
|
||||
PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6,
|
||||
PC_PCPERF_VERTICES_TO_VFD = 7,
|
||||
PC_PCPERF_REUSED_VERTICES = 8,
|
||||
PC_PCPERF_CYCLES_STALLED_BY_VFD = 9,
|
||||
PC_PCPERF_CYCLES_STALLED_BY_TSE = 10,
|
||||
PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11,
|
||||
PC_PCPERF_CYCLES_IS_WORKING = 12,
|
||||
};
|
||||
|
||||
enum a3xx_rb_perfcounter_select {
|
||||
RB_RBPERF_ACTIVE_CYCLES_ANY = 0,
|
||||
RB_RBPERF_ACTIVE_CYCLES_ALL = 1,
|
||||
RB_RBPERF_STARVE_CYCLES_BY_SP = 2,
|
||||
RB_RBPERF_STARVE_CYCLES_BY_RAS = 3,
|
||||
RB_RBPERF_STARVE_CYCLES_BY_MARB = 4,
|
||||
RB_RBPERF_STALL_CYCLES_BY_MARB = 5,
|
||||
RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6,
|
||||
RB_RBPERF_RB_MARB_DATA = 7,
|
||||
RB_RBPERF_SP_RB_QUAD = 8,
|
||||
RB_RBPERF_RAS_EARLY_Z_QUADS = 9,
|
||||
RB_RBPERF_GMEM_CH0_READ = 10,
|
||||
RB_RBPERF_GMEM_CH1_READ = 11,
|
||||
RB_RBPERF_GMEM_CH0_WRITE = 12,
|
||||
RB_RBPERF_GMEM_CH1_WRITE = 13,
|
||||
RB_RBPERF_CP_CONTEXT_DONE = 14,
|
||||
RB_RBPERF_CP_CACHE_FLUSH = 15,
|
||||
RB_RBPERF_CP_ZPASS_DONE = 16,
|
||||
};
|
||||
|
||||
enum a3xx_rbbm_perfcounter_select {
|
||||
RBBM_ALAWYS_ON = 0,
|
||||
RBBM_VBIF_BUSY = 1,
|
||||
RBBM_TSE_BUSY = 2,
|
||||
RBBM_RAS_BUSY = 3,
|
||||
RBBM_PC_DCALL_BUSY = 4,
|
||||
RBBM_PC_VSD_BUSY = 5,
|
||||
RBBM_VFD_BUSY = 6,
|
||||
RBBM_VPC_BUSY = 7,
|
||||
RBBM_UCHE_BUSY = 8,
|
||||
RBBM_VSC_BUSY = 9,
|
||||
RBBM_HLSQ_BUSY = 10,
|
||||
RBBM_ANY_RB_BUSY = 11,
|
||||
RBBM_ANY_TEX_BUSY = 12,
|
||||
RBBM_ANY_USP_BUSY = 13,
|
||||
RBBM_ANY_MARB_BUSY = 14,
|
||||
RBBM_ANY_ARB_BUSY = 15,
|
||||
RBBM_AHB_STATUS_BUSY = 16,
|
||||
RBBM_AHB_STATUS_STALLED = 17,
|
||||
RBBM_AHB_STATUS_TXFR = 18,
|
||||
RBBM_AHB_STATUS_TXFR_SPLIT = 19,
|
||||
RBBM_AHB_STATUS_TXFR_ERROR = 20,
|
||||
RBBM_AHB_STATUS_LONG_STALL = 21,
|
||||
RBBM_RBBM_STATUS_MASKED = 22,
|
||||
};
|
||||
|
||||
enum a3xx_sp_perfcounter_select {
|
||||
SP_LM_LOAD_INSTRUCTIONS = 0,
|
||||
SP_LM_STORE_INSTRUCTIONS = 1,
|
||||
SP_LM_ATOMICS = 2,
|
||||
SP_UCHE_LOAD_INSTRUCTIONS = 3,
|
||||
SP_UCHE_STORE_INSTRUCTIONS = 4,
|
||||
SP_UCHE_ATOMICS = 5,
|
||||
SP_VS_TEX_INSTRUCTIONS = 6,
|
||||
SP_VS_CFLOW_INSTRUCTIONS = 7,
|
||||
SP_VS_EFU_INSTRUCTIONS = 8,
|
||||
SP_VS_FULL_ALU_INSTRUCTIONS = 9,
|
||||
SP_VS_HALF_ALU_INSTRUCTIONS = 10,
|
||||
SP_FS_TEX_INSTRUCTIONS = 11,
|
||||
SP_FS_CFLOW_INSTRUCTIONS = 12,
|
||||
SP_FS_EFU_INSTRUCTIONS = 13,
|
||||
SP_FS_FULL_ALU_INSTRUCTIONS = 14,
|
||||
SP0_ICL1_MISSES = 26,
|
||||
SP_FS_HALF_ALU_INSTRUCTIONS = 15,
|
||||
SP_FS_BARY_INSTRUCTIONS = 16,
|
||||
SP_VS_INSTRUCTIONS = 17,
|
||||
SP_FS_INSTRUCTIONS = 18,
|
||||
SP_ADDR_LOCK_COUNT = 19,
|
||||
SP_UCHE_READ_TRANS = 20,
|
||||
SP_UCHE_WRITE_TRANS = 21,
|
||||
SP_EXPORT_VPC_TRANS = 22,
|
||||
SP_EXPORT_RB_TRANS = 23,
|
||||
SP_PIXELS_KILLED = 24,
|
||||
SP_ICL1_REQUESTS = 25,
|
||||
SP_ICL1_MISSES = 26,
|
||||
SP_ICL0_REQUESTS = 27,
|
||||
SP_ICL0_MISSES = 28,
|
||||
SP_ALU_ACTIVE_CYCLES = 29,
|
||||
SP_EFU_ACTIVE_CYCLES = 30,
|
||||
SP_STALL_CYCLES_BY_VPC = 31,
|
||||
SP_STALL_CYCLES_BY_TP = 32,
|
||||
SP_STALL_CYCLES_BY_UCHE = 33,
|
||||
SP_STALL_CYCLES_BY_RB = 34,
|
||||
SP_ACTIVE_CYCLES_ANY = 35,
|
||||
SP_ACTIVE_CYCLES_ALL = 36,
|
||||
};
|
||||
|
||||
enum a3xx_tp_perfcounter_select {
|
||||
TPL1_TPPERF_L1_REQUESTS = 0,
|
||||
TPL1_TPPERF_TP0_L1_REQUESTS = 1,
|
||||
TPL1_TPPERF_TP0_L1_MISSES = 2,
|
||||
TPL1_TPPERF_TP1_L1_REQUESTS = 3,
|
||||
TPL1_TPPERF_TP1_L1_MISSES = 4,
|
||||
TPL1_TPPERF_TP2_L1_REQUESTS = 5,
|
||||
TPL1_TPPERF_TP2_L1_MISSES = 6,
|
||||
TPL1_TPPERF_TP3_L1_REQUESTS = 7,
|
||||
TPL1_TPPERF_TP3_L1_MISSES = 8,
|
||||
TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9,
|
||||
TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10,
|
||||
TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11,
|
||||
TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12,
|
||||
TPL1_TPPERF_BILINEAR_OPS = 13,
|
||||
TPL1_TPPERF_QUADSQUADS_OFFSET = 14,
|
||||
TPL1_TPPERF_QUADQUADS_SHADOW = 15,
|
||||
TPL1_TPPERF_QUADS_ARRAY = 16,
|
||||
TPL1_TPPERF_QUADS_PROJECTION = 17,
|
||||
TPL1_TPPERF_QUADS_GRADIENT = 18,
|
||||
TPL1_TPPERF_QUADS_1D2D = 19,
|
||||
TPL1_TPPERF_QUADS_3DCUBE = 20,
|
||||
TPL1_TPPERF_ZERO_LOD = 21,
|
||||
TPL1_TPPERF_OUTPUT_TEXELS = 22,
|
||||
TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23,
|
||||
TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24,
|
||||
TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25,
|
||||
TPL1_TPPERF_LATENCY = 26,
|
||||
TPL1_TPPERF_LATENCY_TRANS = 27,
|
||||
};
|
||||
|
||||
enum a3xx_vfd_perfcounter_select {
|
||||
VFD_PERF_UCHE_BYTE_FETCHED = 0,
|
||||
VFD_PERF_UCHE_TRANS = 1,
|
||||
VFD_PERF_VPC_BYPASS_COMPONENTS = 2,
|
||||
VFD_PERF_FETCH_INSTRUCTIONS = 3,
|
||||
VFD_PERF_DECODE_INSTRUCTIONS = 4,
|
||||
VFD_PERF_ACTIVE_CYCLES = 5,
|
||||
VFD_PERF_STALL_CYCLES_UCHE = 6,
|
||||
VFD_PERF_STALL_CYCLES_HLSQ = 7,
|
||||
VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8,
|
||||
VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9,
|
||||
};
|
||||
|
||||
enum a3xx_vpc_perfcounter_select {
|
||||
VPC_PERF_SP_LM_PRIMITIVES = 0,
|
||||
VPC_PERF_COMPONENTS_FROM_SP = 1,
|
||||
VPC_PERF_SP_LM_COMPONENTS = 2,
|
||||
VPC_PERF_ACTIVE_CYCLES = 3,
|
||||
VPC_PERF_STALL_CYCLES_LM = 4,
|
||||
VPC_PERF_STALL_CYCLES_RAS = 5,
|
||||
};
|
||||
|
||||
enum a3xx_uche_perfcounter_select {
|
||||
UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0,
|
||||
UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1,
|
||||
UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2,
|
||||
UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3,
|
||||
UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4,
|
||||
UCHE_UCHEPERF_READ_REQUESTS_TP = 8,
|
||||
UCHE_UCHEPERF_READ_REQUESTS_VFD = 9,
|
||||
UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10,
|
||||
UCHE_UCHEPERF_READ_REQUESTS_MARB = 11,
|
||||
UCHE_UCHEPERF_READ_REQUESTS_SP = 12,
|
||||
UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13,
|
||||
UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14,
|
||||
UCHE_UCHEPERF_TAG_CHECK_FAILS = 15,
|
||||
UCHE_UCHEPERF_EVICTS = 16,
|
||||
UCHE_UCHEPERF_FLUSHES = 17,
|
||||
UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18,
|
||||
UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19,
|
||||
UCHE_UCHEPERF_ACTIVE_CYCLES = 20,
|
||||
};
|
||||
|
||||
enum a3xx_rb_blend_opcode {
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#include "util/u_format.h"
|
||||
|
||||
#include "freedreno_resource.h"
|
||||
#include "freedreno_query_hw.h"
|
||||
|
||||
#include "fd3_emit.h"
|
||||
#include "fd3_blend.h"
|
||||
|
|
@ -888,6 +889,8 @@ fd3_emit_restore(struct fd_context *ctx)
|
|||
|
||||
fd_wfi(ctx, ring);
|
||||
|
||||
fd_hw_query_enable(ctx, ring);
|
||||
|
||||
ctx->needs_rb_fbd = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -49,6 +49,8 @@ struct fd4_context {
|
|||
|
||||
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
|
||||
* could combine it with another allocation.
|
||||
*
|
||||
* (upper area used as scratch bo.. see fd4_query)
|
||||
*/
|
||||
struct fd_bo *vsc_size_mem;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#include "util/u_format.h"
|
||||
|
||||
#include "freedreno_resource.h"
|
||||
#include "freedreno_query_hw.h"
|
||||
|
||||
#include "fd4_emit.h"
|
||||
#include "fd4_blend.h"
|
||||
|
|
@ -882,6 +883,8 @@ fd4_emit_restore(struct fd_context *ctx)
|
|||
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
|
||||
OUT_RING(ring, 0x0);
|
||||
|
||||
fd_hw_query_enable(ctx, ring);
|
||||
|
||||
ctx->needs_rb_fbd = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include "freedreno_util.h"
|
||||
|
||||
#include "fd4_query.h"
|
||||
#include "fd4_context.h"
|
||||
#include "fd4_draw.h"
|
||||
#include "fd4_format.h"
|
||||
|
||||
|
|
@ -81,7 +82,12 @@ static uint64_t
|
|||
count_samples(const struct fd_rb_samp_ctrs *start,
|
||||
const struct fd_rb_samp_ctrs *end)
|
||||
{
|
||||
return end->ctr[0] - start->ctr[0];
|
||||
uint64_t n = 0;
|
||||
|
||||
for (unsigned i = 0; i < 16; i += 4)
|
||||
n += end->ctr[i] - start->ctr[i];
|
||||
|
||||
return n / 2;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -102,6 +108,127 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
|
|||
result->b |= (n > 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Time Elapsed Query:
|
||||
*
|
||||
* Note: we could in theory support timestamp queries, but they
|
||||
* won't give sensible results for tilers.
|
||||
*/
|
||||
|
||||
static void
|
||||
time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
{
|
||||
/* Right now, the assignment of countable to counter register is
|
||||
* just hard coded. If we start exposing more countables than we
|
||||
* have counters, we will need to be more clever.
|
||||
*/
|
||||
fd_wfi(ctx, ring);
|
||||
OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
|
||||
OUT_RING(ring, CP_ALWAYS_COUNT);
|
||||
}
|
||||
|
||||
static struct fd_hw_sample *
|
||||
time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
{
|
||||
struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t));
|
||||
|
||||
/* use unused part of vsc_size_mem as scratch space, to avoid
|
||||
* extra allocation:
|
||||
*/
|
||||
struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem;
|
||||
const int sample_off = 128;
|
||||
const int addr_off = sample_off + 8;
|
||||
|
||||
debug_assert(ctx->screen->max_freq > 0);
|
||||
|
||||
/* Basic issue is that we need to read counter value to a relative
|
||||
* destination (with per-tile offset) rather than absolute dest
|
||||
* addr. But there is no pm4 packet that can do that. This is
|
||||
* where it would be *really* nice if we could write our own fw
|
||||
* since afaict implementing the sort of packet we need would be
|
||||
* trivial.
|
||||
*
|
||||
* Instead, we:
|
||||
* (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
|
||||
* (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
|
||||
* (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
|
||||
* address to the per-sample offset in the scratch buffer
|
||||
* (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
|
||||
* to CP_ME_NRT_ADDR
|
||||
* (5) CP_MEM_TO_REG's to copy saved counter value from scratch
|
||||
* buffer to CP_ME_NRT_DATA to trigger the write out to query
|
||||
* result buffer
|
||||
*
|
||||
* Straightforward, right?
|
||||
*
|
||||
* Maybe could swap the order of things in the scratch buffer to
|
||||
* put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
|
||||
* shot, but that's really just polishing a turd..
|
||||
*/
|
||||
|
||||
fd_wfi(ctx, ring);
|
||||
|
||||
/* copy sample counter _LO and _HI to scratch: */
|
||||
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
|
||||
CP_REG_TO_MEM_0_64B |
|
||||
CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */
|
||||
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
|
||||
|
||||
/* ok... here we really *would* like to use the CP_SET_CONSTANT
|
||||
* mode which can add a constant to value in reg2 and write to
|
||||
* reg1... *but* that only works for banked/context registers,
|
||||
* and CP_ME_NRT_DATA isn't one of those.. so we need to do some
|
||||
* CP math to the scratch buffer instead:
|
||||
*
|
||||
* (note first 8 bytes are counter value, use offset 0x8 for
|
||||
* address calculation)
|
||||
*/
|
||||
|
||||
/* per-sample offset to scratch bo: */
|
||||
OUT_PKT3(ring, CP_MEM_WRITE, 2);
|
||||
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
|
||||
OUT_RING(ring, samp->offset);
|
||||
|
||||
/* now add to that the per-tile base: */
|
||||
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
|
||||
CP_REG_TO_MEM_0_ACCUMULATE |
|
||||
CP_REG_TO_MEM_0_CNT(1-1)); /* readback 1 regs */
|
||||
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
|
||||
|
||||
/* now copy that back to CP_ME_NRT_ADDR: */
|
||||
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
|
||||
OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
|
||||
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
|
||||
|
||||
/* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
|
||||
* to trigger the write to result buffer
|
||||
*/
|
||||
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
|
||||
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
|
||||
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
|
||||
|
||||
/* and again to get the value of the _HI reg from scratch: */
|
||||
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
|
||||
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
|
||||
OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
|
||||
|
||||
/* Sigh.. */
|
||||
|
||||
return samp;
|
||||
}
|
||||
|
||||
static void
|
||||
time_elapsed_accumulate_result(struct fd_context *ctx,
|
||||
const void *start, const void *end,
|
||||
union pipe_query_result *result)
|
||||
{
|
||||
uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
|
||||
/* max_freq is in Hz, convert cycle count to ns: */
|
||||
result->u64 += n * 1000000000 / ctx->screen->max_freq;
|
||||
}
|
||||
|
||||
static const struct fd_hw_sample_provider occlusion_counter = {
|
||||
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
|
||||
.active = FD_STAGE_DRAW,
|
||||
|
|
@ -116,8 +243,17 @@ static const struct fd_hw_sample_provider occlusion_predicate = {
|
|||
.accumulate_result = occlusion_predicate_accumulate_result,
|
||||
};
|
||||
|
||||
static const struct fd_hw_sample_provider time_elapsed = {
|
||||
.query_type = PIPE_QUERY_TIME_ELAPSED,
|
||||
.active = FD_STAGE_DRAW,
|
||||
.enable = time_elapsed_enable,
|
||||
.get_sample = time_elapsed_get_sample,
|
||||
.accumulate_result = time_elapsed_accumulate_result,
|
||||
};
|
||||
|
||||
void fd4_query_context_init(struct pipe_context *pctx)
|
||||
{
|
||||
fd_hw_query_register_provider(pctx, &occlusion_counter);
|
||||
fd_hw_query_register_provider(pctx, &occlusion_predicate);
|
||||
fd_hw_query_register_provider(pctx, &time_elapsed);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
|
|||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
|
|
|
|||
|
|
@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
|
|||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
|
|
@ -172,6 +173,11 @@ enum adreno_pm4_type3_packets {
|
|||
CP_UNKNOWN_1A = 26,
|
||||
CP_UNKNOWN_4E = 78,
|
||||
CP_WIDE_REG_WRITE = 116,
|
||||
CP_SCRATCH_TO_REG = 77,
|
||||
CP_REG_TO_SCRATCH = 74,
|
||||
CP_WAIT_MEM_WRITES = 18,
|
||||
CP_COND_REG_EXEC = 71,
|
||||
CP_MEM_TO_REG = 66,
|
||||
IN_IB_PREFETCH_END = 23,
|
||||
IN_SUBBLK_PREFETCH = 31,
|
||||
IN_INSTR_PREFETCH = 32,
|
||||
|
|
@ -503,5 +509,29 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
|
|||
return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
|
||||
}
|
||||
|
||||
#define REG_CP_REG_TO_MEM_0 0x00000000
|
||||
#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff
|
||||
#define CP_REG_TO_MEM_0_REG__SHIFT 0
|
||||
static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK;
|
||||
}
|
||||
#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000
|
||||
#define CP_REG_TO_MEM_0_CNT__SHIFT 19
|
||||
static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK;
|
||||
}
|
||||
#define CP_REG_TO_MEM_0_64B 0x40000000
|
||||
#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000
|
||||
|
||||
#define REG_CP_REG_TO_MEM_1 0x00000001
|
||||
#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff
|
||||
#define CP_REG_TO_MEM_1_DEST__SHIFT 0
|
||||
static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK;
|
||||
}
|
||||
|
||||
|
||||
#endif /* ADRENO_PM4_XML */
|
||||
|
|
|
|||
|
|
@ -164,6 +164,9 @@ struct fd_context {
|
|||
*/
|
||||
struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
|
||||
|
||||
/* which sample providers were active in the current batch: */
|
||||
uint32_t active_providers;
|
||||
|
||||
/* tracking for current stage, to know when to start/stop
|
||||
* any active queries:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -65,4 +65,16 @@ fd_query(struct pipe_query *pq)
|
|||
void fd_query_screen_init(struct pipe_screen *pscreen);
|
||||
void fd_query_context_init(struct pipe_context *pctx);
|
||||
|
||||
static inline bool
|
||||
skip_begin_query(int type)
|
||||
{
|
||||
switch (type) {
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* FREEDRENO_QUERY_H_ */
|
||||
|
|
|
|||
|
|
@ -47,6 +47,8 @@ static int pidx(unsigned query_type)
|
|||
return 0;
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
return 1;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
return 2;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -89,7 +91,9 @@ static void
|
|||
resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
|
||||
struct fd_ringbuffer *ring)
|
||||
{
|
||||
int idx = pidx(hq->provider->query_type);
|
||||
assert(!hq->period);
|
||||
ctx->active_providers |= (1 << idx);
|
||||
hq->period = util_slab_alloc(&ctx->sample_period_pool);
|
||||
list_inithead(&hq->period->list);
|
||||
hq->period->start = get_sample(ctx, ring, hq->base.type);
|
||||
|
|
@ -101,7 +105,9 @@ static void
|
|||
pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
|
||||
struct fd_ringbuffer *ring)
|
||||
{
|
||||
int idx = pidx(hq->provider->query_type);
|
||||
assert(hq->period && !hq->period->end);
|
||||
assert(ctx->active_providers & (1 << idx));
|
||||
hq->period->end = get_sample(ctx, ring, hq->base.type);
|
||||
list_addtail(&hq->period->list, &hq->current_periods);
|
||||
hq->period = NULL;
|
||||
|
|
@ -156,6 +162,12 @@ static void
|
|||
fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
|
||||
{
|
||||
struct fd_hw_query *hq = fd_hw_query(q);
|
||||
/* there are a couple special cases, which don't have
|
||||
* a matching ->begin_query():
|
||||
*/
|
||||
if (skip_begin_query(q->type) && !q->active) {
|
||||
fd_hw_begin_query(ctx, q);
|
||||
}
|
||||
if (!q->active)
|
||||
return;
|
||||
if (is_active(hq, ctx->stage))
|
||||
|
|
@ -291,6 +303,8 @@ fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
|
|||
struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
|
||||
pipe_reference_init(&samp->reference, 1);
|
||||
samp->size = size;
|
||||
debug_assert(util_is_power_of_two(size));
|
||||
ctx->next_sample_offset = align(ctx->next_sample_offset, size);
|
||||
samp->offset = ctx->next_sample_offset;
|
||||
/* NOTE: util_slab_alloc() does not zero out the buffer: */
|
||||
samp->bo = NULL;
|
||||
|
|
@ -318,7 +332,7 @@ prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
|
|||
assert(samp->tile_stride == tile_stride);
|
||||
return;
|
||||
}
|
||||
samp->bo = bo;
|
||||
samp->bo = fd_bo_ref(bo);
|
||||
samp->num_tiles = num_tiles;
|
||||
samp->tile_stride = tile_stride;
|
||||
}
|
||||
|
|
@ -431,6 +445,23 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
ctx->stage = stage;
|
||||
}
|
||||
|
||||
/* call the provider->enable() for all the hw queries that were active
|
||||
* in the current batch. This sets up perfctr selector regs statically
|
||||
* for the duration of the batch.
|
||||
*/
|
||||
void
|
||||
fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
{
|
||||
for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
|
||||
if (ctx->active_providers & (1 << idx)) {
|
||||
assert(ctx->sample_providers[idx]);
|
||||
if (ctx->sample_providers[idx]->enable)
|
||||
ctx->sample_providers[idx]->enable(ctx, ring);
|
||||
}
|
||||
}
|
||||
ctx->active_providers = 0; /* clear it for next frame */
|
||||
}
|
||||
|
||||
void
|
||||
fd_hw_query_register_provider(struct pipe_context *pctx,
|
||||
const struct fd_hw_sample_provider *provider)
|
||||
|
|
|
|||
|
|
@ -76,6 +76,11 @@ struct fd_hw_sample_provider {
|
|||
/* stages applicable to the query type: */
|
||||
enum fd_render_stage active;
|
||||
|
||||
/* Optional hook for enabling a counter. Guaranteed to happen
|
||||
* at least once before the first ->get_sample() in a batch.
|
||||
*/
|
||||
void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring);
|
||||
|
||||
/* when a new sample is required, emit appropriate cmdstream
|
||||
* and return a sample object:
|
||||
*/
|
||||
|
|
@ -144,6 +149,7 @@ void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
|
|||
struct fd_ringbuffer *ring);
|
||||
void fd_hw_query_set_stage(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring, enum fd_render_stage stage);
|
||||
void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring);
|
||||
void fd_hw_query_register_provider(struct pipe_context *pctx,
|
||||
const struct fd_hw_sample_provider *provider);
|
||||
void fd_hw_query_init(struct pipe_context *pctx);
|
||||
|
|
|
|||
|
|
@ -298,12 +298,14 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return is_a3xx(screen) ? 1 : 0;
|
||||
|
||||
/* Queries. */
|
||||
case PIPE_CAP_QUERY_TIME_ELAPSED:
|
||||
case PIPE_CAP_QUERY_TIMESTAMP:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
return 0;
|
||||
case PIPE_CAP_OCCLUSION_QUERY:
|
||||
return is_a3xx(screen) || is_a4xx(screen);
|
||||
case PIPE_CAP_QUERY_TIME_ELAPSED:
|
||||
/* only a4xx, requires new enough kernel so we know max_freq: */
|
||||
return (screen->max_freq > 0) && is_a4xx(screen);
|
||||
|
||||
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
|
||||
case PIPE_CAP_MIN_TEXEL_OFFSET:
|
||||
|
|
@ -434,9 +436,12 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
return 16;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_SUPPORTED_IRS:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
return 0;
|
||||
}
|
||||
debug_printf("unknown shader param %d\n", param);
|
||||
|
|
@ -534,6 +539,16 @@ fd_screen_create(struct fd_device *dev)
|
|||
}
|
||||
screen->device_id = val;
|
||||
|
||||
if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) {
|
||||
DBG("could not get gpu freq");
|
||||
/* this limits what performance related queries are
|
||||
* supported but is not fatal
|
||||
*/
|
||||
screen->max_freq = 0;
|
||||
} else {
|
||||
screen->max_freq = val;
|
||||
}
|
||||
|
||||
if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
|
||||
DBG("could not get gpu-id");
|
||||
goto fail;
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ struct fd_screen {
|
|||
uint32_t device_id;
|
||||
uint32_t gpu_id; /* 220, 305, etc */
|
||||
uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */
|
||||
uint32_t max_freq;
|
||||
uint32_t max_rts; /* max # of render targets */
|
||||
|
||||
void *compiler; /* currently unused for a2xx */
|
||||
|
|
|
|||
|
|
@ -1365,7 +1365,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
|
|||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
|
||||
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
|
||||
struct ir3_instruction *const_off[4];
|
||||
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
|
||||
unsigned i, coords, flags;
|
||||
unsigned nsrc0 = 0, nsrc1 = 0;
|
||||
|
|
|
|||
|
|
@ -79,9 +79,7 @@ launch_grid(struct ilo_context *ilo,
|
|||
}
|
||||
|
||||
static void
|
||||
ilo_launch_grid(struct pipe_context *pipe,
|
||||
const uint *block_layout, const uint *grid_layout,
|
||||
uint32_t pc, const void *input)
|
||||
ilo_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
||||
{
|
||||
struct ilo_context *ilo = ilo_context(pipe);
|
||||
struct ilo_shader_state *cs = ilo->state_vector.cs;
|
||||
|
|
@ -92,13 +90,13 @@ ilo_launch_grid(struct pipe_context *pipe,
|
|||
input_buf.buffer_size =
|
||||
ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
|
||||
if (input_buf.buffer_size) {
|
||||
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input,
|
||||
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, info->input,
|
||||
&input_buf.buffer_offset, &input_buf.buffer);
|
||||
}
|
||||
|
||||
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
|
||||
|
||||
launch_grid(ilo, block_layout, grid_layout, &input_buf, pc);
|
||||
launch_grid(ilo, info->block, info->grid, &input_buf, info->pc);
|
||||
|
||||
ilo_render_invalidate_hw(ilo->render);
|
||||
|
||||
|
|
|
|||
|
|
@ -136,6 +136,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
|
|||
return ILO_MAX_SAMPLER_VIEWS;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_SUPPORTED_IRS:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
|
|
|
|||
|
|
@ -1851,7 +1851,7 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader,
|
|||
static void
|
||||
ilo_set_shader_images(struct pipe_context *pipe, unsigned shader,
|
||||
unsigned start, unsigned count,
|
||||
struct pipe_image_view **views)
|
||||
struct pipe_image_view *views)
|
||||
{
|
||||
#if 0
|
||||
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
|
||||
|
|
|
|||
|
|
@ -910,7 +910,9 @@ lp_rast_create( unsigned num_threads )
|
|||
create_rast_threads(rast);
|
||||
|
||||
/* for synchronizing rasterization threads */
|
||||
pipe_barrier_init( &rast->barrier, rast->num_threads );
|
||||
if (rast->num_threads > 0) {
|
||||
pipe_barrier_init( &rast->barrier, rast->num_threads );
|
||||
}
|
||||
|
||||
memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
|
||||
|
||||
|
|
@ -967,7 +969,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
|
|||
}
|
||||
|
||||
/* for synchronizing rasterization threads */
|
||||
pipe_barrier_destroy( &rast->barrier );
|
||||
if (rast->num_threads > 0) {
|
||||
pipe_barrier_destroy( &rast->barrier );
|
||||
}
|
||||
|
||||
lp_scene_queue_destroy(rast->full_scenes);
|
||||
|
||||
|
|
|
|||
|
|
@ -169,8 +169,8 @@ struct lp_setup_context
|
|||
};
|
||||
|
||||
static inline void
|
||||
scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
|
||||
struct u_rect *scissor)
|
||||
scissor_planes_needed(boolean scis_planes[4], const struct u_rect *bbox,
|
||||
const struct u_rect *scissor)
|
||||
{
|
||||
/* left */
|
||||
scis_planes[0] = (bbox->x0 < scissor->x0);
|
||||
|
|
|
|||
|
|
@ -719,7 +719,7 @@ try_setup_line( struct lp_setup_context *setup,
|
|||
*/
|
||||
if (nr_planes > 4) {
|
||||
/* why not just use draw_regions */
|
||||
struct u_rect *scissor = &setup->scissors[viewport_index];
|
||||
const struct u_rect *scissor = &setup->scissors[viewport_index];
|
||||
struct lp_rast_plane *plane_s = &plane[4];
|
||||
boolean s_planes[4];
|
||||
scissor_planes_needed(s_planes, &bbox, scissor);
|
||||
|
|
|
|||
|
|
@ -681,7 +681,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
*/
|
||||
if (nr_planes > 3) {
|
||||
/* why not just use draw_regions */
|
||||
struct u_rect *scissor = &setup->scissors[viewport_index];
|
||||
const struct u_rect *scissor = &setup->scissors[viewport_index];
|
||||
struct lp_rast_plane *plane_s = &plane[3];
|
||||
boolean s_planes[4];
|
||||
scissor_planes_needed(s_planes, &bbox, scissor);
|
||||
|
|
|
|||
|
|
@ -60,6 +60,8 @@ NV30_C_SOURCES := \
|
|||
nv30/nvfx_vertprog.c
|
||||
|
||||
NV50_C_SOURCES := \
|
||||
nv50/g80_defs.xml.h \
|
||||
nv50/g80_texture.xml.h \
|
||||
nv50/nv50_2d.xml.h \
|
||||
nv50/nv50_3ddefs.xml.h \
|
||||
nv50/nv50_3d.xml.h \
|
||||
|
|
@ -68,7 +70,6 @@ NV50_C_SOURCES := \
|
|||
nv50/nv50_compute.xml.h \
|
||||
nv50/nv50_context.c \
|
||||
nv50/nv50_context.h \
|
||||
nv50/nv50_defs.xml.h \
|
||||
nv50/nv50_formats.c \
|
||||
nv50/nv50_miptree.c \
|
||||
nv50/nv50_program.c \
|
||||
|
|
@ -93,7 +94,6 @@ NV50_C_SOURCES := \
|
|||
nv50/nv50_state_validate.c \
|
||||
nv50/nv50_surface.c \
|
||||
nv50/nv50_tex.c \
|
||||
nv50/nv50_texture.xml.h \
|
||||
nv50/nv50_transfer.c \
|
||||
nv50/nv50_transfer.h \
|
||||
nv50/nv50_vbo.c \
|
||||
|
|
@ -147,6 +147,7 @@ NVC0_CODEGEN_SOURCES := \
|
|||
codegen/nv50_ir_target_nvc0.h
|
||||
|
||||
NVC0_C_SOURCES := \
|
||||
nvc0/gm107_texture.xml.h \
|
||||
nvc0/nvc0_3d.xml.h \
|
||||
nvc0/nvc0_compute.c \
|
||||
nvc0/nvc0_compute.h \
|
||||
|
|
|
|||
|
|
@ -232,6 +232,8 @@ enum operation
|
|||
#define NV50_IR_SUBOP_SHFL_UP 1
|
||||
#define NV50_IR_SUBOP_SHFL_DOWN 2
|
||||
#define NV50_IR_SUBOP_SHFL_BFLY 3
|
||||
#define NV50_IR_SUBOP_LOAD_LOCKED 1
|
||||
#define NV50_IR_SUBOP_STORE_UNLOCKED 2
|
||||
#define NV50_IR_SUBOP_MADSP_SD 0xffff
|
||||
// Yes, we could represent those with DataType.
|
||||
// Or put the type into operation and have a couple 1000 values in that enum.
|
||||
|
|
|
|||
|
|
@ -433,6 +433,10 @@ CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
|
|||
srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
|
||||
break;
|
||||
default:
|
||||
if (i->op == OP_SELP) {
|
||||
assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
|
||||
srcId(i->src(s), 42);
|
||||
}
|
||||
// ignore here, can be predicate or flags, but must not be address
|
||||
break;
|
||||
}
|
||||
|
|
@ -1045,7 +1049,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
|
|||
{
|
||||
emitForm_21(i, 0x250, 0x050);
|
||||
|
||||
if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
|
||||
if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
|
||||
code[1] |= 1 << 13;
|
||||
}
|
||||
|
||||
|
|
@ -1239,7 +1243,7 @@ CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask
|
|||
|
||||
defId(i->def(0), 2);
|
||||
srcId(i->src(0), 10);
|
||||
srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23);
|
||||
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
|
||||
|
||||
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
|
||||
code[1] |= 1 << 9; // dall
|
||||
|
|
|
|||
|
|
@ -193,6 +193,8 @@ private:
|
|||
void emitNOP();
|
||||
void emitKIL();
|
||||
void emitOUT();
|
||||
|
||||
void emitMEMBAR();
|
||||
};
|
||||
|
||||
/*******************************************************************************
|
||||
|
|
@ -248,6 +250,8 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val)
|
|||
case SV_INVOCATION_ID : id = 0x11; break;
|
||||
case SV_THREAD_KILL : id = 0x13; break;
|
||||
case SV_INVOCATION_INFO: id = 0x1d; break;
|
||||
case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
|
||||
case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
|
||||
default:
|
||||
assert(!"invalid system value");
|
||||
id = 0;
|
||||
|
|
@ -1531,7 +1535,10 @@ CodeEmitterGM107::emitFSWZADD()
|
|||
emitRND (0x27);
|
||||
emitField(0x26, 1, insn->lanes); /* abused for .ndv */
|
||||
emitField(0x1c, 8, insn->subOp);
|
||||
emitGPR (0x14, insn->src(1));
|
||||
if (insn->predSrc != 1)
|
||||
emitGPR (0x14, insn->src(1));
|
||||
else
|
||||
emitGPR (0x14);
|
||||
emitGPR (0x08, insn->src(0));
|
||||
emitGPR (0x00, insn->def(0));
|
||||
}
|
||||
|
|
@ -2327,22 +2334,34 @@ void
|
|||
CodeEmitterGM107::emitATOM()
|
||||
{
|
||||
unsigned dType, subOp;
|
||||
switch (insn->dType) {
|
||||
case TYPE_U32: dType = 0; break;
|
||||
case TYPE_S32: dType = 1; break;
|
||||
case TYPE_U64: dType = 2; break;
|
||||
case TYPE_F32: dType = 3; break;
|
||||
case TYPE_B128: dType = 4; break;
|
||||
case TYPE_S64: dType = 5; break;
|
||||
default: assert(!"unexpected dType"); dType = 0; break;
|
||||
}
|
||||
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
|
||||
subOp = 8;
|
||||
else
|
||||
subOp = insn->subOp;
|
||||
assert(insn->subOp != NV50_IR_SUBOP_ATOM_CAS); /* XXX */
|
||||
|
||||
emitInsn (0xed000000);
|
||||
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
|
||||
switch (insn->dType) {
|
||||
case TYPE_U32: dType = 0; break;
|
||||
case TYPE_U64: dType = 1; break;
|
||||
default: assert(!"unexpected dType"); dType = 0; break;
|
||||
}
|
||||
subOp = 15;
|
||||
|
||||
emitInsn (0xee000000);
|
||||
} else {
|
||||
switch (insn->dType) {
|
||||
case TYPE_U32: dType = 0; break;
|
||||
case TYPE_S32: dType = 1; break;
|
||||
case TYPE_U64: dType = 2; break;
|
||||
case TYPE_F32: dType = 3; break;
|
||||
case TYPE_B128: dType = 4; break;
|
||||
case TYPE_S64: dType = 5; break;
|
||||
default: assert(!"unexpected dType"); dType = 0; break;
|
||||
}
|
||||
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
|
||||
subOp = 8;
|
||||
else
|
||||
subOp = insn->subOp;
|
||||
|
||||
emitInsn (0xed000000);
|
||||
}
|
||||
|
||||
emitField(0x34, 4, subOp);
|
||||
emitField(0x31, 3, dType);
|
||||
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
|
||||
|
|
@ -2627,6 +2646,13 @@ CodeEmitterGM107::emitOUT()
|
|||
emitGPR (0x00, insn->def(0));
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterGM107::emitMEMBAR()
|
||||
{
|
||||
emitInsn (0xef980000);
|
||||
emitField(0x08, 2, insn->subOp >> 2);
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* assembler front-end
|
||||
******************************************************************************/
|
||||
|
|
@ -2926,6 +2952,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
|
|||
case OP_RESTART:
|
||||
emitOUT();
|
||||
break;
|
||||
case OP_MEMBAR:
|
||||
emitMEMBAR();
|
||||
break;
|
||||
default:
|
||||
assert(!"invalid opcode");
|
||||
emitNOP();
|
||||
|
|
|
|||
|
|
@ -527,7 +527,8 @@ CodeEmitterNV50::emitForm_ADD(const Instruction *i)
|
|||
|
||||
setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
|
||||
setSrc(i, 0, 0);
|
||||
setSrc(i, 1, 2);
|
||||
if (i->predSrc != 1)
|
||||
setSrc(i, 1, 2);
|
||||
|
||||
if (i->getIndirect(0, 0)) {
|
||||
assert(!i->getIndirect(1, 0));
|
||||
|
|
@ -840,7 +841,7 @@ CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
|
|||
|
||||
emitForm_ADD(i);
|
||||
|
||||
if (!i->srcExists(1))
|
||||
if (!i->srcExists(1) || i->predSrc == 1)
|
||||
srcId(i->src(0), 32 + 14);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -398,6 +398,11 @@ CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
|
|||
srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
|
||||
break;
|
||||
default:
|
||||
if (i->op == OP_SELP) {
|
||||
// OP_SELP is used to implement shared+atomics on Fermi.
|
||||
assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
|
||||
srcId(i->src(s), 49);
|
||||
}
|
||||
// ignore here, can be predicate or flags, but must not be address
|
||||
break;
|
||||
}
|
||||
|
|
@ -1174,7 +1179,7 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i)
|
|||
{
|
||||
emitForm_A(i, HEX64(20000000, 00000004));
|
||||
|
||||
if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
|
||||
if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
|
||||
code[1] |= 1 << 20;
|
||||
}
|
||||
|
||||
|
|
@ -1334,7 +1339,7 @@ CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
|
|||
|
||||
defId(i->def(0), 14);
|
||||
srcId(i->src(0), 20);
|
||||
srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
|
||||
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
|
||||
|
||||
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
|
||||
code[0] |= 1 << 9; // dall
|
||||
|
|
@ -1773,7 +1778,16 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
|
|||
switch (i->src(0).getFile()) {
|
||||
case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
|
||||
case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
|
||||
case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
|
||||
case FILE_MEMORY_SHARED:
|
||||
if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
|
||||
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
|
||||
opc = 0xb8000000;
|
||||
else
|
||||
opc = 0xcc000000;
|
||||
} else {
|
||||
opc = 0xc9000000;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(!"invalid memory file");
|
||||
opc = 0;
|
||||
|
|
@ -1782,6 +1796,15 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
|
|||
code[0] = 0x00000005;
|
||||
code[1] = opc;
|
||||
|
||||
if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
|
||||
// Unlocked store on shared memory can fail.
|
||||
if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
|
||||
i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
|
||||
assert(i->defExists(0));
|
||||
defId(i->def(0), 8);
|
||||
}
|
||||
}
|
||||
|
||||
setAddressByFile(i->src(0));
|
||||
srcId(i->src(1), 14);
|
||||
srcId(i->src(0).getIndirect(0), 20);
|
||||
|
|
@ -1804,7 +1827,16 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
|
|||
switch (i->src(0).getFile()) {
|
||||
case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
|
||||
case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
|
||||
case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
|
||||
case FILE_MEMORY_SHARED:
|
||||
if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
|
||||
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
|
||||
opc = 0xa8000000;
|
||||
else
|
||||
opc = 0xc4000000;
|
||||
} else {
|
||||
opc = 0xc1000000;
|
||||
}
|
||||
break;
|
||||
case FILE_MEMORY_CONST:
|
||||
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
|
||||
emitMOV(i); // not sure if this is any better
|
||||
|
|
@ -1820,6 +1852,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
|
|||
}
|
||||
code[1] = opc;
|
||||
|
||||
if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
|
||||
if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
|
||||
assert(i->defExists(1));
|
||||
defId(i->def(1), 32 + 18);
|
||||
}
|
||||
}
|
||||
|
||||
defId(i->def(0), 14);
|
||||
|
||||
setAddressByFile(i->src(0));
|
||||
|
|
|
|||
|
|
@ -374,6 +374,7 @@ static nv50_ir::DataFile translateFile(uint file)
|
|||
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
|
||||
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
|
||||
case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
|
||||
case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
|
||||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_NULL:
|
||||
default:
|
||||
|
|
@ -858,6 +859,11 @@ public:
|
|||
};
|
||||
std::vector<Resource> resources;
|
||||
|
||||
struct MemoryFile {
|
||||
bool shared;
|
||||
};
|
||||
std::vector<MemoryFile> memoryFiles;
|
||||
|
||||
private:
|
||||
int inferSysValDirection(unsigned sn) const;
|
||||
bool scanDeclaration(const struct tgsi_full_declaration *);
|
||||
|
|
@ -904,6 +910,7 @@ bool Source::scanSource()
|
|||
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
|
||||
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
|
||||
tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
|
||||
memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
|
||||
|
||||
info->immd.bufSize = 0;
|
||||
|
||||
|
|
@ -1213,6 +1220,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
for (i = first; i <= last; ++i)
|
||||
textureViews[i].target = decl->SamplerView.Resource;
|
||||
break;
|
||||
case TGSI_FILE_MEMORY:
|
||||
for (i = first; i <= last; ++i)
|
||||
memoryFiles[i].shared = decl->Declaration.Shared;
|
||||
break;
|
||||
case TGSI_FILE_NULL:
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
for (i = first; i <= last; ++i)
|
||||
tempArrayId[i] = arrayId;
|
||||
|
|
@ -1220,7 +1232,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
|
||||
first, last - first + 1)));
|
||||
break;
|
||||
case TGSI_FILE_NULL:
|
||||
case TGSI_FILE_ADDRESS:
|
||||
case TGSI_FILE_CONSTANT:
|
||||
case TGSI_FILE_IMMEDIATE:
|
||||
|
|
@ -1516,6 +1527,9 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
|
|||
|
||||
sym->reg.fileIndex = fileIdx;
|
||||
|
||||
if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared)
|
||||
sym->setFile(FILE_MEMORY_SHARED);
|
||||
|
||||
if (idx >= 0) {
|
||||
if (sym->reg.file == FILE_SHADER_INPUT)
|
||||
sym->setOffset(info->in[idx].slot[c] * 4);
|
||||
|
|
@ -1769,7 +1783,7 @@ Converter::acquireDst(int d, int c)
|
|||
int idx = dst.getIndex(0);
|
||||
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
|
||||
|
||||
if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
|
||||
if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY)
|
||||
return NULL;
|
||||
|
||||
if (dst.isIndirect(0) ||
|
||||
|
|
@ -2239,7 +2253,8 @@ Converter::handleLOAD(Value *dst0[4])
|
|||
int c;
|
||||
std::vector<Value *> off, src, ldv, def;
|
||||
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
|
||||
tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!dst0[c])
|
||||
continue;
|
||||
|
|
@ -2248,9 +2263,10 @@ Converter::handleLOAD(Value *dst0[4])
|
|||
Symbol *sym;
|
||||
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
|
||||
off = NULL;
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
|
||||
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
|
||||
tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
|
||||
} else {
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
|
||||
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 4 * c);
|
||||
}
|
||||
|
||||
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
|
||||
|
|
@ -2337,7 +2353,8 @@ Converter::handleSTORE()
|
|||
int c;
|
||||
std::vector<Value *> off, src, dummy;
|
||||
|
||||
if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER ||
|
||||
tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) {
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!(tgsi.getDst(0).getMask() & (1 << c)))
|
||||
continue;
|
||||
|
|
@ -2346,11 +2363,11 @@ Converter::handleSTORE()
|
|||
Value *off;
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
|
||||
off = NULL;
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
|
||||
sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
|
||||
tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
|
||||
} else {
|
||||
off = fetchSrc(0, 0);
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
|
||||
sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
|
||||
}
|
||||
|
||||
Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
|
||||
|
|
@ -2422,7 +2439,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
|||
std::vector<Value *> defv;
|
||||
LValue *dst = getScratch();
|
||||
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
|
||||
tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
|
||||
for (int c = 0; c < 4; ++c) {
|
||||
if (!dst0[c])
|
||||
continue;
|
||||
|
|
@ -2431,9 +2449,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
|||
Value *off = fetchSrc(1, c), *off2 = NULL;
|
||||
Value *sym;
|
||||
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
|
||||
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
|
||||
tgsi.getSrc(1).getValueU32(c, info));
|
||||
else
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
|
||||
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
|
||||
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
|
||||
|
|
|
|||
|
|
@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq)
|
|||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::handleSharedATOM(Instruction *atom)
|
||||
{
|
||||
assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
|
||||
|
||||
BasicBlock *currBB = atom->bb;
|
||||
BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false);
|
||||
BasicBlock *joinBB = atom->bb->splitAfter(atom);
|
||||
|
||||
bld.setPosition(currBB, true);
|
||||
assert(!currBB->joinAt);
|
||||
currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
|
||||
|
||||
bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL);
|
||||
currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE);
|
||||
|
||||
bld.setPosition(tryLockAndSetBB, true);
|
||||
|
||||
Instruction *ld =
|
||||
bld.mkLoad(TYPE_U32, atom->getDef(0),
|
||||
bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
|
||||
ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
|
||||
ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
|
||||
|
||||
Value *stVal;
|
||||
if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
|
||||
// Read the old value, and write the new one.
|
||||
stVal = atom->getSrc(1);
|
||||
} else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
|
||||
CmpInstruction *set =
|
||||
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
|
||||
TYPE_U32, ld->getDef(0), atom->getSrc(1));
|
||||
set->setPredicate(CC_P, ld->getDef(1));
|
||||
|
||||
Instruction *selp =
|
||||
bld.mkOp3(OP_SELP, TYPE_U32, bld.getSSA(), ld->getDef(0),
|
||||
atom->getSrc(2), set->getDef(0));
|
||||
selp->src(2).mod = Modifier(NV50_IR_MOD_NOT);
|
||||
selp->setPredicate(CC_P, ld->getDef(1));
|
||||
|
||||
stVal = selp->getDef(0);
|
||||
} else {
|
||||
operation op;
|
||||
|
||||
switch (atom->subOp) {
|
||||
case NV50_IR_SUBOP_ATOM_ADD:
|
||||
op = OP_ADD;
|
||||
break;
|
||||
case NV50_IR_SUBOP_ATOM_AND:
|
||||
op = OP_AND;
|
||||
break;
|
||||
case NV50_IR_SUBOP_ATOM_OR:
|
||||
op = OP_OR;
|
||||
break;
|
||||
case NV50_IR_SUBOP_ATOM_XOR:
|
||||
op = OP_XOR;
|
||||
break;
|
||||
case NV50_IR_SUBOP_ATOM_MIN:
|
||||
op = OP_MIN;
|
||||
break;
|
||||
case NV50_IR_SUBOP_ATOM_MAX:
|
||||
op = OP_MAX;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
Instruction *i =
|
||||
bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0),
|
||||
atom->getSrc(1));
|
||||
i->setPredicate(CC_P, ld->getDef(1));
|
||||
|
||||
stVal = i->getDef(0);
|
||||
}
|
||||
|
||||
Instruction *st =
|
||||
bld.mkStore(OP_STORE, TYPE_U32,
|
||||
bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
|
||||
NULL, stVal);
|
||||
st->setPredicate(CC_P, ld->getDef(1));
|
||||
st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED;
|
||||
|
||||
// Loop until the lock is acquired.
|
||||
bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1));
|
||||
tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK);
|
||||
tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS);
|
||||
bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
|
||||
|
||||
bld.remove(atom);
|
||||
|
||||
bld.setPosition(joinBB, false);
|
||||
bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
|
||||
}
|
||||
|
||||
bool
|
||||
NVC0LoweringPass::handleATOM(Instruction *atom)
|
||||
{
|
||||
|
|
@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
|
|||
sv = SV_LBASE;
|
||||
break;
|
||||
case FILE_MEMORY_SHARED:
|
||||
sv = SV_SBASE;
|
||||
break;
|
||||
handleSharedATOM(atom);
|
||||
return true;
|
||||
default:
|
||||
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
|
||||
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
|
||||
|
|
@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
|
|||
bool
|
||||
NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
|
||||
{
|
||||
if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
|
||||
// ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
|
||||
return false;
|
||||
}
|
||||
|
||||
if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
|
||||
cas->subOp != NV50_IR_SUBOP_ATOM_EXCH)
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ protected:
|
|||
bool handleATOM(Instruction *);
|
||||
bool handleCasExch(Instruction *, bool needCctl);
|
||||
void handleSurfaceOpNVE4(TexInstruction *);
|
||||
void handleSharedATOM(Instruction *);
|
||||
|
||||
void checkPredicate(Instruction *);
|
||||
|
||||
|
|
|
|||
|
|
@ -1539,6 +1539,7 @@ private:
|
|||
void handleCVT_CVT(Instruction *);
|
||||
void handleCVT_EXTBF(Instruction *);
|
||||
void handleSUCLAMP(Instruction *);
|
||||
void handleNEG(Instruction *);
|
||||
|
||||
BuildUtil bld;
|
||||
};
|
||||
|
|
@ -1634,6 +1635,9 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
|
|||
if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
|
||||
return false;
|
||||
|
||||
if (src->getInsn()->saturate)
|
||||
return false;
|
||||
|
||||
if (src->getInsn()->postFactor)
|
||||
return false;
|
||||
if (toOp == OP_SAD) {
|
||||
|
|
@ -2011,6 +2015,34 @@ AlgebraicOpt::handleSUCLAMP(Instruction *insn)
|
|||
insn->setSrc(0, add->getSrc(s));
|
||||
}
|
||||
|
||||
// NEG(AND(SET, 1)) -> SET
|
||||
void
|
||||
AlgebraicOpt::handleNEG(Instruction *i) {
|
||||
Instruction *src = i->getSrc(0)->getInsn();
|
||||
ImmediateValue imm;
|
||||
int b;
|
||||
|
||||
if (isFloatType(i->sType) || !src || src->op != OP_AND)
|
||||
return;
|
||||
|
||||
if (src->src(0).getImmediate(imm))
|
||||
b = 1;
|
||||
else if (src->src(1).getImmediate(imm))
|
||||
b = 0;
|
||||
else
|
||||
return;
|
||||
|
||||
if (!imm.isInteger(1))
|
||||
return;
|
||||
|
||||
Instruction *set = src->getSrc(b)->getInsn();
|
||||
if ((set->op == OP_SET || set->op == OP_SET_AND ||
|
||||
set->op == OP_SET_OR || set->op == OP_SET_XOR) &&
|
||||
!isFloatType(set->dType)) {
|
||||
i->def(0).replace(set->getDef(0), false);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
AlgebraicOpt::visit(BasicBlock *bb)
|
||||
{
|
||||
|
|
@ -2048,6 +2080,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
|
|||
case OP_SUCLAMP:
|
||||
handleSUCLAMP(i);
|
||||
break;
|
||||
case OP_NEG:
|
||||
handleNEG(i);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue