Merge remote-tracking branch 'origin/master' into vulkan

This commit is contained in:
Kristian Høgsberg Kristensen 2016-02-24 12:50:27 -08:00
commit 59f5728995
358 changed files with 12431 additions and 4750 deletions

View file

@ -21,13 +21,8 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# use c99 compiler by default
ifeq ($(LOCAL_CC),)
ifeq ($(LOCAL_IS_HOST_MODULE),true)
LOCAL_CC := $(HOST_CC) -std=c99 -D_GNU_SOURCE
else
LOCAL_CC := $(TARGET_CC) -std=c99
endif
LOCAL_CFLAGS += -D_GNU_SOURCE
endif
LOCAL_C_INCLUDES += \
@ -37,6 +32,7 @@ LOCAL_C_INCLUDES += \
MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION)
# define ANDROID_VERSION (e.g., 4.0.x => 0x0400)
LOCAL_CFLAGS += \
-Wno-unused-parameter \
-DPACKAGE_VERSION=\"$(MESA_VERSION)\" \
-DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\" \
-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
@ -60,6 +56,10 @@ LOCAL_CFLAGS += \
-fvisibility=hidden \
-Wno-sign-compare
# mesa requires at least c99 compiler
LOCAL_CONLYFLAGS += \
-std=c99
ifeq ($(strip $(MESA_ENABLE_ASM)),true)
ifeq ($(TARGET_ARCH),x86)
LOCAL_CFLAGS += \

View file

@ -1 +1 @@
11.2.0-devel
11.3.0-devel

View file

@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
LIBDRM_INTEL_REQUIRED=2.4.61
LIBDRM_NVVIEUX_REQUIRED=2.4.66
LIBDRM_NOUVEAU_REQUIRED=2.4.66
LIBDRM_FREEDRENO_REQUIRED=2.4.65
LIBDRM_FREEDRENO_REQUIRED=2.4.67
DRI2PROTO_REQUIRED=2.6
DRI3PROTO_REQUIRED=1.0
PRESENTPROTO_REQUIRED=1.0
@ -2297,6 +2297,9 @@ dnl in LLVM_LIBS.
if test "x$MESA_LLVM" != x0; then
if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed])
fi
LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
dnl llvm-config may not give the right answer when llvm is a built as a

View file

@ -215,10 +215,10 @@ GLES3.1, GLSL ES 3.1
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_framebuffer_no_attachments DONE (i965)
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_shader_atomic_counters DONE (i965)
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
GL_ARB_shader_image_load_store DONE (i965)
GL_ARB_shader_image_size DONE (i965)
GL_ARB_shader_storage_buffer_object DONE (i965)
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
GL_ARB_shading_language_packing DONE (all drivers)
GL_ARB_separate_shader_objects DONE (all drivers)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@ -249,14 +249,14 @@ GLES3.2, GLSL ES 3.2
GL_OES_primitive_bounding box not started
GL_OES_sample_shading not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
GL_OES_sample_variables not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
GL_OES_shader_image_atomic not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers)
GL_OES_shader_image_atomic DONE (all drivers that support GL_ARB_shader_image_load_store)
GL_OES_shader_io_blocks not started (based on parts of GLSL 1.50, which is done)
GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done)
GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
GL_OES_texture_border_clamp not started (based on GL_ARB_texture_border_clamp, which is done)
GL_OES_texture_border_clamp DONE (all drivers)
GL_OES_texture_buffer not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
GL_OES_texture_stencil8 not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8)
GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample)
More info about these features and the work involved can be found at

View file

@ -91,6 +91,14 @@ This is only valid for versions >= 3.0.
<li> Mesa may not really implement all the features of the given version.
(for developers only)
</ul>
<li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by
glGetString(GL_VERSION) for OpenGL ES.
<ul>
<li> The format should be MAJOR.MINOR
<li> Examples: 2.0, 3.0, 3.1
<li> Mesa may not really implement all the features of the given version.
(for developers only)
</ul>
<li>MESA_GLSL_VERSION_OVERRIDE - changes the value returned by
glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as
"130". Mesa will not really implement all the features of the given language version
@ -224,7 +232,7 @@ See src/mesa/state_tracker/st_debug.c for other options.
<li>LP_PERF - a comma-separated list of options to selectively no-op various
parts of the driver. See the source code for details.
<li>LP_NUM_THREADS - an integer indicating how many threads to use for rendering.
Zero turns of threading completely. The default value is the number of CPU
Zero turns off threading completely. The default value is the number of CPU
cores present.
</ul>
@ -245,6 +253,25 @@ for details.
</ul>
<h3>VC4 driver environment variables</h3>
<ul>
<li>VC4_DEBUG - a comma-separated list of named flags, which do various things:
<ul>
<li>cl - dump command list during creation</li>
<li>qpu - dump generated QPU instructions</li>
<li>qir - dump QPU IR during program compile</li>
<li>nir - dump NIR during program compile</li>
<li>tgsi - dump TGSI during program compile</li>
<li>shaderdb - dump program compile information for shader-db analysis</li>
<li>perf - print during performance-related events</li>
<li>norast - skip actual hardware execution of commands</li>
<li>always_flush - flush after each draw call</li>
<li>always_sync - wait for finish after each flush</li>
<li>dump - write a GPU command stream trace file (VC4 simulator only)</li>
</ul>
</ul>
<p>
Other Gallium drivers have their own environment variables. These may change
frequently so the source code should be consulted for details.

View file

@ -58,6 +58,9 @@ On Windows with MinGW, install flex and bison with:
For MSVC on Windows, install
<a href="http://winflexbison.sourceforge.net/">Win flex-bison</a>.
</li>
<br>
<li>For building on Windows, Microsoft Visual Studio 2013 or later is required.
</li>
</ul>

View file

@ -74,6 +74,10 @@ TBD.
<h2>Changes</h2>
Microsoft Visual Studio 2013 or later is now required for building
on Windows.
Previously, Visual Studio 2008 and later were supported.
TBD.
</div>

61
docs/relnotes/11.3.0.html Normal file
View file

@ -0,0 +1,61 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 11.3.0 Release Notes / TBD</h1>
<p>
Mesa 11.3.0 is a new development release.
People who are concerned with stability and reliability should stick
with a previous release or wait for Mesa 11.3.1.
</p>
<p>
Mesa 11.3.0 implements the OpenGL 4.1 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
4.1 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
TBD.
</pre>
<h2>New features</h2>
<p>
Note: some of the new features are only available with certain drivers.
</p>
<ul>
<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
<li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li>
</ul>
<h2>Bug fixes</h2>
TBD.
<h2>Changes</h2>
TBD.
</div>
</body>
</html>

View file

@ -112,6 +112,7 @@ CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
CHIPSET(0x190B, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
@ -122,16 +123,17 @@ CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2")
CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
CHIPSET(0x1921, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
CHIPSET(0x1923, skl_gt3, "Intel(R) Skylake GT3e")
CHIPSET(0x1926, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics 555 (Skylake GT3e)")
CHIPSET(0x192D, skl_gt3, "Intel(R) Iris Graphics P555 (Skylake GT3e)")
CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")

View file

@ -78,6 +78,7 @@ DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
DECL_TYPE(sampler, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_VOID)
DECL_TYPE(sampler1D, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT)
DECL_TYPE(sampler2D, GL_SAMPLER_2D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT)
DECL_TYPE(sampler3D, GL_SAMPLER_3D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT)

View file

@ -685,18 +685,6 @@ struct ast_type_qualifier {
*/
bool has_auxiliary_storage() const;
/**
* \brief Return string representation of interpolation qualifier.
*
* If an interpolation qualifier is present, then return that qualifier's
* string representation. Otherwise, return null. For example, if the
* noperspective bit is set, then this returns "noperspective".
*
* If multiple interpolation qualifiers are somehow present, then the
* returned string is undefined but not null.
*/
const char *interpolation_string() const;
bool merge_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &q,

View file

@ -1405,9 +1405,9 @@ emit_inline_matrix_constructor(const glsl_type *type,
zero.d[i] = 0.0;
ir_instruction *inst =
new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
new(ctx) ir_constant(rhs_var->type, &zero),
NULL);
new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
new(ctx) ir_constant(rhs_var->type, &zero),
NULL);
instructions->push_tail(inst);
ir_dereference *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
@ -1422,36 +1422,36 @@ emit_inline_matrix_constructor(const glsl_type *type,
* columns than rows).
*/
static const unsigned rhs_swiz[4][4] = {
{ 0, 1, 1, 1 },
{ 1, 0, 1, 1 },
{ 1, 1, 0, 1 },
{ 1, 1, 1, 0 }
{ 0, 1, 1, 1 },
{ 1, 0, 1, 1 },
{ 1, 1, 0, 1 },
{ 1, 1, 1, 0 }
};
const unsigned cols_to_init = MIN2(type->matrix_columns,
type->vector_elements);
type->vector_elements);
for (unsigned i = 0; i < cols_to_init; i++) {
ir_constant *const col_idx = new(ctx) ir_constant(i);
ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
ir_constant *const col_idx = new(ctx) ir_constant(i);
ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i],
type->vector_elements);
ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i],
type->vector_elements);
inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
instructions->push_tail(inst);
inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
instructions->push_tail(inst);
}
for (unsigned i = cols_to_init; i < type->matrix_columns; i++) {
ir_constant *const col_idx = new(ctx) ir_constant(i);
ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
ir_constant *const col_idx = new(ctx) ir_constant(i);
ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1,
type->vector_elements);
ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1,
type->vector_elements);
inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
instructions->push_tail(inst);
inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
instructions->push_tail(inst);
}
} else if (first_param->type->is_matrix()) {
/* From page 50 (56 of the PDF) of the GLSL 1.50 spec:
@ -1469,36 +1469,43 @@ emit_inline_matrix_constructor(const glsl_type *type,
/* If the source matrix is smaller, pre-initialize the relavent parts of
* the destination matrix to the identity matrix.
*/
if ((src_matrix->type->matrix_columns < var->type->matrix_columns)
|| (src_matrix->type->vector_elements < var->type->vector_elements)) {
if ((src_matrix->type->matrix_columns < var->type->matrix_columns) ||
(src_matrix->type->vector_elements < var->type->vector_elements)) {
/* If the source matrix has fewer rows, every column of the destination
* must be initialized. Otherwise only the columns in the destination
* that do not exist in the source must be initialized.
*/
unsigned col =
(src_matrix->type->vector_elements < var->type->vector_elements)
? 0 : src_matrix->type->matrix_columns;
/* If the source matrix has fewer rows, every column of the destination
* must be initialized. Otherwise only the columns in the destination
* that do not exist in the source must be initialized.
*/
unsigned col =
(src_matrix->type->vector_elements < var->type->vector_elements)
? 0 : src_matrix->type->matrix_columns;
const glsl_type *const col_type = var->type->column_type();
for (/* empty */; col < var->type->matrix_columns; col++) {
ir_constant_data ident;
const glsl_type *const col_type = var->type->column_type();
for (/* empty */; col < var->type->matrix_columns; col++) {
ir_constant_data ident;
ident.f[0] = 0.0;
ident.f[1] = 0.0;
ident.f[2] = 0.0;
ident.f[3] = 0.0;
if (!col_type->is_double()) {
ident.f[0] = 0.0f;
ident.f[1] = 0.0f;
ident.f[2] = 0.0f;
ident.f[3] = 0.0f;
ident.f[col] = 1.0f;
} else {
ident.d[0] = 0.0;
ident.d[1] = 0.0;
ident.d[2] = 0.0;
ident.d[3] = 0.0;
ident.d[col] = 1.0;
}
ident.f[col] = 1.0;
ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident);
ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident);
ir_rvalue *const lhs =
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col));
ir_rvalue *const lhs =
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col));
ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL);
instructions->push_tail(inst);
}
ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL);
instructions->push_tail(inst);
}
}
/* Assign columns from the source matrix to the destination matrix.
@ -1507,51 +1514,51 @@ emit_inline_matrix_constructor(const glsl_type *type,
* generate a temporary and copy the paramter there.
*/
ir_variable *const rhs_var =
new(ctx) ir_variable(first_param->type, "mat_ctor_mat",
ir_var_temporary);
new(ctx) ir_variable(first_param->type, "mat_ctor_mat",
ir_var_temporary);
instructions->push_tail(rhs_var);
ir_dereference *const rhs_var_ref =
new(ctx) ir_dereference_variable(rhs_var);
new(ctx) ir_dereference_variable(rhs_var);
ir_instruction *const inst =
new(ctx) ir_assignment(rhs_var_ref, first_param, NULL);
new(ctx) ir_assignment(rhs_var_ref, first_param, NULL);
instructions->push_tail(inst);
const unsigned last_row = MIN2(src_matrix->type->vector_elements,
var->type->vector_elements);
var->type->vector_elements);
const unsigned last_col = MIN2(src_matrix->type->matrix_columns,
var->type->matrix_columns);
var->type->matrix_columns);
unsigned swiz[4] = { 0, 0, 0, 0 };
for (unsigned i = 1; i < last_row; i++)
swiz[i] = i;
swiz[i] = i;
const unsigned write_mask = (1U << last_row) - 1;
const unsigned write_mask = (1U << last_row) - 1;
for (unsigned i = 0; i < last_col; i++) {
ir_dereference *const lhs =
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i));
ir_rvalue *const rhs_col =
new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i));
ir_dereference *const lhs =
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i));
ir_rvalue *const rhs_col =
new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i));
/* If one matrix has columns that are smaller than the columns of the
* other matrix, wrap the column access of the larger with a swizzle
* so that the LHS and RHS of the assignment have the same size (and
* therefore have the same type).
*
* It would be perfectly valid to unconditionally generate the
* swizzles, this this will typically result in a more compact IR tree.
*/
ir_rvalue *rhs;
if (lhs->type->vector_elements != rhs_col->type->vector_elements) {
rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row);
} else {
rhs = rhs_col;
}
/* If one matrix has columns that are smaller than the columns of the
* other matrix, wrap the column access of the larger with a swizzle
* so that the LHS and RHS of the assignment have the same size (and
* therefore have the same type).
*
* It would be perfectly valid to unconditionally generate the
* swizzles, this this will typically result in a more compact IR tree.
*/
ir_rvalue *rhs;
if (lhs->type->vector_elements != rhs_col->type->vector_elements) {
rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row);
} else {
rhs = rhs_col;
}
ir_instruction *inst =
new(ctx) ir_assignment(lhs, rhs, NULL, write_mask);
instructions->push_tail(inst);
ir_instruction *inst =
new(ctx) ir_assignment(lhs, rhs, NULL, write_mask);
instructions->push_tail(inst);
}
} else {
const unsigned cols = type->matrix_columns;

View file

@ -1133,9 +1133,9 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_FUNCTION:
/* I assume a comparison of a struct containing a sampler just
* ignores the sampler present in the type.
*/
@ -2268,7 +2268,7 @@ get_type_name_for_precision_qualifier(const glsl_type *type)
type->sampler_array + 2 * type->sampler_shadow;
const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4;
assert(type_idx < 4);
switch (type->sampler_type) {
switch (type->sampled_type) {
case GLSL_TYPE_FLOAT:
switch (type->sampler_dimensionality) {
case GLSL_SAMPLER_DIM_1D: {
@ -2750,6 +2750,17 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
"vertex shader inputs or fragment shader outputs",
interpolation_string(interpolation));
}
} else if (state->es_shader &&
((mode == ir_var_shader_in &&
state->stage != MESA_SHADER_VERTEX) ||
(mode == ir_var_shader_out &&
state->stage != MESA_SHADER_FRAGMENT))) {
/* Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
*
* "When no interpolation qualifier is present, smooth interpolation
* is used."
*/
interpolation = INTERP_QUALIFIER_SMOOTH;
}
return interpolation;
@ -2954,7 +2965,7 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
"used on image function parameters");
}
if (qual->image_base_type != base_type->sampler_type) {
if (qual->image_base_type != base_type->sampled_type) {
_mesa_glsl_error(loc, state, "format qualifier doesn't match the "
"base data type of the image");
}
@ -4679,8 +4690,7 @@ ast_declarator_list::hir(exec_list *instructions,
&& this->type->qualifier.has_interpolation()
&& this->type->qualifier.flags.q.varying) {
const char *i = this->type->qualifier.interpolation_string();
assert(i != NULL);
const char *i = interpolation_string(var->data.interpolation);
const char *s;
if (this->type->qualifier.flags.q.centroid)
s = "centroid varying";
@ -4710,9 +4720,7 @@ ast_declarator_list::hir(exec_list *instructions,
if (state->is_version(130, 300)
&& this->type->qualifier.has_interpolation()) {
const char *i = this->type->qualifier.interpolation_string();
assert(i != NULL);
const char *i = interpolation_string(var->data.interpolation);
switch (state->stage) {
case MESA_SHADER_VERTEX:
if (this->type->qualifier.flags.q.in) {
@ -6259,7 +6267,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
decl_count);
bool first_member = true;
bool first_member_has_explicit_location;
bool first_member_has_explicit_location = false;
unsigned i = 0;
foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {

View file

@ -102,19 +102,6 @@ ast_type_qualifier::has_auxiliary_storage() const
|| this->flags.q.patch;
}
const char*
ast_type_qualifier::interpolation_string() const
{
if (this->flags.q.smooth)
return "smooth";
else if (this->flags.q.flat)
return "flat";
else if (this->flags.q.noperspective)
return "noperspective";
else
return NULL;
}
/**
* This function merges both duplicate identifies within a single layout and
* multiple layout qualifiers on a single variable declaration. The

View file

@ -448,8 +448,16 @@ shader_image_load_store(const _mesa_glsl_parse_state *state)
static bool
shader_image_atomic(const _mesa_glsl_parse_state *state)
{
return (state->is_version(420, 0) ||
state->ARB_shader_image_load_store_enable);
return (state->is_version(420, 320) ||
state->ARB_shader_image_load_store_enable ||
state->OES_shader_image_atomic_enable);
}
static bool
shader_image_atomic_exchange_float(const _mesa_glsl_parse_state *state)
{
return (state->is_version(450, 320) ||
state->OES_shader_image_atomic_enable);
}
static bool
@ -577,17 +585,6 @@ private:
unsigned num_arguments,
unsigned flags);
enum image_function_flags {
IMAGE_FUNCTION_EMIT_STUB = (1 << 0),
IMAGE_FUNCTION_RETURNS_VOID = (1 << 1),
IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2),
IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
IMAGE_FUNCTION_READ_ONLY = (1 << 4),
IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
IMAGE_FUNCTION_MS_ONLY = (1 << 7),
};
/**
* Create a new image built-in function for all known image types.
* \p flags is a bitfield of \c image_function_flags flags.
@ -836,6 +833,18 @@ private:
/** @} */
};
enum image_function_flags {
IMAGE_FUNCTION_EMIT_STUB = (1 << 0),
IMAGE_FUNCTION_RETURNS_VOID = (1 << 1),
IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2),
IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
IMAGE_FUNCTION_READ_ONLY = (1 << 4),
IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
IMAGE_FUNCTION_MS_ONLY = (1 << 7),
IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE = (1 << 8)
};
} /* anonymous namespace */
/**
@ -2921,7 +2930,7 @@ builtin_builder::add_image_function(const char *name,
ir_function *f = new(mem_ctx) ir_function(name);
for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) {
if ((types[i]->sampler_type != GLSL_TYPE_FLOAT ||
if ((types[i]->sampled_type != GLSL_TYPE_FLOAT ||
(flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) &&
(types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS ||
!(flags & IMAGE_FUNCTION_MS_ONLY)))
@ -2981,7 +2990,9 @@ builtin_builder::add_image_functions(bool glsl)
add_image_function((glsl ? "imageAtomicExchange" :
"__intrinsic_image_atomic_exchange"),
"__intrinsic_image_atomic_exchange",
&builtin_builder::_image_prototype, 1, atom_flags);
&builtin_builder::_image_prototype, 1,
(flags | IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE |
IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE));
add_image_function((glsl ? "imageAtomicCompSwap" :
"__intrinsic_image_atomic_comp_swap"),
@ -5232,13 +5243,28 @@ builtin_builder::_mid3(const glsl_type *type)
return sig;
}
static builtin_available_predicate
get_image_available_predicate(const glsl_type *type, unsigned flags)
{
if ((flags & IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE) &&
type->sampled_type == GLSL_TYPE_FLOAT)
return shader_image_atomic_exchange_float;
else if (flags & (IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE |
IMAGE_FUNCTION_AVAIL_ATOMIC))
return shader_image_atomic;
else
return shader_image_load_store;
}
ir_function_signature *
builtin_builder::_image_prototype(const glsl_type *image_type,
unsigned num_arguments,
unsigned flags)
{
const glsl_type *data_type = glsl_type::get_instance(
image_type->sampler_type,
image_type->sampled_type,
(flags & IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE ? 4 : 1),
1);
const glsl_type *ret_type = (flags & IMAGE_FUNCTION_RETURNS_VOID ?
@ -5249,10 +5275,9 @@ builtin_builder::_image_prototype(const glsl_type *image_type,
ir_variable *coord = in_var(
glsl_type::ivec(image_type->coordinate_components()), "coord");
const builtin_available_predicate avail =
(flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic :
shader_image_load_store);
ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord);
ir_function_signature *sig = new_sig(
ret_type, get_image_available_predicate(image_type, flags),
2, image, coord);
/* Sample index for multisample images. */
if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS)

View file

@ -770,11 +770,16 @@ builtin_variable_generator::generate_constants()
}
if (state->is_version(430, 310) || state->ARB_compute_shader_enable) {
add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS);
add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS);
add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS);
add_const("gl_MaxComputeTextureImageUnits", MAX_COMPUTE_TEXTURE_IMAGE_UNITS);
add_const("gl_MaxComputeUniformComponents", MAX_COMPUTE_UNIFORM_COMPONENTS);
add_const("gl_MaxComputeAtomicCounterBuffers",
state->Const.MaxComputeAtomicCounterBuffers);
add_const("gl_MaxComputeAtomicCounters",
state->Const.MaxComputeAtomicCounters);
add_const("gl_MaxComputeImageUniforms",
state->Const.MaxComputeImageUniforms);
add_const("gl_MaxComputeTextureImageUnits",
state->Const.MaxComputeTextureImageUnits);
add_const("gl_MaxComputeUniformComponents",
state->Const.MaxComputeUniformComponents);
add_const_ivec3("gl_MaxComputeWorkGroupCount",
state->Const.MaxComputeWorkGroupCount[0],

View file

@ -2096,6 +2096,9 @@ _check_for_reserved_macro_name (glcpp_parser_t *parser, YYLTYPE *loc,
if (strncmp(identifier, "GL_", 3) == 0) {
glcpp_error (loc, parser, "Macro names starting with \"GL_\" are reserved.\n");
}
if (strcmp(identifier, "defined") == 0) {
glcpp_error (loc, parser, "\"defined\" cannot be used as a macro name");
}
}
static int
@ -2388,6 +2391,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
if (version >= 310) {
if (extensions->ARB_shader_image_load_store)
add_builtin_define(parser, "GL_OES_shader_image_atomic", 1);
if (extensions->OES_geometry_shader) {
add_builtin_define(parser, "GL_OES_geometry_point_size", 1);
add_builtin_define(parser, "GL_OES_geometry_shader", 1);

View file

@ -113,11 +113,7 @@ literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state,
if (base == 16)
digits += 2;
#ifdef _MSC_VER
unsigned __int64 value = _strtoui64(digits, NULL, base);
#else
unsigned long long value = strtoull(digits, NULL, base);
#endif
lval->n = (int)value;

View file

@ -120,6 +120,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters;
this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters;
this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters;
this->Const.MaxComputeAtomicCounters = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters;
this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters;
this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings;
this->Const.MaxVertexAtomicCounterBuffers =
@ -132,6 +133,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers;
this->Const.MaxFragmentAtomicCounterBuffers =
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
this->Const.MaxComputeAtomicCounterBuffers =
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers;
this->Const.MaxCombinedAtomicCounterBuffers =
ctx->Const.MaxCombinedAtomicBuffers;
this->Const.MaxAtomicCounterBufferSize =
@ -143,6 +146,9 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++)
this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i];
this->Const.MaxComputeTextureImageUnits = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
this->Const.MaxComputeUniformComponents = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents;
this->Const.MaxImageUnits = ctx->Const.MaxImageUnits;
this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources;
this->Const.MaxImageSamples = ctx->Const.MaxImageSamples;
@ -151,6 +157,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms;
this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms;
this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms;
this->Const.MaxComputeImageUniforms = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms;
this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms;
/* ARB_viewport_array */
@ -601,6 +608,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
EXT(OES_shader_image_atomic, false, true, ARB_shader_image_load_store),
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
EXT(OES_texture_3D, false, true, dummy_true),
EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
@ -946,27 +954,11 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
"the interface block");
}
/* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
*
* "GLSL ES 3.0 does not support interface blocks for shader inputs or
* outputs."
*
* And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
*
* "Only variables output from a shader can be candidates for
* invariance."
*
* From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
*
* "If optional qualifiers are used, they can include interpolation
* qualifiers, auxiliary storage qualifiers, and storage qualifiers
* and they must declare an input, output, or uniform member
* consistent with the interface qualifier of the block"
*/
if (qualifier.flags.q.invariant)
if (!(q.flags.q.in || q.flags.q.out) && qualifier.flags.q.invariant)
_mesa_glsl_error(locp, state,
"invariant qualifiers cannot be used "
"with interface blocks members");
"invariant qualifiers can be used only "
"in interface block members for shader "
"inputs or outputs");
}
}

View file

@ -422,6 +422,11 @@ struct _mesa_glsl_parse_state {
unsigned MaxAtomicCounterBufferSize;
/* ARB_compute_shader */
unsigned MaxComputeAtomicCounterBuffers;
unsigned MaxComputeAtomicCounters;
unsigned MaxComputeImageUniforms;
unsigned MaxComputeTextureImageUnits;
unsigned MaxComputeUniformComponents;
unsigned MaxComputeWorkGroupCount[3];
unsigned MaxComputeWorkGroupSize[3];
@ -588,6 +593,8 @@ struct _mesa_glsl_parse_state {
bool OES_geometry_point_size_warn;
bool OES_geometry_shader_enable;
bool OES_geometry_shader_warn;
bool OES_shader_image_atomic_enable;
bool OES_shader_image_atomic_warn;
bool OES_standard_derivatives_enable;
bool OES_standard_derivatives_warn;
bool OES_texture_3D_enable;

View file

@ -1442,7 +1442,7 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
} else {
assert(sampler->type->sampler_type == (int) type->base_type);
assert(sampler->type->sampled_type == (int) type->base_type);
if (sampler->type->sampler_shadow)
assert(type->vector_elements == 4 || type->vector_elements == 1);
else
@ -1696,21 +1696,6 @@ interpolation_string(unsigned interpolation)
return "";
}
glsl_interp_qualifier
ir_variable::determine_interpolation_mode(bool flat_shade)
{
if (this->data.interpolation != INTERP_QUALIFIER_NONE)
return (glsl_interp_qualifier) this->data.interpolation;
int location = this->data.location;
bool is_gl_Color =
location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
if (flat_shade && is_gl_Color)
return INTERP_QUALIFIER_FLAT;
else
return INTERP_QUALIFIER_SMOOTH;
}
const char *const ir_variable::warn_extension_table[] = {
"",
"GL_ARB_shader_stencil_export",

View file

@ -431,17 +431,6 @@ public:
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
/**
* Determine how this variable should be interpolated based on its
* interpolation qualifier (if present), whether it is gl_Color or
* gl_SecondaryColor, and whether flatshading is enabled in the current GL
* state.
*
* The return value will always be either INTERP_QUALIFIER_SMOOTH,
* INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT.
*/
glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);
/**
* Determine whether or not a variable is part of a uniform or
* shader storage block.

View file

@ -366,7 +366,6 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
return c;
}
case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
@ -374,6 +373,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
case GLSL_TYPE_ERROR:
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
assert(!"Should not get here.");
break;
}

View file

@ -88,9 +88,9 @@ copy_constant_to_storage(union gl_constant_value *storage,
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_VOID:
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ERROR:
/* All other types should have already been filtered by other
* paths in the caller.

View file

@ -649,15 +649,15 @@ private:
current_var->data.image_write_only ? GL_WRITE_ONLY :
GL_READ_WRITE);
for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j)
prog->_LinkedShaders[shader_type]->
ImageAccess[this->next_image + j] = access;
const unsigned first = this->next_image;
/* Increment the image index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
this->next_image += MAX2(1, uniform->array_elements);
for (unsigned i = first; i < MIN2(next_image, MAX_IMAGE_UNIFORMS); i++)
prog->_LinkedShaders[shader_type]->ImageAccess[i] = access;
}
}
@ -1038,9 +1038,43 @@ assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id,
uniform_size->map->put(hidden_uniform_start + hidden_id, name);
}
/**
* Search through the list of empty blocks to find one that fits the current
* uniform.
*/
static int
find_empty_block(struct gl_shader_program *prog,
struct gl_uniform_storage *uniform)
{
const unsigned entries = MAX2(1, uniform->array_elements);
foreach_list_typed(struct empty_uniform_block, block, link,
&prog->EmptyUniformLocations) {
/* Found a block with enough slots to fit the uniform */
if (block->slots == entries) {
unsigned start = block->start;
exec_node_remove(&block->link);
ralloc_free(block);
return start;
/* Found a block with more slots than needed. It can still be used. */
} else if (block->slots > entries) {
unsigned start = block->start;
block->start += entries;
block->slots -= entries;
return start;
}
}
return -1;
}
void
link_assign_uniform_locations(struct gl_shader_program *prog,
unsigned int boolean_true)
unsigned int boolean_true,
unsigned int num_explicit_uniform_locs,
unsigned int max_uniform_locs)
{
ralloc_free(prog->UniformStorage);
prog->UniformStorage = NULL;
@ -1131,6 +1165,9 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data);
unsigned total_entries = num_explicit_uniform_locs;
unsigned empty_locs = prog->NumUniformRemapTable - num_explicit_uniform_locs;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;
@ -1194,21 +1231,44 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
/* how many new entries for this uniform? */
const unsigned entries = MAX2(1, uniforms[i].array_elements);
/* resize remap table to fit new entries */
prog->UniformRemapTable =
reralloc(prog,
prog->UniformRemapTable,
gl_uniform_storage *,
prog->NumUniformRemapTable + entries);
/* Find UniformRemapTable for empty blocks where we can fit this uniform. */
int chosen_location = -1;
if (empty_locs)
chosen_location = find_empty_block(prog, &uniforms[i]);
/* Add new entries to the total amount of entries. */
total_entries += entries;
if (chosen_location != -1) {
empty_locs -= entries;
} else {
chosen_location = prog->NumUniformRemapTable;
/* resize remap table to fit new entries */
prog->UniformRemapTable =
reralloc(prog,
prog->UniformRemapTable,
gl_uniform_storage *,
prog->NumUniformRemapTable + entries);
prog->NumUniformRemapTable += entries;
}
/* set pointers for this uniform */
for (unsigned j = 0; j < entries; j++)
prog->UniformRemapTable[prog->NumUniformRemapTable+j] = &uniforms[i];
prog->UniformRemapTable[chosen_location + j] = &uniforms[i];
/* set the base location in remap table for the uniform */
uniforms[i].remap_location = prog->NumUniformRemapTable;
uniforms[i].remap_location = chosen_location;
}
prog->NumUniformRemapTable += entries;
/* Verify that total amount of entries for explicit and implicit locations
* is less than MAX_UNIFORM_LOCATIONS.
*/
if (total_entries > max_uniform_locs) {
linker_error(prog, "count of uniform locations > MAX_UNIFORM_LOCATIONS"
"(%u > %u)", total_entries, max_uniform_locs);
}
/* Reserve all the explicit locations of the active subroutine uniforms. */

View file

@ -1739,22 +1739,7 @@ assign_varying_locations(struct gl_context *ctx,
if (var && var->data.mode == ir_var_shader_in &&
var->data.is_unmatched_generic_inout) {
if (prog->IsES) {
/*
* On Page 91 (Page 97 of the PDF) of the GLSL ES 1.0 spec:
*
* If the vertex shader declares but doesn't write to a
* varying and the fragment shader declares and reads it,
* is this an error?
*
* RESOLUTION: No.
*/
linker_warning(prog, "%s shader varying %s not written "
"by %s shader\n.",
_mesa_shader_stage_to_string(consumer->Stage),
var->name,
_mesa_shader_stage_to_string(producer->Stage));
} else if (prog->Version <= 120) {
if (!prog->IsES && prog->Version <= 120) {
/* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
*
* Only those varying variables used (i.e. read) in
@ -1772,6 +1757,12 @@ assign_varying_locations(struct gl_context *ctx,
_mesa_shader_stage_to_string(consumer->Stage),
var->name,
_mesa_shader_stage_to_string(producer->Stage));
} else {
linker_warning(prog, "%s shader varying %s not written "
"by %s shader\n.",
_mesa_shader_stage_to_string(consumer->Stage),
var->name,
_mesa_shader_stage_to_string(producer->Stage));
}
}
}

View file

@ -3008,12 +3008,13 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
* for a variable, checks for overlaps between other uniforms using explicit
* locations.
*/
static bool
static int
reserve_explicit_locations(struct gl_shader_program *prog,
string_to_uint_map *map, ir_variable *var)
{
unsigned slots = var->type->uniform_locations();
unsigned max_loc = var->data.location + slots - 1;
unsigned return_value = slots;
/* Resize remap table if locations do not fit in the current one. */
if (max_loc + 1 > prog->NumUniformRemapTable) {
@ -3024,7 +3025,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
if (!prog->UniformRemapTable) {
linker_error(prog, "Out of memory during linking.\n");
return false;
return -1;
}
/* Initialize allocated space. */
@ -3042,8 +3043,10 @@ reserve_explicit_locations(struct gl_shader_program *prog,
/* Possibly same uniform from a different stage, this is ok. */
unsigned hash_loc;
if (map->get(hash_loc, var->name) && hash_loc == loc - i)
continue;
if (map->get(hash_loc, var->name) && hash_loc == loc - i) {
return_value = 0;
continue;
}
/* ARB_explicit_uniform_location specification states:
*
@ -3055,7 +3058,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
"location qualifier for uniform %s overlaps "
"previously used location\n",
var->name);
return false;
return -1;
}
/* Initialize location as inactive before optimization
@ -3067,7 +3070,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
/* Note, base location used for arrays. */
map->put(var->data.location, var->name);
return true;
return return_value;
}
static bool
@ -3128,12 +3131,12 @@ reserve_subroutine_explicit_locations(struct gl_shader_program *prog,
* any optimizations happen to handle also inactive uniforms and
* inactive array elements that may get trimmed away.
*/
static void
static int
check_explicit_uniform_locations(struct gl_context *ctx,
struct gl_shader_program *prog)
{
if (!ctx->Extensions.ARB_explicit_uniform_location)
return;
return -1;
/* This map is used to detect if overlapping explicit locations
* occur with the same uniform (from different stage) or a different one.
@ -3142,7 +3145,7 @@ check_explicit_uniform_locations(struct gl_context *ctx,
if (!uniform_map) {
linker_error(prog, "Out of memory during linking.\n");
return;
return -1;
}
unsigned entries_total = 0;
@ -3157,31 +3160,47 @@ check_explicit_uniform_locations(struct gl_context *ctx,
if (!var || var->data.mode != ir_var_uniform)
continue;
entries_total += var->type->uniform_locations();
if (var->data.explicit_location) {
bool ret;
bool ret = false;
if (var->type->without_array()->is_subroutine())
ret = reserve_subroutine_explicit_locations(prog, sh, var);
else
ret = reserve_explicit_locations(prog, uniform_map, var);
else {
int slots = reserve_explicit_locations(prog, uniform_map,
var);
if (slots != -1) {
ret = true;
entries_total += slots;
}
}
if (!ret) {
delete uniform_map;
return;
return -1;
}
}
}
}
/* Verify that total amount of entries for explicit and implicit locations
* is less than MAX_UNIFORM_LOCATIONS.
*/
if (entries_total >= ctx->Const.MaxUserAssignableUniformLocations) {
linker_error(prog, "count of uniform locations >= MAX_UNIFORM_LOCATIONS"
"(%u >= %u)", entries_total,
ctx->Const.MaxUserAssignableUniformLocations);
exec_list_make_empty(&prog->EmptyUniformLocations);
struct empty_uniform_block *current_block = NULL;
for (unsigned i = 0; i < prog->NumUniformRemapTable; i++) {
/* We found empty space in UniformRemapTable. */
if (prog->UniformRemapTable[i] == NULL) {
/* We've found the beginning of a new continous block of empty slots */
if (!current_block || current_block->start + current_block->slots != i) {
current_block = rzalloc(prog, struct empty_uniform_block);
current_block->start = i;
exec_list_push_tail(&prog->EmptyUniformLocations,
&current_block->link);
}
/* The current block continues, so we simply increment its slots */
current_block->slots++;
}
}
delete uniform_map;
return entries_total;
}
static bool
@ -4129,6 +4148,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
tfeedback_decl *tfeedback_decls = NULL;
unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying;
unsigned int num_explicit_uniform_locs = 0;
void *mem_ctx = ralloc_context(NULL); // temporary linker context
@ -4310,7 +4330,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
last = i;
}
check_explicit_uniform_locations(ctx, prog);
num_explicit_uniform_locs = check_explicit_uniform_locations(ctx, prog);
link_assign_subroutine_types(prog);
if (!prog->LinkStatus)
@ -4541,7 +4561,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
goto done;
update_array_sizes(prog);
link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue);
link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue,
num_explicit_uniform_locs,
ctx->Const.MaxUserAssignableUniformLocations);
link_assign_atomic_counter_resources(ctx, prog);
store_fragdepth_layout(prog);

View file

@ -35,7 +35,9 @@ link_invalidate_variable_locations(exec_list *ir);
extern void
link_assign_uniform_locations(struct gl_shader_program *prog,
unsigned int boolean_true);
unsigned int boolean_true,
unsigned int num_explicit_uniform_locs,
unsigned int max_uniform_locs);
extern void
link_set_uniform_initializers(struct gl_shader_program *prog,
@ -202,4 +204,17 @@ linker_error(gl_shader_program *prog, const char *fmt, ...);
void
linker_warning(gl_shader_program *prog, const char *fmt, ...);
/**
* Sometimes there are empty slots left over in UniformRemapTable after we
* allocate slots to explicit locations. This struct represents a single
* continouous block of empty slots in UniformRemapTable.
*/
struct empty_uniform_block {
struct exec_node link;
/* The start location of the block */
unsigned start;
/* The number of slots in the block */
unsigned slots;
};
#endif /* GLSL_LINKER_H */

View file

@ -62,8 +62,8 @@ public:
{
}
ir_visitor_status visit(ir_loop_jump *ir);
ir_visitor_status visit_enter(ir_discard *ir);
ir_visitor_status visit_enter(ir_loop_jump *ir);
ir_visitor_status visit_enter(ir_loop *ir);
ir_visitor_status visit_enter(ir_function_signature *ir);
@ -76,7 +76,7 @@ public:
} /* anonymous namespace */
ir_visitor_status
lower_discard_flow_visitor::visit_enter(ir_loop_jump *ir)
lower_discard_flow_visitor::visit(ir_loop_jump *ir)
{
if (ir->mode != ir_loop_jump::jump_continue)
return visit_continue;

View file

@ -58,10 +58,16 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.MaxComputeWorkGroupSize[1] = 1024;
ctx->Const.MaxComputeWorkGroupSize[2] = 64;
ctx->Const.MaxComputeWorkGroupInvocations = 1024;
ctx->Const.MaxComputeSharedMemorySize = 32768;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = 8;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = 8;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = 8;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformBlocks = 12;
switch (ctx->Const.GLSLVersion) {
case 100:
@ -77,12 +83,14 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 8;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 128 * 4;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 128 * 4;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits =
ctx->Const.MaxCombinedTextureImageUnits;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 16 * 4;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 16 * 4;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
@ -103,12 +111,14 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 512;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits =
ctx->Const.MaxCombinedTextureImageUnits;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 64;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
@ -129,11 +139,13 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
@ -153,17 +165,20 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
@ -191,11 +206,13 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 16 * 4;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 224;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 224;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 15 * 4;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */

View file

@ -43,7 +43,7 @@ TEST(sampler_types, TYPE) \
const glsl_type *type = glsl_type::TYPE##_type; \
EXPECT_EQ(GLSL_TYPE_SAMPLER, type->base_type); \
EXPECT_EQ(DIM, type->sampler_dimensionality); \
EXPECT_EQ(DATA_TYPE, type->sampler_type); \
EXPECT_EQ(DATA_TYPE, type->sampled_type); \
ARR; \
SHAD; \
EXPECT_EQ(COMPS, type->coordinate_components()); \

View file

@ -103,6 +103,7 @@ generate_data_element(void *mem_ctx, const glsl_type *type,
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_FUNCTION:
ASSERT_TRUE(false);
break;
}
@ -136,6 +137,7 @@ generate_data_element(void *mem_ctx, const glsl_type *type,
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_FUNCTION:
ASSERT_TRUE(false);
break;
}
@ -241,6 +243,7 @@ verify_data(gl_constant_value *storage, unsigned storage_array_size,
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_FUNCTION:
ASSERT_TRUE(false);
break;
}

View file

@ -51,7 +51,7 @@ glsl_type::glsl_type(GLenum gl_type,
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
sampler_type(0), interface_packing(0),
sampled_type(0), interface_packing(0),
vector_elements(vector_elements), matrix_columns(matrix_columns),
length(0)
{
@ -75,7 +75,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(dim), sampler_shadow(shadow),
sampler_array(array), sampler_type(type), interface_packing(0),
sampler_array(array), sampled_type(type), interface_packing(0),
length(0)
{
mtx_lock(&glsl_type::mutex);
@ -101,7 +101,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
gl_type(0),
base_type(GLSL_TYPE_STRUCT),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
sampler_type(0), interface_packing(0),
sampled_type(0), interface_packing(0),
vector_elements(0), matrix_columns(0),
length(num_fields)
{
@ -141,7 +141,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
gl_type(0),
base_type(GLSL_TYPE_INTERFACE),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
sampler_type(0), interface_packing((unsigned) packing),
sampled_type(0), interface_packing((unsigned) packing),
vector_elements(0), matrix_columns(0),
length(num_fields)
{
@ -180,7 +180,7 @@ glsl_type::glsl_type(const glsl_type *return_type,
gl_type(0),
base_type(GLSL_TYPE_FUNCTION),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
sampler_type(0), interface_packing(0),
sampled_type(0), interface_packing(0),
vector_elements(0), matrix_columns(0),
length(num_params)
{
@ -212,7 +212,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
gl_type(0),
base_type(GLSL_TYPE_SUBROUTINE),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
sampler_type(0), interface_packing(0),
sampled_type(0), interface_packing(0),
vector_elements(1), matrix_columns(1),
length(0)
{
@ -428,7 +428,7 @@ _mesa_glsl_release_types(void)
glsl_type::glsl_type(const glsl_type *array, unsigned length) :
base_type(GLSL_TYPE_ARRAY),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
sampler_type(0), interface_packing(0),
sampled_type(0), interface_packing(0),
vector_elements(0), matrix_columns(0),
length(length), name(NULL)
{

View file

@ -56,11 +56,11 @@ enum glsl_base_type {
GLSL_TYPE_IMAGE,
GLSL_TYPE_ATOMIC_UINT,
GLSL_TYPE_STRUCT,
GLSL_TYPE_FUNCTION,
GLSL_TYPE_INTERFACE,
GLSL_TYPE_ARRAY,
GLSL_TYPE_VOID,
GLSL_TYPE_SUBROUTINE,
GLSL_TYPE_FUNCTION,
GLSL_TYPE_ERROR
};
@ -122,7 +122,7 @@ struct glsl_type {
unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */
unsigned sampler_shadow:1;
unsigned sampler_array:1;
unsigned sampler_type:2; /**< Type of data returned using this
unsigned sampled_type:2; /**< Type of data returned using this
* sampler or image. Only \c
* GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT,
* and \c GLSL_TYPE_UINT are valid.

View file

@ -148,7 +148,7 @@ glsl_base_type
glsl_get_sampler_result_type(const struct glsl_type *type)
{
assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
return (glsl_base_type)type->sampler_type;
return (glsl_base_type)type->sampled_type;
}
unsigned
@ -314,6 +314,12 @@ glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array,
return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type);
}
const struct glsl_type *
glsl_bare_sampler_type()
{
return glsl_type::sampler_type;
}
const struct glsl_type *
glsl_image_type(enum glsl_sampler_dim dim, bool is_array,
enum glsl_base_type base_type)
@ -331,6 +337,7 @@ glsl_function_type(const glsl_type *return_type,
const glsl_type *
glsl_transposed_type(const struct glsl_type *type)
{
assert(glsl_type_is_matrix(type));
return glsl_type::get_instance(type->base_type, type->matrix_columns,
type->vector_elements);
}

View file

@ -113,6 +113,7 @@ const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim,
bool is_shadow, bool is_array,
enum glsl_base_type base_type);
const struct glsl_type *glsl_bare_sampler_type();
const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim,
bool is_array,
enum glsl_base_type base_type);

View file

@ -44,9 +44,8 @@ LOCAL_CFLAGS := \
-DHAVE_ANDROID_PLATFORM
ifeq ($(MESA_LOLLIPOP_BUILD),true)
LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
LOCAL_CFLAGS_32 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
LOCAL_CFLAGS_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
else
LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
endif

View file

@ -532,7 +532,12 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
{ HAL_PIXEL_FORMAT_RGB_888, { 0xff, 0xff00, 0xff0000, 0x0 } },
{ HAL_PIXEL_FORMAT_RGB_565, { 0xf800, 0x7e0, 0x1f, 0x0 } },
{ HAL_PIXEL_FORMAT_BGRA_8888, { 0xff0000, 0xff00, 0xff, 0xff000000 } },
{ 0, 0, { 0, 0, 0, 0 } }
{ 0, { 0, 0, 0, 0 } }
};
EGLint config_attrs[] = {
EGL_NATIVE_VISUAL_ID, 0,
EGL_NATIVE_VISUAL_TYPE, 0,
EGL_NONE
};
int count, i, j;
@ -540,6 +545,9 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
for (i = 0; visuals[i].format; i++) {
int format_count = 0;
config_attrs[1] = visuals[i].format;
config_attrs[3] = visuals[i].format;
for (j = 0; dri2_dpy->driver_configs[j]; j++) {
const EGLint surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT;
struct dri2_egl_config *dri2_conf;
@ -553,10 +561,8 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
continue;
dri2_conf = dri2_add_config(dpy, dri2_dpy->driver_configs[j],
count + 1, surface_type, NULL, visuals[i].rgba_masks);
count + 1, surface_type, config_attrs, visuals[i].rgba_masks);
if (dri2_conf) {
dri2_conf->base.NativeVisualID = visuals[i].format;
dri2_conf->base.NativeVisualType = visuals[i].format;
count++;
format_count++;
}

View file

@ -472,6 +472,8 @@ dri2_x11_get_buffers(__DRIdrawable * driDrawable,
dri2_surf->drawable,
count, count, attachments);
reply = xcb_dri2_get_buffers_reply (dri2_dpy->conn, cookie, NULL);
if (reply == NULL)
return NULL;
buffers = xcb_dri2_get_buffers_buffers (reply);
if (buffers == NULL)
return NULL;
@ -870,7 +872,12 @@ dri2_x11_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
if (dri2_dpy->dri2) {
return dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1;
if (dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1) {
return EGL_TRUE;
}
/* Swap failed with a window drawable. */
_eglError(EGL_BAD_NATIVE_WINDOW, __FUNCTION__);
return EGL_FALSE;
} else {
assert(dri2_dpy->swrast);

View file

@ -1555,8 +1555,14 @@ eglGetSyncAttrib(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *valu
static EGLBoolean EGLAPIENTRY
eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value)
{
EGLAttrib attrib = *value;
EGLBoolean result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
EGLAttrib attrib;
EGLBoolean result;
if (!value)
RETURN_EGL_ERROR(NULL, EGL_BAD_PARAMETER, EGL_FALSE);
attrib = *value;
result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
/* The EGL_KHR_fence_sync spec says this about eglGetSyncAttribKHR:
*

View file

@ -144,9 +144,6 @@ EGLBoolean
_eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
EGLint attribute, EGLAttrib *value)
{
if (!value)
return _eglError(EGL_BAD_PARAMETER, "eglGetSyncAttribKHR");
switch (attribute) {
case EGL_SYNC_TYPE_KHR:
*value = sync->Type;

View file

@ -69,8 +69,11 @@ struct cso_context {
boolean has_geometry_shader;
boolean has_tessellation;
boolean has_compute_shader;
boolean has_streamout;
unsigned saved_state; /**< bitmask of CSO_BIT_x flags */
struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned nr_fragment_views;
@ -106,6 +109,7 @@ struct cso_context {
void *geometry_shader, *geometry_shader_saved;
void *tessctrl_shader, *tessctrl_shader_saved;
void *tesseval_shader, *tesseval_shader_saved;
void *compute_shader;
void *velements, *velements_saved;
struct pipe_query *render_condition, *render_condition_saved;
uint render_condition_mode, render_condition_mode_saved;
@ -272,6 +276,15 @@ struct cso_context *cso_create_context( struct pipe_context *pipe )
PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
ctx->has_tessellation = TRUE;
}
if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
int supported_irs =
pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
PIPE_SHADER_CAP_SUPPORTED_IRS);
if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
ctx->has_compute_shader = TRUE;
}
}
if (pipe->screen->get_param(pipe->screen,
PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) {
ctx->has_streamout = TRUE;
@ -333,6 +346,10 @@ void cso_destroy_context( struct cso_context *ctx )
ctx->pipe->bind_tes_state(ctx->pipe, NULL);
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL);
}
if (ctx->has_compute_shader) {
ctx->pipe->bind_compute_state(ctx->pipe, NULL);
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_COMPUTE, 0, NULL);
}
ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
if (ctx->has_streamout)
@ -425,13 +442,15 @@ enum pipe_error cso_set_blend(struct cso_context *ctx,
return PIPE_OK;
}
void cso_save_blend(struct cso_context *ctx)
static void
cso_save_blend(struct cso_context *ctx)
{
assert(!ctx->blend_saved);
ctx->blend_saved = ctx->blend;
}
void cso_restore_blend(struct cso_context *ctx)
static void
cso_restore_blend(struct cso_context *ctx)
{
if (ctx->blend != ctx->blend_saved) {
ctx->blend = ctx->blend_saved;
@ -488,13 +507,15 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx,
return PIPE_OK;
}
void cso_save_depth_stencil_alpha(struct cso_context *ctx)
static void
cso_save_depth_stencil_alpha(struct cso_context *ctx)
{
assert(!ctx->depth_stencil_saved);
ctx->depth_stencil_saved = ctx->depth_stencil;
}
void cso_restore_depth_stencil_alpha(struct cso_context *ctx)
static void
cso_restore_depth_stencil_alpha(struct cso_context *ctx)
{
if (ctx->depth_stencil != ctx->depth_stencil_saved) {
ctx->depth_stencil = ctx->depth_stencil_saved;
@ -547,13 +568,15 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx,
return PIPE_OK;
}
void cso_save_rasterizer(struct cso_context *ctx)
static void
cso_save_rasterizer(struct cso_context *ctx)
{
assert(!ctx->rasterizer_saved);
ctx->rasterizer_saved = ctx->rasterizer;
}
void cso_restore_rasterizer(struct cso_context *ctx)
static void
cso_restore_rasterizer(struct cso_context *ctx)
{
if (ctx->rasterizer != ctx->rasterizer_saved) {
ctx->rasterizer = ctx->rasterizer_saved;
@ -581,13 +604,15 @@ void cso_delete_fragment_shader(struct cso_context *ctx, void *handle )
ctx->pipe->delete_fs_state(ctx->pipe, handle);
}
void cso_save_fragment_shader(struct cso_context *ctx)
static void
cso_save_fragment_shader(struct cso_context *ctx)
{
assert(!ctx->fragment_shader_saved);
ctx->fragment_shader_saved = ctx->fragment_shader;
}
void cso_restore_fragment_shader(struct cso_context *ctx)
static void
cso_restore_fragment_shader(struct cso_context *ctx)
{
if (ctx->fragment_shader_saved != ctx->fragment_shader) {
ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved);
@ -615,13 +640,15 @@ void cso_delete_vertex_shader(struct cso_context *ctx, void *handle )
ctx->pipe->delete_vs_state(ctx->pipe, handle);
}
void cso_save_vertex_shader(struct cso_context *ctx)
static void
cso_save_vertex_shader(struct cso_context *ctx)
{
assert(!ctx->vertex_shader_saved);
ctx->vertex_shader_saved = ctx->vertex_shader;
}
void cso_restore_vertex_shader(struct cso_context *ctx)
static void
cso_restore_vertex_shader(struct cso_context *ctx)
{
if (ctx->vertex_shader_saved != ctx->vertex_shader) {
ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved);
@ -640,12 +667,14 @@ void cso_set_framebuffer(struct cso_context *ctx,
}
}
void cso_save_framebuffer(struct cso_context *ctx)
static void
cso_save_framebuffer(struct cso_context *ctx)
{
util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb);
}
void cso_restore_framebuffer(struct cso_context *ctx)
static void
cso_restore_framebuffer(struct cso_context *ctx)
{
if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) {
util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved);
@ -664,13 +693,33 @@ void cso_set_viewport(struct cso_context *ctx,
}
}
void cso_save_viewport(struct cso_context *ctx)
/**
* Setup viewport state for given width and height (position is always (0,0)).
* Invert the Y axis if 'invert' is true.
*/
void
cso_set_viewport_dims(struct cso_context *ctx,
float width, float height, boolean invert)
{
struct pipe_viewport_state vp;
vp.scale[0] = width * 0.5f;
vp.scale[1] = height * (invert ? -0.5f : 0.5f);
vp.scale[2] = 0.5f;
vp.translate[0] = 0.5f * width;
vp.translate[1] = 0.5f * height;
vp.translate[2] = 0.5f;
cso_set_viewport(ctx, &vp);
}
static void
cso_save_viewport(struct cso_context *ctx)
{
ctx->vp_saved = ctx->vp;
}
void cso_restore_viewport(struct cso_context *ctx)
static void
cso_restore_viewport(struct cso_context *ctx)
{
if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) {
ctx->vp = ctx->vp_saved;
@ -696,12 +745,14 @@ void cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask)
}
}
void cso_save_sample_mask(struct cso_context *ctx)
static void
cso_save_sample_mask(struct cso_context *ctx)
{
ctx->sample_mask_saved = ctx->sample_mask;
}
void cso_restore_sample_mask(struct cso_context *ctx)
static void
cso_restore_sample_mask(struct cso_context *ctx)
{
cso_set_sample_mask(ctx, ctx->sample_mask_saved);
}
@ -714,12 +765,14 @@ void cso_set_min_samples(struct cso_context *ctx, unsigned min_samples)
}
}
void cso_save_min_samples(struct cso_context *ctx)
static void
cso_save_min_samples(struct cso_context *ctx)
{
ctx->min_samples_saved = ctx->min_samples;
}
void cso_restore_min_samples(struct cso_context *ctx)
static void
cso_restore_min_samples(struct cso_context *ctx)
{
cso_set_min_samples(ctx, ctx->min_samples_saved);
}
@ -733,13 +786,15 @@ void cso_set_stencil_ref(struct cso_context *ctx,
}
}
void cso_save_stencil_ref(struct cso_context *ctx)
static void
cso_save_stencil_ref(struct cso_context *ctx)
{
ctx->stencil_ref_saved = ctx->stencil_ref;
}
void cso_restore_stencil_ref(struct cso_context *ctx)
static void
cso_restore_stencil_ref(struct cso_context *ctx)
{
if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved,
sizeof(ctx->stencil_ref))) {
@ -764,14 +819,16 @@ void cso_set_render_condition(struct cso_context *ctx,
}
}
void cso_save_render_condition(struct cso_context *ctx)
static void
cso_save_render_condition(struct cso_context *ctx)
{
ctx->render_condition_saved = ctx->render_condition;
ctx->render_condition_cond_saved = ctx->render_condition_cond;
ctx->render_condition_mode_saved = ctx->render_condition_mode;
}
void cso_restore_render_condition(struct cso_context *ctx)
static void
cso_restore_render_condition(struct cso_context *ctx)
{
cso_set_render_condition(ctx, ctx->render_condition_saved,
ctx->render_condition_cond_saved,
@ -798,7 +855,8 @@ void cso_delete_geometry_shader(struct cso_context *ctx, void *handle)
ctx->pipe->delete_gs_state(ctx->pipe, handle);
}
void cso_save_geometry_shader(struct cso_context *ctx)
static void
cso_save_geometry_shader(struct cso_context *ctx)
{
if (!ctx->has_geometry_shader) {
return;
@ -808,7 +866,8 @@ void cso_save_geometry_shader(struct cso_context *ctx)
ctx->geometry_shader_saved = ctx->geometry_shader;
}
void cso_restore_geometry_shader(struct cso_context *ctx)
static void
cso_restore_geometry_shader(struct cso_context *ctx)
{
if (!ctx->has_geometry_shader) {
return;
@ -841,7 +900,8 @@ void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle)
ctx->pipe->delete_tcs_state(ctx->pipe, handle);
}
void cso_save_tessctrl_shader(struct cso_context *ctx)
static void
cso_save_tessctrl_shader(struct cso_context *ctx)
{
if (!ctx->has_tessellation) {
return;
@ -851,7 +911,8 @@ void cso_save_tessctrl_shader(struct cso_context *ctx)
ctx->tessctrl_shader_saved = ctx->tessctrl_shader;
}
void cso_restore_tessctrl_shader(struct cso_context *ctx)
static void
cso_restore_tessctrl_shader(struct cso_context *ctx)
{
if (!ctx->has_tessellation) {
return;
@ -884,7 +945,8 @@ void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle)
ctx->pipe->delete_tes_state(ctx->pipe, handle);
}
void cso_save_tesseval_shader(struct cso_context *ctx)
static void
cso_save_tesseval_shader(struct cso_context *ctx)
{
if (!ctx->has_tessellation) {
return;
@ -894,7 +956,8 @@ void cso_save_tesseval_shader(struct cso_context *ctx)
ctx->tesseval_shader_saved = ctx->tesseval_shader;
}
void cso_restore_tesseval_shader(struct cso_context *ctx)
static void
cso_restore_tesseval_shader(struct cso_context *ctx)
{
if (!ctx->has_tessellation) {
return;
@ -907,6 +970,26 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
ctx->tesseval_shader_saved = NULL;
}
void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle)
{
assert(ctx->has_compute_shader || !handle);
if (ctx->has_compute_shader && ctx->compute_shader != handle) {
ctx->compute_shader = handle;
ctx->pipe->bind_compute_state(ctx->pipe, handle);
}
}
void cso_delete_compute_shader(struct cso_context *ctx, void *handle)
{
if (handle == ctx->compute_shader) {
/* unbind before deleting */
ctx->pipe->bind_compute_state(ctx->pipe, NULL);
ctx->compute_shader = NULL;
}
ctx->pipe->delete_compute_state(ctx->pipe, handle);
}
enum pipe_error
cso_set_vertex_elements(struct cso_context *ctx,
unsigned count,
@ -967,7 +1050,8 @@ cso_set_vertex_elements(struct cso_context *ctx,
return PIPE_OK;
}
void cso_save_vertex_elements(struct cso_context *ctx)
static void
cso_save_vertex_elements(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
@ -980,7 +1064,8 @@ void cso_save_vertex_elements(struct cso_context *ctx)
ctx->velements_saved = ctx->velements;
}
void cso_restore_vertex_elements(struct cso_context *ctx)
static void
cso_restore_vertex_elements(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
@ -1032,7 +1117,8 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
ctx->pipe->set_vertex_buffers(ctx->pipe, start_slot, count, buffers);
}
void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
static void
cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
@ -1047,7 +1133,8 @@ void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
sizeof(struct pipe_vertex_buffer));
}
void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
static void
cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
@ -1165,7 +1252,7 @@ cso_set_samplers(struct cso_context *ctx,
return error;
}
void
static void
cso_save_fragment_samplers(struct cso_context *ctx)
{
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
@ -1176,7 +1263,7 @@ cso_save_fragment_samplers(struct cso_context *ctx)
}
void
static void
cso_restore_fragment_samplers(struct cso_context *ctx)
{
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
@ -1223,7 +1310,7 @@ cso_set_sampler_views(struct cso_context *ctx,
}
void
static void
cso_save_fragment_sampler_views(struct cso_context *ctx)
{
unsigned i;
@ -1238,7 +1325,7 @@ cso_save_fragment_sampler_views(struct cso_context *ctx)
}
void
static void
cso_restore_fragment_sampler_views(struct cso_context *ctx)
{
unsigned i, nr_saved = ctx->nr_fragment_views_saved;
@ -1298,7 +1385,7 @@ cso_set_stream_outputs(struct cso_context *ctx,
ctx->nr_so_targets = num_targets;
}
void
static void
cso_save_stream_outputs(struct cso_context *ctx)
{
uint i;
@ -1315,7 +1402,7 @@ cso_save_stream_outputs(struct cso_context *ctx)
}
}
void
static void
cso_restore_stream_outputs(struct cso_context *ctx)
{
struct pipe_context *pipe = ctx->pipe;
@ -1402,6 +1489,113 @@ cso_restore_constant_buffer_slot0(struct cso_context *cso,
NULL);
}
/**
* Save all the CSO state items specified by the state_mask bitmask
* of CSO_BIT_x flags.
*/
void
cso_save_state(struct cso_context *cso, unsigned state_mask)
{
assert(cso->saved_state == 0);
cso->saved_state = state_mask;
if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT)
cso_save_aux_vertex_buffer_slot(cso);
if (state_mask & CSO_BIT_BLEND)
cso_save_blend(cso);
if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA)
cso_save_depth_stencil_alpha(cso);
if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
cso_save_fragment_samplers(cso);
if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS)
cso_save_fragment_sampler_views(cso);
if (state_mask & CSO_BIT_FRAGMENT_SHADER)
cso_save_fragment_shader(cso);
if (state_mask & CSO_BIT_FRAMEBUFFER)
cso_save_framebuffer(cso);
if (state_mask & CSO_BIT_GEOMETRY_SHADER)
cso_save_geometry_shader(cso);
if (state_mask & CSO_BIT_MIN_SAMPLES)
cso_save_min_samples(cso);
if (state_mask & CSO_BIT_RASTERIZER)
cso_save_rasterizer(cso);
if (state_mask & CSO_BIT_RENDER_CONDITION)
cso_save_render_condition(cso);
if (state_mask & CSO_BIT_SAMPLE_MASK)
cso_save_sample_mask(cso);
if (state_mask & CSO_BIT_STENCIL_REF)
cso_save_stencil_ref(cso);
if (state_mask & CSO_BIT_STREAM_OUTPUTS)
cso_save_stream_outputs(cso);
if (state_mask & CSO_BIT_TESSCTRL_SHADER)
cso_save_tessctrl_shader(cso);
if (state_mask & CSO_BIT_TESSEVAL_SHADER)
cso_save_tesseval_shader(cso);
if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
cso_save_vertex_elements(cso);
if (state_mask & CSO_BIT_VERTEX_SHADER)
cso_save_vertex_shader(cso);
if (state_mask & CSO_BIT_VIEWPORT)
cso_save_viewport(cso);
}
/**
* Restore the state which was saved by cso_save_state().
*/
void
cso_restore_state(struct cso_context *cso)
{
unsigned state_mask = cso->saved_state;
assert(state_mask);
if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT)
cso_restore_aux_vertex_buffer_slot(cso);
if (state_mask & CSO_BIT_BLEND)
cso_restore_blend(cso);
if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA)
cso_restore_depth_stencil_alpha(cso);
if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
cso_restore_fragment_samplers(cso);
if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS)
cso_restore_fragment_sampler_views(cso);
if (state_mask & CSO_BIT_FRAGMENT_SHADER)
cso_restore_fragment_shader(cso);
if (state_mask & CSO_BIT_FRAMEBUFFER)
cso_restore_framebuffer(cso);
if (state_mask & CSO_BIT_GEOMETRY_SHADER)
cso_restore_geometry_shader(cso);
if (state_mask & CSO_BIT_MIN_SAMPLES)
cso_restore_min_samples(cso);
if (state_mask & CSO_BIT_RASTERIZER)
cso_restore_rasterizer(cso);
if (state_mask & CSO_BIT_RENDER_CONDITION)
cso_restore_render_condition(cso);
if (state_mask & CSO_BIT_SAMPLE_MASK)
cso_restore_sample_mask(cso);
if (state_mask & CSO_BIT_STENCIL_REF)
cso_restore_stencil_ref(cso);
if (state_mask & CSO_BIT_STREAM_OUTPUTS)
cso_restore_stream_outputs(cso);
if (state_mask & CSO_BIT_TESSCTRL_SHADER)
cso_restore_tessctrl_shader(cso);
if (state_mask & CSO_BIT_TESSEVAL_SHADER)
cso_restore_tesseval_shader(cso);
if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
cso_restore_vertex_elements(cso);
if (state_mask & CSO_BIT_VERTEX_SHADER)
cso_restore_vertex_shader(cso);
if (state_mask & CSO_BIT_VIEWPORT)
cso_restore_viewport(cso);
cso->saved_state = 0;
}
/* drawing */
void

View file

@ -47,22 +47,15 @@ void cso_destroy_context( struct cso_context *cso );
enum pipe_error cso_set_blend( struct cso_context *cso,
const struct pipe_blend_state *blend );
void cso_save_blend(struct cso_context *cso);
void cso_restore_blend(struct cso_context *cso);
enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso,
const struct pipe_depth_stencil_alpha_state *dsa );
void cso_save_depth_stencil_alpha(struct cso_context *cso);
void cso_restore_depth_stencil_alpha(struct cso_context *cso);
enum pipe_error cso_set_rasterizer( struct cso_context *cso,
const struct pipe_rasterizer_state *rasterizer );
void cso_save_rasterizer(struct cso_context *cso);
void cso_restore_rasterizer(struct cso_context *cso);
enum pipe_error
@ -71,11 +64,6 @@ cso_set_samplers(struct cso_context *cso,
unsigned count,
const struct pipe_sampler_state **states);
void
cso_save_fragment_samplers(struct cso_context *cso);
void
cso_restore_fragment_samplers(struct cso_context *cso);
/* Alternate interface to support state trackers that like to modify
* samplers one at a time:
@ -91,9 +79,6 @@ cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
unsigned count,
const struct pipe_vertex_element *states);
void cso_save_vertex_elements(struct cso_context *ctx);
void cso_restore_vertex_elements(struct cso_context *ctx);
void cso_set_vertex_buffers(struct cso_context *ctx,
unsigned start_slot, unsigned count,
@ -101,8 +86,6 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
/* One vertex buffer slot is provided with the save/restore functionality.
* cso_context chooses the slot, it can be non-zero. */
void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx);
void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx);
unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx);
@ -110,8 +93,6 @@ void cso_set_stream_outputs(struct cso_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offsets);
void cso_save_stream_outputs(struct cso_context *ctx);
void cso_restore_stream_outputs(struct cso_context *ctx);
/*
@ -123,67 +104,81 @@ void cso_restore_stream_outputs(struct cso_context *ctx);
void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_fragment_shader(struct cso_context *ctx, void *handle );
void cso_save_fragment_shader(struct cso_context *cso);
void cso_restore_fragment_shader(struct cso_context *cso);
void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_vertex_shader(struct cso_context *ctx, void *handle );
void cso_save_vertex_shader(struct cso_context *cso);
void cso_restore_vertex_shader(struct cso_context *cso);
void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_geometry_shader(struct cso_context *ctx, void *handle);
void cso_save_geometry_shader(struct cso_context *cso);
void cso_restore_geometry_shader(struct cso_context *cso);
void cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle);
void cso_save_tessctrl_shader(struct cso_context *cso);
void cso_restore_tessctrl_shader(struct cso_context *cso);
void cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle);
void cso_save_tesseval_shader(struct cso_context *cso);
void cso_restore_tesseval_shader(struct cso_context *cso);
void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_compute_shader(struct cso_context *ctx, void *handle);
void cso_set_framebuffer(struct cso_context *cso,
const struct pipe_framebuffer_state *fb);
void cso_save_framebuffer(struct cso_context *cso);
void cso_restore_framebuffer(struct cso_context *cso);
void cso_set_viewport(struct cso_context *cso,
const struct pipe_viewport_state *vp);
void cso_save_viewport(struct cso_context *cso);
void cso_restore_viewport(struct cso_context *cso);
void cso_set_viewport_dims(struct cso_context *ctx,
float width, float height, boolean invert);
void cso_set_blend_color(struct cso_context *cso,
const struct pipe_blend_color *bc);
void cso_set_sample_mask(struct cso_context *cso, unsigned stencil_mask);
void cso_save_sample_mask(struct cso_context *ctx);
void cso_restore_sample_mask(struct cso_context *ctx);
void cso_set_min_samples(struct cso_context *cso, unsigned min_samples);
void cso_save_min_samples(struct cso_context *ctx);
void cso_restore_min_samples(struct cso_context *ctx);
void cso_set_stencil_ref(struct cso_context *cso,
const struct pipe_stencil_ref *sr);
void cso_save_stencil_ref(struct cso_context *cso);
void cso_restore_stencil_ref(struct cso_context *cso);
void cso_set_render_condition(struct cso_context *cso,
struct pipe_query *query,
boolean condition, uint mode);
void cso_save_render_condition(struct cso_context *cso);
void cso_restore_render_condition(struct cso_context *cso);
#define CSO_BIT_AUX_VERTEX_BUFFER_SLOT 0x1
#define CSO_BIT_BLEND 0x2
#define CSO_BIT_DEPTH_STENCIL_ALPHA 0x4
#define CSO_BIT_FRAGMENT_SAMPLERS 0x8
#define CSO_BIT_FRAGMENT_SAMPLER_VIEWS 0x10
#define CSO_BIT_FRAGMENT_SHADER 0x20
#define CSO_BIT_FRAMEBUFFER 0x40
#define CSO_BIT_GEOMETRY_SHADER 0x80
#define CSO_BIT_MIN_SAMPLES 0x100
#define CSO_BIT_RASTERIZER 0x200
#define CSO_BIT_RENDER_CONDITION 0x400
#define CSO_BIT_SAMPLE_MASK 0x800
#define CSO_BIT_STENCIL_REF 0x1000
#define CSO_BIT_STREAM_OUTPUTS 0x2000
#define CSO_BIT_TESSCTRL_SHADER 0x4000
#define CSO_BIT_TESSEVAL_SHADER 0x8000
#define CSO_BIT_VERTEX_ELEMENTS 0x10000
#define CSO_BIT_VERTEX_SHADER 0x20000
#define CSO_BIT_VIEWPORT 0x40000
#define CSO_BITS_ALL_SHADERS (CSO_BIT_VERTEX_SHADER | \
CSO_BIT_FRAGMENT_SHADER | \
CSO_BIT_GEOMETRY_SHADER | \
CSO_BIT_TESSCTRL_SHADER | \
CSO_BIT_TESSEVAL_SHADER)
void cso_save_state(struct cso_context *cso, unsigned state_mask);
void cso_restore_state(struct cso_context *cso);
/* sampler view state */
@ -194,12 +189,6 @@ cso_set_sampler_views(struct cso_context *cso,
unsigned count,
struct pipe_sampler_view **views);
void
cso_save_fragment_sampler_views(struct cso_context *ctx);
void
cso_restore_fragment_sampler_views(struct cso_context *ctx);
/* constant buffers */
@ -230,7 +219,6 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode,
uint start, uint count,
uint start_instance, uint instance_count);
/* helper drawing function */
void
cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count);

View file

@ -26,6 +26,9 @@
**************************************************************************/
#include <stddef.h>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <llvm-c/Core.h>
#include <llvm-c/Disassembler.h>
@ -125,7 +128,7 @@ lp_debug_dump_value(LLVMValueRef value)
* - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
*/
static size_t
disassemble(const void* func)
disassemble(const void* func, std::stringstream &buffer)
{
const uint8_t *bytes = (const uint8_t *)func;
@ -143,8 +146,8 @@ disassemble(const void* func)
char outline[1024];
if (!D) {
_debug_printf("error: couldn't create disassembler for triple %s\n",
Triple.c_str());
buffer << "error: could not create disassembler for triple "
<< Triple.c_str() << '\n';
return 0;
}
@ -158,13 +161,13 @@ disassemble(const void* func)
* so that between runs.
*/
_debug_printf("%6lu:\t", (unsigned long)pc);
buffer << std::setw(6) << (unsigned long)pc << ":\t";
Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline,
sizeof outline);
if (!Size) {
_debug_printf("invalid\n");
buffer << "invalid\n";
pc += 1;
break;
}
@ -176,10 +179,11 @@ disassemble(const void* func)
if (0) {
unsigned i;
for (i = 0; i < Size; ++i) {
_debug_printf("%02x ", bytes[pc + i]);
buffer << std::hex << std::setfill('0') << std::setw(2)
<< static_cast<int> (bytes[pc + i]);
}
for (; i < 16; ++i) {
_debug_printf(" ");
buffer << std::dec << " ";
}
}
@ -187,9 +191,7 @@ disassemble(const void* func)
* Print the instruction.
*/
_debug_printf("%*s", Size, outline);
_debug_printf("\n");
buffer << std::setw(Size) << outline << '\n';
/*
* Stop disassembling on return statements, if there is no record of a
@ -198,9 +200,11 @@ disassemble(const void* func)
* XXX: This currently assumes x86
*/
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if (Size == 1 && bytes[pc] == 0xc3) {
break;
}
#endif
/*
* Advance.
@ -209,12 +213,12 @@ disassemble(const void* func)
pc += Size;
if (pc >= extent) {
_debug_printf("disassembly larger than %ull bytes, aborting\n", extent);
buffer << "disassembly larger than " << extent << " bytes, aborting\n";
break;
}
}
_debug_printf("\n");
buffer << '\n';
LLVMDisasmDispose(D);
@ -222,7 +226,8 @@ disassemble(const void* func)
* Print GDB command, useful to verify output.
*/
if (0) {
_debug_printf("disassemble %p %p\n", bytes, bytes + pc);
buffer << "disassemble " << static_cast<const void*>(bytes) << ' '
<< static_cast<const void*>(bytes + pc) << '\n';
}
return pc;
@ -231,8 +236,14 @@ disassemble(const void* func)
extern "C" void
lp_disassemble(LLVMValueRef func, const void *code) {
_debug_printf("%s:\n", LLVMGetValueName(func));
disassemble(code);
std::stringstream buffer;
std::string s;
buffer << LLVMGetValueName(func) << ":\n";
disassemble(code, buffer);
s = buffer.str();
_debug_printf("%s", s.c_str());
_debug_printf("\n");
}
@ -248,9 +259,10 @@ extern "C" void
lp_profile(LLVMValueRef func, const void *code)
{
#if defined(__linux__) && defined(PROFILE)
std::stringstream buffer;
static std::ofstream perf_asm_file;
static boolean first_time = TRUE;
static FILE *perf_map_file = NULL;
static int perf_asm_fd = -1;
if (first_time) {
/*
* We rely on the disassembler for determining a function's size, but
@ -264,17 +276,16 @@ lp_profile(LLVMValueRef func, const void *code)
util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
perf_map_file = fopen(filename, "wt");
util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
perf_asm_file.open(filename);
}
first_time = FALSE;
}
if (perf_map_file) {
const char *symbol = LLVMGetValueName(func);
unsigned long addr = (uintptr_t)code;
llvm::raw_fd_ostream Out(perf_asm_fd, false);
Out << symbol << ":\n";
unsigned long size = disassemble(code);
buffer << symbol << ":\n";
unsigned long size = disassemble(code, buffer);
perf_asm_file << buffer.rdbuf() << std::flush;
fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
fflush(perf_map_file);
}

View file

@ -118,8 +118,10 @@ create_pass_manager(struct gallivm_state *gallivm)
* simple, or constant propagation into them, etc.
*/
#if HAVE_LLVM < 0x0309
// Old versions of LLVM get the DataLayout from the pass manager.
LLVMAddTargetData(gallivm->target, gallivm->passmgr);
#endif
/* Setting the module's DataLayout to an empty string will cause the
* ExecutionEngine to copy to the DataLayout string from its target

View file

@ -128,6 +128,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
return PIPE_MAX_SHADER_SAMPLER_VIEWS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return 1 << PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
@ -137,6 +139,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;

View file

@ -61,6 +61,11 @@
#include <llvm/Target/TargetOptions.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ADT/Triple.h>
#if HAVE_LLVM >= 0x0307
#include <llvm/Analysis/TargetLibraryInfo.h>
#else
#include <llvm/Target/TargetLibraryInfo.h>
#endif
#if HAVE_LLVM < 0x0306
#include <llvm/ExecutionEngine/JITMemoryManager.h>
#else
@ -147,6 +152,31 @@ lp_set_target_options(void)
gallivm_init_llvm_targets();
}
extern "C"
LLVMTargetLibraryInfoRef
gallivm_create_target_library_info(const char *triple)
{
return reinterpret_cast<LLVMTargetLibraryInfoRef>(
#if HAVE_LLVM < 0x0307
new llvm::TargetLibraryInfo(
#else
new llvm::TargetLibraryInfoImpl(
#endif
llvm::Triple(triple)));
}
extern "C"
void
gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
{
delete reinterpret_cast<
#if HAVE_LLVM < 0x0307
llvm::TargetLibraryInfo
#else
llvm::TargetLibraryInfoImpl
#endif
*>(library_info);
}
extern "C"
LLVMValueRef

View file

@ -32,6 +32,7 @@
#include "lp_bld.h"
#include <llvm-c/ExecutionEngine.h>
#include <llvm-c/Target.h>
#ifdef __cplusplus
@ -44,6 +45,12 @@ struct lp_generated_code;
extern void
gallivm_init_llvm_targets(void);
extern LLVMTargetLibraryInfoRef
gallivm_create_target_library_info(const char *triple);
extern void
gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
extern void
lp_set_target_options(void);

View file

@ -2592,7 +2592,10 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
/* XXX: for real msaa support, the w component would be the sample index. */
/*
* XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
* would be the sample index.
*/
for (i = 0; i < dims; i++) {
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
@ -2742,6 +2745,7 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
opcode == TGSI_OPCODE_SAMPLE_D ||
opcode == TGSI_OPCODE_SAMPLE_I ||
opcode == TGSI_OPCODE_SAMPLE_I_MS ||
opcode == TGSI_OPCODE_SAMPLE_L ||
opcode == TGSI_OPCODE_SVIEWINFO ||
opcode == TGSI_OPCODE_CAL ||
@ -3989,6 +3993,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;

View file

@ -199,6 +199,7 @@ static const GLubyte Fixed8x13_Character_123[] = { 8, 0, 0, 0, 14, 16, 16,
static const GLubyte Fixed8x13_Character_124[] = { 8, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0};
static const GLubyte Fixed8x13_Character_125[] = { 8, 0, 0, 0,112, 8, 8, 16, 12, 16, 8, 8,112, 0, 0};
static const GLubyte Fixed8x13_Character_126[] = { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 84, 36, 0, 0};
#if 0 /* currently unused */
static const GLubyte Fixed8x13_Character_127[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_128[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_129[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
@ -232,6 +233,7 @@ static const GLubyte Fixed8x13_Character_156[] = { 9, 0, 0, 0, 0, 0, 0,17
static const GLubyte Fixed8x13_Character_157[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_158[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_159[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
#endif
static const GLubyte Fixed8x13_Character_160[] = { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_161[] = { 8, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 0, 16, 0, 0};
static const GLubyte Fixed8x13_Character_162[] = { 8, 0, 0, 0, 0, 16, 56, 84, 80, 80, 84, 56, 16, 0, 0};

View file

@ -460,25 +460,25 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
hud->constants.two_div_fb_height = 2.0f / hud->fb_height;
cso_save_framebuffer(cso);
cso_save_sample_mask(cso);
cso_save_min_samples(cso);
cso_save_blend(cso);
cso_save_depth_stencil_alpha(cso);
cso_save_fragment_shader(cso);
cso_save_fragment_sampler_views(cso);
cso_save_fragment_samplers(cso);
cso_save_rasterizer(cso);
cso_save_viewport(cso);
cso_save_stream_outputs(cso);
cso_save_geometry_shader(cso);
cso_save_tessctrl_shader(cso);
cso_save_tesseval_shader(cso);
cso_save_vertex_shader(cso);
cso_save_vertex_elements(cso);
cso_save_aux_vertex_buffer_slot(cso);
cso_save_state(cso, (CSO_BIT_FRAMEBUFFER |
CSO_BIT_SAMPLE_MASK |
CSO_BIT_MIN_SAMPLES |
CSO_BIT_BLEND |
CSO_BIT_DEPTH_STENCIL_ALPHA |
CSO_BIT_FRAGMENT_SHADER |
CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
CSO_BIT_FRAGMENT_SAMPLERS |
CSO_BIT_RASTERIZER |
CSO_BIT_VIEWPORT |
CSO_BIT_STREAM_OUTPUTS |
CSO_BIT_GEOMETRY_SHADER |
CSO_BIT_TESSCTRL_SHADER |
CSO_BIT_TESSEVAL_SHADER |
CSO_BIT_VERTEX_SHADER |
CSO_BIT_VERTEX_ELEMENTS |
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
CSO_BIT_RENDER_CONDITION));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
cso_save_render_condition(cso);
/* set states */
memset(&surf_templ, 0, sizeof(surf_templ));
@ -591,26 +591,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_pane_draw_colored_objects(hud, pane);
}
/* restore states */
cso_restore_framebuffer(cso);
cso_restore_sample_mask(cso);
cso_restore_min_samples(cso);
cso_restore_blend(cso);
cso_restore_depth_stencil_alpha(cso);
cso_restore_fragment_shader(cso);
cso_restore_fragment_sampler_views(cso);
cso_restore_fragment_samplers(cso);
cso_restore_rasterizer(cso);
cso_restore_viewport(cso);
cso_restore_stream_outputs(cso);
cso_restore_tessctrl_shader(cso);
cso_restore_tesseval_shader(cso);
cso_restore_geometry_shader(cso);
cso_restore_vertex_shader(cso);
cso_restore_vertex_elements(cso);
cso_restore_aux_vertex_buffer_slot(cso);
cso_restore_state(cso);
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
cso_restore_render_condition(cso);
pipe_surface_reference(&surf, NULL);
}

View file

@ -283,8 +283,8 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
return SUPER(mm);
failure:
if(mm->heap)
u_mmDestroy(mm->heap);
if(mm->heap)
u_mmDestroy(mm->heap);
if(mm->map)
pb_unmap(mm->buffer);
FREE(mm);

View file

@ -115,27 +115,27 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
}
/* save state (restored below) */
cso_save_blend(cso);
cso_save_depth_stencil_alpha(cso);
cso_save_fragment_shader(cso);
cso_save_framebuffer(cso);
cso_save_tessctrl_shader(cso);
cso_save_tesseval_shader(cso);
cso_save_geometry_shader(cso);
cso_save_rasterizer(cso);
cso_save_sample_mask(cso);
cso_save_min_samples(cso);
cso_save_fragment_samplers(cso);
cso_save_fragment_sampler_views(cso);
cso_save_stencil_ref(cso);
cso_save_stream_outputs(cso);
cso_save_vertex_elements(cso);
cso_save_vertex_shader(cso);
cso_save_viewport(cso);
cso_save_aux_vertex_buffer_slot(cso);
cso_save_state(cso, (CSO_BIT_BLEND |
CSO_BIT_DEPTH_STENCIL_ALPHA |
CSO_BIT_FRAGMENT_SHADER |
CSO_BIT_FRAMEBUFFER |
CSO_BIT_TESSCTRL_SHADER |
CSO_BIT_TESSEVAL_SHADER |
CSO_BIT_GEOMETRY_SHADER |
CSO_BIT_RASTERIZER |
CSO_BIT_SAMPLE_MASK |
CSO_BIT_MIN_SAMPLES |
CSO_BIT_FRAGMENT_SAMPLERS |
CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
CSO_BIT_STENCIL_REF |
CSO_BIT_STREAM_OUTPUTS |
CSO_BIT_VERTEX_ELEMENTS |
CSO_BIT_VERTEX_SHADER |
CSO_BIT_VIEWPORT |
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
CSO_BIT_RENDER_CONDITION));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
cso_save_render_condition(cso);
/* set default state */
cso_set_sample_mask(cso, ~0);
@ -186,27 +186,9 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
}
/* restore state we changed */
cso_restore_blend(cso);
cso_restore_depth_stencil_alpha(cso);
cso_restore_fragment_shader(cso);
cso_restore_framebuffer(cso);
cso_restore_tessctrl_shader(cso);
cso_restore_tesseval_shader(cso);
cso_restore_geometry_shader(cso);
cso_restore_rasterizer(cso);
cso_restore_sample_mask(cso);
cso_restore_min_samples(cso);
cso_restore_fragment_samplers(cso);
cso_restore_fragment_sampler_views(cso);
cso_restore_stencil_ref(cso);
cso_restore_stream_outputs(cso);
cso_restore_vertex_elements(cso);
cso_restore_vertex_shader(cso);
cso_restore_viewport(cso);
cso_restore_aux_vertex_buffer_slot(cso);
cso_restore_state(cso);
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
cso_restore_render_condition(cso);
pipe_resource_reference(&ppq->depth, NULL);
pipe_resource_reference(&refin, NULL);

View file

@ -111,6 +111,7 @@ tgsi_default_declaration( void )
declaration.Local = 0;
declaration.Array = 0;
declaration.Atomic = 0;
declaration.Shared = 0;
declaration.Padding = 0;
return declaration;

View file

@ -352,7 +352,7 @@ iter_declaration(
TXT(", ");
ENM(decl->Image.Resource, tgsi_texture_names);
TXT(", ");
UID(decl->Image.Format);
TXT(util_format_name(decl->Image.Format));
if (decl->Image.Writable)
TXT(", WR");
if (decl->Image.Raw)
@ -364,6 +364,11 @@ iter_declaration(
TXT(", ATOMIC");
}
if (decl->Declaration.File == TGSI_FILE_MEMORY) {
if (decl->Declaration.Shared)
TXT(", SHARED");
}
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
TXT(", ");
ENM(decl->SamplerView.Resource, tgsi_texture_names);

View file

@ -2300,7 +2300,8 @@ exec_txf(struct tgsi_exec_machine *mach,
IFETCH(&r[3], 0, TGSI_CHAN_W);
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
target = mach->SamplerViews[unit].Resource;
}
else {
@ -2342,7 +2343,8 @@ exec_txf(struct tgsi_exec_machine *mach,
r[3].f[j] = rgba[3][j];
}
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
unsigned char swizzles[4];
swizzles[0] = inst->Src[1].Register.SwizzleX;
swizzles[1] = inst->Src[1].Register.SwizzleY;
@ -4967,7 +4969,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SAMPLE_I_MS:
assert(0);
exec_txf(mach, inst);
break;
case TGSI_OPCODE_SAMPLE:

View file

@ -465,6 +465,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
return PIPE_MAX_SHADER_SAMPLER_VIEWS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return 1 << PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_DOUBLES:
@ -474,6 +476,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;

View file

@ -247,7 +247,14 @@ scan_declaration(struct tgsi_shader_info *info,
info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
info->num_inputs++;
/* Vertex shaders can have inputs with holes between them. */
if (info->processor == TGSI_PROCESSOR_VERTEX)
info->num_inputs = MAX2(info->num_inputs, reg + 1);
else {
info->num_inputs++;
assert(reg < info->num_inputs);
}
/* Only interpolated varyings. Don't include POSITION.
* Don't include integer varyings, because they are not
@ -341,6 +348,7 @@ scan_declaration(struct tgsi_shader_info *info,
info->output_semantic_name[reg] = (ubyte) semName;
info->output_semantic_index[reg] = (ubyte) semIndex;
info->num_outputs++;
assert(reg < info->num_outputs);
if (semName == TGSI_SEMANTIC_COLOR)
info->colors_written |= 1 << semIndex;

View file

@ -57,6 +57,7 @@ static const char *tgsi_file_names[] =
"IMAGE",
"SVIEW",
"BUFFER",
"MEMORY",
};
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =

View file

@ -1290,8 +1290,6 @@ static boolean parse_declaration( struct translate_ctx *ctx )
return FALSE;
}
/* XXX format */
cur2 = cur;
eat_opt_white(&cur2);
while (*cur2 == ',') {
@ -1304,7 +1302,16 @@ static boolean parse_declaration( struct translate_ctx *ctx )
decl.Image.Writable = 1;
} else {
break;
for (i = 0; i < PIPE_FORMAT_COUNT; i++) {
const struct util_format_description *desc =
util_format_description(i);
if (desc && str_match_nocase_whole(&cur2, desc->name)) {
decl.Image.Format = i;
break;
}
}
if (i == PIPE_FORMAT_COUNT)
break;
}
cur = cur2;
eat_opt_white(&cur2);
@ -1381,6 +1388,9 @@ static boolean parse_declaration( struct translate_ctx *ctx )
if (str_match_nocase_whole(&cur, "ATOMIC")) {
decl.Declaration.Atomic = 1;
ctx->cur = cur;
} else if (str_match_nocase_whole(&cur, "SHARED")) {
decl.Declaration.Shared = 1;
ctx->cur = cur;
}
} else {
if (str_match_nocase_whole(&cur, "LOCAL")) {

View file

@ -189,6 +189,8 @@ struct ureg_program
unsigned nr_instructions;
struct ureg_tokens domain[2];
bool use_shared_memory;
};
static union tgsi_any_token error_tokens[32];
@ -727,6 +729,16 @@ struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
return reg;
}
/* Allocate a shared memory area.
*/
struct ureg_src ureg_DECL_shared_memory(struct ureg_program *ureg)
{
struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, 0);
ureg->use_shared_memory = true;
return reg;
}
static int
match_or_expand_immediate64( const unsigned *v,
int type,
@ -1653,6 +1665,23 @@ emit_decl_buffer(struct ureg_program *ureg,
out[1].decl_range.Last = index;
}
static void
emit_decl_shared_memory(struct ureg_program *ureg)
{
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
out[0].value = 0;
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 2;
out[0].decl.File = TGSI_FILE_MEMORY;
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
out[0].decl.Shared = true;
out[1].value = 0;
out[1].decl_range.First = 0;
out[1].decl_range.Last = 0;
}
static void
emit_immediate( struct ureg_program *ureg,
const unsigned *v,
@ -1825,6 +1854,9 @@ static void emit_decls( struct ureg_program *ureg )
emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
}
if (ureg->use_shared_memory)
emit_decl_shared_memory(ureg);
if (ureg->const_decls.nr_constant_ranges) {
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
emit_decl_range(ureg,

View file

@ -337,6 +337,9 @@ ureg_DECL_image(struct ureg_program *ureg,
struct ureg_src
ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic);
struct ureg_src
ureg_DECL_shared_memory(struct ureg_program *ureg);
static inline struct ureg_src
ureg_imm4f( struct ureg_program *ureg,
float a, float b,

View file

@ -541,23 +541,23 @@ util_blit_pixels_tex(struct blit_state *ctx,
PIPE_BIND_RENDER_TARGET));
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
cso_save_rasterizer(ctx->cso);
cso_save_sample_mask(ctx->cso);
cso_save_min_samples(ctx->cso);
cso_save_fragment_samplers(ctx->cso);
cso_save_fragment_sampler_views(ctx->cso);
cso_save_stream_outputs(ctx->cso);
cso_save_viewport(ctx->cso);
cso_save_framebuffer(ctx->cso);
cso_save_fragment_shader(ctx->cso);
cso_save_vertex_shader(ctx->cso);
cso_save_tessctrl_shader(ctx->cso);
cso_save_tesseval_shader(ctx->cso);
cso_save_geometry_shader(ctx->cso);
cso_save_vertex_elements(ctx->cso);
cso_save_aux_vertex_buffer_slot(ctx->cso);
cso_save_state(ctx->cso, (CSO_BIT_BLEND |
CSO_BIT_DEPTH_STENCIL_ALPHA |
CSO_BIT_RASTERIZER |
CSO_BIT_SAMPLE_MASK |
CSO_BIT_MIN_SAMPLES |
CSO_BIT_FRAGMENT_SAMPLERS |
CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
CSO_BIT_STREAM_OUTPUTS |
CSO_BIT_VIEWPORT |
CSO_BIT_FRAMEBUFFER |
CSO_BIT_FRAGMENT_SHADER |
CSO_BIT_VERTEX_SHADER |
CSO_BIT_TESSCTRL_SHADER |
CSO_BIT_TESSEVAL_SHADER |
CSO_BIT_GEOMETRY_SHADER |
CSO_BIT_VERTEX_ELEMENTS |
CSO_BIT_AUX_VERTEX_BUFFER_SLOT));
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend_write_color);
@ -625,21 +625,5 @@ util_blit_pixels_tex(struct blit_state *ctx,
2); /* attribs/vert */
/* restore state we changed */
cso_restore_blend(ctx->cso);
cso_restore_depth_stencil_alpha(ctx->cso);
cso_restore_rasterizer(ctx->cso);
cso_restore_sample_mask(ctx->cso);
cso_restore_min_samples(ctx->cso);
cso_restore_fragment_samplers(ctx->cso);
cso_restore_fragment_sampler_views(ctx->cso);
cso_restore_viewport(ctx->cso);
cso_restore_framebuffer(ctx->cso);
cso_restore_fragment_shader(ctx->cso);
cso_restore_vertex_shader(ctx->cso);
cso_restore_tessctrl_shader(ctx->cso);
cso_restore_tesseval_shader(ctx->cso);
cso_restore_geometry_shader(ctx->cso);
cso_restore_vertex_elements(ctx->cso);
cso_restore_aux_vertex_buffer_slot(ctx->cso);
cso_restore_stream_outputs(ctx->cso);
cso_restore_state(ctx->cso);
}

View file

@ -173,17 +173,6 @@ pipe_sampler_view_release(struct pipe_context *ctx,
*ptr = NULL;
}
static inline void
pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view *view)
{
struct pipe_image_view *old_view = *ptr;
if (pipe_reference_described(&(*ptr)->reference, &view->reference,
(debug_reference_descriptor)debug_describe_image_view))
old_view->context->image_view_destroy(old_view->context, old_view);
*ptr = view;
}
static inline void
pipe_so_target_reference(struct pipe_stream_output_target **ptr,
struct pipe_stream_output_target *target)

View file

@ -415,6 +415,9 @@ to be 0.
(also used to implement atomic counters). Having this be non-0 also
implies support for the ``LOAD``, ``STORE``, and ``ATOM*`` TGSI
opcodes.
* ``PIPE_SHADER_CAP_SUPPORTED_IRS``: Supported representations of the
program. It should be a mask of ``pipe_shader_ir`` bits.
* ``PIPE_SHADER_CAP_MAX_SHADER_IMAGES``: Maximum number of image units.
.. _pipe_compute_cap:

View file

@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context *_pipe,
pipe->sampler_view_destroy(pipe, view);
}
static struct pipe_image_view *
dd_context_create_image_view(struct pipe_context *_pipe,
struct pipe_resource *resource,
const struct pipe_image_view *templ)
{
struct pipe_context *pipe = dd_context(_pipe)->pipe;
struct pipe_image_view *view =
pipe->create_image_view(pipe, resource, templ);
if (!view)
return NULL;
view->context = _pipe;
return view;
}
static void
dd_context_image_view_destroy(struct pipe_context *_pipe,
struct pipe_image_view *view)
{
struct pipe_context *pipe = dd_context(_pipe)->pipe;
pipe->image_view_destroy(pipe, view);
}
static struct pipe_stream_output_target *
dd_context_create_stream_output_target(struct pipe_context *_pipe,
struct pipe_resource *res,
@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader,
static void
dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
unsigned start, unsigned num,
struct pipe_image_view **views)
struct pipe_image_view *views)
{
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
CTX_INIT(sampler_view_destroy);
CTX_INIT(create_surface);
CTX_INIT(surface_destroy);
CTX_INIT(create_image_view);
CTX_INIT(image_view_destroy);
CTX_INIT(transfer_map);
CTX_INIT(transfer_flush_region);
CTX_INIT(transfer_unmap);

View file

@ -94,7 +94,7 @@ struct dd_context
struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
struct pipe_image_view shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
struct dd_state *velems;

View file

@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
Copyright (C) 2013-2016 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -255,11 +256,273 @@ enum a3xx_color_fmt {
RB_R32G32B32A32_UINT = 59,
};
enum a3xx_cp_perfcounter_select {
CP_ALWAYS_COUNT = 0,
CP_AHB_PFPTRANS_WAIT = 3,
CP_AHB_NRTTRANS_WAIT = 6,
CP_CSF_NRT_READ_WAIT = 8,
CP_CSF_I1_FIFO_FULL = 9,
CP_CSF_I2_FIFO_FULL = 10,
CP_CSF_ST_FIFO_FULL = 11,
CP_RESERVED_12 = 12,
CP_CSF_RING_ROQ_FULL = 13,
CP_CSF_I1_ROQ_FULL = 14,
CP_CSF_I2_ROQ_FULL = 15,
CP_CSF_ST_ROQ_FULL = 16,
CP_RESERVED_17 = 17,
CP_MIU_TAG_MEM_FULL = 18,
CP_MIU_NRT_WRITE_STALLED = 22,
CP_MIU_NRT_READ_STALLED = 23,
CP_ME_REGS_RB_DONE_FIFO_FULL = 26,
CP_ME_REGS_VS_EVENT_FIFO_FULL = 27,
CP_ME_REGS_PS_EVENT_FIFO_FULL = 28,
CP_ME_REGS_CF_EVENT_FIFO_FULL = 29,
CP_ME_MICRO_RB_STARVED = 30,
CP_AHB_RBBM_DWORD_SENT = 40,
CP_ME_BUSY_CLOCKS = 41,
CP_ME_WAIT_CONTEXT_AVAIL = 42,
CP_PFP_TYPE0_PACKET = 43,
CP_PFP_TYPE3_PACKET = 44,
CP_CSF_RB_WPTR_NEQ_RPTR = 45,
CP_CSF_I1_SIZE_NEQ_ZERO = 46,
CP_CSF_I2_SIZE_NEQ_ZERO = 47,
CP_CSF_RBI1I2_FETCHING = 48,
};
enum a3xx_gras_tse_perfcounter_select {
GRAS_TSEPERF_INPUT_PRIM = 0,
GRAS_TSEPERF_INPUT_NULL_PRIM = 1,
GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2,
GRAS_TSEPERF_CLIPPED_PRIM = 3,
GRAS_TSEPERF_NEW_PRIM = 4,
GRAS_TSEPERF_ZERO_AREA_PRIM = 5,
GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6,
GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7,
GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8,
GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9,
GRAS_TSEPERF_PRE_CLIP_PRIM = 10,
GRAS_TSEPERF_POST_CLIP_PRIM = 11,
GRAS_TSEPERF_WORKING_CYCLES = 12,
GRAS_TSEPERF_PC_STARVE = 13,
GRAS_TSERASPERF_STALL = 14,
};
enum a3xx_gras_ras_perfcounter_select {
GRAS_RASPERF_16X16_TILES = 0,
GRAS_RASPERF_8X8_TILES = 1,
GRAS_RASPERF_4X4_TILES = 2,
GRAS_RASPERF_WORKING_CYCLES = 3,
GRAS_RASPERF_STALL_CYCLES_BY_RB = 4,
GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5,
GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6,
};
enum a3xx_hlsq_perfcounter_select {
HLSQ_PERF_SP_VS_CONSTANT = 0,
HLSQ_PERF_SP_VS_INSTRUCTIONS = 1,
HLSQ_PERF_SP_FS_CONSTANT = 2,
HLSQ_PERF_SP_FS_INSTRUCTIONS = 3,
HLSQ_PERF_TP_STATE = 4,
HLSQ_PERF_QUADS = 5,
HLSQ_PERF_PIXELS = 6,
HLSQ_PERF_VERTICES = 7,
HLSQ_PERF_FS8_THREADS = 8,
HLSQ_PERF_FS16_THREADS = 9,
HLSQ_PERF_FS32_THREADS = 10,
HLSQ_PERF_VS8_THREADS = 11,
HLSQ_PERF_VS16_THREADS = 12,
HLSQ_PERF_SP_VS_DATA_BYTES = 13,
HLSQ_PERF_SP_FS_DATA_BYTES = 14,
HLSQ_PERF_ACTIVE_CYCLES = 15,
HLSQ_PERF_STALL_CYCLES_SP_STATE = 16,
HLSQ_PERF_STALL_CYCLES_SP_VS = 17,
HLSQ_PERF_STALL_CYCLES_SP_FS = 18,
HLSQ_PERF_STALL_CYCLES_UCHE = 19,
HLSQ_PERF_RBBM_LOAD_CYCLES = 20,
HLSQ_PERF_DI_TO_VS_START_SP0 = 21,
HLSQ_PERF_DI_TO_FS_START_SP0 = 22,
HLSQ_PERF_VS_START_TO_DONE_SP0 = 23,
HLSQ_PERF_FS_START_TO_DONE_SP0 = 24,
HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25,
HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26,
HLSQ_PERF_UCHE_LATENCY_CYCLES = 27,
HLSQ_PERF_UCHE_LATENCY_COUNT = 28,
};
enum a3xx_pc_perfcounter_select {
PC_PCPERF_VISIBILITY_STREAMS = 0,
PC_PCPERF_TOTAL_INSTANCES = 1,
PC_PCPERF_PRIMITIVES_PC_VPC = 2,
PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3,
PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4,
PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5,
PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6,
PC_PCPERF_VERTICES_TO_VFD = 7,
PC_PCPERF_REUSED_VERTICES = 8,
PC_PCPERF_CYCLES_STALLED_BY_VFD = 9,
PC_PCPERF_CYCLES_STALLED_BY_TSE = 10,
PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11,
PC_PCPERF_CYCLES_IS_WORKING = 12,
};
enum a3xx_rb_perfcounter_select {
RB_RBPERF_ACTIVE_CYCLES_ANY = 0,
RB_RBPERF_ACTIVE_CYCLES_ALL = 1,
RB_RBPERF_STARVE_CYCLES_BY_SP = 2,
RB_RBPERF_STARVE_CYCLES_BY_RAS = 3,
RB_RBPERF_STARVE_CYCLES_BY_MARB = 4,
RB_RBPERF_STALL_CYCLES_BY_MARB = 5,
RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6,
RB_RBPERF_RB_MARB_DATA = 7,
RB_RBPERF_SP_RB_QUAD = 8,
RB_RBPERF_RAS_EARLY_Z_QUADS = 9,
RB_RBPERF_GMEM_CH0_READ = 10,
RB_RBPERF_GMEM_CH1_READ = 11,
RB_RBPERF_GMEM_CH0_WRITE = 12,
RB_RBPERF_GMEM_CH1_WRITE = 13,
RB_RBPERF_CP_CONTEXT_DONE = 14,
RB_RBPERF_CP_CACHE_FLUSH = 15,
RB_RBPERF_CP_ZPASS_DONE = 16,
};
enum a3xx_rbbm_perfcounter_select {
RBBM_ALAWYS_ON = 0,
RBBM_VBIF_BUSY = 1,
RBBM_TSE_BUSY = 2,
RBBM_RAS_BUSY = 3,
RBBM_PC_DCALL_BUSY = 4,
RBBM_PC_VSD_BUSY = 5,
RBBM_VFD_BUSY = 6,
RBBM_VPC_BUSY = 7,
RBBM_UCHE_BUSY = 8,
RBBM_VSC_BUSY = 9,
RBBM_HLSQ_BUSY = 10,
RBBM_ANY_RB_BUSY = 11,
RBBM_ANY_TEX_BUSY = 12,
RBBM_ANY_USP_BUSY = 13,
RBBM_ANY_MARB_BUSY = 14,
RBBM_ANY_ARB_BUSY = 15,
RBBM_AHB_STATUS_BUSY = 16,
RBBM_AHB_STATUS_STALLED = 17,
RBBM_AHB_STATUS_TXFR = 18,
RBBM_AHB_STATUS_TXFR_SPLIT = 19,
RBBM_AHB_STATUS_TXFR_ERROR = 20,
RBBM_AHB_STATUS_LONG_STALL = 21,
RBBM_RBBM_STATUS_MASKED = 22,
};
enum a3xx_sp_perfcounter_select {
SP_LM_LOAD_INSTRUCTIONS = 0,
SP_LM_STORE_INSTRUCTIONS = 1,
SP_LM_ATOMICS = 2,
SP_UCHE_LOAD_INSTRUCTIONS = 3,
SP_UCHE_STORE_INSTRUCTIONS = 4,
SP_UCHE_ATOMICS = 5,
SP_VS_TEX_INSTRUCTIONS = 6,
SP_VS_CFLOW_INSTRUCTIONS = 7,
SP_VS_EFU_INSTRUCTIONS = 8,
SP_VS_FULL_ALU_INSTRUCTIONS = 9,
SP_VS_HALF_ALU_INSTRUCTIONS = 10,
SP_FS_TEX_INSTRUCTIONS = 11,
SP_FS_CFLOW_INSTRUCTIONS = 12,
SP_FS_EFU_INSTRUCTIONS = 13,
SP_FS_FULL_ALU_INSTRUCTIONS = 14,
SP0_ICL1_MISSES = 26,
SP_FS_HALF_ALU_INSTRUCTIONS = 15,
SP_FS_BARY_INSTRUCTIONS = 16,
SP_VS_INSTRUCTIONS = 17,
SP_FS_INSTRUCTIONS = 18,
SP_ADDR_LOCK_COUNT = 19,
SP_UCHE_READ_TRANS = 20,
SP_UCHE_WRITE_TRANS = 21,
SP_EXPORT_VPC_TRANS = 22,
SP_EXPORT_RB_TRANS = 23,
SP_PIXELS_KILLED = 24,
SP_ICL1_REQUESTS = 25,
SP_ICL1_MISSES = 26,
SP_ICL0_REQUESTS = 27,
SP_ICL0_MISSES = 28,
SP_ALU_ACTIVE_CYCLES = 29,
SP_EFU_ACTIVE_CYCLES = 30,
SP_STALL_CYCLES_BY_VPC = 31,
SP_STALL_CYCLES_BY_TP = 32,
SP_STALL_CYCLES_BY_UCHE = 33,
SP_STALL_CYCLES_BY_RB = 34,
SP_ACTIVE_CYCLES_ANY = 35,
SP_ACTIVE_CYCLES_ALL = 36,
};
enum a3xx_tp_perfcounter_select {
TPL1_TPPERF_L1_REQUESTS = 0,
TPL1_TPPERF_TP0_L1_REQUESTS = 1,
TPL1_TPPERF_TP0_L1_MISSES = 2,
TPL1_TPPERF_TP1_L1_REQUESTS = 3,
TPL1_TPPERF_TP1_L1_MISSES = 4,
TPL1_TPPERF_TP2_L1_REQUESTS = 5,
TPL1_TPPERF_TP2_L1_MISSES = 6,
TPL1_TPPERF_TP3_L1_REQUESTS = 7,
TPL1_TPPERF_TP3_L1_MISSES = 8,
TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9,
TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10,
TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11,
TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12,
TPL1_TPPERF_BILINEAR_OPS = 13,
TPL1_TPPERF_QUADSQUADS_OFFSET = 14,
TPL1_TPPERF_QUADQUADS_SHADOW = 15,
TPL1_TPPERF_QUADS_ARRAY = 16,
TPL1_TPPERF_QUADS_PROJECTION = 17,
TPL1_TPPERF_QUADS_GRADIENT = 18,
TPL1_TPPERF_QUADS_1D2D = 19,
TPL1_TPPERF_QUADS_3DCUBE = 20,
TPL1_TPPERF_ZERO_LOD = 21,
TPL1_TPPERF_OUTPUT_TEXELS = 22,
TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23,
TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24,
TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25,
TPL1_TPPERF_LATENCY = 26,
TPL1_TPPERF_LATENCY_TRANS = 27,
};
enum a3xx_vfd_perfcounter_select {
VFD_PERF_UCHE_BYTE_FETCHED = 0,
VFD_PERF_UCHE_TRANS = 1,
VFD_PERF_VPC_BYPASS_COMPONENTS = 2,
VFD_PERF_FETCH_INSTRUCTIONS = 3,
VFD_PERF_DECODE_INSTRUCTIONS = 4,
VFD_PERF_ACTIVE_CYCLES = 5,
VFD_PERF_STALL_CYCLES_UCHE = 6,
VFD_PERF_STALL_CYCLES_HLSQ = 7,
VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8,
VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9,
};
enum a3xx_vpc_perfcounter_select {
VPC_PERF_SP_LM_PRIMITIVES = 0,
VPC_PERF_COMPONENTS_FROM_SP = 1,
VPC_PERF_SP_LM_COMPONENTS = 2,
VPC_PERF_ACTIVE_CYCLES = 3,
VPC_PERF_STALL_CYCLES_LM = 4,
VPC_PERF_STALL_CYCLES_RAS = 5,
};
enum a3xx_uche_perfcounter_select {
UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0,
UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1,
UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2,
UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3,
UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4,
UCHE_UCHEPERF_READ_REQUESTS_TP = 8,
UCHE_UCHEPERF_READ_REQUESTS_VFD = 9,
UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10,
UCHE_UCHEPERF_READ_REQUESTS_MARB = 11,
UCHE_UCHEPERF_READ_REQUESTS_SP = 12,
UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13,
UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14,
UCHE_UCHEPERF_TAG_CHECK_FAILS = 15,
UCHE_UCHEPERF_EVICTS = 16,
UCHE_UCHEPERF_FLUSHES = 17,
UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18,
UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19,
UCHE_UCHEPERF_ACTIVE_CYCLES = 20,
};
enum a3xx_rb_blend_opcode {

View file

@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
#include "fd3_emit.h"
#include "fd3_blend.h"
@ -888,6 +889,8 @@ fd3_emit_restore(struct fd_context *ctx)
fd_wfi(ctx, ring);
fd_hw_query_enable(ctx, ring);
ctx->needs_rb_fbd = true;
}

File diff suppressed because it is too large Load diff

View file

@ -49,6 +49,8 @@ struct fd4_context {
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
*
* (upper area used as scratch bo.. see fd4_query)
*/
struct fd_bo *vsc_size_mem;

View file

@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
#include "fd4_emit.h"
#include "fd4_blend.h"
@ -882,6 +883,8 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
OUT_RING(ring, 0x0);
fd_hw_query_enable(ctx, ring);
ctx->needs_rb_fbd = true;
}

View file

@ -31,6 +31,7 @@
#include "freedreno_util.h"
#include "fd4_query.h"
#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_format.h"
@ -81,7 +82,12 @@ static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end)
{
return end->ctr[0] - start->ctr[0];
uint64_t n = 0;
for (unsigned i = 0; i < 16; i += 4)
n += end->ctr[i] - start->ctr[i];
return n / 2;
}
static void
@ -102,6 +108,127 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
result->b |= (n > 0);
}
/*
* Time Elapsed Query:
*
* Note: we could in theory support timestamp queries, but they
* won't give sensible results for tilers.
*/
static void
time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
/* Right now, the assignment of countable to counter register is
* just hard coded. If we start exposing more countables than we
* have counters, we will need to be more clever.
*/
fd_wfi(ctx, ring);
OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
OUT_RING(ring, CP_ALWAYS_COUNT);
}
static struct fd_hw_sample *
time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t));
/* use unused part of vsc_size_mem as scratch space, to avoid
* extra allocation:
*/
struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem;
const int sample_off = 128;
const int addr_off = sample_off + 8;
debug_assert(ctx->screen->max_freq > 0);
/* Basic issue is that we need to read counter value to a relative
* destination (with per-tile offset) rather than absolute dest
* addr. But there is no pm4 packet that can do that. This is
* where it would be *really* nice if we could write our own fw
* since afaict implementing the sort of packet we need would be
* trivial.
*
* Instead, we:
* (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
* (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
* (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
* address to the per-sample offset in the scratch buffer
* (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
* to CP_ME_NRT_ADDR
* (5) CP_MEM_TO_REG's to copy saved counter value from scratch
* buffer to CP_ME_NRT_DATA to trigger the write out to query
* result buffer
*
* Straightforward, right?
*
* Maybe could swap the order of things in the scratch buffer to
* put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
* shot, but that's really just polishing a turd..
*/
fd_wfi(ctx, ring);
/* copy sample counter _LO and _HI to scratch: */
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
/* ok... here we really *would* like to use the CP_SET_CONSTANT
* mode which can add a constant to value in reg2 and write to
* reg1... *but* that only works for banked/context registers,
* and CP_ME_NRT_DATA isn't one of those.. so we need to do some
* CP math to the scratch buffer instead:
*
* (note first 8 bytes are counter value, use offset 0x8 for
* address calculation)
*/
/* per-sample offset to scratch bo: */
OUT_PKT3(ring, CP_MEM_WRITE, 2);
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
OUT_RING(ring, samp->offset);
/* now add to that the per-tile base: */
OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
CP_REG_TO_MEM_0_ACCUMULATE |
CP_REG_TO_MEM_0_CNT(1-1)); /* readback 1 regs */
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
/* now copy that back to CP_ME_NRT_ADDR: */
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
/* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
* to trigger the write to result buffer
*/
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
/* and again to get the value of the _HI reg from scratch: */
OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
/* Sigh.. */
return samp;
}
static void
time_elapsed_accumulate_result(struct fd_context *ctx,
const void *start, const void *end,
union pipe_query_result *result)
{
uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
/* max_freq is in Hz, convert cycle count to ns: */
result->u64 += n * 1000000000 / ctx->screen->max_freq;
}
static const struct fd_hw_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.active = FD_STAGE_DRAW,
@ -116,8 +243,17 @@ static const struct fd_hw_sample_provider occlusion_predicate = {
.accumulate_result = occlusion_predicate_accumulate_result,
};
static const struct fd_hw_sample_provider time_elapsed = {
.query_type = PIPE_QUERY_TIME_ELAPSED,
.active = FD_STAGE_DRAW,
.enable = time_elapsed_enable,
.get_sample = time_elapsed_get_sample,
.accumulate_result = time_elapsed_accumulate_result,
};
void fd4_query_context_init(struct pipe_context *pctx)
{
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
fd_hw_query_register_provider(pctx, &time_elapsed);
}

View file

@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
Copyright (C) 2013-2016 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
Copyright (C) 2013-2016 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -172,6 +173,11 @@ enum adreno_pm4_type3_packets {
CP_UNKNOWN_1A = 26,
CP_UNKNOWN_4E = 78,
CP_WIDE_REG_WRITE = 116,
CP_SCRATCH_TO_REG = 77,
CP_REG_TO_SCRATCH = 74,
CP_WAIT_MEM_WRITES = 18,
CP_COND_REG_EXEC = 71,
CP_MEM_TO_REG = 66,
IN_IB_PREFETCH_END = 23,
IN_SUBBLK_PREFETCH = 31,
IN_INSTR_PREFETCH = 32,
@ -503,5 +509,29 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
}
#define REG_CP_REG_TO_MEM_0 0x00000000
#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff
#define CP_REG_TO_MEM_0_REG__SHIFT 0
static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val)
{
return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK;
}
#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000
#define CP_REG_TO_MEM_0_CNT__SHIFT 19
static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val)
{
return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK;
}
#define CP_REG_TO_MEM_0_64B 0x40000000
#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000
#define REG_CP_REG_TO_MEM_1 0x00000001
#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff
#define CP_REG_TO_MEM_1_DEST__SHIFT 0
static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val)
{
return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK;
}
#endif /* ADRENO_PM4_XML */

View file

@ -164,6 +164,9 @@ struct fd_context {
*/
struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
/* which sample providers were active in the current batch: */
uint32_t active_providers;
/* tracking for current stage, to know when to start/stop
* any active queries:
*/

View file

@ -65,4 +65,16 @@ fd_query(struct pipe_query *pq)
void fd_query_screen_init(struct pipe_screen *pscreen);
void fd_query_context_init(struct pipe_context *pctx);
static inline bool
skip_begin_query(int type)
{
switch (type) {
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_GPU_FINISHED:
return true;
default:
return false;
}
}
#endif /* FREEDRENO_QUERY_H_ */

View file

@ -47,6 +47,8 @@ static int pidx(unsigned query_type)
return 0;
case PIPE_QUERY_OCCLUSION_PREDICATE:
return 1;
case PIPE_QUERY_TIME_ELAPSED:
return 2;
default:
return -1;
}
@ -89,7 +91,9 @@ static void
resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
struct fd_ringbuffer *ring)
{
int idx = pidx(hq->provider->query_type);
assert(!hq->period);
ctx->active_providers |= (1 << idx);
hq->period = util_slab_alloc(&ctx->sample_period_pool);
list_inithead(&hq->period->list);
hq->period->start = get_sample(ctx, ring, hq->base.type);
@ -101,7 +105,9 @@ static void
pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
struct fd_ringbuffer *ring)
{
int idx = pidx(hq->provider->query_type);
assert(hq->period && !hq->period->end);
assert(ctx->active_providers & (1 << idx));
hq->period->end = get_sample(ctx, ring, hq->base.type);
list_addtail(&hq->period->list, &hq->current_periods);
hq->period = NULL;
@ -156,6 +162,12 @@ static void
fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
{
struct fd_hw_query *hq = fd_hw_query(q);
/* there are a couple special cases, which don't have
* a matching ->begin_query():
*/
if (skip_begin_query(q->type) && !q->active) {
fd_hw_begin_query(ctx, q);
}
if (!q->active)
return;
if (is_active(hq, ctx->stage))
@ -291,6 +303,8 @@ fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
pipe_reference_init(&samp->reference, 1);
samp->size = size;
debug_assert(util_is_power_of_two(size));
ctx->next_sample_offset = align(ctx->next_sample_offset, size);
samp->offset = ctx->next_sample_offset;
/* NOTE: util_slab_alloc() does not zero out the buffer: */
samp->bo = NULL;
@ -318,7 +332,7 @@ prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
assert(samp->tile_stride == tile_stride);
return;
}
samp->bo = bo;
samp->bo = fd_bo_ref(bo);
samp->num_tiles = num_tiles;
samp->tile_stride = tile_stride;
}
@ -431,6 +445,23 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->stage = stage;
}
/* call the provider->enable() for all the hw queries that were active
* in the current batch. This sets up perfctr selector regs statically
* for the duration of the batch.
*/
void
fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
if (ctx->active_providers & (1 << idx)) {
assert(ctx->sample_providers[idx]);
if (ctx->sample_providers[idx]->enable)
ctx->sample_providers[idx]->enable(ctx, ring);
}
}
ctx->active_providers = 0; /* clear it for next frame */
}
void
fd_hw_query_register_provider(struct pipe_context *pctx,
const struct fd_hw_sample_provider *provider)

View file

@ -76,6 +76,11 @@ struct fd_hw_sample_provider {
/* stages applicable to the query type: */
enum fd_render_stage active;
/* Optional hook for enabling a counter. Guaranteed to happen
* at least once before the first ->get_sample() in a batch.
*/
void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring);
/* when a new sample is required, emit appropriate cmdstream
* and return a sample object:
*/
@ -144,6 +149,7 @@ void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
struct fd_ringbuffer *ring);
void fd_hw_query_set_stage(struct fd_context *ctx,
struct fd_ringbuffer *ring, enum fd_render_stage stage);
void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring);
void fd_hw_query_register_provider(struct pipe_context *pctx,
const struct fd_hw_sample_provider *provider);
void fd_hw_query_init(struct pipe_context *pctx);

View file

@ -298,12 +298,14 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return is_a3xx(screen) ? 1 : 0;
/* Queries. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
return is_a3xx(screen) || is_a4xx(screen);
case PIPE_CAP_QUERY_TIME_ELAPSED:
/* only a4xx, requires new enough kernel so we know max_freq: */
return (screen->max_freq > 0) && is_a4xx(screen);
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
@ -434,9 +436,12 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
}
debug_printf("unknown shader param %d\n", param);
@ -534,6 +539,16 @@ fd_screen_create(struct fd_device *dev)
}
screen->device_id = val;
if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) {
DBG("could not get gpu freq");
/* this limits what performance related queries are
* supported but is not fatal
*/
screen->max_freq = 0;
} else {
screen->max_freq = val;
}
if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
DBG("could not get gpu-id");
goto fail;

View file

@ -56,6 +56,7 @@ struct fd_screen {
uint32_t device_id;
uint32_t gpu_id; /* 220, 305, etc */
uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */
uint32_t max_freq;
uint32_t max_rts; /* max # of render targets */
void *compiler; /* currently unused for a2xx */

View file

@ -1365,7 +1365,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
struct ir3_block *b = ctx->block;
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
struct ir3_instruction *const_off[4];
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;

View file

@ -79,9 +79,7 @@ launch_grid(struct ilo_context *ilo,
}
static void
ilo_launch_grid(struct pipe_context *pipe,
const uint *block_layout, const uint *grid_layout,
uint32_t pc, const void *input)
ilo_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *cs = ilo->state_vector.cs;
@ -92,13 +90,13 @@ ilo_launch_grid(struct pipe_context *pipe,
input_buf.buffer_size =
ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
if (input_buf.buffer_size) {
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input,
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, info->input,
&input_buf.buffer_offset, &input_buf.buffer);
}
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
launch_grid(ilo, block_layout, grid_layout, &input_buf, pc);
launch_grid(ilo, info->block, info->grid, &input_buf, info->pc);
ilo_render_invalidate_hw(ilo->render);

View file

@ -136,6 +136,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
return ILO_MAX_SAMPLER_VIEWS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return 0;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:

View file

@ -1851,7 +1851,7 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader,
static void
ilo_set_shader_images(struct pipe_context *pipe, unsigned shader,
unsigned start, unsigned count,
struct pipe_image_view **views)
struct pipe_image_view *views)
{
#if 0
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;

View file

@ -910,7 +910,9 @@ lp_rast_create( unsigned num_threads )
create_rast_threads(rast);
/* for synchronizing rasterization threads */
pipe_barrier_init( &rast->barrier, rast->num_threads );
if (rast->num_threads > 0) {
pipe_barrier_init( &rast->barrier, rast->num_threads );
}
memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
@ -967,7 +969,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
}
/* for synchronizing rasterization threads */
pipe_barrier_destroy( &rast->barrier );
if (rast->num_threads > 0) {
pipe_barrier_destroy( &rast->barrier );
}
lp_scene_queue_destroy(rast->full_scenes);

View file

@ -169,8 +169,8 @@ struct lp_setup_context
};
static inline void
scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
struct u_rect *scissor)
scissor_planes_needed(boolean scis_planes[4], const struct u_rect *bbox,
const struct u_rect *scissor)
{
/* left */
scis_planes[0] = (bbox->x0 < scissor->x0);

View file

@ -719,7 +719,7 @@ try_setup_line( struct lp_setup_context *setup,
*/
if (nr_planes > 4) {
/* why not just use draw_regions */
struct u_rect *scissor = &setup->scissors[viewport_index];
const struct u_rect *scissor = &setup->scissors[viewport_index];
struct lp_rast_plane *plane_s = &plane[4];
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, scissor);

View file

@ -681,7 +681,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
*/
if (nr_planes > 3) {
/* why not just use draw_regions */
struct u_rect *scissor = &setup->scissors[viewport_index];
const struct u_rect *scissor = &setup->scissors[viewport_index];
struct lp_rast_plane *plane_s = &plane[3];
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, scissor);

View file

@ -60,6 +60,8 @@ NV30_C_SOURCES := \
nv30/nvfx_vertprog.c
NV50_C_SOURCES := \
nv50/g80_defs.xml.h \
nv50/g80_texture.xml.h \
nv50/nv50_2d.xml.h \
nv50/nv50_3ddefs.xml.h \
nv50/nv50_3d.xml.h \
@ -68,7 +70,6 @@ NV50_C_SOURCES := \
nv50/nv50_compute.xml.h \
nv50/nv50_context.c \
nv50/nv50_context.h \
nv50/nv50_defs.xml.h \
nv50/nv50_formats.c \
nv50/nv50_miptree.c \
nv50/nv50_program.c \
@ -93,7 +94,6 @@ NV50_C_SOURCES := \
nv50/nv50_state_validate.c \
nv50/nv50_surface.c \
nv50/nv50_tex.c \
nv50/nv50_texture.xml.h \
nv50/nv50_transfer.c \
nv50/nv50_transfer.h \
nv50/nv50_vbo.c \
@ -147,6 +147,7 @@ NVC0_CODEGEN_SOURCES := \
codegen/nv50_ir_target_nvc0.h
NVC0_C_SOURCES := \
nvc0/gm107_texture.xml.h \
nvc0/nvc0_3d.xml.h \
nvc0/nvc0_compute.c \
nvc0/nvc0_compute.h \

View file

@ -232,6 +232,8 @@ enum operation
#define NV50_IR_SUBOP_SHFL_UP 1
#define NV50_IR_SUBOP_SHFL_DOWN 2
#define NV50_IR_SUBOP_SHFL_BFLY 3
#define NV50_IR_SUBOP_LOAD_LOCKED 1
#define NV50_IR_SUBOP_STORE_UNLOCKED 2
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.

View file

@ -433,6 +433,10 @@ CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
break;
default:
if (i->op == OP_SELP) {
assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
srcId(i->src(s), 42);
}
// ignore here, can be predicate or flags, but must not be address
break;
}
@ -1045,7 +1049,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
{
emitForm_21(i, 0x250, 0x050);
if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 13;
}
@ -1239,7 +1243,7 @@ CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask
defId(i->def(0), 2);
srcId(i->src(0), 10);
srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23);
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[1] |= 1 << 9; // dall

View file

@ -193,6 +193,8 @@ private:
void emitNOP();
void emitKIL();
void emitOUT();
void emitMEMBAR();
};
/*******************************************************************************
@ -248,6 +250,8 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val)
case SV_INVOCATION_ID : id = 0x11; break;
case SV_THREAD_KILL : id = 0x13; break;
case SV_INVOCATION_INFO: id = 0x1d; break;
case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
default:
assert(!"invalid system value");
id = 0;
@ -1531,7 +1535,10 @@ CodeEmitterGM107::emitFSWZADD()
emitRND (0x27);
emitField(0x26, 1, insn->lanes); /* abused for .ndv */
emitField(0x1c, 8, insn->subOp);
emitGPR (0x14, insn->src(1));
if (insn->predSrc != 1)
emitGPR (0x14, insn->src(1));
else
emitGPR (0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
@ -2327,22 +2334,34 @@ void
CodeEmitterGM107::emitATOM()
{
unsigned dType, subOp;
switch (insn->dType) {
case TYPE_U32: dType = 0; break;
case TYPE_S32: dType = 1; break;
case TYPE_U64: dType = 2; break;
case TYPE_F32: dType = 3; break;
case TYPE_B128: dType = 4; break;
case TYPE_S64: dType = 5; break;
default: assert(!"unexpected dType"); dType = 0; break;
}
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
subOp = 8;
else
subOp = insn->subOp;
assert(insn->subOp != NV50_IR_SUBOP_ATOM_CAS); /* XXX */
emitInsn (0xed000000);
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
switch (insn->dType) {
case TYPE_U32: dType = 0; break;
case TYPE_U64: dType = 1; break;
default: assert(!"unexpected dType"); dType = 0; break;
}
subOp = 15;
emitInsn (0xee000000);
} else {
switch (insn->dType) {
case TYPE_U32: dType = 0; break;
case TYPE_S32: dType = 1; break;
case TYPE_U64: dType = 2; break;
case TYPE_F32: dType = 3; break;
case TYPE_B128: dType = 4; break;
case TYPE_S64: dType = 5; break;
default: assert(!"unexpected dType"); dType = 0; break;
}
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
subOp = 8;
else
subOp = insn->subOp;
emitInsn (0xed000000);
}
emitField(0x34, 4, subOp);
emitField(0x31, 3, dType);
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
@ -2627,6 +2646,13 @@ CodeEmitterGM107::emitOUT()
emitGPR (0x00, insn->def(0));
}
void
CodeEmitterGM107::emitMEMBAR()
{
emitInsn (0xef980000);
emitField(0x08, 2, insn->subOp >> 2);
}
/*******************************************************************************
* assembler front-end
******************************************************************************/
@ -2926,6 +2952,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_RESTART:
emitOUT();
break;
case OP_MEMBAR:
emitMEMBAR();
break;
default:
assert(!"invalid opcode");
emitNOP();

View file

@ -527,7 +527,8 @@ CodeEmitterNV50::emitForm_ADD(const Instruction *i)
setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
setSrc(i, 0, 0);
setSrc(i, 1, 2);
if (i->predSrc != 1)
setSrc(i, 1, 2);
if (i->getIndirect(0, 0)) {
assert(!i->getIndirect(1, 0));
@ -840,7 +841,7 @@ CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
emitForm_ADD(i);
if (!i->srcExists(1))
if (!i->srcExists(1) || i->predSrc == 1)
srcId(i->src(0), 32 + 14);
}

View file

@ -398,6 +398,11 @@ CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
break;
default:
if (i->op == OP_SELP) {
// OP_SELP is used to implement shared+atomics on Fermi.
assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
srcId(i->src(s), 49);
}
// ignore here, can be predicate or flags, but must not be address
break;
}
@ -1174,7 +1179,7 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i)
{
emitForm_A(i, HEX64(20000000, 00000004));
if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 20;
}
@ -1334,7 +1339,7 @@ CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
defId(i->def(0), 14);
srcId(i->src(0), 20);
srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[0] |= 1 << 9; // dall
@ -1773,7 +1778,16 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
case FILE_MEMORY_SHARED:
if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
opc = 0xb8000000;
else
opc = 0xcc000000;
} else {
opc = 0xc9000000;
}
break;
default:
assert(!"invalid memory file");
opc = 0;
@ -1782,6 +1796,15 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
code[0] = 0x00000005;
code[1] = opc;
if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
// Unlocked store on shared memory can fail.
if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
assert(i->defExists(0));
defId(i->def(0), 8);
}
}
setAddressByFile(i->src(0));
srcId(i->src(1), 14);
srcId(i->src(0).getIndirect(0), 20);
@ -1804,7 +1827,16 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
case FILE_MEMORY_SHARED:
if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
opc = 0xa8000000;
else
opc = 0xc4000000;
} else {
opc = 0xc1000000;
}
break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
emitMOV(i); // not sure if this is any better
@ -1820,6 +1852,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
}
code[1] = opc;
if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
assert(i->defExists(1));
defId(i->def(1), 32 + 18);
}
}
defId(i->def(0), 14);
setAddressByFile(i->src(0));

View file

@ -374,6 +374,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@ -858,6 +859,11 @@ public:
};
std::vector<Resource> resources;
struct MemoryFile {
bool shared;
};
std::vector<MemoryFile> memoryFiles;
private:
int inferSysValDirection(unsigned sn) const;
bool scanDeclaration(const struct tgsi_full_declaration *);
@ -904,6 +910,7 @@ bool Source::scanSource()
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
info->immd.bufSize = 0;
@ -1213,6 +1220,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
break;
case TGSI_FILE_MEMORY:
for (i = first; i <= last; ++i)
memoryFiles[i].shared = decl->Declaration.Shared;
break;
case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
for (i = first; i <= last; ++i)
tempArrayId[i] = arrayId;
@ -1220,7 +1232,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
first, last - first + 1)));
break;
case TGSI_FILE_NULL:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
@ -1516,6 +1527,9 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
sym->reg.fileIndex = fileIdx;
if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared)
sym->setFile(FILE_MEMORY_SHARED);
if (idx >= 0) {
if (sym->reg.file == FILE_SHADER_INPUT)
sym->setOffset(info->in[idx].slot[c] * 4);
@ -1769,7 +1783,7 @@ Converter::acquireDst(int d, int c)
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY)
return NULL;
if (dst.isIndirect(0) ||
@ -2239,7 +2253,8 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@ -2248,9 +2263,10 @@ Converter::handleLOAD(Value *dst0[4])
Symbol *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
} else {
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 4 * c);
}
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
@ -2337,7 +2353,8 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER ||
tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) {
for (c = 0; c < 4; ++c) {
if (!(tgsi.getDst(0).getMask() & (1 << c)))
continue;
@ -2346,11 +2363,11 @@ Converter::handleSTORE()
Value *off;
if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
} else {
off = fetchSrc(0, 0);
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
}
Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
@ -2422,7 +2439,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
for (int c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@ -2431,9 +2449,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
Value *off = fetchSrc(1, c), *off2 = NULL;
Value *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
tgsi.getSrc(1).getValueU32(c, info));
else
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
if (tgsi.getSrc(0).isIndirect(0))
off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
if (subOp == NV50_IR_SUBOP_ATOM_CAS)

View file

@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq)
return true;
}
void
NVC0LoweringPass::handleSharedATOM(Instruction *atom)
{
assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
BasicBlock *currBB = atom->bb;
BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false);
BasicBlock *joinBB = atom->bb->splitAfter(atom);
bld.setPosition(currBB, true);
assert(!currBB->joinAt);
currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL);
currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE);
bld.setPosition(tryLockAndSetBB, true);
Instruction *ld =
bld.mkLoad(TYPE_U32, atom->getDef(0),
bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
Value *stVal;
if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
// Read the old value, and write the new one.
stVal = atom->getSrc(1);
} else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
CmpInstruction *set =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, ld->getDef(0), atom->getSrc(1));
set->setPredicate(CC_P, ld->getDef(1));
Instruction *selp =
bld.mkOp3(OP_SELP, TYPE_U32, bld.getSSA(), ld->getDef(0),
atom->getSrc(2), set->getDef(0));
selp->src(2).mod = Modifier(NV50_IR_MOD_NOT);
selp->setPredicate(CC_P, ld->getDef(1));
stVal = selp->getDef(0);
} else {
operation op;
switch (atom->subOp) {
case NV50_IR_SUBOP_ATOM_ADD:
op = OP_ADD;
break;
case NV50_IR_SUBOP_ATOM_AND:
op = OP_AND;
break;
case NV50_IR_SUBOP_ATOM_OR:
op = OP_OR;
break;
case NV50_IR_SUBOP_ATOM_XOR:
op = OP_XOR;
break;
case NV50_IR_SUBOP_ATOM_MIN:
op = OP_MIN;
break;
case NV50_IR_SUBOP_ATOM_MAX:
op = OP_MAX;
break;
default:
assert(0);
}
Instruction *i =
bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0),
atom->getSrc(1));
i->setPredicate(CC_P, ld->getDef(1));
stVal = i->getDef(0);
}
Instruction *st =
bld.mkStore(OP_STORE, TYPE_U32,
bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
NULL, stVal);
st->setPredicate(CC_P, ld->getDef(1));
st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED;
// Loop until the lock is acquired.
bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1));
tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK);
tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS);
bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
bld.remove(atom);
bld.setPosition(joinBB, false);
bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
}
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
sv = SV_LBASE;
break;
case FILE_MEMORY_SHARED:
sv = SV_SBASE;
break;
handleSharedATOM(atom);
return true;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
bool
NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
{
if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
// ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
return false;
}
if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
cas->subOp != NV50_IR_SUBOP_ATOM_EXCH)
return false;

View file

@ -105,6 +105,7 @@ protected:
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
void handleSharedATOM(Instruction *);
void checkPredicate(Instruction *);

View file

@ -1539,6 +1539,7 @@ private:
void handleCVT_CVT(Instruction *);
void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
void handleNEG(Instruction *);
BuildUtil bld;
};
@ -1634,6 +1635,9 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
return false;
if (src->getInsn()->saturate)
return false;
if (src->getInsn()->postFactor)
return false;
if (toOp == OP_SAD) {
@ -2011,6 +2015,34 @@ AlgebraicOpt::handleSUCLAMP(Instruction *insn)
insn->setSrc(0, add->getSrc(s));
}
// NEG(AND(SET, 1)) -> SET
void
AlgebraicOpt::handleNEG(Instruction *i) {
Instruction *src = i->getSrc(0)->getInsn();
ImmediateValue imm;
int b;
if (isFloatType(i->sType) || !src || src->op != OP_AND)
return;
if (src->src(0).getImmediate(imm))
b = 1;
else if (src->src(1).getImmediate(imm))
b = 0;
else
return;
if (!imm.isInteger(1))
return;
Instruction *set = src->getSrc(b)->getInsn();
if ((set->op == OP_SET || set->op == OP_SET_AND ||
set->op == OP_SET_OR || set->op == OP_SET_XOR) &&
!isFloatType(set->dType)) {
i->def(0).replace(set->getDef(0), false);
}
}
bool
AlgebraicOpt::visit(BasicBlock *bb)
{
@ -2048,6 +2080,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
case OP_SUCLAMP:
handleSUCLAMP(i);
break;
case OP_NEG:
handleNEG(i);
break;
default:
break;
}

Some files were not shown because too many files have changed in this diff Show more