mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 05:00:09 +01:00
Merge commit '8b0fb1c152fe191768953aa8c77b89034a377f83' into vulkan
This pulls in Rob Clark's const_index changes for NIR
This commit is contained in:
commit
768bd7f272
112 changed files with 4146 additions and 4234 deletions
|
|
@ -149,7 +149,7 @@ GL 4.2, GLSL 4.20:
|
|||
|
||||
GL 4.3, GLSL 4.30:
|
||||
|
||||
GL_ARB_arrays_of_arrays DONE (i965)
|
||||
GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
|
||||
GL_ARB_clear_buffer_object DONE (all drivers)
|
||||
GL_ARB_compute_shader DONE (i965)
|
||||
|
|
@ -209,7 +209,7 @@ GL 4.5, GLSL 4.50:
|
|||
|
||||
These are the extensions cherry-picked to make GLES 3.1
|
||||
GLES3.1, GLSL ES 3.1
|
||||
GL_ARB_arrays_of_arrays DONE (i965)
|
||||
GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_compute_shader DONE (i965)
|
||||
GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.
|
|||
</p>
|
||||
|
||||
<ul>
|
||||
<li>GL_ARB_arrays_of_arrays on all gallium drivers that provide GLSL 1.30</li>
|
||||
<li>GL_ARB_base_instance on freedreno/a4xx</li>
|
||||
<li>GL_ARB_compute_shader on i965</li>
|
||||
<li>GL_ARB_copy_image on r600</li>
|
||||
|
|
|
|||
|
|
@ -560,7 +560,8 @@ done:
|
|||
state->symbols->add_global_function(f);
|
||||
emit_function(state, f);
|
||||
}
|
||||
f->add_signature(sig->clone_prototype(f, NULL));
|
||||
sig = sig->clone_prototype(f, NULL);
|
||||
f->add_signature(sig);
|
||||
}
|
||||
}
|
||||
return sig;
|
||||
|
|
|
|||
|
|
@ -4211,33 +4211,46 @@ ast_declarator_list::hir(exec_list *instructions,
|
|||
_mesa_glsl_error(&loc, state,
|
||||
"invalid type `%s' in empty declaration",
|
||||
type_name);
|
||||
} else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) {
|
||||
/* Empty atomic counter declarations are allowed and useful
|
||||
* to set the default offset qualifier.
|
||||
*/
|
||||
return NULL;
|
||||
} else if (this->type->qualifier.precision != ast_precision_none) {
|
||||
if (this->type->specifier->structure != NULL) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"precision qualifiers can't be applied "
|
||||
"to structures");
|
||||
} else {
|
||||
static const char *const precision_names[] = {
|
||||
"highp",
|
||||
"highp",
|
||||
"mediump",
|
||||
"lowp"
|
||||
};
|
||||
|
||||
_mesa_glsl_warning(&loc, state,
|
||||
"empty declaration with precision qualifier, "
|
||||
"to set the default precision, use "
|
||||
"`precision %s %s;'",
|
||||
precision_names[this->type->qualifier.precision],
|
||||
type_name);
|
||||
} else {
|
||||
if (decl_type->base_type == GLSL_TYPE_ARRAY) {
|
||||
/* From Section 4.12 (Empty Declarations) of the GLSL 4.5 spec:
|
||||
*
|
||||
* "The combinations of types and qualifiers that cause
|
||||
* compile-time or link-time errors are the same whether or not
|
||||
* the declaration is empty."
|
||||
*/
|
||||
validate_array_dimensions(decl_type, state, &loc);
|
||||
}
|
||||
|
||||
if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) {
|
||||
/* Empty atomic counter declarations are allowed and useful
|
||||
* to set the default offset qualifier.
|
||||
*/
|
||||
return NULL;
|
||||
} else if (this->type->qualifier.precision != ast_precision_none) {
|
||||
if (this->type->specifier->structure != NULL) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"precision qualifiers can't be applied "
|
||||
"to structures");
|
||||
} else {
|
||||
static const char *const precision_names[] = {
|
||||
"highp",
|
||||
"highp",
|
||||
"mediump",
|
||||
"lowp"
|
||||
};
|
||||
|
||||
_mesa_glsl_warning(&loc, state,
|
||||
"empty declaration with precision "
|
||||
"qualifier, to set the default precision, "
|
||||
"use `precision %s %s;'",
|
||||
precision_names[this->type->
|
||||
qualifier.precision],
|
||||
type_name);
|
||||
}
|
||||
} else if (this->type->specifier->structure == NULL) {
|
||||
_mesa_glsl_warning(&loc, state, "empty declaration");
|
||||
}
|
||||
} else if (this->type->specifier->structure == NULL) {
|
||||
_mesa_glsl_warning(&loc, state, "empty declaration");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "main/core.h" /* for struct gl_context */
|
||||
#include "main/context.h"
|
||||
#include "main/debug_output.h"
|
||||
#include "main/shaderobj.h"
|
||||
#include "util/u_atomic.h" /* for p_atomic_cmpxchg */
|
||||
#include "util/ralloc.h"
|
||||
|
|
|
|||
|
|
@ -863,6 +863,14 @@ public:
|
|||
*/
|
||||
int location;
|
||||
|
||||
/**
|
||||
* for glsl->tgsi/mesa IR we need to store the index into the
|
||||
* parameters for uniforms, initially the code overloaded location
|
||||
* but this causes problems with indirect samplers and AoA.
|
||||
* This is assigned in _mesa_generate_parameters_list_for_uniforms.
|
||||
*/
|
||||
int param_index;
|
||||
|
||||
/**
|
||||
* Vertex stream output identifier.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1352,7 +1352,7 @@ private:
|
|||
|
||||
namespace linker {
|
||||
|
||||
bool
|
||||
void
|
||||
populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
|
||||
hash_table *consumer_inputs,
|
||||
hash_table *consumer_interface_inputs,
|
||||
|
|
@ -1366,8 +1366,8 @@ populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
|
|||
ir_variable *const input_var = node->as_variable();
|
||||
|
||||
if ((input_var != NULL) && (input_var->data.mode == ir_var_shader_in)) {
|
||||
if (input_var->type->is_interface())
|
||||
return false;
|
||||
/* All interface blocks should have been lowered by this point */
|
||||
assert(!input_var->type->is_interface());
|
||||
|
||||
if (input_var->data.explicit_location) {
|
||||
/* assign_varying_locations only cares about finding the
|
||||
|
|
@ -1401,8 +1401,6 @@ populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1626,18 +1624,11 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
if (producer)
|
||||
canonicalize_shader_io(producer->ir, ir_var_shader_out);
|
||||
|
||||
if (consumer
|
||||
&& !linker::populate_consumer_input_sets(mem_ctx,
|
||||
consumer->ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
consumer_inputs_with_locations)) {
|
||||
assert(!"populate_consumer_input_sets failed");
|
||||
hash_table_dtor(tfeedback_candidates);
|
||||
hash_table_dtor(consumer_inputs);
|
||||
hash_table_dtor(consumer_interface_inputs);
|
||||
return false;
|
||||
}
|
||||
if (consumer)
|
||||
linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
consumer_inputs_with_locations);
|
||||
|
||||
if (producer) {
|
||||
foreach_in_list(ir_instruction, node, producer->ir) {
|
||||
|
|
@ -1652,8 +1643,10 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
(output_var->data.stream < MAX_VERTEX_STREAMS &&
|
||||
producer->Stage == MESA_SHADER_GEOMETRY));
|
||||
|
||||
tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
|
||||
g.process(output_var);
|
||||
if (num_tfeedback_decls > 0) {
|
||||
tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
|
||||
g.process(output_var);
|
||||
}
|
||||
|
||||
ir_variable *const input_var =
|
||||
linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
|
||||
|
|
|
|||
|
|
@ -82,8 +82,6 @@
|
|||
#include "main/enums.h"
|
||||
|
||||
|
||||
void linker_error(gl_shader_program *, const char *, ...);
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
|
|
@ -2125,6 +2123,7 @@ link_intrastage_shaders(void *mem_ctx,
|
|||
|
||||
if (ok) {
|
||||
memcpy(linking_shaders, shader_list, num_shaders * sizeof(gl_shader *));
|
||||
_mesa_glsl_initialize_builtin_functions();
|
||||
linking_shaders[num_shaders] = _mesa_glsl_get_builtin_function_shader();
|
||||
|
||||
ok = link_function_calls(prog, linked, linking_shaders, num_shaders + 1);
|
||||
|
|
@ -4105,15 +4104,34 @@ disable_varying_optimizations_for_sso(struct gl_shader_program *prog)
|
|||
void
|
||||
link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
{
|
||||
prog->LinkStatus = true; /* All error paths will set this to false */
|
||||
prog->Validated = false;
|
||||
prog->_Used = false;
|
||||
|
||||
/* Section 7.3 (Program Objects) of the OpenGL 4.5 Core Profile spec says:
|
||||
*
|
||||
* "Linking can fail for a variety of reasons as specified in the
|
||||
* OpenGL Shading Language Specification, as well as any of the
|
||||
* following reasons:
|
||||
*
|
||||
* - No shader objects are attached to program."
|
||||
*
|
||||
* The Compatibility Profile specification does not list the error. In
|
||||
* Compatibility Profile missing shader stages are replaced by
|
||||
* fixed-function. This applies to the case where all stages are
|
||||
* missing.
|
||||
*/
|
||||
if (prog->NumShaders == 0) {
|
||||
if (ctx->API != API_OPENGL_COMPAT)
|
||||
linker_error(prog, "no shaders attached to the program\n");
|
||||
return;
|
||||
}
|
||||
|
||||
tfeedback_decl *tfeedback_decls = NULL;
|
||||
unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL); // temporary linker context
|
||||
|
||||
prog->LinkStatus = true; /* All error paths will set this to false */
|
||||
prog->Validated = false;
|
||||
prog->_Used = false;
|
||||
|
||||
prog->ARB_fragment_coord_conventions_enable = false;
|
||||
|
||||
/* Separate the shaders into groups based on their type.
|
||||
|
|
@ -4129,13 +4147,11 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
|
||||
unsigned min_version = UINT_MAX;
|
||||
unsigned max_version = 0;
|
||||
const bool is_es_prog =
|
||||
(prog->NumShaders > 0 && prog->Shaders[0]->IsES) ? true : false;
|
||||
for (unsigned i = 0; i < prog->NumShaders; i++) {
|
||||
min_version = MIN2(min_version, prog->Shaders[i]->Version);
|
||||
max_version = MAX2(max_version, prog->Shaders[i]->Version);
|
||||
|
||||
if (prog->Shaders[i]->IsES != is_es_prog) {
|
||||
if (prog->Shaders[i]->IsES != prog->Shaders[0]->IsES) {
|
||||
linker_error(prog, "all shaders must use same shading "
|
||||
"language version\n");
|
||||
goto done;
|
||||
|
|
@ -4153,80 +4169,59 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
/* In desktop GLSL, different shader versions may be linked together. In
|
||||
* GLSL ES, all shader versions must be the same.
|
||||
*/
|
||||
if (is_es_prog && min_version != max_version) {
|
||||
if (prog->Shaders[0]->IsES && min_version != max_version) {
|
||||
linker_error(prog, "all shaders must use same shading "
|
||||
"language version\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
prog->Version = max_version;
|
||||
prog->IsES = is_es_prog;
|
||||
|
||||
/* From OpenGL 4.5 Core specification (7.3 Program Objects):
|
||||
* "Linking can fail for a variety of reasons as specified in the OpenGL
|
||||
* Shading Language Specification, as well as any of the following
|
||||
* reasons:
|
||||
*
|
||||
* * No shader objects are attached to program.
|
||||
*
|
||||
* ..."
|
||||
*
|
||||
* Same rule applies for OpenGL ES >= 3.1.
|
||||
*/
|
||||
|
||||
if (prog->NumShaders == 0 &&
|
||||
((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) ||
|
||||
(ctx->API == API_OPENGLES2 && ctx->Version >= 31))) {
|
||||
linker_error(prog, "No shader objects are attached to program.\n");
|
||||
goto done;
|
||||
}
|
||||
prog->IsES = prog->Shaders[0]->IsES;
|
||||
|
||||
/* Some shaders have to be linked with some other shaders present.
|
||||
*/
|
||||
if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&
|
||||
num_shaders[MESA_SHADER_VERTEX] == 0 &&
|
||||
!prog->SeparateShader) {
|
||||
linker_error(prog, "Geometry shader must be linked with "
|
||||
"vertex shader\n");
|
||||
goto done;
|
||||
}
|
||||
if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 &&
|
||||
num_shaders[MESA_SHADER_VERTEX] == 0 &&
|
||||
!prog->SeparateShader) {
|
||||
linker_error(prog, "Tessellation evaluation shader must be linked with "
|
||||
"vertex shader\n");
|
||||
goto done;
|
||||
}
|
||||
if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
|
||||
num_shaders[MESA_SHADER_VERTEX] == 0 &&
|
||||
!prog->SeparateShader) {
|
||||
linker_error(prog, "Tessellation control shader must be linked with "
|
||||
"vertex shader\n");
|
||||
goto done;
|
||||
}
|
||||
if (!prog->SeparateShader) {
|
||||
if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&
|
||||
num_shaders[MESA_SHADER_VERTEX] == 0) {
|
||||
linker_error(prog, "Geometry shader must be linked with "
|
||||
"vertex shader\n");
|
||||
goto done;
|
||||
}
|
||||
if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 &&
|
||||
num_shaders[MESA_SHADER_VERTEX] == 0) {
|
||||
linker_error(prog, "Tessellation evaluation shader must be linked "
|
||||
"with vertex shader\n");
|
||||
goto done;
|
||||
}
|
||||
if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
|
||||
num_shaders[MESA_SHADER_VERTEX] == 0) {
|
||||
linker_error(prog, "Tessellation control shader must be linked with "
|
||||
"vertex shader\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* The spec is self-contradictory here. It allows linking without a tess
|
||||
* eval shader, but that can only be used with transform feedback and
|
||||
* rasterization disabled. However, transform feedback isn't allowed
|
||||
* with GL_PATCHES, so it can't be used.
|
||||
*
|
||||
* More investigation showed that the idea of transform feedback after
|
||||
* a tess control shader was dropped, because some hw vendors couldn't
|
||||
* support tessellation without a tess eval shader, but the linker section
|
||||
* wasn't updated to reflect that.
|
||||
*
|
||||
* All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this
|
||||
* spec bug.
|
||||
*
|
||||
* Do what's reasonable and always require a tess eval shader if a tess
|
||||
* control shader is present.
|
||||
*/
|
||||
if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
|
||||
num_shaders[MESA_SHADER_TESS_EVAL] == 0 &&
|
||||
!prog->SeparateShader) {
|
||||
linker_error(prog, "Tessellation control shader must be linked with "
|
||||
"tessellation evaluation shader\n");
|
||||
goto done;
|
||||
/* The spec is self-contradictory here. It allows linking without a tess
|
||||
* eval shader, but that can only be used with transform feedback and
|
||||
* rasterization disabled. However, transform feedback isn't allowed
|
||||
* with GL_PATCHES, so it can't be used.
|
||||
*
|
||||
* More investigation showed that the idea of transform feedback after
|
||||
* a tess control shader was dropped, because some hw vendors couldn't
|
||||
* support tessellation without a tess eval shader, but the linker
|
||||
* section wasn't updated to reflect that.
|
||||
*
|
||||
* All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this
|
||||
* spec bug.
|
||||
*
|
||||
* Do what's reasonable and always require a tess eval shader if a tess
|
||||
* control shader is present.
|
||||
*/
|
||||
if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
|
||||
num_shaders[MESA_SHADER_TESS_EVAL] == 0) {
|
||||
linker_error(prog, "Tessellation control shader must be linked with "
|
||||
"tessellation evaluation shader\n");
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute shaders have additional restrictions. */
|
||||
|
|
@ -4362,7 +4357,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
*
|
||||
* This rule also applies to GLSL ES 3.00.
|
||||
*/
|
||||
if (max_version >= (is_es_prog ? 300 : 130)) {
|
||||
if (max_version >= (prog->IsES ? 300 : 130)) {
|
||||
struct gl_shader *sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
|
||||
if (sh) {
|
||||
lower_discard_flow(sh->ir);
|
||||
|
|
@ -4451,9 +4446,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
* non-zero, but the program object has no vertex or geometry
|
||||
* shader;
|
||||
*/
|
||||
if (first == MESA_SHADER_FRAGMENT) {
|
||||
if (first >= MESA_SHADER_FRAGMENT) {
|
||||
linker_error(prog, "Transform feedback varyings specified, but "
|
||||
"no vertex or geometry shader is present.\n");
|
||||
"no vertex, tessellation, or geometry shader is "
|
||||
"present.\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
|
@ -4465,93 +4461,82 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
goto done;
|
||||
}
|
||||
|
||||
/* Linking the stages in the opposite order (from fragment to vertex)
|
||||
* ensures that inter-shader outputs written to in an earlier stage are
|
||||
* eliminated if they are (transitively) not used in a later stage.
|
||||
/* If there is no fragment shader we need to set transform feedback.
|
||||
*
|
||||
* For SSO we need also need to assign output locations, we assign them
|
||||
* here because we need to do it for both single stage programs and multi
|
||||
* stage programs.
|
||||
*/
|
||||
int next;
|
||||
if (last < MESA_SHADER_FRAGMENT &&
|
||||
(num_tfeedback_decls != 0 || prog->SeparateShader)) {
|
||||
if (!assign_varying_locations(ctx, mem_ctx, prog,
|
||||
prog->_LinkedShaders[last], NULL,
|
||||
num_tfeedback_decls, tfeedback_decls))
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (first < MESA_SHADER_FRAGMENT) {
|
||||
gl_shader *const sh = prog->_LinkedShaders[last];
|
||||
|
||||
if (first != MESA_SHADER_VERTEX) {
|
||||
/* There was no vertex shader, but we still have to assign varying
|
||||
* locations for use by tessellation/geometry shader inputs in SSO.
|
||||
*
|
||||
* If the shader is not separable (i.e., prog->SeparateShader is
|
||||
* false), linking will have already failed when first is not
|
||||
* MESA_SHADER_VERTEX.
|
||||
*/
|
||||
if (!assign_varying_locations(ctx, mem_ctx, prog,
|
||||
NULL, prog->_LinkedShaders[first],
|
||||
num_tfeedback_decls, tfeedback_decls))
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (last != MESA_SHADER_FRAGMENT &&
|
||||
(num_tfeedback_decls != 0 || prog->SeparateShader)) {
|
||||
/* There was no fragment shader, but we still have to assign varying
|
||||
* locations for use by transform feedback.
|
||||
*/
|
||||
if (!assign_varying_locations(ctx, mem_ctx, prog,
|
||||
sh, NULL,
|
||||
num_tfeedback_decls, tfeedback_decls))
|
||||
goto done;
|
||||
}
|
||||
|
||||
do_dead_builtin_varyings(ctx, sh, NULL,
|
||||
num_tfeedback_decls, tfeedback_decls);
|
||||
|
||||
remove_unused_shader_inputs_and_outputs(prog->SeparateShader, sh,
|
||||
if (last <= MESA_SHADER_FRAGMENT) {
|
||||
/* Remove unused varyings from the first/last stage unless SSO */
|
||||
remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
|
||||
prog->_LinkedShaders[first],
|
||||
ir_var_shader_in);
|
||||
remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
|
||||
prog->_LinkedShaders[last],
|
||||
ir_var_shader_out);
|
||||
}
|
||||
else if (first == MESA_SHADER_FRAGMENT) {
|
||||
/* If the program only contains a fragment shader...
|
||||
*/
|
||||
gl_shader *const sh = prog->_LinkedShaders[first];
|
||||
|
||||
do_dead_builtin_varyings(ctx, NULL, sh,
|
||||
num_tfeedback_decls, tfeedback_decls);
|
||||
/* If the program is made up of only a single stage */
|
||||
if (first == last) {
|
||||
|
||||
if (prog->SeparateShader) {
|
||||
if (!assign_varying_locations(ctx, mem_ctx, prog,
|
||||
NULL /* producer */,
|
||||
sh /* consumer */,
|
||||
0 /* num_tfeedback_decls */,
|
||||
NULL /* tfeedback_decls */))
|
||||
goto done;
|
||||
gl_shader *const sh = prog->_LinkedShaders[last];
|
||||
if (prog->SeparateShader) {
|
||||
/* Assign input locations for SSO, output locations are already
|
||||
* assigned.
|
||||
*/
|
||||
if (!assign_varying_locations(ctx, mem_ctx, prog,
|
||||
NULL /* producer */,
|
||||
sh /* consumer */,
|
||||
0 /* num_tfeedback_decls */,
|
||||
NULL /* tfeedback_decls */))
|
||||
goto done;
|
||||
}
|
||||
|
||||
do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
|
||||
do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
|
||||
tfeedback_decls);
|
||||
} else {
|
||||
remove_unused_shader_inputs_and_outputs(false, sh,
|
||||
ir_var_shader_in);
|
||||
/* Linking the stages in the opposite order (from fragment to vertex)
|
||||
* ensures that inter-shader outputs written to in an earlier stage
|
||||
* are eliminated if they are (transitively) not used in a later
|
||||
* stage.
|
||||
*/
|
||||
int next = last;
|
||||
for (int i = next - 1; i >= 0; i--) {
|
||||
if (prog->_LinkedShaders[i] == NULL)
|
||||
continue;
|
||||
|
||||
gl_shader *const sh_i = prog->_LinkedShaders[i];
|
||||
gl_shader *const sh_next = prog->_LinkedShaders[next];
|
||||
|
||||
if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
|
||||
next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
|
||||
tfeedback_decls))
|
||||
goto done;
|
||||
|
||||
do_dead_builtin_varyings(ctx, sh_i, sh_next,
|
||||
next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
|
||||
tfeedback_decls);
|
||||
|
||||
/* This must be done after all dead varyings are eliminated. */
|
||||
if (!check_against_output_limit(ctx, prog, sh_i))
|
||||
goto done;
|
||||
if (!check_against_input_limit(ctx, prog, sh_next))
|
||||
goto done;
|
||||
|
||||
next = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
next = last;
|
||||
for (int i = next - 1; i >= 0; i--) {
|
||||
if (prog->_LinkedShaders[i] == NULL)
|
||||
continue;
|
||||
|
||||
gl_shader *const sh_i = prog->_LinkedShaders[i];
|
||||
gl_shader *const sh_next = prog->_LinkedShaders[next];
|
||||
|
||||
if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
|
||||
next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
|
||||
tfeedback_decls))
|
||||
goto done;
|
||||
|
||||
do_dead_builtin_varyings(ctx, sh_i, sh_next,
|
||||
next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
|
||||
tfeedback_decls);
|
||||
|
||||
/* This must be done after all dead varyings are eliminated. */
|
||||
if (!check_against_output_limit(ctx, prog, sh_i))
|
||||
goto done;
|
||||
if (!check_against_input_limit(ctx, prog, sh_next))
|
||||
goto done;
|
||||
|
||||
next = i;
|
||||
}
|
||||
|
||||
if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls))
|
||||
goto done;
|
||||
|
||||
|
|
@ -4569,38 +4554,38 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
if (!prog->LinkStatus)
|
||||
goto done;
|
||||
|
||||
/* OpenGL ES requires that a vertex shader and a fragment shader both be
|
||||
* present in a linked program. GL_ARB_ES2_compatibility doesn't say
|
||||
/* OpenGL ES < 3.1 requires that a vertex shader and a fragment shader both
|
||||
* be present in a linked program. GL_ARB_ES2_compatibility doesn't say
|
||||
* anything about shader linking when one of the shaders (vertex or
|
||||
* fragment shader) is absent. So, the extension shouldn't change the
|
||||
* behavior specified in GLSL specification.
|
||||
*
|
||||
* From OpenGL ES 3.1 specification (7.3 Program Objects):
|
||||
* "Linking can fail for a variety of reasons as specified in the
|
||||
* OpenGL ES Shading Language Specification, as well as any of the
|
||||
* following reasons:
|
||||
*
|
||||
* ...
|
||||
*
|
||||
* * program contains objects to form either a vertex shader or
|
||||
* fragment shader, and program is not separable, and does not
|
||||
* contain objects to form both a vertex shader and fragment
|
||||
* shader."
|
||||
*
|
||||
* However, the only scenario in 3.1+ where we don't require them both is
|
||||
* when we have a compute shader. For example:
|
||||
*
|
||||
* - No shaders is a link error.
|
||||
* - Geom or Tess without a Vertex shader is a link error which means we
|
||||
* always require a Vertex shader and hence a Fragment shader.
|
||||
* - Finally a Compute shader linked with any other stage is a link error.
|
||||
*/
|
||||
if (!prog->SeparateShader && ctx->API == API_OPENGLES2) {
|
||||
/* With ES < 3.1 one needs to have always vertex + fragment shader. */
|
||||
if (ctx->Version < 31) {
|
||||
if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
|
||||
linker_error(prog, "program lacks a vertex shader\n");
|
||||
} else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
|
||||
linker_error(prog, "program lacks a fragment shader\n");
|
||||
}
|
||||
} else {
|
||||
/* From OpenGL ES 3.1 specification (7.3 Program Objects):
|
||||
* "Linking can fail for a variety of reasons as specified in the
|
||||
* OpenGL ES Shading Language Specification, as well as any of the
|
||||
* following reasons:
|
||||
*
|
||||
* ...
|
||||
*
|
||||
* * program contains objects to form either a vertex shader or
|
||||
* fragment shader, and program is not separable, and does not
|
||||
* contain objects to form both a vertex shader and fragment
|
||||
* shader."
|
||||
*/
|
||||
if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^
|
||||
!!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
|
||||
linker_error(prog, "Program needs to contain both vertex and "
|
||||
"fragment shaders.\n");
|
||||
}
|
||||
if (!prog->SeparateShader && ctx->API == API_OPENGLES2 &&
|
||||
num_shaders[MESA_SHADER_COMPUTE] == 0) {
|
||||
if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
|
||||
linker_error(prog, "program lacks a vertex shader\n");
|
||||
} else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
|
||||
linker_error(prog, "program lacks a fragment shader\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -156,11 +156,11 @@ TEST_F(link_varyings, single_simple_input)
|
|||
|
||||
ir.push_tail(v);
|
||||
|
||||
ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk));
|
||||
linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk);
|
||||
|
||||
EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a"));
|
||||
EXPECT_EQ(1u, num_elements(consumer_inputs));
|
||||
|
|
@ -183,11 +183,11 @@ TEST_F(link_varyings, gl_ClipDistance)
|
|||
|
||||
ir.push_tail(clipdistance);
|
||||
|
||||
ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk));
|
||||
linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk);
|
||||
|
||||
EXPECT_EQ(clipdistance, junk[VARYING_SLOT_CLIP_DIST0]);
|
||||
EXPECT_TRUE(is_empty(consumer_inputs));
|
||||
|
|
@ -205,11 +205,11 @@ TEST_F(link_varyings, single_interface_input)
|
|||
|
||||
ir.push_tail(v);
|
||||
|
||||
ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk));
|
||||
linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk);
|
||||
char *const full_name = interface_field_name(simple_interface);
|
||||
|
||||
EXPECT_EQ((void *) v, hash_table_find(consumer_interface_inputs, full_name));
|
||||
|
|
@ -236,11 +236,11 @@ TEST_F(link_varyings, one_interface_and_one_simple_input)
|
|||
|
||||
ir.push_tail(iface);
|
||||
|
||||
ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk));
|
||||
linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk);
|
||||
|
||||
char *const iface_field_name = interface_field_name(simple_interface);
|
||||
|
||||
|
|
@ -252,24 +252,6 @@ TEST_F(link_varyings, one_interface_and_one_simple_input)
|
|||
EXPECT_EQ(1u, num_elements(consumer_inputs));
|
||||
}
|
||||
|
||||
TEST_F(link_varyings, invalid_interface_input)
|
||||
{
|
||||
ir_variable *const v =
|
||||
new(mem_ctx) ir_variable(simple_interface,
|
||||
"named_interface",
|
||||
ir_var_shader_in);
|
||||
|
||||
ASSERT_EQ(simple_interface, v->get_interface_type());
|
||||
|
||||
ir.push_tail(v);
|
||||
|
||||
EXPECT_FALSE(linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk));
|
||||
}
|
||||
|
||||
TEST_F(link_varyings, interface_field_doesnt_match_noninterface)
|
||||
{
|
||||
char *const iface_field_name = interface_field_name(simple_interface);
|
||||
|
|
@ -283,11 +265,11 @@ TEST_F(link_varyings, interface_field_doesnt_match_noninterface)
|
|||
|
||||
ir.push_tail(in_v);
|
||||
|
||||
ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk));
|
||||
linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk);
|
||||
|
||||
/* Create an output variable, "v", that is part of an interface block named
|
||||
* "a". They should not match.
|
||||
|
|
@ -325,11 +307,11 @@ TEST_F(link_varyings, interface_field_doesnt_match_noninterface_vice_versa)
|
|||
|
||||
ir.push_tail(in_v);
|
||||
|
||||
ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk));
|
||||
linker::populate_consumer_input_sets(mem_ctx,
|
||||
&ir,
|
||||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
junk);
|
||||
|
||||
/* Create an output variable "a.v". They should not match.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -376,8 +376,6 @@ nir_visitor::visit(ir_variable *ir)
|
|||
var->data.explicit_binding = ir->data.explicit_binding;
|
||||
var->data.has_initializer = ir->data.has_initializer;
|
||||
var->data.location_frac = ir->data.location_frac;
|
||||
var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array;
|
||||
var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray;
|
||||
|
||||
switch (ir->data.depth_layout) {
|
||||
case ir_depth_layout_none:
|
||||
|
|
@ -600,7 +598,7 @@ nir_visitor::visit(ir_emit_vertex *ir)
|
|||
{
|
||||
nir_intrinsic_instr *instr =
|
||||
nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
|
||||
instr->const_index[0] = ir->stream_id();
|
||||
nir_intrinsic_set_stream_id(instr, ir->stream_id());
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
}
|
||||
|
||||
|
|
@ -609,7 +607,7 @@ nir_visitor::visit(ir_end_primitive *ir)
|
|||
{
|
||||
nir_intrinsic_instr *instr =
|
||||
nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
|
||||
instr->const_index[0] = ir->stream_id();
|
||||
nir_intrinsic_set_stream_id(instr, ir->stream_id());
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
}
|
||||
|
||||
|
|
@ -889,7 +887,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
|
||||
instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
|
||||
instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
|
||||
instr->const_index[0] = write_mask->value.u[0];
|
||||
nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
|
||||
instr->num_components = val->type->vector_elements;
|
||||
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
|
|
@ -987,7 +985,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
exec_node *param = ir->actual_parameters.get_head();
|
||||
ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
|
||||
|
||||
instr->const_index[0] = 0;
|
||||
nir_intrinsic_set_base(instr, 0);
|
||||
instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
|
||||
|
||||
const glsl_type *type = ir->return_deref->var->type;
|
||||
|
|
@ -1011,10 +1009,10 @@ nir_visitor::visit(ir_call *ir)
|
|||
ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
|
||||
assert(write_mask);
|
||||
|
||||
instr->const_index[0] = 0;
|
||||
nir_intrinsic_set_base(instr, 0);
|
||||
instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
|
||||
|
||||
instr->const_index[1] = write_mask->value.u[0];
|
||||
nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
|
||||
|
||||
instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
|
||||
instr->num_components = val->type->vector_elements;
|
||||
|
|
@ -1069,7 +1067,8 @@ nir_visitor::visit(ir_call *ir)
|
|||
nir_intrinsic_instr *store_instr =
|
||||
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
|
||||
store_instr->num_components = ir->return_deref->type->vector_elements;
|
||||
store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
|
||||
nir_intrinsic_set_write_mask(store_instr,
|
||||
(1 << store_instr->num_components) - 1);
|
||||
|
||||
store_instr->variables[0] =
|
||||
evaluate_deref(&store_instr->instr, ir->return_deref);
|
||||
|
|
@ -1147,7 +1146,7 @@ nir_visitor::visit(ir_assignment *ir)
|
|||
nir_intrinsic_instr *store =
|
||||
nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
|
||||
store->num_components = ir->lhs->type->vector_elements;
|
||||
store->const_index[0] = ir->write_mask;
|
||||
nir_intrinsic_set_write_mask(store, ir->write_mask);
|
||||
nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
|
||||
store->variables[0] = nir_deref_as_var(store_deref);
|
||||
store->src[0] = nir_src_for_ssa(src);
|
||||
|
|
|
|||
|
|
@ -225,24 +225,6 @@ typedef struct nir_variable {
|
|||
*/
|
||||
unsigned location_frac:2;
|
||||
|
||||
/**
|
||||
* Non-zero if this variable was created by lowering a named interface
|
||||
* block which was not an array.
|
||||
*
|
||||
* Note that this variable and \c from_named_ifc_block_array will never
|
||||
* both be non-zero.
|
||||
*/
|
||||
unsigned from_named_ifc_block_nonarray:1;
|
||||
|
||||
/**
|
||||
* Non-zero if this variable was created by lowering a named interface
|
||||
* block which was an array.
|
||||
*
|
||||
* Note that this variable and \c from_named_ifc_block_nonarray will never
|
||||
* both be non-zero.
|
||||
*/
|
||||
unsigned from_named_ifc_block_array:1;
|
||||
|
||||
/**
|
||||
* \brief Layout qualifier for gl_FragDepth.
|
||||
*
|
||||
|
|
@ -835,7 +817,7 @@ typedef struct {
|
|||
} nir_call_instr;
|
||||
|
||||
#define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \
|
||||
num_variables, num_indices, flags) \
|
||||
num_variables, num_indices, idx0, idx1, idx2, flags) \
|
||||
nir_intrinsic_##name,
|
||||
|
||||
#define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
|
||||
|
|
@ -848,6 +830,8 @@ typedef enum {
|
|||
#undef INTRINSIC
|
||||
#undef LAST_INTRINSIC
|
||||
|
||||
#define NIR_INTRINSIC_MAX_CONST_INDEX 3
|
||||
|
||||
/** Represents an intrinsic
|
||||
*
|
||||
* An intrinsic is an instruction type for handling things that are
|
||||
|
|
@ -891,7 +875,7 @@ typedef struct {
|
|||
*/
|
||||
uint8_t num_components;
|
||||
|
||||
int const_index[3];
|
||||
int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
|
||||
|
||||
nir_deref_var *variables[2];
|
||||
|
||||
|
|
@ -920,6 +904,55 @@ typedef enum {
|
|||
NIR_INTRINSIC_CAN_REORDER = (1 << 1),
|
||||
} nir_intrinsic_semantic_flag;
|
||||
|
||||
/**
|
||||
* \name NIR intrinsics const-index flag
|
||||
*
|
||||
* Indicates the usage of a const_index slot.
|
||||
*
|
||||
* \sa nir_intrinsic_info::index_map
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Generally instructions that take a offset src argument, can encode
|
||||
* a constant 'base' value which is added to the offset.
|
||||
*/
|
||||
NIR_INTRINSIC_BASE = 1,
|
||||
|
||||
/**
|
||||
* For store instructions, a writemask for the store.
|
||||
*/
|
||||
NIR_INTRINSIC_WRMASK = 2,
|
||||
|
||||
/**
|
||||
* The stream-id for GS emit_vertex/end_primitive intrinsics.
|
||||
*/
|
||||
NIR_INTRINSIC_STREAM_ID = 3,
|
||||
|
||||
/**
|
||||
* The clip-plane id for load_user_clip_plane intrinsic.
|
||||
*/
|
||||
NIR_INTRINSIC_UCP_ID = 4,
|
||||
|
||||
/**
|
||||
* The range of a load operation. This specifies the maximum amount of
|
||||
* data starting at the base offset (if any) that can be accessed.
|
||||
*/
|
||||
NIR_INTRINSIC_RANGE = 5,
|
||||
|
||||
/**
|
||||
* The Vulkan descriptor set for vulkan_resource_index intrinsic.
|
||||
*/
|
||||
NIR_INTRINSIC_DESC_SET = 6,
|
||||
|
||||
/**
|
||||
* The Vulkan descriptor set binding for vulkan_resource_index intrinsic.
|
||||
*/
|
||||
NIR_INTRINSIC_BINDING = 7,
|
||||
|
||||
NIR_INTRINSIC_NUM_INDEX_FLAGS,
|
||||
|
||||
} nir_intrinsic_index_flag;
|
||||
|
||||
#define NIR_INTRINSIC_MAX_INPUTS 4
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -949,12 +982,40 @@ typedef struct {
|
|||
/** the number of constant indices used by the intrinsic */
|
||||
unsigned num_indices;
|
||||
|
||||
/** indicates the usage of intr->const_index[n] */
|
||||
unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
|
||||
|
||||
/** semantic flags for calls to this intrinsic */
|
||||
nir_intrinsic_semantic_flag flags;
|
||||
} nir_intrinsic_info;
|
||||
|
||||
extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
|
||||
|
||||
|
||||
#define INTRINSIC_IDX_ACCESSORS(name, flag, type) \
|
||||
static inline type \
|
||||
nir_intrinsic_##name(nir_intrinsic_instr *instr) \
|
||||
{ \
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \
|
||||
assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \
|
||||
return instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; \
|
||||
} \
|
||||
static inline void \
|
||||
nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val) \
|
||||
{ \
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \
|
||||
assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \
|
||||
instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val; \
|
||||
}
|
||||
|
||||
INTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned)
|
||||
INTRINSIC_IDX_ACCESSORS(base, BASE, int)
|
||||
INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned)
|
||||
INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned)
|
||||
INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned)
|
||||
INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned)
|
||||
INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned)
|
||||
|
||||
/**
|
||||
* \group texture information
|
||||
*
|
||||
|
|
|
|||
|
|
@ -102,13 +102,10 @@ class Constant(Value):
|
|||
self.value = val
|
||||
|
||||
def __hex__(self):
|
||||
# Even if it's an integer, we still need to unpack as an unsigned
|
||||
# int. This is because, without C99, we can only assign to the first
|
||||
# element of a union in an initializer.
|
||||
if isinstance(self.value, (bool)):
|
||||
return 'NIR_TRUE' if self.value else 'NIR_FALSE'
|
||||
if isinstance(self.value, (int, long)):
|
||||
return hex(struct.unpack('I', struct.pack('i' if self.value < 0 else 'I', self.value))[0])
|
||||
return hex(self.value)
|
||||
elif isinstance(self.value, float):
|
||||
return hex(struct.unpack('I', struct.pack('f', self.value))[0])
|
||||
else:
|
||||
|
|
@ -216,7 +213,7 @@ ${pass_name}_block(nir_block *block, void *void_state)
|
|||
{
|
||||
struct opt_state *state = void_state;
|
||||
|
||||
nir_foreach_instr_safe(block, instr) {
|
||||
nir_foreach_instr_reverse_safe(block, instr) {
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
continue;
|
||||
|
||||
|
|
@ -255,7 +252,7 @@ ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
|
|||
state.progress = false;
|
||||
state.condition_flags = condition_flags;
|
||||
|
||||
nir_foreach_block(impl, ${pass_name}_block, &state);
|
||||
nir_foreach_block_reverse(impl, ${pass_name}_block, &state);
|
||||
|
||||
if (state.progress)
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
|
|
|
|||
|
|
@ -374,7 +374,7 @@ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
|
|||
nir_intrinsic_instr *store =
|
||||
nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
|
||||
store->num_components = num_components;
|
||||
store->const_index[0] = writemask;
|
||||
nir_intrinsic_set_write_mask(store, writemask);
|
||||
store->variables[0] = nir_deref_var_create(store, var);
|
||||
store->src[0] = nir_src_for_ssa(value);
|
||||
nir_builder_instr_insert(build, &store->instr);
|
||||
|
|
|
|||
|
|
@ -30,7 +30,8 @@
|
|||
#define OPCODE(name) nir_intrinsic_##name
|
||||
|
||||
#define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \
|
||||
_dest_components, _num_variables, _num_indices, _flags) \
|
||||
_dest_components, _num_variables, _num_indices, \
|
||||
idx0, idx1, idx2, _flags) \
|
||||
{ \
|
||||
.name = #_name, \
|
||||
.num_srcs = _num_srcs, \
|
||||
|
|
@ -39,9 +40,16 @@
|
|||
.dest_components = _dest_components, \
|
||||
.num_variables = _num_variables, \
|
||||
.num_indices = _num_indices, \
|
||||
.index_map = { \
|
||||
[NIR_INTRINSIC_ ## idx0] = 1, \
|
||||
[NIR_INTRINSIC_ ## idx1] = 2, \
|
||||
[NIR_INTRINSIC_ ## idx2] = 3, \
|
||||
}, \
|
||||
.flags = _flags \
|
||||
},
|
||||
|
||||
#define NIR_INTRINSIC_xx 0
|
||||
|
||||
#define LAST_INTRINSIC(name)
|
||||
|
||||
const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = {
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@
|
|||
* expands to a list of macros of the form:
|
||||
*
|
||||
* INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
|
||||
* num_variables, num_indices, flags)
|
||||
* num_variables, num_indices, idx0, idx1, idx2, flags)
|
||||
*
|
||||
* Which should correspond one-to-one with the nir_intrinsic_info structure. It
|
||||
* is included in both ir.h to create the nir_intrinsic enum (with members of
|
||||
|
|
@ -42,9 +42,9 @@
|
|||
#define ARR(...) { __VA_ARGS__ }
|
||||
|
||||
|
||||
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
|
||||
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
|
||||
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
|
||||
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
|
||||
|
||||
/*
|
||||
* Interpolation of input. The interp_var_at* intrinsics are similar to the
|
||||
|
|
@ -54,25 +54,25 @@ INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
|
|||
* respectively.
|
||||
*/
|
||||
|
||||
INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,
|
||||
INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
|
||||
INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
|
||||
INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
/*
|
||||
* Ask the driver for the size of a given buffer. It takes the buffer index
|
||||
* as source.
|
||||
*/
|
||||
INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
|
||||
INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
/*
|
||||
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
|
||||
* around/optimized in general
|
||||
*/
|
||||
#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
|
||||
#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)
|
||||
|
||||
BARRIER(barrier)
|
||||
BARRIER(discard)
|
||||
|
|
@ -89,7 +89,7 @@ BARRIER(memory_barrier)
|
|||
* The latter can be used as code motion barrier, which is currently not
|
||||
* feasible with NIR.
|
||||
*/
|
||||
INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
|
||||
/*
|
||||
* Memory barrier with semantics analogous to the compute shader
|
||||
|
|
@ -103,7 +103,7 @@ BARRIER(memory_barrier_image)
|
|||
BARRIER(memory_barrier_shared)
|
||||
|
||||
/** A conditional discard, with a single boolean source. */
|
||||
INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
|
||||
INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
|
||||
|
||||
/**
|
||||
* Basic Geometry Shader intrinsics.
|
||||
|
|
@ -113,8 +113,8 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
|
|||
*
|
||||
* end_primitive implements GLSL's EndPrimitive() built-in.
|
||||
*/
|
||||
INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0)
|
||||
INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
|
||||
INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
|
||||
INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
|
||||
|
||||
/**
|
||||
* Geometry Shader intrinsics with a vertex count.
|
||||
|
|
@ -125,9 +125,9 @@ INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
|
|||
* These maintain a count of the number of vertices emitted, as an additional
|
||||
* unsigned integer source.
|
||||
*/
|
||||
INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
|
||||
INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
|
||||
INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
|
||||
INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
|
||||
INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
|
||||
INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
|
||||
|
||||
/*
|
||||
* Atomic counters
|
||||
|
|
@ -137,8 +137,8 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
|
|||
*/
|
||||
|
||||
#define ATOMIC(name, flags) \
|
||||
INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \
|
||||
INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)
|
||||
INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \
|
||||
INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
|
||||
|
||||
ATOMIC(inc, 0)
|
||||
ATOMIC(dec, 0)
|
||||
|
|
@ -159,20 +159,20 @@ ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
|
|||
* either one or two additional scalar arguments with the same meaning as in
|
||||
* the ARB_shader_image_load_store specification.
|
||||
*/
|
||||
INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0,
|
||||
INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0)
|
||||
INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
|
||||
INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
|
||||
INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
/*
|
||||
|
|
@ -191,7 +191,8 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
|
|||
* The intended usage is that the shader will call vulkan_surface_index to
|
||||
* get an index and then pass that as the buffer index ubo/ssbo calls.
|
||||
*/
|
||||
INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3,
|
||||
INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 2,
|
||||
DESC_SET, BINDING, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
/*
|
||||
|
|
@ -210,16 +211,16 @@ INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3,
|
|||
*
|
||||
* All operations take 1 variable deref.
|
||||
*/
|
||||
INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0)
|
||||
INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
|
||||
/*
|
||||
* SSBO atomic intrinsics
|
||||
|
|
@ -238,16 +239,16 @@ INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0)
|
|||
* in ssbo_atomic_add, etc).
|
||||
* 3: For CompSwap only: the second data parameter.
|
||||
*/
|
||||
INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
|
||||
/*
|
||||
* CS shared variable atomic intrinsics
|
||||
|
|
@ -265,42 +266,43 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
|
|||
* in shared_atomic_add, etc).
|
||||
* 2: For CompSwap only: the second data parameter.
|
||||
*/
|
||||
INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
|
||||
INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
|
||||
#define SYSTEM_VALUE(name, components, num_indices) \
|
||||
#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
|
||||
INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
|
||||
idx0, idx1, idx2, \
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
SYSTEM_VALUE(front_face, 1, 0)
|
||||
SYSTEM_VALUE(vertex_id, 1, 0)
|
||||
SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
|
||||
SYSTEM_VALUE(base_vertex, 1, 0)
|
||||
SYSTEM_VALUE(instance_id, 1, 0)
|
||||
SYSTEM_VALUE(base_instance, 1, 0)
|
||||
SYSTEM_VALUE(draw_id, 1, 0)
|
||||
SYSTEM_VALUE(sample_id, 1, 0)
|
||||
SYSTEM_VALUE(sample_pos, 2, 0)
|
||||
SYSTEM_VALUE(sample_mask_in, 1, 0)
|
||||
SYSTEM_VALUE(primitive_id, 1, 0)
|
||||
SYSTEM_VALUE(invocation_id, 1, 0)
|
||||
SYSTEM_VALUE(tess_coord, 3, 0)
|
||||
SYSTEM_VALUE(tess_level_outer, 4, 0)
|
||||
SYSTEM_VALUE(tess_level_inner, 2, 0)
|
||||
SYSTEM_VALUE(patch_vertices_in, 1, 0)
|
||||
SYSTEM_VALUE(local_invocation_id, 3, 0)
|
||||
SYSTEM_VALUE(work_group_id, 3, 0)
|
||||
SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
|
||||
SYSTEM_VALUE(num_work_groups, 3, 0)
|
||||
SYSTEM_VALUE(helper_invocation, 1, 0)
|
||||
SYSTEM_VALUE(front_face, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(vertex_id, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(vertex_id_zero_base, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(base_vertex, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(instance_id, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(base_instance, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(draw_id, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(sample_id, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(sample_pos, 2, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(sample_mask_in, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(primitive_id, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(invocation_id, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(tess_coord, 3, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(tess_level_outer, 4, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(tess_level_inner, 2, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(patch_vertices_in, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(local_invocation_id, 3, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
|
||||
SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
|
||||
|
||||
/*
|
||||
* Load operations pull data from some piece of GPU memory. All load
|
||||
|
|
@ -323,27 +325,29 @@ SYSTEM_VALUE(helper_invocation, 1, 0)
|
|||
* offsets are always in bytes.
|
||||
*/
|
||||
|
||||
#define LOAD(name, srcs, indices, flags) \
|
||||
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
|
||||
#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
|
||||
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
|
||||
|
||||
/* src[] = { offset }. const_index[] = { base, size } */
|
||||
LOAD(uniform, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
/* src[] = { offset }. const_index[] = { base, range } */
|
||||
LOAD(uniform, 1, 2, BASE, RANGE, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(input, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
/* src[] = { vertex, offset }. const_index[] = { base } */
|
||||
LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(per_vertex_input, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
LOAD(output, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { vertex, offset }. const_index[] = { base } */
|
||||
LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
LOAD(per_vertex_output, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base, size } */
|
||||
LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base, range } */
|
||||
LOAD(push_constant, 1, 2, BASE, RANGE, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
/*
|
||||
* Stores work the same way as loads, except now the first source is the value
|
||||
|
|
@ -352,16 +356,16 @@ LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDE
|
|||
* const_index[0].
|
||||
*/
|
||||
|
||||
#define STORE(name, srcs, indices, flags) \
|
||||
INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
|
||||
#define STORE(name, srcs, num_indices, idx0, idx1, idx2, flags) \
|
||||
INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, num_indices, idx0, idx1, idx2, flags)
|
||||
|
||||
/* src[] = { value, offset }. const_index[] = { base, write_mask } */
|
||||
STORE(output, 2, 2, 0)
|
||||
STORE(output, 2, 2, BASE, WRMASK, xx, 0)
|
||||
/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
|
||||
STORE(per_vertex_output, 3, 2, 0)
|
||||
STORE(per_vertex_output, 3, 2, BASE, WRMASK, xx, 0)
|
||||
/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
|
||||
STORE(ssbo, 3, 1, 0)
|
||||
STORE(ssbo, 3, 1, WRMASK, xx, xx, 0)
|
||||
/* src[] = { value, offset }. const_index[] = { base, write_mask } */
|
||||
STORE(shared, 2, 2, 0)
|
||||
STORE(shared, 2, 2, BASE, WRMASK, xx, 0)
|
||||
|
||||
LAST_INTRINSIC(store_shared)
|
||||
|
|
|
|||
|
|
@ -71,8 +71,8 @@ lower_instr(nir_intrinsic_instr *instr,
|
|||
unsigned uniform_loc = instr->variables[0]->var->data.location;
|
||||
|
||||
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
|
||||
new_instr->const_index[0] =
|
||||
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
|
||||
nir_intrinsic_set_base(new_instr,
|
||||
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index);
|
||||
|
||||
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
|
||||
offset_const->value.u[0] = instr->variables[0]->var->data.offset;
|
||||
|
|
|
|||
|
|
@ -71,8 +71,8 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
|
|||
|
||||
store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
|
||||
store->num_components = 4;
|
||||
store->const_index[0] = out->data.driver_location;
|
||||
store->const_index[1] = 0xf; /* wrmask */
|
||||
nir_intrinsic_set_base(store, out->data.driver_location);
|
||||
nir_intrinsic_set_write_mask(store, 0xf);
|
||||
store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
|
||||
store->src[0].is_ssa = true;
|
||||
store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
|
|
@ -86,7 +86,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
|
|||
|
||||
load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
|
||||
load->num_components = 4;
|
||||
load->const_index[0] = in->data.driver_location;
|
||||
nir_intrinsic_set_base(load, in->data.driver_location);
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
|
@ -112,7 +112,7 @@ find_output_in_block(nir_block *block, void *void_state)
|
|||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if ((intr->intrinsic == nir_intrinsic_store_output) &&
|
||||
intr->const_index[0] == state->drvloc) {
|
||||
nir_intrinsic_base(intr) == state->drvloc) {
|
||||
assert(state->def == NULL);
|
||||
assert(intr->src[0].is_ssa);
|
||||
assert(nir_src_as_const_value(intr->src[1]));
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
|
|||
nir_intrinsic_instr *lowered =
|
||||
nir_intrinsic_instr_create(b->shader,
|
||||
nir_intrinsic_emit_vertex_with_counter);
|
||||
lowered->const_index[0] = intrin->const_index[0];
|
||||
nir_intrinsic_set_stream_id(lowered, nir_intrinsic_stream_id(intrin));
|
||||
lowered->src[0] = nir_src_for_ssa(count);
|
||||
nir_builder_instr_insert(b, &lowered->instr);
|
||||
|
||||
|
|
@ -121,7 +121,7 @@ rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state)
|
|||
nir_intrinsic_instr *lowered =
|
||||
nir_intrinsic_instr_create(b->shader,
|
||||
nir_intrinsic_end_primitive_with_counter);
|
||||
lowered->const_index[0] = intrin->const_index[0];
|
||||
nir_intrinsic_set_stream_id(lowered, nir_intrinsic_stream_id(intrin));
|
||||
lowered->src[0] = nir_src_for_ssa(count);
|
||||
nir_builder_instr_insert(b, &lowered->instr);
|
||||
|
||||
|
|
|
|||
|
|
@ -274,8 +274,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
load_op(state, mode, per_vertex));
|
||||
load->num_components = intrin->num_components;
|
||||
|
||||
load->const_index[0] =
|
||||
intrin->variables[0]->var->data.driver_location;
|
||||
nir_intrinsic_set_base(load,
|
||||
intrin->variables[0]->var->data.driver_location);
|
||||
|
||||
if (load->intrinsic == nir_intrinsic_load_uniform) {
|
||||
load->const_index[1] =
|
||||
|
|
@ -321,11 +321,9 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
|
||||
nir_src_copy(&store->src[0], &intrin->src[0], store);
|
||||
|
||||
store->const_index[0] =
|
||||
intrin->variables[0]->var->data.driver_location;
|
||||
|
||||
/* Copy the writemask */
|
||||
store->const_index[1] = intrin->const_index[0];
|
||||
nir_intrinsic_set_base(store,
|
||||
intrin->variables[0]->var->data.driver_location);
|
||||
nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
|
||||
|
||||
if (per_vertex)
|
||||
store->src[1] = nir_src_for_ssa(vertex_index);
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
|
|||
|
||||
nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
|
||||
nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
|
||||
mov->dest.write_mask = intrin->const_index[0];
|
||||
mov->dest.write_mask = nir_intrinsic_write_mask(intrin);
|
||||
mov->dest.dest.is_ssa = false;
|
||||
mov->dest.dest.reg.reg = reg_src.reg.reg;
|
||||
mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ load_input(nir_builder *b, nir_variable *in)
|
|||
|
||||
load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
|
||||
load->num_components = 4;
|
||||
load->const_index[0] = in->data.driver_location;
|
||||
nir_intrinsic_set_base(load, in->data.driver_location);
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
|
@ -151,7 +151,7 @@ nir_lower_two_sided_color_block(nir_block *block, void *void_state)
|
|||
for (idx = 0; idx < state->colors_count; idx++) {
|
||||
unsigned drvloc =
|
||||
state->colors[idx].front->data.driver_location;
|
||||
if (intr->const_index[0] == drvloc) {
|
||||
if (nir_intrinsic_base(intr) == drvloc) {
|
||||
assert(nir_src_as_const_value(intr->src[0]));
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
|
|||
nir_intrinsic_instr *store =
|
||||
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
|
||||
store->num_components = num_components;
|
||||
store->const_index[0] = (1 << num_components) - 1;
|
||||
nir_intrinsic_set_write_mask(store, (1 << num_components) - 1);
|
||||
store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
|
||||
|
||||
store->src[0].is_ssa = true;
|
||||
|
|
|
|||
|
|
@ -560,7 +560,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
|
|||
nir_ssa_def *new_def;
|
||||
b.cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
|
||||
unsigned wrmask = nir_intrinsic_write_mask(intrin);
|
||||
if (wrmask == (1 << intrin->num_components) - 1) {
|
||||
/* Whole variable store - just copy the source. Note that
|
||||
* intrin->num_components and intrin->src[0].ssa->num_components
|
||||
* may differ.
|
||||
|
|
@ -580,7 +581,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
|
|||
*/
|
||||
nir_ssa_def *srcs[4];
|
||||
for (unsigned i = 0; i < intrin->num_components; i++) {
|
||||
if (intrin->const_index[0] & (1 << i)) {
|
||||
if (wrmask & (1 << i)) {
|
||||
srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
|
||||
} else {
|
||||
srcs[i] = nir_channel(&b, old_def, i);
|
||||
|
|
|
|||
|
|
@ -151,6 +151,8 @@ optimizations = [
|
|||
(('ior', a, 0), a),
|
||||
(('fxor', a, a), 0.0),
|
||||
(('ixor', a, a), 0),
|
||||
(('fxor', a, 0.0), a),
|
||||
(('ixor', a, 0), a),
|
||||
(('inot', ('inot', a)), a),
|
||||
# DeMorgan's Laws
|
||||
(('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))),
|
||||
|
|
@ -167,6 +169,8 @@ optimizations = [
|
|||
(('flog2', ('fexp2', a)), a), # lg2(2^a) = a
|
||||
(('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
|
||||
(('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
|
||||
(('fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
|
||||
('fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
|
||||
(('fpow', a, 1.0), a),
|
||||
(('fpow', a, 2.0), ('fmul', a, a)),
|
||||
(('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
|
||||
|
|
@ -313,6 +317,19 @@ optimizations = [
|
|||
'options->lower_unpack_snorm_4x8'),
|
||||
]
|
||||
|
||||
# Unreal Engine 4 demo applications open-codes bitfieldReverse()
|
||||
def bitfield_reverse(u):
|
||||
step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16))
|
||||
step2 = ('ior', ('ishl', ('iand', step1, 0x00ff00ff), 8), ('ushr', ('iand', step1, 0xff00ff00), 8))
|
||||
step3 = ('ior', ('ishl', ('iand', step2, 0x0f0f0f0f), 4), ('ushr', ('iand', step2, 0xf0f0f0f0), 4))
|
||||
step4 = ('ior', ('ishl', ('iand', step3, 0x33333333), 2), ('ushr', ('iand', step3, 0xcccccccc), 2))
|
||||
step5 = ('ior', ('ishl', ('iand', step4, 0x55555555), 1), ('ushr', ('iand', step4, 0xaaaaaaaa), 1))
|
||||
|
||||
return step5
|
||||
|
||||
optimizations += [(bitfield_reverse('x'), ('bitfield_reverse', 'x'))]
|
||||
|
||||
|
||||
# Add optimizations to handle the case where the result of a ternary is
|
||||
# compared to a constant. This way we can take things like
|
||||
#
|
||||
|
|
|
|||
|
|
@ -451,15 +451,16 @@ print_deref(nir_deref_var *deref, print_state *state)
|
|||
static void
|
||||
print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
||||
{
|
||||
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
|
||||
unsigned num_srcs = info->num_srcs;
|
||||
FILE *fp = state->fp;
|
||||
|
||||
if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
|
||||
if (info->has_dest) {
|
||||
print_dest(&instr->dest, state);
|
||||
fprintf(fp, " = ");
|
||||
}
|
||||
|
||||
fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name);
|
||||
fprintf(fp, "intrinsic %s (", info->name);
|
||||
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
if (i != 0)
|
||||
|
|
@ -470,9 +471,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
|
||||
fprintf(fp, ") (");
|
||||
|
||||
unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
|
||||
|
||||
for (unsigned i = 0; i < num_vars; i++) {
|
||||
for (unsigned i = 0; i < info->num_variables; i++) {
|
||||
if (i != 0)
|
||||
fprintf(fp, ", ");
|
||||
|
||||
|
|
@ -481,9 +480,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
|
||||
fprintf(fp, ") (");
|
||||
|
||||
unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices;
|
||||
|
||||
for (unsigned i = 0; i < num_indices; i++) {
|
||||
for (unsigned i = 0; i < info->num_indices; i++) {
|
||||
if (i != 0)
|
||||
fprintf(fp, ", ");
|
||||
|
||||
|
|
@ -492,6 +489,34 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
|
||||
fprintf(fp, ")");
|
||||
|
||||
static const char *index_name[NIR_INTRINSIC_NUM_INDEX_FLAGS] = {
|
||||
[NIR_INTRINSIC_BASE] = "base",
|
||||
[NIR_INTRINSIC_WRMASK] = "wrmask",
|
||||
[NIR_INTRINSIC_STREAM_ID] = "stream-id",
|
||||
[NIR_INTRINSIC_UCP_ID] = "ucp-id",
|
||||
[NIR_INTRINSIC_RANGE] = "range",
|
||||
[NIR_INTRINSIC_DESC_SET] = "desc-set",
|
||||
[NIR_INTRINSIC_BINDING] = "binding",
|
||||
};
|
||||
for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
|
||||
if (!info->index_map[idx])
|
||||
continue;
|
||||
fprintf(fp, " /*");
|
||||
if (idx == NIR_INTRINSIC_WRMASK) {
|
||||
/* special case wrmask to show it as a writemask.. */
|
||||
unsigned wrmask = nir_intrinsic_write_mask(instr);
|
||||
fprintf(fp, " wrmask=");
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
if ((wrmask >> i) & 1)
|
||||
fprintf(fp, "%c", "xyzw"[i]);
|
||||
} else {
|
||||
unsigned off = info->index_map[idx] - 1;
|
||||
assert(index_name[idx]); /* forgot to update index_name table? */
|
||||
fprintf(fp, " %s=%d", index_name[idx], instr->const_index[off]);
|
||||
}
|
||||
fprintf(fp, " */");
|
||||
}
|
||||
|
||||
if (!state->shader)
|
||||
return;
|
||||
|
||||
|
|
@ -515,7 +540,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
}
|
||||
|
||||
nir_foreach_variable(var, var_list) {
|
||||
if ((var->data.driver_location == instr->const_index[0]) &&
|
||||
if ((var->data.driver_location == nir_intrinsic_base(instr)) &&
|
||||
var->name) {
|
||||
fprintf(fp, "\t/* %s */", var->name);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -417,7 +417,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
|
|||
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
|
||||
instr->variables[0]->var->data.mode != nir_var_uniform &&
|
||||
instr->variables[0]->var->data.mode != nir_var_shader_storage);
|
||||
assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0);
|
||||
assert((nir_intrinsic_write_mask(instr) & ~((1 << instr->num_components) - 1)) == 0);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_copy_var:
|
||||
|
|
|
|||
|
|
@ -319,8 +319,8 @@ get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
|
|||
nir_intrinsic_instr_create(b->nb.shader,
|
||||
nir_intrinsic_vulkan_resource_index);
|
||||
instr->src[0] = nir_src_for_ssa(array_index);
|
||||
instr->const_index[0] = chain->var->descriptor_set;
|
||||
instr->const_index[1] = chain->var->binding;
|
||||
nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
|
||||
nir_intrinsic_set_binding(instr, chain->var->binding);
|
||||
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
|
||||
nir_builder_instr_insert(&b->nb, &instr->instr);
|
||||
|
|
|
|||
|
|
@ -191,11 +191,13 @@ C_SOURCES := \
|
|||
util/u_cpu_detect.c \
|
||||
util/u_cpu_detect.h \
|
||||
util/u_debug.c \
|
||||
util/u_debug.h \
|
||||
util/u_debug_describe.c \
|
||||
util/u_debug_describe.h \
|
||||
util/u_debug_flush.c \
|
||||
util/u_debug_flush.h \
|
||||
util/u_debug.h \
|
||||
util/u_debug_image.c \
|
||||
util/u_debug_image.h \
|
||||
util/u_debug_memory.c \
|
||||
util/u_debug_refcnt.c \
|
||||
util/u_debug_refcnt.h \
|
||||
|
|
|
|||
|
|
@ -43,10 +43,10 @@
|
|||
#include "util/u_format.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_pstipple.h"
|
||||
#include "util/u_sampler.h"
|
||||
|
||||
#include "tgsi/tgsi_transform.h"
|
||||
#include "tgsi/tgsi_dump.h"
|
||||
|
||||
#include "draw_context.h"
|
||||
#include "draw_pipe.h"
|
||||
|
|
@ -114,178 +114,6 @@ struct pstip_stage
|
|||
};
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Subclass of tgsi_transform_context, used for transforming the
|
||||
* user's fragment shader to add the extra texture sample and fragment kill
|
||||
* instructions.
|
||||
*/
|
||||
struct pstip_transform_context {
|
||||
struct tgsi_transform_context base;
|
||||
uint tempsUsed; /**< bitmask */
|
||||
int wincoordInput;
|
||||
int maxInput;
|
||||
uint samplersUsed; /**< bitfield of samplers used */
|
||||
bool hasSview;
|
||||
int freeSampler; /** an available sampler for the pstipple */
|
||||
int texTemp; /**< temp registers */
|
||||
int numImmed;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* TGSI declaration transform callback.
|
||||
* Look for a free sampler, a free input attrib, and two free temp regs.
|
||||
*/
|
||||
static void
|
||||
pstip_transform_decl(struct tgsi_transform_context *ctx,
|
||||
struct tgsi_full_declaration *decl)
|
||||
{
|
||||
struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
|
||||
uint i;
|
||||
for (i = decl->Range.First;
|
||||
i <= decl->Range.Last; i++) {
|
||||
pctx->samplersUsed |= 1 << i;
|
||||
}
|
||||
}
|
||||
else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
|
||||
pctx->hasSview = true;
|
||||
}
|
||||
else if (decl->Declaration.File == TGSI_FILE_INPUT) {
|
||||
pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
|
||||
if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
|
||||
pctx->wincoordInput = (int) decl->Range.First;
|
||||
}
|
||||
else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
|
||||
uint i;
|
||||
for (i = decl->Range.First;
|
||||
i <= decl->Range.Last; i++) {
|
||||
pctx->tempsUsed |= (1 << i);
|
||||
}
|
||||
}
|
||||
|
||||
ctx->emit_declaration(ctx, decl);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TGSI immediate declaration transform callback.
|
||||
* We're just counting the number of immediates here.
|
||||
*/
|
||||
static void
|
||||
pstip_transform_immed(struct tgsi_transform_context *ctx,
|
||||
struct tgsi_full_immediate *immed)
|
||||
{
|
||||
struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
|
||||
ctx->emit_immediate(ctx, immed); /* emit to output shader */
|
||||
pctx->numImmed++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
|
||||
*/
|
||||
static int
|
||||
free_bit(uint bitfield)
|
||||
{
|
||||
return ffs(~bitfield) - 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TGSI transform prolog callback.
|
||||
*/
|
||||
static void
|
||||
pstip_transform_prolog(struct tgsi_transform_context *ctx)
|
||||
{
|
||||
struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
|
||||
uint i;
|
||||
int wincoordInput;
|
||||
|
||||
/* find free sampler */
|
||||
pctx->freeSampler = free_bit(pctx->samplersUsed);
|
||||
if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
|
||||
pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
|
||||
|
||||
if (pctx->wincoordInput < 0)
|
||||
wincoordInput = pctx->maxInput + 1;
|
||||
else
|
||||
wincoordInput = pctx->wincoordInput;
|
||||
|
||||
/* find one free temp reg */
|
||||
for (i = 0; i < 32; i++) {
|
||||
if ((pctx->tempsUsed & (1 << i)) == 0) {
|
||||
/* found a free temp */
|
||||
if (pctx->texTemp < 0)
|
||||
pctx->texTemp = i;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(pctx->texTemp >= 0);
|
||||
|
||||
if (pctx->wincoordInput < 0) {
|
||||
/* declare new position input reg */
|
||||
tgsi_transform_input_decl(ctx, wincoordInput,
|
||||
TGSI_SEMANTIC_POSITION, 1,
|
||||
TGSI_INTERPOLATE_LINEAR);
|
||||
}
|
||||
|
||||
/* declare new sampler */
|
||||
tgsi_transform_sampler_decl(ctx, pctx->freeSampler);
|
||||
|
||||
/* if the src shader has SVIEW decl's for each SAMP decl, we
|
||||
* need to continue the trend and ensure there is a matching
|
||||
* SVIEW for the new SAMP we just created
|
||||
*/
|
||||
if (pctx->hasSview) {
|
||||
tgsi_transform_sampler_view_decl(ctx,
|
||||
pctx->freeSampler,
|
||||
TGSI_TEXTURE_2D,
|
||||
TGSI_RETURN_TYPE_FLOAT);
|
||||
}
|
||||
|
||||
/* declare new temp regs */
|
||||
tgsi_transform_temp_decl(ctx, pctx->texTemp);
|
||||
|
||||
/* emit immediate = {1/32, 1/32, 1, 1}
|
||||
* The index/position of this immediate will be pctx->numImmed
|
||||
*/
|
||||
tgsi_transform_immediate_decl(ctx, 1.0/32.0, 1.0/32.0, 1.0, 1.0);
|
||||
|
||||
/*
|
||||
* Insert new MUL/TEX/KILL_IF instructions at start of program
|
||||
* Take gl_FragCoord, divide by 32 (stipple size), sample the
|
||||
* texture and kill fragment if needed.
|
||||
*
|
||||
* We'd like to use non-normalized texcoords to index into a RECT
|
||||
* texture, but we can only use GL_REPEAT wrap mode with normalized
|
||||
* texcoords. Darn.
|
||||
*/
|
||||
|
||||
/* MUL texTemp, INPUT[wincoord], 1/32; */
|
||||
tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
|
||||
TGSI_FILE_TEMPORARY, pctx->texTemp,
|
||||
TGSI_WRITEMASK_XYZW,
|
||||
TGSI_FILE_INPUT, wincoordInput,
|
||||
TGSI_FILE_IMMEDIATE, pctx->numImmed);
|
||||
|
||||
/* TEX texTemp, texTemp, sampler; */
|
||||
tgsi_transform_tex_2d_inst(ctx,
|
||||
TGSI_FILE_TEMPORARY, pctx->texTemp,
|
||||
TGSI_FILE_TEMPORARY, pctx->texTemp,
|
||||
pctx->freeSampler);
|
||||
|
||||
/* KILL_IF -texTemp.wwww; # if -texTemp < 0, KILL fragment */
|
||||
tgsi_transform_kill_inst(ctx,
|
||||
TGSI_FILE_TEMPORARY, pctx->texTemp,
|
||||
TGSI_SWIZZLE_W, TRUE);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate the frag shader we'll use for doing polygon stipple.
|
||||
* This will be the user's shader prefixed with a TEX and KIL instruction.
|
||||
|
|
@ -293,40 +121,27 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
|
|||
static boolean
|
||||
generate_pstip_fs(struct pstip_stage *pstip)
|
||||
{
|
||||
struct pipe_context *pipe = pstip->pipe;
|
||||
struct pipe_screen *screen = pipe->screen;
|
||||
const struct pipe_shader_state *orig_fs = &pstip->fs->state;
|
||||
/*struct draw_context *draw = pstip->stage.draw;*/
|
||||
struct pipe_shader_state pstip_fs;
|
||||
struct pstip_transform_context transform;
|
||||
const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
|
||||
enum tgsi_file_type wincoord_file;
|
||||
|
||||
wincoord_file = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL) ?
|
||||
TGSI_FILE_SYSTEM_VALUE : TGSI_FILE_INPUT;
|
||||
|
||||
pstip_fs = *orig_fs; /* copy to init */
|
||||
pstip_fs.tokens = tgsi_alloc_tokens(newLen);
|
||||
pstip_fs.tokens = util_pstipple_create_fragment_shader(orig_fs->tokens,
|
||||
&pstip->fs->sampler_unit,
|
||||
0,
|
||||
wincoord_file);
|
||||
if (pstip_fs.tokens == NULL)
|
||||
return FALSE;
|
||||
|
||||
memset(&transform, 0, sizeof(transform));
|
||||
transform.wincoordInput = -1;
|
||||
transform.maxInput = -1;
|
||||
transform.texTemp = -1;
|
||||
transform.base.prolog = pstip_transform_prolog;
|
||||
transform.base.transform_declaration = pstip_transform_decl;
|
||||
transform.base.transform_immediate = pstip_transform_immed;
|
||||
|
||||
tgsi_transform_shader(orig_fs->tokens,
|
||||
(struct tgsi_token *) pstip_fs.tokens,
|
||||
newLen, &transform.base);
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
tgsi_dump(orig_fs->tokens, 0);
|
||||
tgsi_dump(pstip_fs.tokens, 0);
|
||||
#endif
|
||||
|
||||
assert(pstip->fs);
|
||||
|
||||
pstip->fs->sampler_unit = transform.freeSampler;
|
||||
assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
|
||||
|
||||
pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
|
||||
pstip->fs->pstip_fs = pstip->driver_create_fs_state(pipe, &pstip_fs);
|
||||
|
||||
FREE((void *)pstip_fs.tokens);
|
||||
|
||||
|
|
@ -337,113 +152,6 @@ generate_pstip_fs(struct pstip_stage *pstip)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Load texture image with current stipple pattern.
|
||||
*/
|
||||
static void
|
||||
pstip_update_texture(struct pstip_stage *pstip)
|
||||
{
|
||||
static const uint bit31 = 1 << 31;
|
||||
struct pipe_context *pipe = pstip->pipe;
|
||||
struct pipe_transfer *transfer;
|
||||
const uint *stipple = pstip->state.stipple->stipple;
|
||||
uint i, j;
|
||||
ubyte *data;
|
||||
|
||||
data = pipe_transfer_map(pipe, pstip->texture, 0, 0,
|
||||
PIPE_TRANSFER_WRITE, 0, 0, 32, 32, &transfer);
|
||||
|
||||
/*
|
||||
* Load alpha texture.
|
||||
* Note: 0 means keep the fragment, 255 means kill it.
|
||||
* We'll negate the texel value and use KILL_IF which kills if value
|
||||
* is negative.
|
||||
*/
|
||||
for (i = 0; i < 32; i++) {
|
||||
for (j = 0; j < 32; j++) {
|
||||
if (stipple[i] & (bit31 >> j)) {
|
||||
/* fragment "on" */
|
||||
data[i * transfer->stride + j] = 0;
|
||||
}
|
||||
else {
|
||||
/* fragment "off" */
|
||||
data[i * transfer->stride + j] = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* unmap */
|
||||
pipe_transfer_unmap(pipe, transfer);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create the texture map we'll use for stippling.
|
||||
*/
|
||||
static boolean
|
||||
pstip_create_texture(struct pstip_stage *pstip)
|
||||
{
|
||||
struct pipe_context *pipe = pstip->pipe;
|
||||
struct pipe_screen *screen = pipe->screen;
|
||||
struct pipe_resource texTemp;
|
||||
struct pipe_sampler_view viewTempl;
|
||||
|
||||
memset(&texTemp, 0, sizeof(texTemp));
|
||||
texTemp.target = PIPE_TEXTURE_2D;
|
||||
texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
|
||||
texTemp.last_level = 0;
|
||||
texTemp.width0 = 32;
|
||||
texTemp.height0 = 32;
|
||||
texTemp.depth0 = 1;
|
||||
texTemp.array_size = 1;
|
||||
texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
|
||||
|
||||
pstip->texture = screen->resource_create(screen, &texTemp);
|
||||
if (pstip->texture == NULL)
|
||||
return FALSE;
|
||||
|
||||
u_sampler_view_default_template(&viewTempl,
|
||||
pstip->texture,
|
||||
pstip->texture->format);
|
||||
pstip->sampler_view = pipe->create_sampler_view(pipe,
|
||||
pstip->texture,
|
||||
&viewTempl);
|
||||
if (!pstip->sampler_view) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create the sampler CSO that'll be used for stippling.
|
||||
*/
|
||||
static boolean
|
||||
pstip_create_sampler(struct pstip_stage *pstip)
|
||||
{
|
||||
struct pipe_sampler_state sampler;
|
||||
struct pipe_context *pipe = pstip->pipe;
|
||||
|
||||
memset(&sampler, 0, sizeof(sampler));
|
||||
sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
|
||||
sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
|
||||
sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
|
||||
sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
|
||||
sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
|
||||
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
|
||||
sampler.normalized_coords = 1;
|
||||
sampler.min_lod = 0.0f;
|
||||
sampler.max_lod = 0.0f;
|
||||
|
||||
pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
|
||||
if (pstip->sampler_cso == NULL)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* When we're about to draw our first stipple polygon in a batch, this function
|
||||
* is called to tell the driver to bind our modified fragment shader.
|
||||
|
|
@ -722,7 +430,8 @@ pstip_set_polygon_stipple(struct pipe_context *pipe,
|
|||
/* pass-through */
|
||||
pstip->driver_set_polygon_stipple(pstip->pipe, stipple);
|
||||
|
||||
pstip_update_texture(pstip);
|
||||
util_pstipple_update_stipple_texture(pstip->pipe, pstip->texture,
|
||||
pstip->state.stipple->stipple);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -758,10 +467,17 @@ draw_install_pstipple_stage(struct draw_context *draw,
|
|||
pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;
|
||||
|
||||
/* create special texture, sampler state */
|
||||
if (!pstip_create_texture(pstip))
|
||||
pstip->texture = util_pstipple_create_stipple_texture(pipe, NULL);
|
||||
if (!pstip->texture)
|
||||
goto fail;
|
||||
|
||||
if (!pstip_create_sampler(pstip))
|
||||
pstip->sampler_view = util_pstipple_create_sampler_view(pipe,
|
||||
pstip->texture);
|
||||
if (!pstip->sampler_view)
|
||||
goto fail;
|
||||
|
||||
pstip->sampler_cso = util_pstipple_create_sampler(pipe);
|
||||
if (!pstip->sampler_cso)
|
||||
goto fail;
|
||||
|
||||
/* override the driver's functions */
|
||||
|
|
|
|||
|
|
@ -614,8 +614,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
|
|||
}
|
||||
|
||||
nir_ssa_def *offset;
|
||||
if (dim) {
|
||||
/* UBO loads don't have a const_index[0] base offset. */
|
||||
if (op == nir_intrinsic_load_ubo) {
|
||||
/* UBO loads don't have a base offset. */
|
||||
offset = nir_imm_int(b, index);
|
||||
if (indirect) {
|
||||
offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
|
||||
|
|
@ -623,7 +623,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
|
|||
/* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
|
||||
offset = nir_ishl(b, offset, nir_imm_int(b, 4));
|
||||
} else {
|
||||
load->const_index[0] = index;
|
||||
nir_intrinsic_set_base(load, index);
|
||||
if (indirect) {
|
||||
offset = ttn_src_for_indirect(c, indirect);
|
||||
} else {
|
||||
|
|
@ -1875,7 +1875,7 @@ ttn_emit_instruction(struct ttn_compile *c)
|
|||
&tgsi_dst->Indirect : NULL;
|
||||
|
||||
store->num_components = 4;
|
||||
store->const_index[0] = dest.write_mask;
|
||||
nir_intrinsic_set_write_mask(store, dest.write_mask);
|
||||
store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
|
||||
store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
|
||||
|
||||
|
|
@ -1907,8 +1907,8 @@ ttn_add_output_stores(struct ttn_compile *c)
|
|||
store->num_components = 4;
|
||||
store->src[0].reg.reg = c->output_regs[loc].reg;
|
||||
store->src[0].reg.base_offset = c->output_regs[loc].offset;
|
||||
store->const_index[0] = loc;
|
||||
store->const_index[1] = 0xf; /* writemask */
|
||||
nir_intrinsic_set_base(store, loc);
|
||||
nir_intrinsic_set_write_mask(store, 0xf);
|
||||
store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
nir_builder_instr_insert(b, &store->instr);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,387 @@
|
|||
|
||||
|
||||
|
||||
static void
|
||||
scan_instruction(struct tgsi_shader_info *info,
|
||||
const struct tgsi_full_instruction *fullinst,
|
||||
unsigned *current_depth)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
|
||||
info->opcode_count[fullinst->Instruction.Opcode]++;
|
||||
|
||||
switch (fullinst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_IF:
|
||||
case TGSI_OPCODE_UIF:
|
||||
case TGSI_OPCODE_BGNLOOP:
|
||||
(*current_depth)++;
|
||||
info->max_depth = MAX2(info->max_depth, *current_depth);
|
||||
break;
|
||||
case TGSI_OPCODE_ENDIF:
|
||||
case TGSI_OPCODE_ENDLOOP:
|
||||
(*current_depth)--;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
|
||||
fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
|
||||
fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
|
||||
const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
|
||||
unsigned input;
|
||||
|
||||
if (src0->Register.Indirect && src0->Indirect.ArrayID)
|
||||
input = info->input_array_first[src0->Indirect.ArrayID];
|
||||
else
|
||||
input = src0->Register.Index;
|
||||
|
||||
/* For the INTERP opcodes, the interpolation is always
|
||||
* PERSPECTIVE unless LINEAR is specified.
|
||||
*/
|
||||
switch (info->input_interpolate[input]) {
|
||||
case TGSI_INTERPOLATE_COLOR:
|
||||
case TGSI_INTERPOLATE_CONSTANT:
|
||||
case TGSI_INTERPOLATE_PERSPECTIVE:
|
||||
switch (fullinst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_INTERP_CENTROID:
|
||||
info->uses_persp_opcode_interp_centroid = TRUE;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_OFFSET:
|
||||
info->uses_persp_opcode_interp_offset = TRUE;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_SAMPLE:
|
||||
info->uses_persp_opcode_interp_sample = TRUE;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case TGSI_INTERPOLATE_LINEAR:
|
||||
switch (fullinst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_INTERP_CENTROID:
|
||||
info->uses_linear_opcode_interp_centroid = TRUE;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_OFFSET:
|
||||
info->uses_linear_opcode_interp_offset = TRUE;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_SAMPLE:
|
||||
info->uses_linear_opcode_interp_sample = TRUE;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
|
||||
fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
|
||||
info->uses_doubles = TRUE;
|
||||
|
||||
for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
|
||||
const struct tgsi_full_src_register *src = &fullinst->Src[i];
|
||||
int ind = src->Register.Index;
|
||||
|
||||
/* Mark which inputs are effectively used */
|
||||
if (src->Register.File == TGSI_FILE_INPUT) {
|
||||
unsigned usage_mask;
|
||||
usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
|
||||
if (src->Register.Indirect) {
|
||||
for (ind = 0; ind < info->num_inputs; ++ind) {
|
||||
info->input_usage_mask[ind] |= usage_mask;
|
||||
}
|
||||
} else {
|
||||
assert(ind >= 0);
|
||||
assert(ind < PIPE_MAX_SHADER_INPUTS);
|
||||
info->input_usage_mask[ind] |= usage_mask;
|
||||
}
|
||||
|
||||
if (info->processor == TGSI_PROCESSOR_FRAGMENT &&
|
||||
!src->Register.Indirect) {
|
||||
unsigned name =
|
||||
info->input_semantic_name[src->Register.Index];
|
||||
unsigned index =
|
||||
info->input_semantic_index[src->Register.Index];
|
||||
|
||||
if (name == TGSI_SEMANTIC_POSITION &&
|
||||
(src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleW == TGSI_SWIZZLE_Z))
|
||||
info->reads_z = TRUE;
|
||||
|
||||
if (name == TGSI_SEMANTIC_COLOR) {
|
||||
unsigned mask =
|
||||
(1 << src->Register.SwizzleX) |
|
||||
(1 << src->Register.SwizzleY) |
|
||||
(1 << src->Register.SwizzleZ) |
|
||||
(1 << src->Register.SwizzleW);
|
||||
|
||||
info->colors_read |= mask << (index * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check for indirect register reads */
|
||||
if (src->Register.Indirect) {
|
||||
info->indirect_files |= (1 << src->Register.File);
|
||||
info->indirect_files_read |= (1 << src->Register.File);
|
||||
}
|
||||
|
||||
/* MSAA samplers */
|
||||
if (src->Register.File == TGSI_FILE_SAMPLER) {
|
||||
assert(fullinst->Instruction.Texture);
|
||||
assert(src->Register.Index < Elements(info->is_msaa_sampler));
|
||||
|
||||
if (fullinst->Instruction.Texture &&
|
||||
(fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
|
||||
fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
|
||||
info->is_msaa_sampler[src->Register.Index] = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check for indirect register writes */
|
||||
for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
|
||||
const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
|
||||
if (dst->Register.Indirect) {
|
||||
info->indirect_files |= (1 << dst->Register.File);
|
||||
info->indirect_files_written |= (1 << dst->Register.File);
|
||||
}
|
||||
}
|
||||
|
||||
info->num_instructions++;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
scan_declaration(struct tgsi_shader_info *info,
|
||||
const struct tgsi_full_declaration *fulldecl)
|
||||
{
|
||||
const uint file = fulldecl->Declaration.File;
|
||||
const unsigned procType = info->processor;
|
||||
uint reg;
|
||||
|
||||
if (fulldecl->Declaration.Array) {
|
||||
unsigned array_id = fulldecl->Array.ArrayID;
|
||||
|
||||
switch (file) {
|
||||
case TGSI_FILE_INPUT:
|
||||
assert(array_id < ARRAY_SIZE(info->input_array_first));
|
||||
info->input_array_first[array_id] = fulldecl->Range.First;
|
||||
info->input_array_last[array_id] = fulldecl->Range.Last;
|
||||
break;
|
||||
case TGSI_FILE_OUTPUT:
|
||||
assert(array_id < ARRAY_SIZE(info->output_array_first));
|
||||
info->output_array_first[array_id] = fulldecl->Range.First;
|
||||
info->output_array_last[array_id] = fulldecl->Range.Last;
|
||||
break;
|
||||
}
|
||||
info->array_max[file] = MAX2(info->array_max[file], array_id);
|
||||
}
|
||||
|
||||
for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) {
|
||||
unsigned semName = fulldecl->Semantic.Name;
|
||||
unsigned semIndex = fulldecl->Semantic.Index +
|
||||
(reg - fulldecl->Range.First);
|
||||
|
||||
/* only first 32 regs will appear in this bitfield */
|
||||
info->file_mask[file] |= (1 << reg);
|
||||
info->file_count[file]++;
|
||||
info->file_max[file] = MAX2(info->file_max[file], (int)reg);
|
||||
|
||||
if (file == TGSI_FILE_CONSTANT) {
|
||||
int buffer = 0;
|
||||
|
||||
if (fulldecl->Declaration.Dimension)
|
||||
buffer = fulldecl->Dim.Index2D;
|
||||
|
||||
info->const_file_max[buffer] =
|
||||
MAX2(info->const_file_max[buffer], (int)reg);
|
||||
}
|
||||
else if (file == TGSI_FILE_INPUT) {
|
||||
info->input_semantic_name[reg] = (ubyte) semName;
|
||||
info->input_semantic_index[reg] = (ubyte) semIndex;
|
||||
info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
|
||||
info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
|
||||
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
|
||||
info->num_inputs++;
|
||||
|
||||
/* Only interpolated varyings. Don't include POSITION.
|
||||
* Don't include integer varyings, because they are not
|
||||
* interpolated.
|
||||
*/
|
||||
if (semName == TGSI_SEMANTIC_GENERIC ||
|
||||
semName == TGSI_SEMANTIC_TEXCOORD ||
|
||||
semName == TGSI_SEMANTIC_COLOR ||
|
||||
semName == TGSI_SEMANTIC_BCOLOR ||
|
||||
semName == TGSI_SEMANTIC_FOG ||
|
||||
semName == TGSI_SEMANTIC_CLIPDIST ||
|
||||
semName == TGSI_SEMANTIC_CULLDIST) {
|
||||
switch (fulldecl->Interp.Interpolate) {
|
||||
case TGSI_INTERPOLATE_COLOR:
|
||||
case TGSI_INTERPOLATE_PERSPECTIVE:
|
||||
switch (fulldecl->Interp.Location) {
|
||||
case TGSI_INTERPOLATE_LOC_CENTER:
|
||||
info->uses_persp_center = TRUE;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_CENTROID:
|
||||
info->uses_persp_centroid = TRUE;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_SAMPLE:
|
||||
info->uses_persp_sample = TRUE;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LINEAR:
|
||||
switch (fulldecl->Interp.Location) {
|
||||
case TGSI_INTERPOLATE_LOC_CENTER:
|
||||
info->uses_linear_center = TRUE;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_CENTROID:
|
||||
info->uses_linear_centroid = TRUE;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_SAMPLE:
|
||||
info->uses_linear_sample = TRUE;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
/* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
|
||||
}
|
||||
}
|
||||
|
||||
if (semName == TGSI_SEMANTIC_PRIMID)
|
||||
info->uses_primid = TRUE;
|
||||
else if (procType == TGSI_PROCESSOR_FRAGMENT) {
|
||||
if (semName == TGSI_SEMANTIC_POSITION)
|
||||
info->reads_position = TRUE;
|
||||
else if (semName == TGSI_SEMANTIC_FACE)
|
||||
info->uses_frontface = TRUE;
|
||||
}
|
||||
}
|
||||
else if (file == TGSI_FILE_SYSTEM_VALUE) {
|
||||
unsigned index = fulldecl->Range.First;
|
||||
|
||||
info->system_value_semantic_name[index] = semName;
|
||||
info->num_system_values = MAX2(info->num_system_values, index + 1);
|
||||
|
||||
switch (semName) {
|
||||
case TGSI_SEMANTIC_INSTANCEID:
|
||||
info->uses_instanceid = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_VERTEXID:
|
||||
info->uses_vertexid = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_VERTEXID_NOBASE:
|
||||
info->uses_vertexid_nobase = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_BASEVERTEX:
|
||||
info->uses_basevertex = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_PRIMID:
|
||||
info->uses_primid = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_INVOCATIONID:
|
||||
info->uses_invocationid = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_POSITION:
|
||||
info->reads_position = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_FACE:
|
||||
info->uses_frontface = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_SAMPLEMASK:
|
||||
info->reads_samplemask = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (file == TGSI_FILE_OUTPUT) {
|
||||
info->output_semantic_name[reg] = (ubyte) semName;
|
||||
info->output_semantic_index[reg] = (ubyte) semIndex;
|
||||
info->num_outputs++;
|
||||
|
||||
if (semName == TGSI_SEMANTIC_COLOR)
|
||||
info->colors_written |= 1 << semIndex;
|
||||
|
||||
if (procType == TGSI_PROCESSOR_VERTEX ||
|
||||
procType == TGSI_PROCESSOR_GEOMETRY ||
|
||||
procType == TGSI_PROCESSOR_TESS_CTRL ||
|
||||
procType == TGSI_PROCESSOR_TESS_EVAL) {
|
||||
switch (semName) {
|
||||
case TGSI_SEMANTIC_VIEWPORT_INDEX:
|
||||
info->writes_viewport_index = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_LAYER:
|
||||
info->writes_layer = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_PSIZE:
|
||||
info->writes_psize = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_CLIPVERTEX:
|
||||
info->writes_clipvertex = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (procType == TGSI_PROCESSOR_FRAGMENT) {
|
||||
switch (semName) {
|
||||
case TGSI_SEMANTIC_POSITION:
|
||||
info->writes_z = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_STENCIL:
|
||||
info->writes_stencil = TRUE;
|
||||
break;
|
||||
case TGSI_SEMANTIC_SAMPLEMASK:
|
||||
info->writes_samplemask = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (procType == TGSI_PROCESSOR_VERTEX) {
|
||||
if (semName == TGSI_SEMANTIC_EDGEFLAG) {
|
||||
info->writes_edgeflag = TRUE;
|
||||
}
|
||||
}
|
||||
} else if (file == TGSI_FILE_SAMPLER) {
|
||||
info->samplers_declared |= 1 << reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
scan_immediate(struct tgsi_shader_info *info)
|
||||
{
|
||||
uint reg = info->immediate_count++;
|
||||
uint file = TGSI_FILE_IMMEDIATE;
|
||||
|
||||
info->file_mask[file] |= (1 << reg);
|
||||
info->file_count[file]++;
|
||||
info->file_max[file] = MAX2(info->file_max[file], (int)reg);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
scan_property(struct tgsi_shader_info *info,
|
||||
const struct tgsi_full_property *fullprop)
|
||||
{
|
||||
unsigned name = fullprop->Property.PropertyName;
|
||||
unsigned value = fullprop->u[0].Data;
|
||||
|
||||
assert(name < Elements(info->properties));
|
||||
info->properties[name] = value;
|
||||
|
||||
switch (name) {
|
||||
case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
|
||||
info->num_written_clipdistance = value;
|
||||
info->clipdist_writemask |= (1 << value) - 1;
|
||||
break;
|
||||
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
|
||||
info->num_written_culldistance = value;
|
||||
info->culldist_writemask |= (1 << value) - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Scan the given TGSI shader to collect information such as number of
|
||||
|
|
@ -81,390 +462,30 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
procType == TGSI_PROCESSOR_COMPUTE);
|
||||
info->processor = procType;
|
||||
|
||||
|
||||
/**
|
||||
** Loop over incoming program tokens/instructions
|
||||
*/
|
||||
while( !tgsi_parse_end_of_tokens( &parse ) ) {
|
||||
|
||||
while (!tgsi_parse_end_of_tokens(&parse)) {
|
||||
info->num_tokens++;
|
||||
|
||||
tgsi_parse_token( &parse );
|
||||
|
||||
switch( parse.FullToken.Token.Type ) {
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
{
|
||||
const struct tgsi_full_instruction *fullinst
|
||||
= &parse.FullToken.FullInstruction;
|
||||
uint i;
|
||||
|
||||
assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
|
||||
info->opcode_count[fullinst->Instruction.Opcode]++;
|
||||
|
||||
switch (fullinst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_IF:
|
||||
case TGSI_OPCODE_UIF:
|
||||
case TGSI_OPCODE_BGNLOOP:
|
||||
current_depth++;
|
||||
info->max_depth = MAX2(info->max_depth, current_depth);
|
||||
break;
|
||||
case TGSI_OPCODE_ENDIF:
|
||||
case TGSI_OPCODE_ENDLOOP:
|
||||
current_depth--;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
|
||||
fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
|
||||
fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
|
||||
const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
|
||||
unsigned input;
|
||||
|
||||
if (src0->Register.Indirect && src0->Indirect.ArrayID)
|
||||
input = info->input_array_first[src0->Indirect.ArrayID];
|
||||
else
|
||||
input = src0->Register.Index;
|
||||
|
||||
/* For the INTERP opcodes, the interpolation is always
|
||||
* PERSPECTIVE unless LINEAR is specified.
|
||||
*/
|
||||
switch (info->input_interpolate[input]) {
|
||||
case TGSI_INTERPOLATE_COLOR:
|
||||
case TGSI_INTERPOLATE_CONSTANT:
|
||||
case TGSI_INTERPOLATE_PERSPECTIVE:
|
||||
switch (fullinst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_INTERP_CENTROID:
|
||||
info->uses_persp_opcode_interp_centroid = true;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_OFFSET:
|
||||
info->uses_persp_opcode_interp_offset = true;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_SAMPLE:
|
||||
info->uses_persp_opcode_interp_sample = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case TGSI_INTERPOLATE_LINEAR:
|
||||
switch (fullinst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_INTERP_CENTROID:
|
||||
info->uses_linear_opcode_interp_centroid = true;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_OFFSET:
|
||||
info->uses_linear_opcode_interp_offset = true;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_SAMPLE:
|
||||
info->uses_linear_opcode_interp_sample = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
|
||||
fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
|
||||
info->uses_doubles = true;
|
||||
|
||||
for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
|
||||
const struct tgsi_full_src_register *src =
|
||||
&fullinst->Src[i];
|
||||
int ind = src->Register.Index;
|
||||
|
||||
/* Mark which inputs are effectively used */
|
||||
if (src->Register.File == TGSI_FILE_INPUT) {
|
||||
unsigned usage_mask;
|
||||
usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
|
||||
if (src->Register.Indirect) {
|
||||
for (ind = 0; ind < info->num_inputs; ++ind) {
|
||||
info->input_usage_mask[ind] |= usage_mask;
|
||||
}
|
||||
} else {
|
||||
assert(ind >= 0);
|
||||
assert(ind < PIPE_MAX_SHADER_INPUTS);
|
||||
info->input_usage_mask[ind] |= usage_mask;
|
||||
}
|
||||
|
||||
if (procType == TGSI_PROCESSOR_FRAGMENT &&
|
||||
!src->Register.Indirect) {
|
||||
unsigned name =
|
||||
info->input_semantic_name[src->Register.Index];
|
||||
unsigned index =
|
||||
info->input_semantic_index[src->Register.Index];
|
||||
|
||||
if (name == TGSI_SEMANTIC_POSITION &&
|
||||
(src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleW == TGSI_SWIZZLE_Z))
|
||||
info->reads_z = TRUE;
|
||||
|
||||
if (name == TGSI_SEMANTIC_COLOR) {
|
||||
unsigned mask =
|
||||
(1 << src->Register.SwizzleX) |
|
||||
(1 << src->Register.SwizzleY) |
|
||||
(1 << src->Register.SwizzleZ) |
|
||||
(1 << src->Register.SwizzleW);
|
||||
|
||||
info->colors_read |= mask << (index * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check for indirect register reads */
|
||||
if (src->Register.Indirect) {
|
||||
info->indirect_files |= (1 << src->Register.File);
|
||||
info->indirect_files_read |= (1 << src->Register.File);
|
||||
}
|
||||
|
||||
/* MSAA samplers */
|
||||
if (src->Register.File == TGSI_FILE_SAMPLER) {
|
||||
assert(fullinst->Instruction.Texture);
|
||||
assert(src->Register.Index < Elements(info->is_msaa_sampler));
|
||||
|
||||
if (fullinst->Instruction.Texture &&
|
||||
(fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
|
||||
fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
|
||||
info->is_msaa_sampler[src->Register.Index] = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check for indirect register writes */
|
||||
for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
|
||||
const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
|
||||
if (dst->Register.Indirect) {
|
||||
info->indirect_files |= (1 << dst->Register.File);
|
||||
info->indirect_files_written |= (1 << dst->Register.File);
|
||||
}
|
||||
}
|
||||
|
||||
info->num_instructions++;
|
||||
}
|
||||
scan_instruction(info, &parse.FullToken.FullInstruction,
|
||||
¤t_depth);
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_DECLARATION:
|
||||
{
|
||||
const struct tgsi_full_declaration *fulldecl
|
||||
= &parse.FullToken.FullDeclaration;
|
||||
const uint file = fulldecl->Declaration.File;
|
||||
uint reg;
|
||||
|
||||
if (fulldecl->Declaration.Array) {
|
||||
unsigned array_id = fulldecl->Array.ArrayID;
|
||||
|
||||
switch (file) {
|
||||
case TGSI_FILE_INPUT:
|
||||
assert(array_id < ARRAY_SIZE(info->input_array_first));
|
||||
info->input_array_first[array_id] = fulldecl->Range.First;
|
||||
info->input_array_last[array_id] = fulldecl->Range.Last;
|
||||
break;
|
||||
case TGSI_FILE_OUTPUT:
|
||||
assert(array_id < ARRAY_SIZE(info->output_array_first));
|
||||
info->output_array_first[array_id] = fulldecl->Range.First;
|
||||
info->output_array_last[array_id] = fulldecl->Range.Last;
|
||||
break;
|
||||
}
|
||||
info->array_max[file] = MAX2(info->array_max[file], array_id);
|
||||
}
|
||||
|
||||
for (reg = fulldecl->Range.First;
|
||||
reg <= fulldecl->Range.Last;
|
||||
reg++) {
|
||||
unsigned semName = fulldecl->Semantic.Name;
|
||||
unsigned semIndex =
|
||||
fulldecl->Semantic.Index + (reg - fulldecl->Range.First);
|
||||
|
||||
/* only first 32 regs will appear in this bitfield */
|
||||
info->file_mask[file] |= (1 << reg);
|
||||
info->file_count[file]++;
|
||||
info->file_max[file] = MAX2(info->file_max[file], (int)reg);
|
||||
|
||||
if (file == TGSI_FILE_CONSTANT) {
|
||||
int buffer = 0;
|
||||
|
||||
if (fulldecl->Declaration.Dimension)
|
||||
buffer = fulldecl->Dim.Index2D;
|
||||
|
||||
info->const_file_max[buffer] =
|
||||
MAX2(info->const_file_max[buffer], (int)reg);
|
||||
}
|
||||
else if (file == TGSI_FILE_INPUT) {
|
||||
info->input_semantic_name[reg] = (ubyte) semName;
|
||||
info->input_semantic_index[reg] = (ubyte) semIndex;
|
||||
info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
|
||||
info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
|
||||
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
|
||||
info->num_inputs++;
|
||||
|
||||
/* Only interpolated varyings. Don't include POSITION.
|
||||
* Don't include integer varyings, because they are not
|
||||
* interpolated.
|
||||
*/
|
||||
if (semName == TGSI_SEMANTIC_GENERIC ||
|
||||
semName == TGSI_SEMANTIC_TEXCOORD ||
|
||||
semName == TGSI_SEMANTIC_COLOR ||
|
||||
semName == TGSI_SEMANTIC_BCOLOR ||
|
||||
semName == TGSI_SEMANTIC_FOG ||
|
||||
semName == TGSI_SEMANTIC_CLIPDIST ||
|
||||
semName == TGSI_SEMANTIC_CULLDIST) {
|
||||
switch (fulldecl->Interp.Interpolate) {
|
||||
case TGSI_INTERPOLATE_COLOR:
|
||||
case TGSI_INTERPOLATE_PERSPECTIVE:
|
||||
switch (fulldecl->Interp.Location) {
|
||||
case TGSI_INTERPOLATE_LOC_CENTER:
|
||||
info->uses_persp_center = true;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_CENTROID:
|
||||
info->uses_persp_centroid = true;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_SAMPLE:
|
||||
info->uses_persp_sample = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LINEAR:
|
||||
switch (fulldecl->Interp.Location) {
|
||||
case TGSI_INTERPOLATE_LOC_CENTER:
|
||||
info->uses_linear_center = true;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_CENTROID:
|
||||
info->uses_linear_centroid = true;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_SAMPLE:
|
||||
info->uses_linear_sample = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
/* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
|
||||
}
|
||||
}
|
||||
|
||||
if (semName == TGSI_SEMANTIC_PRIMID)
|
||||
info->uses_primid = TRUE;
|
||||
else if (procType == TGSI_PROCESSOR_FRAGMENT) {
|
||||
if (semName == TGSI_SEMANTIC_POSITION)
|
||||
info->reads_position = TRUE;
|
||||
else if (semName == TGSI_SEMANTIC_FACE)
|
||||
info->uses_frontface = TRUE;
|
||||
}
|
||||
}
|
||||
else if (file == TGSI_FILE_SYSTEM_VALUE) {
|
||||
unsigned index = fulldecl->Range.First;
|
||||
|
||||
info->system_value_semantic_name[index] = semName;
|
||||
info->num_system_values = MAX2(info->num_system_values,
|
||||
index + 1);
|
||||
|
||||
if (semName == TGSI_SEMANTIC_INSTANCEID) {
|
||||
info->uses_instanceid = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_VERTEXID) {
|
||||
info->uses_vertexid = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) {
|
||||
info->uses_vertexid_nobase = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_BASEVERTEX) {
|
||||
info->uses_basevertex = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_PRIMID) {
|
||||
info->uses_primid = TRUE;
|
||||
} else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
|
||||
info->uses_invocationid = TRUE;
|
||||
} else if (semName == TGSI_SEMANTIC_POSITION)
|
||||
info->reads_position = TRUE;
|
||||
else if (semName == TGSI_SEMANTIC_FACE)
|
||||
info->uses_frontface = TRUE;
|
||||
else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
|
||||
info->reads_samplemask = TRUE;
|
||||
}
|
||||
else if (file == TGSI_FILE_OUTPUT) {
|
||||
info->output_semantic_name[reg] = (ubyte) semName;
|
||||
info->output_semantic_index[reg] = (ubyte) semIndex;
|
||||
info->num_outputs++;
|
||||
|
||||
if (semName == TGSI_SEMANTIC_COLOR)
|
||||
info->colors_written |= 1 << semIndex;
|
||||
|
||||
if (procType == TGSI_PROCESSOR_VERTEX ||
|
||||
procType == TGSI_PROCESSOR_GEOMETRY ||
|
||||
procType == TGSI_PROCESSOR_TESS_CTRL ||
|
||||
procType == TGSI_PROCESSOR_TESS_EVAL) {
|
||||
if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
|
||||
info->writes_viewport_index = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_LAYER) {
|
||||
info->writes_layer = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_PSIZE) {
|
||||
info->writes_psize = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_CLIPVERTEX) {
|
||||
info->writes_clipvertex = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (procType == TGSI_PROCESSOR_FRAGMENT) {
|
||||
if (semName == TGSI_SEMANTIC_POSITION) {
|
||||
info->writes_z = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_STENCIL) {
|
||||
info->writes_stencil = TRUE;
|
||||
} else if (semName == TGSI_SEMANTIC_SAMPLEMASK) {
|
||||
info->writes_samplemask = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (procType == TGSI_PROCESSOR_VERTEX) {
|
||||
if (semName == TGSI_SEMANTIC_EDGEFLAG) {
|
||||
info->writes_edgeflag = TRUE;
|
||||
}
|
||||
}
|
||||
} else if (file == TGSI_FILE_SAMPLER) {
|
||||
info->samplers_declared |= 1 << reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
scan_declaration(info, &parse.FullToken.FullDeclaration);
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_IMMEDIATE:
|
||||
{
|
||||
uint reg = info->immediate_count++;
|
||||
uint file = TGSI_FILE_IMMEDIATE;
|
||||
|
||||
info->file_mask[file] |= (1 << reg);
|
||||
info->file_count[file]++;
|
||||
info->file_max[file] = MAX2(info->file_max[file], (int)reg);
|
||||
}
|
||||
scan_immediate(info);
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_PROPERTY:
|
||||
{
|
||||
const struct tgsi_full_property *fullprop
|
||||
= &parse.FullToken.FullProperty;
|
||||
unsigned name = fullprop->Property.PropertyName;
|
||||
unsigned value = fullprop->u[0].Data;
|
||||
|
||||
assert(name < Elements(info->properties));
|
||||
info->properties[name] = value;
|
||||
|
||||
switch (name) {
|
||||
case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
|
||||
info->num_written_clipdistance = value;
|
||||
info->clipdist_writemask |= (1 << value) - 1;
|
||||
break;
|
||||
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
|
||||
info->num_written_culldistance = value;
|
||||
info->culldist_writemask |= (1 << value) - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
scan_property(info, &parse.FullToken.FullProperty);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert( 0 );
|
||||
assert(!"Unexpected TGSI token type");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -487,7 +508,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
}
|
||||
}
|
||||
|
||||
tgsi_parse_free (&parse);
|
||||
tgsi_parse_free(&parse);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
|
|||
|
||||
decl = tgsi_default_full_declaration();
|
||||
decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW;
|
||||
decl.Declaration.UsageMask = 0xf;
|
||||
decl.Declaration.UsageMask = TGSI_WRITEMASK_XYZW;
|
||||
decl.Range.First =
|
||||
decl.Range.Last = index;
|
||||
decl.SamplerView.Resource = target;
|
||||
|
|
|
|||
|
|
@ -1593,7 +1593,7 @@ emit_decl_sampler_view(struct ureg_program *ureg,
|
|||
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
|
||||
out[0].decl.NrTokens = 3;
|
||||
out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
|
||||
out[0].decl.UsageMask = 0xf;
|
||||
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
|
||||
|
||||
out[1].value = 0;
|
||||
out[1].decl_range.First = index;
|
||||
|
|
@ -1621,7 +1621,7 @@ emit_decl_image(struct ureg_program *ureg,
|
|||
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
|
||||
out[0].decl.NrTokens = 3;
|
||||
out[0].decl.File = TGSI_FILE_IMAGE;
|
||||
out[0].decl.UsageMask = 0xf;
|
||||
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
|
||||
|
||||
out[1].value = 0;
|
||||
out[1].decl_range.First = index;
|
||||
|
|
@ -1645,7 +1645,7 @@ emit_decl_buffer(struct ureg_program *ureg,
|
|||
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
|
||||
out[0].decl.NrTokens = 2;
|
||||
out[0].decl.File = TGSI_FILE_BUFFER;
|
||||
out[0].decl.UsageMask = 0xf;
|
||||
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
|
||||
out[0].decl.Atomic = atomic;
|
||||
|
||||
out[1].value = 0;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
*
|
||||
* Copyright 2008 VMware, Inc.
|
||||
* Copyright (c) 2008 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
|
@ -11,11 +11,11 @@
|
|||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
|
|
@ -23,24 +23,22 @@
|
|||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#include "pipe/p_config.h"
|
||||
#include "pipe/p_config.h"
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "pipe/p_format.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "pipe/p_format.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_string.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_tile.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_string.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "util/u_surface.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
@ -53,14 +51,15 @@
|
|||
#endif
|
||||
|
||||
|
||||
void _debug_vprintf(const char *format, va_list ap)
|
||||
void
|
||||
_debug_vprintf(const char *format, va_list ap)
|
||||
{
|
||||
static char buf[4096] = {'\0'};
|
||||
#if defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_EMBEDDED)
|
||||
/* We buffer until we find a newline. */
|
||||
size_t len = strlen(buf);
|
||||
int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap);
|
||||
if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) {
|
||||
if (ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) {
|
||||
os_log_message(buf);
|
||||
buf[0] = '\0';
|
||||
}
|
||||
|
|
@ -70,12 +69,12 @@ void _debug_vprintf(const char *format, va_list ap)
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
_pipe_debug_message(
|
||||
struct pipe_debug_callback *cb,
|
||||
unsigned *id,
|
||||
enum pipe_debug_type type,
|
||||
const char *fmt, ...)
|
||||
_pipe_debug_message(struct pipe_debug_callback *cb,
|
||||
unsigned *id,
|
||||
enum pipe_debug_type type,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
|
|
@ -112,9 +111,8 @@ debug_disable_error_message_boxes(void)
|
|||
|
||||
|
||||
#ifdef DEBUG
|
||||
void debug_print_blob( const char *name,
|
||||
const void *blob,
|
||||
unsigned size )
|
||||
void
|
||||
debug_print_blob(const char *name, const void *blob, unsigned size)
|
||||
{
|
||||
const unsigned *ublob = (const unsigned *)blob;
|
||||
unsigned i;
|
||||
|
|
@ -147,6 +145,7 @@ debug_get_option_should_print(void)
|
|||
return value;
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
debug_get_option(const char *name, const char *dfault)
|
||||
{
|
||||
|
|
@ -157,39 +156,42 @@ debug_get_option(const char *name, const char *dfault)
|
|||
result = dfault;
|
||||
|
||||
if (debug_get_option_should_print())
|
||||
debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)");
|
||||
|
||||
debug_printf("%s: %s = %s\n", __FUNCTION__, name,
|
||||
result ? result : "(null)");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
boolean
|
||||
debug_get_bool_option(const char *name, boolean dfault)
|
||||
{
|
||||
const char *str = os_get_option(name);
|
||||
boolean result;
|
||||
|
||||
if(str == NULL)
|
||||
|
||||
if (str == NULL)
|
||||
result = dfault;
|
||||
else if(!util_strcmp(str, "n"))
|
||||
else if (!util_strcmp(str, "n"))
|
||||
result = FALSE;
|
||||
else if(!util_strcmp(str, "no"))
|
||||
else if (!util_strcmp(str, "no"))
|
||||
result = FALSE;
|
||||
else if(!util_strcmp(str, "0"))
|
||||
else if (!util_strcmp(str, "0"))
|
||||
result = FALSE;
|
||||
else if(!util_strcmp(str, "f"))
|
||||
else if (!util_strcmp(str, "f"))
|
||||
result = FALSE;
|
||||
else if(!util_strcmp(str, "F"))
|
||||
else if (!util_strcmp(str, "F"))
|
||||
result = FALSE;
|
||||
else if(!util_strcmp(str, "false"))
|
||||
else if (!util_strcmp(str, "false"))
|
||||
result = FALSE;
|
||||
else if(!util_strcmp(str, "FALSE"))
|
||||
else if (!util_strcmp(str, "FALSE"))
|
||||
result = FALSE;
|
||||
else
|
||||
result = TRUE;
|
||||
|
||||
if (debug_get_option_should_print())
|
||||
debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE");
|
||||
|
||||
debug_printf("%s: %s = %s\n", __FUNCTION__, name,
|
||||
result ? "TRUE" : "FALSE");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -199,23 +201,23 @@ debug_get_num_option(const char *name, long dfault)
|
|||
{
|
||||
long result;
|
||||
const char *str;
|
||||
|
||||
|
||||
str = os_get_option(name);
|
||||
if(!str)
|
||||
if (!str)
|
||||
result = dfault;
|
||||
else {
|
||||
long sign;
|
||||
char c;
|
||||
c = *str++;
|
||||
if(c == '-') {
|
||||
if (c == '-') {
|
||||
sign = -1;
|
||||
c = *str++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
sign = 1;
|
||||
}
|
||||
result = 0;
|
||||
while('0' <= c && c <= '9') {
|
||||
while ('0' <= c && c <= '9') {
|
||||
result = result*10 + (c - '0');
|
||||
c = *str++;
|
||||
}
|
||||
|
|
@ -228,7 +230,9 @@ debug_get_num_option(const char *name, long dfault)
|
|||
return result;
|
||||
}
|
||||
|
||||
static boolean str_has_option(const char *str, const char *name)
|
||||
|
||||
static boolean
|
||||
str_has_option(const char *str, const char *name)
|
||||
{
|
||||
/* Empty string. */
|
||||
if (!*str) {
|
||||
|
|
@ -271,8 +275,9 @@ static boolean str_has_option(const char *str, const char *name)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
uint64_t
|
||||
debug_get_flags_option(const char *name,
|
||||
debug_get_flags_option(const char *name,
|
||||
const struct debug_named_value *flags,
|
||||
uint64_t dfault)
|
||||
{
|
||||
|
|
@ -280,9 +285,9 @@ debug_get_flags_option(const char *name,
|
|||
const char *str;
|
||||
const struct debug_named_value *orig = flags;
|
||||
unsigned namealign = 0;
|
||||
|
||||
|
||||
str = os_get_option(name);
|
||||
if(!str)
|
||||
if (!str)
|
||||
result = dfault;
|
||||
else if (!util_strcmp(str, "help")) {
|
||||
result = dfault;
|
||||
|
|
@ -296,7 +301,7 @@ debug_get_flags_option(const char *name,
|
|||
}
|
||||
else {
|
||||
result = 0;
|
||||
while( flags->name ) {
|
||||
while (flags->name) {
|
||||
if (str_has_option(str, flags->name))
|
||||
result |= flags->value;
|
||||
++flags;
|
||||
|
|
@ -305,7 +310,8 @@ debug_get_flags_option(const char *name,
|
|||
|
||||
if (debug_get_option_should_print()) {
|
||||
if (str) {
|
||||
debug_printf("%s: %s = 0x%"PRIx64" (%s)\n", __FUNCTION__, name, result, str);
|
||||
debug_printf("%s: %s = 0x%"PRIx64" (%s)\n",
|
||||
__FUNCTION__, name, result, str);
|
||||
} else {
|
||||
debug_printf("%s: %s = 0x%"PRIx64"\n", __FUNCTION__, name, result);
|
||||
}
|
||||
|
|
@ -315,24 +321,24 @@ debug_get_flags_option(const char *name,
|
|||
}
|
||||
|
||||
|
||||
void _debug_assert_fail(const char *expr,
|
||||
const char *file,
|
||||
unsigned line,
|
||||
const char *function)
|
||||
void
|
||||
_debug_assert_fail(const char *expr, const char *file, unsigned line,
|
||||
const char *function)
|
||||
{
|
||||
_debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr);
|
||||
_debug_printf("%s:%u:%s: Assertion `%s' failed.\n",
|
||||
file, line, function, expr);
|
||||
os_abort();
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
debug_dump_enum(const struct debug_named_value *names,
|
||||
debug_dump_enum(const struct debug_named_value *names,
|
||||
unsigned long value)
|
||||
{
|
||||
static char rest[64];
|
||||
|
||||
while(names->name) {
|
||||
if(names->value == value)
|
||||
|
||||
while (names->name) {
|
||||
if (names->value == value)
|
||||
return names->name;
|
||||
++names;
|
||||
}
|
||||
|
|
@ -343,14 +349,14 @@ debug_dump_enum(const struct debug_named_value *names,
|
|||
|
||||
|
||||
const char *
|
||||
debug_dump_enum_noprefix(const struct debug_named_value *names,
|
||||
debug_dump_enum_noprefix(const struct debug_named_value *names,
|
||||
const char *prefix,
|
||||
unsigned long value)
|
||||
{
|
||||
static char rest[64];
|
||||
|
||||
while(names->name) {
|
||||
if(names->value == value) {
|
||||
|
||||
while (names->name) {
|
||||
if (names->value == value) {
|
||||
const char *name = names->name;
|
||||
while (*name == *prefix) {
|
||||
name++;
|
||||
|
|
@ -361,16 +367,13 @@ debug_dump_enum_noprefix(const struct debug_named_value *names,
|
|||
++names;
|
||||
}
|
||||
|
||||
|
||||
|
||||
util_snprintf(rest, sizeof(rest), "0x%08lx", value);
|
||||
return rest;
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
debug_dump_flags(const struct debug_named_value *names,
|
||||
unsigned long value)
|
||||
debug_dump_flags(const struct debug_named_value *names, unsigned long value)
|
||||
{
|
||||
static char output[4096];
|
||||
static char rest[256];
|
||||
|
|
@ -378,8 +381,8 @@ debug_dump_flags(const struct debug_named_value *names,
|
|||
|
||||
output[0] = '\0';
|
||||
|
||||
while(names->name) {
|
||||
if((names->value & value) == names->value) {
|
||||
while (names->name) {
|
||||
if ((names->value & value) == names->value) {
|
||||
if (!first)
|
||||
util_strncat(output, "|", sizeof(output) - strlen(output) - 1);
|
||||
else
|
||||
|
|
@ -390,27 +393,28 @@ debug_dump_flags(const struct debug_named_value *names,
|
|||
}
|
||||
++names;
|
||||
}
|
||||
|
||||
|
||||
if (value) {
|
||||
if (!first)
|
||||
util_strncat(output, "|", sizeof(output) - strlen(output) - 1);
|
||||
else
|
||||
first = 0;
|
||||
|
||||
|
||||
util_snprintf(rest, sizeof(rest), "0x%08lx", value);
|
||||
util_strncat(output, rest, sizeof(output) - strlen(output) - 1);
|
||||
output[sizeof(output) - 1] = '\0';
|
||||
}
|
||||
|
||||
if(first)
|
||||
|
||||
if (first)
|
||||
return "0";
|
||||
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
void debug_print_format(const char *msg, unsigned fmt )
|
||||
void
|
||||
debug_print_format(const char *msg, unsigned fmt )
|
||||
{
|
||||
debug_printf("%s: %s\n", msg, util_format_name(fmt));
|
||||
}
|
||||
|
|
@ -447,7 +451,8 @@ u_prim_name(unsigned prim)
|
|||
int fl_indent = 0;
|
||||
const char* fl_function[1024];
|
||||
|
||||
int debug_funclog_enter(const char* f, const int line, const char* file)
|
||||
int
|
||||
debug_funclog_enter(const char* f, const int line, const char* file)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
|
@ -461,14 +466,16 @@ int debug_funclog_enter(const char* f, const int line, const char* file)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void debug_funclog_exit(const char* f, const int line, const char* file)
|
||||
void
|
||||
debug_funclog_exit(const char* f, const int line, const char* file)
|
||||
{
|
||||
--fl_indent;
|
||||
assert(fl_indent >= 0);
|
||||
assert(fl_function[fl_indent] == f);
|
||||
}
|
||||
|
||||
void debug_funclog_enter_exit(const char* f, const int line, const char* file)
|
||||
void
|
||||
debug_funclog_enter_exit(const char* f, const int line, const char* file)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < fl_indent; i++)
|
||||
|
|
@ -480,313 +487,6 @@ void debug_funclog_enter_exit(const char* f, const int line, const char* file)
|
|||
|
||||
|
||||
#ifdef DEBUG
|
||||
/**
|
||||
* Dump an image to .ppm file.
|
||||
* \param format PIPE_FORMAT_x
|
||||
* \param cpp bytes per pixel
|
||||
* \param width width in pixels
|
||||
* \param height height in pixels
|
||||
* \param stride row stride in bytes
|
||||
*/
|
||||
void debug_dump_image(const char *prefix,
|
||||
enum pipe_format format, unsigned cpp,
|
||||
unsigned width, unsigned height,
|
||||
unsigned stride,
|
||||
const void *data)
|
||||
{
|
||||
/* write a ppm file */
|
||||
char filename[256];
|
||||
unsigned char *rgb8;
|
||||
FILE *f;
|
||||
|
||||
util_snprintf(filename, sizeof(filename), "%s.ppm", prefix);
|
||||
|
||||
rgb8 = MALLOC(height * width * 3);
|
||||
if (!rgb8) {
|
||||
return;
|
||||
}
|
||||
|
||||
util_format_translate(
|
||||
PIPE_FORMAT_R8G8B8_UNORM,
|
||||
rgb8, width * 3,
|
||||
0, 0,
|
||||
format,
|
||||
data, stride,
|
||||
0, 0, width, height);
|
||||
|
||||
/* Must be opened in binary mode or DOS line ending causes data
|
||||
* to be read with one byte offset.
|
||||
*/
|
||||
f = fopen(filename, "wb");
|
||||
if (f) {
|
||||
fprintf(f, "P6\n");
|
||||
fprintf(f, "# ppm-file created by gallium\n");
|
||||
fprintf(f, "%i %i\n", width, height);
|
||||
fprintf(f, "255\n");
|
||||
fwrite(rgb8, 1, height * width * 3, f);
|
||||
fclose(f);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Can't open %s for writing\n", filename);
|
||||
}
|
||||
|
||||
FREE(rgb8);
|
||||
}
|
||||
|
||||
/* FIXME: dump resources, not surfaces... */
|
||||
void debug_dump_surface(struct pipe_context *pipe,
|
||||
const char *prefix,
|
||||
struct pipe_surface *surface)
|
||||
{
|
||||
struct pipe_resource *texture;
|
||||
struct pipe_transfer *transfer;
|
||||
void *data;
|
||||
|
||||
if (!surface)
|
||||
return;
|
||||
|
||||
/* XXX: this doesn't necessarily work, as the driver may be using
|
||||
* temporary storage for the surface which hasn't been propagated
|
||||
* back into the texture. Need to nail down the semantics of views
|
||||
* and transfers a bit better before we can say if extra work needs
|
||||
* to be done here:
|
||||
*/
|
||||
texture = surface->texture;
|
||||
|
||||
data = pipe_transfer_map(pipe, texture, surface->u.tex.level,
|
||||
surface->u.tex.first_layer,
|
||||
PIPE_TRANSFER_READ,
|
||||
0, 0, surface->width, surface->height, &transfer);
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
debug_dump_image(prefix,
|
||||
texture->format,
|
||||
util_format_get_blocksize(texture->format),
|
||||
util_format_get_nblocksx(texture->format, surface->width),
|
||||
util_format_get_nblocksy(texture->format, surface->height),
|
||||
transfer->stride,
|
||||
data);
|
||||
|
||||
pipe->transfer_unmap(pipe, transfer);
|
||||
}
|
||||
|
||||
|
||||
void debug_dump_texture(struct pipe_context *pipe,
|
||||
const char *prefix,
|
||||
struct pipe_resource *texture)
|
||||
{
|
||||
struct pipe_surface *surface, surf_tmpl;
|
||||
|
||||
if (!texture)
|
||||
return;
|
||||
|
||||
/* XXX for now, just dump image for layer=0, level=0 */
|
||||
u_surface_default_template(&surf_tmpl, texture);
|
||||
surface = pipe->create_surface(pipe, texture, &surf_tmpl);
|
||||
if (surface) {
|
||||
debug_dump_surface(pipe, prefix, surface);
|
||||
pipe->surface_destroy(pipe, surface);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#pragma pack(push,2)
|
||||
struct bmp_file_header {
|
||||
uint16_t bfType;
|
||||
uint32_t bfSize;
|
||||
uint16_t bfReserved1;
|
||||
uint16_t bfReserved2;
|
||||
uint32_t bfOffBits;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
struct bmp_info_header {
|
||||
uint32_t biSize;
|
||||
int32_t biWidth;
|
||||
int32_t biHeight;
|
||||
uint16_t biPlanes;
|
||||
uint16_t biBitCount;
|
||||
uint32_t biCompression;
|
||||
uint32_t biSizeImage;
|
||||
int32_t biXPelsPerMeter;
|
||||
int32_t biYPelsPerMeter;
|
||||
uint32_t biClrUsed;
|
||||
uint32_t biClrImportant;
|
||||
};
|
||||
|
||||
struct bmp_rgb_quad {
|
||||
uint8_t rgbBlue;
|
||||
uint8_t rgbGreen;
|
||||
uint8_t rgbRed;
|
||||
uint8_t rgbAlpha;
|
||||
};
|
||||
|
||||
void
|
||||
debug_dump_surface_bmp(struct pipe_context *pipe,
|
||||
const char *filename,
|
||||
struct pipe_surface *surface)
|
||||
{
|
||||
struct pipe_transfer *transfer;
|
||||
struct pipe_resource *texture = surface->texture;
|
||||
void *ptr;
|
||||
|
||||
ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level,
|
||||
surface->u.tex.first_layer, PIPE_TRANSFER_READ,
|
||||
0, 0, surface->width, surface->height, &transfer);
|
||||
|
||||
debug_dump_transfer_bmp(pipe, filename, transfer, ptr);
|
||||
|
||||
pipe->transfer_unmap(pipe, transfer);
|
||||
}
|
||||
|
||||
void
|
||||
debug_dump_transfer_bmp(struct pipe_context *pipe,
|
||||
const char *filename,
|
||||
struct pipe_transfer *transfer, void *ptr)
|
||||
{
|
||||
float *rgba;
|
||||
|
||||
if (!transfer)
|
||||
goto error1;
|
||||
|
||||
rgba = MALLOC(transfer->box.width *
|
||||
transfer->box.height *
|
||||
transfer->box.depth *
|
||||
4*sizeof(float));
|
||||
if (!rgba)
|
||||
goto error1;
|
||||
|
||||
pipe_get_tile_rgba(transfer, ptr, 0, 0,
|
||||
transfer->box.width, transfer->box.height,
|
||||
rgba);
|
||||
|
||||
debug_dump_float_rgba_bmp(filename,
|
||||
transfer->box.width, transfer->box.height,
|
||||
rgba, transfer->box.width);
|
||||
|
||||
FREE(rgba);
|
||||
error1:
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
debug_dump_float_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
float *rgba, unsigned stride)
|
||||
{
|
||||
FILE *stream;
|
||||
struct bmp_file_header bmfh;
|
||||
struct bmp_info_header bmih;
|
||||
unsigned x, y;
|
||||
|
||||
if (!rgba)
|
||||
goto error1;
|
||||
|
||||
bmfh.bfType = 0x4d42;
|
||||
bmfh.bfSize = 14 + 40 + height*width*4;
|
||||
bmfh.bfReserved1 = 0;
|
||||
bmfh.bfReserved2 = 0;
|
||||
bmfh.bfOffBits = 14 + 40;
|
||||
|
||||
bmih.biSize = 40;
|
||||
bmih.biWidth = width;
|
||||
bmih.biHeight = height;
|
||||
bmih.biPlanes = 1;
|
||||
bmih.biBitCount = 32;
|
||||
bmih.biCompression = 0;
|
||||
bmih.biSizeImage = height*width*4;
|
||||
bmih.biXPelsPerMeter = 0;
|
||||
bmih.biYPelsPerMeter = 0;
|
||||
bmih.biClrUsed = 0;
|
||||
bmih.biClrImportant = 0;
|
||||
|
||||
stream = fopen(filename, "wb");
|
||||
if (!stream)
|
||||
goto error1;
|
||||
|
||||
fwrite(&bmfh, 14, 1, stream);
|
||||
fwrite(&bmih, 40, 1, stream);
|
||||
|
||||
y = height;
|
||||
while(y--) {
|
||||
float *ptr = rgba + (stride * y * 4);
|
||||
for(x = 0; x < width; ++x)
|
||||
{
|
||||
struct bmp_rgb_quad pixel;
|
||||
pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]);
|
||||
pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
|
||||
pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]);
|
||||
pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]);
|
||||
fwrite(&pixel, 1, 4, stream);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(stream);
|
||||
error1:
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
debug_dump_ubyte_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
const ubyte *rgba, unsigned stride)
|
||||
{
|
||||
FILE *stream;
|
||||
struct bmp_file_header bmfh;
|
||||
struct bmp_info_header bmih;
|
||||
unsigned x, y;
|
||||
|
||||
assert(rgba);
|
||||
if(!rgba)
|
||||
goto error1;
|
||||
|
||||
bmfh.bfType = 0x4d42;
|
||||
bmfh.bfSize = 14 + 40 + height*width*4;
|
||||
bmfh.bfReserved1 = 0;
|
||||
bmfh.bfReserved2 = 0;
|
||||
bmfh.bfOffBits = 14 + 40;
|
||||
|
||||
bmih.biSize = 40;
|
||||
bmih.biWidth = width;
|
||||
bmih.biHeight = height;
|
||||
bmih.biPlanes = 1;
|
||||
bmih.biBitCount = 32;
|
||||
bmih.biCompression = 0;
|
||||
bmih.biSizeImage = height*width*4;
|
||||
bmih.biXPelsPerMeter = 0;
|
||||
bmih.biYPelsPerMeter = 0;
|
||||
bmih.biClrUsed = 0;
|
||||
bmih.biClrImportant = 0;
|
||||
|
||||
stream = fopen(filename, "wb");
|
||||
assert(stream);
|
||||
if(!stream)
|
||||
goto error1;
|
||||
|
||||
fwrite(&bmfh, 14, 1, stream);
|
||||
fwrite(&bmih, 40, 1, stream);
|
||||
|
||||
y = height;
|
||||
while(y--) {
|
||||
const ubyte *ptr = rgba + (stride * y * 4);
|
||||
for(x = 0; x < width; ++x)
|
||||
{
|
||||
struct bmp_rgb_quad pixel;
|
||||
pixel.rgbRed = ptr[x*4 + 0];
|
||||
pixel.rgbGreen = ptr[x*4 + 1];
|
||||
pixel.rgbBlue = ptr[x*4 + 2];
|
||||
pixel.rgbAlpha = ptr[x*4 + 3];
|
||||
fwrite(&pixel, 1, 4, stream);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(stream);
|
||||
error1:
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Print PIPE_TRANSFER_x flags with a message.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -464,45 +464,6 @@ void
|
|||
debug_memory_end(unsigned long beginning);
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
struct pipe_context;
|
||||
struct pipe_surface;
|
||||
struct pipe_transfer;
|
||||
struct pipe_resource;
|
||||
|
||||
void debug_dump_image(const char *prefix,
|
||||
enum pipe_format format, unsigned cpp,
|
||||
unsigned width, unsigned height,
|
||||
unsigned stride,
|
||||
const void *data);
|
||||
void debug_dump_surface(struct pipe_context *pipe,
|
||||
const char *prefix,
|
||||
struct pipe_surface *surface);
|
||||
void debug_dump_texture(struct pipe_context *pipe,
|
||||
const char *prefix,
|
||||
struct pipe_resource *texture);
|
||||
void debug_dump_surface_bmp(struct pipe_context *pipe,
|
||||
const char *filename,
|
||||
struct pipe_surface *surface);
|
||||
void debug_dump_transfer_bmp(struct pipe_context *pipe,
|
||||
const char *filename,
|
||||
struct pipe_transfer *transfer, void *ptr);
|
||||
void debug_dump_float_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
float *rgba, unsigned stride);
|
||||
void debug_dump_ubyte_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
const ubyte *rgba, unsigned stride);
|
||||
#else
|
||||
#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
|
||||
#define debug_dump_surface(pipe, prefix, surface) ((void)0)
|
||||
#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
|
||||
#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
|
||||
#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
|
||||
#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
|
||||
#endif
|
||||
|
||||
|
||||
void
|
||||
debug_print_transfer_flags(const char *msg, unsigned usage);
|
||||
|
||||
|
|
|
|||
348
src/gallium/auxiliary/util/u_debug_image.c
Normal file
348
src/gallium/auxiliary/util/u_debug_image.c
Normal file
|
|
@ -0,0 +1,348 @@
|
|||
/*
|
||||
* Copyright (c) 2008-2016 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "util/u_debug_image.h"
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_string.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/u_tile.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
/**
|
||||
* Dump an image to .ppm file.
|
||||
* \param format PIPE_FORMAT_x
|
||||
* \param cpp bytes per pixel
|
||||
* \param width width in pixels
|
||||
* \param height height in pixels
|
||||
* \param stride row stride in bytes
|
||||
*/
|
||||
void
|
||||
debug_dump_image(const char *prefix,
|
||||
enum pipe_format format, unsigned cpp,
|
||||
unsigned width, unsigned height,
|
||||
unsigned stride,
|
||||
const void *data)
|
||||
{
|
||||
/* write a ppm file */
|
||||
char filename[256];
|
||||
unsigned char *rgb8;
|
||||
FILE *f;
|
||||
|
||||
util_snprintf(filename, sizeof(filename), "%s.ppm", prefix);
|
||||
|
||||
rgb8 = MALLOC(height * width * 3);
|
||||
if (!rgb8) {
|
||||
return;
|
||||
}
|
||||
|
||||
util_format_translate(
|
||||
PIPE_FORMAT_R8G8B8_UNORM,
|
||||
rgb8, width * 3,
|
||||
0, 0,
|
||||
format,
|
||||
data, stride,
|
||||
0, 0, width, height);
|
||||
|
||||
/* Must be opened in binary mode or DOS line ending causes data
|
||||
* to be read with one byte offset.
|
||||
*/
|
||||
f = fopen(filename, "wb");
|
||||
if (f) {
|
||||
fprintf(f, "P6\n");
|
||||
fprintf(f, "# ppm-file created by gallium\n");
|
||||
fprintf(f, "%i %i\n", width, height);
|
||||
fprintf(f, "255\n");
|
||||
fwrite(rgb8, 1, height * width * 3, f);
|
||||
fclose(f);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Can't open %s for writing\n", filename);
|
||||
}
|
||||
|
||||
FREE(rgb8);
|
||||
}
|
||||
|
||||
|
||||
/* FIXME: dump resources, not surfaces... */
|
||||
void
|
||||
debug_dump_surface(struct pipe_context *pipe,
|
||||
const char *prefix,
|
||||
struct pipe_surface *surface)
|
||||
{
|
||||
struct pipe_resource *texture;
|
||||
struct pipe_transfer *transfer;
|
||||
void *data;
|
||||
|
||||
if (!surface)
|
||||
return;
|
||||
|
||||
/* XXX: this doesn't necessarily work, as the driver may be using
|
||||
* temporary storage for the surface which hasn't been propagated
|
||||
* back into the texture. Need to nail down the semantics of views
|
||||
* and transfers a bit better before we can say if extra work needs
|
||||
* to be done here:
|
||||
*/
|
||||
texture = surface->texture;
|
||||
|
||||
data = pipe_transfer_map(pipe, texture, surface->u.tex.level,
|
||||
surface->u.tex.first_layer,
|
||||
PIPE_TRANSFER_READ,
|
||||
0, 0, surface->width, surface->height, &transfer);
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
debug_dump_image(prefix,
|
||||
texture->format,
|
||||
util_format_get_blocksize(texture->format),
|
||||
util_format_get_nblocksx(texture->format, surface->width),
|
||||
util_format_get_nblocksy(texture->format, surface->height),
|
||||
transfer->stride,
|
||||
data);
|
||||
|
||||
pipe->transfer_unmap(pipe, transfer);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
debug_dump_texture(struct pipe_context *pipe,
|
||||
const char *prefix,
|
||||
struct pipe_resource *texture)
|
||||
{
|
||||
struct pipe_surface *surface, surf_tmpl;
|
||||
|
||||
if (!texture)
|
||||
return;
|
||||
|
||||
/* XXX for now, just dump image for layer=0, level=0 */
|
||||
u_surface_default_template(&surf_tmpl, texture);
|
||||
surface = pipe->create_surface(pipe, texture, &surf_tmpl);
|
||||
if (surface) {
|
||||
debug_dump_surface(pipe, prefix, surface);
|
||||
pipe->surface_destroy(pipe, surface);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#pragma pack(push,2)
|
||||
struct bmp_file_header {
|
||||
uint16_t bfType;
|
||||
uint32_t bfSize;
|
||||
uint16_t bfReserved1;
|
||||
uint16_t bfReserved2;
|
||||
uint32_t bfOffBits;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
struct bmp_info_header {
|
||||
uint32_t biSize;
|
||||
int32_t biWidth;
|
||||
int32_t biHeight;
|
||||
uint16_t biPlanes;
|
||||
uint16_t biBitCount;
|
||||
uint32_t biCompression;
|
||||
uint32_t biSizeImage;
|
||||
int32_t biXPelsPerMeter;
|
||||
int32_t biYPelsPerMeter;
|
||||
uint32_t biClrUsed;
|
||||
uint32_t biClrImportant;
|
||||
};
|
||||
|
||||
struct bmp_rgb_quad {
|
||||
uint8_t rgbBlue;
|
||||
uint8_t rgbGreen;
|
||||
uint8_t rgbRed;
|
||||
uint8_t rgbAlpha;
|
||||
};
|
||||
|
||||
void
|
||||
debug_dump_surface_bmp(struct pipe_context *pipe,
|
||||
const char *filename,
|
||||
struct pipe_surface *surface)
|
||||
{
|
||||
struct pipe_transfer *transfer;
|
||||
struct pipe_resource *texture = surface->texture;
|
||||
void *ptr;
|
||||
|
||||
ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level,
|
||||
surface->u.tex.first_layer, PIPE_TRANSFER_READ,
|
||||
0, 0, surface->width, surface->height, &transfer);
|
||||
|
||||
debug_dump_transfer_bmp(pipe, filename, transfer, ptr);
|
||||
|
||||
pipe->transfer_unmap(pipe, transfer);
|
||||
}
|
||||
|
||||
void
|
||||
debug_dump_transfer_bmp(struct pipe_context *pipe,
|
||||
const char *filename,
|
||||
struct pipe_transfer *transfer, void *ptr)
|
||||
{
|
||||
float *rgba;
|
||||
|
||||
if (!transfer)
|
||||
goto error1;
|
||||
|
||||
rgba = MALLOC(transfer->box.width *
|
||||
transfer->box.height *
|
||||
transfer->box.depth *
|
||||
4*sizeof(float));
|
||||
if (!rgba)
|
||||
goto error1;
|
||||
|
||||
pipe_get_tile_rgba(transfer, ptr, 0, 0,
|
||||
transfer->box.width, transfer->box.height,
|
||||
rgba);
|
||||
|
||||
debug_dump_float_rgba_bmp(filename,
|
||||
transfer->box.width, transfer->box.height,
|
||||
rgba, transfer->box.width);
|
||||
|
||||
FREE(rgba);
|
||||
error1:
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
debug_dump_float_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
float *rgba, unsigned stride)
|
||||
{
|
||||
FILE *stream;
|
||||
struct bmp_file_header bmfh;
|
||||
struct bmp_info_header bmih;
|
||||
unsigned x, y;
|
||||
|
||||
if (!rgba)
|
||||
goto error1;
|
||||
|
||||
bmfh.bfType = 0x4d42;
|
||||
bmfh.bfSize = 14 + 40 + height*width*4;
|
||||
bmfh.bfReserved1 = 0;
|
||||
bmfh.bfReserved2 = 0;
|
||||
bmfh.bfOffBits = 14 + 40;
|
||||
|
||||
bmih.biSize = 40;
|
||||
bmih.biWidth = width;
|
||||
bmih.biHeight = height;
|
||||
bmih.biPlanes = 1;
|
||||
bmih.biBitCount = 32;
|
||||
bmih.biCompression = 0;
|
||||
bmih.biSizeImage = height*width*4;
|
||||
bmih.biXPelsPerMeter = 0;
|
||||
bmih.biYPelsPerMeter = 0;
|
||||
bmih.biClrUsed = 0;
|
||||
bmih.biClrImportant = 0;
|
||||
|
||||
stream = fopen(filename, "wb");
|
||||
if (!stream)
|
||||
goto error1;
|
||||
|
||||
fwrite(&bmfh, 14, 1, stream);
|
||||
fwrite(&bmih, 40, 1, stream);
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
float *ptr = rgba + (stride * y * 4);
|
||||
for (x = 0; x < width; ++x) {
|
||||
struct bmp_rgb_quad pixel;
|
||||
pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]);
|
||||
pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
|
||||
pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]);
|
||||
pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]);
|
||||
fwrite(&pixel, 1, 4, stream);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(stream);
|
||||
error1:
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
debug_dump_ubyte_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
const ubyte *rgba, unsigned stride)
|
||||
{
|
||||
FILE *stream;
|
||||
struct bmp_file_header bmfh;
|
||||
struct bmp_info_header bmih;
|
||||
unsigned x, y;
|
||||
|
||||
assert(rgba);
|
||||
if (!rgba)
|
||||
goto error1;
|
||||
|
||||
bmfh.bfType = 0x4d42;
|
||||
bmfh.bfSize = 14 + 40 + height*width*4;
|
||||
bmfh.bfReserved1 = 0;
|
||||
bmfh.bfReserved2 = 0;
|
||||
bmfh.bfOffBits = 14 + 40;
|
||||
|
||||
bmih.biSize = 40;
|
||||
bmih.biWidth = width;
|
||||
bmih.biHeight = height;
|
||||
bmih.biPlanes = 1;
|
||||
bmih.biBitCount = 32;
|
||||
bmih.biCompression = 0;
|
||||
bmih.biSizeImage = height*width*4;
|
||||
bmih.biXPelsPerMeter = 0;
|
||||
bmih.biYPelsPerMeter = 0;
|
||||
bmih.biClrUsed = 0;
|
||||
bmih.biClrImportant = 0;
|
||||
|
||||
stream = fopen(filename, "wb");
|
||||
assert(stream);
|
||||
if (!stream)
|
||||
goto error1;
|
||||
|
||||
fwrite(&bmfh, 14, 1, stream);
|
||||
fwrite(&bmih, 40, 1, stream);
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
const ubyte *ptr = rgba + (stride * y * 4);
|
||||
for (x = 0; x < width; ++x) {
|
||||
struct bmp_rgb_quad pixel;
|
||||
pixel.rgbRed = ptr[x*4 + 0];
|
||||
pixel.rgbGreen = ptr[x*4 + 1];
|
||||
pixel.rgbBlue = ptr[x*4 + 2];
|
||||
pixel.rgbAlpha = ptr[x*4 + 3];
|
||||
fwrite(&pixel, 1, 4, stream);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(stream);
|
||||
error1:
|
||||
;
|
||||
}
|
||||
|
||||
#endif
|
||||
74
src/gallium/auxiliary/util/u_debug_image.h
Normal file
74
src/gallium/auxiliary/util/u_debug_image.h
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (c) 2008-2016 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef U_DEBUG_IMAGE_H
|
||||
#define U_DEBUG_IMAGE_H
|
||||
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "pipe/p_format.h"
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
struct pipe_context;
|
||||
struct pipe_surface;
|
||||
struct pipe_transfer;
|
||||
struct pipe_resource;
|
||||
|
||||
void debug_dump_image(const char *prefix,
|
||||
enum pipe_format format, unsigned cpp,
|
||||
unsigned width, unsigned height,
|
||||
unsigned stride,
|
||||
const void *data);
|
||||
void debug_dump_surface(struct pipe_context *pipe,
|
||||
const char *prefix,
|
||||
struct pipe_surface *surface);
|
||||
void debug_dump_texture(struct pipe_context *pipe,
|
||||
const char *prefix,
|
||||
struct pipe_resource *texture);
|
||||
void debug_dump_surface_bmp(struct pipe_context *pipe,
|
||||
const char *filename,
|
||||
struct pipe_surface *surface);
|
||||
void debug_dump_transfer_bmp(struct pipe_context *pipe,
|
||||
const char *filename,
|
||||
struct pipe_transfer *transfer, void *ptr);
|
||||
void debug_dump_float_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
float *rgba, unsigned stride);
|
||||
void debug_dump_ubyte_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
const ubyte *rgba, unsigned stride);
|
||||
#else
|
||||
#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
|
||||
#define debug_dump_surface(pipe, prefix, surface) ((void)0)
|
||||
#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
|
||||
#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
|
||||
#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
|
||||
#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
*
|
||||
* Copyright 2009 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
|
@ -10,11 +10,11 @@
|
|||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
|
|
@ -22,13 +22,13 @@
|
|||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Stack backtracing.
|
||||
*
|
||||
*
|
||||
* @author Jose Fonseca <jfonseca@vmware.com>
|
||||
*/
|
||||
|
||||
|
|
@ -44,12 +44,13 @@
|
|||
/**
|
||||
* Capture stack backtrace.
|
||||
*
|
||||
* NOTE: The implementation of this function is quite big, but it is important not to
|
||||
* break it down in smaller functions to avoid adding new frames to the calling stack.
|
||||
* NOTE: The implementation of this function is quite big, but it is important
|
||||
* not to break it down in smaller functions to avoid adding new frames to the
|
||||
* calling stack.
|
||||
*/
|
||||
void
|
||||
debug_backtrace_capture(struct debug_stack_frame *backtrace,
|
||||
unsigned start_frame,
|
||||
unsigned start_frame,
|
||||
unsigned nr_frames)
|
||||
{
|
||||
const void **frame_pointer = NULL;
|
||||
|
|
@ -66,7 +67,8 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
|
|||
*/
|
||||
#if defined(PIPE_OS_WINDOWS)
|
||||
{
|
||||
typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG, PVOID *, PULONG);
|
||||
typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG,
|
||||
PVOID *, PULONG);
|
||||
static PFNCAPTURESTACKBACKTRACE pfnCaptureStackBackTrace = NULL;
|
||||
|
||||
if (!pfnCaptureStackBackTrace) {
|
||||
|
|
@ -76,8 +78,9 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
|
|||
assert(hModule);
|
||||
}
|
||||
if (hModule) {
|
||||
pfnCaptureStackBackTrace = (PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule,
|
||||
"RtlCaptureStackBackTrace");
|
||||
pfnCaptureStackBackTrace =
|
||||
(PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule,
|
||||
"RtlCaptureStackBackTrace");
|
||||
}
|
||||
}
|
||||
if (pfnCaptureStackBackTrace) {
|
||||
|
|
@ -88,7 +91,8 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
|
|||
start_frame += 1;
|
||||
|
||||
assert(start_frame + nr_frames < 63);
|
||||
i = pfnCaptureStackBackTrace(start_frame, nr_frames, (PVOID *) &backtrace->function, NULL);
|
||||
i = pfnCaptureStackBackTrace(start_frame, nr_frames,
|
||||
(PVOID *) &backtrace->function, NULL);
|
||||
|
||||
/* Pad remaing requested frames with NULL */
|
||||
while (i < nr_frames) {
|
||||
|
|
@ -110,50 +114,49 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
|
|||
#else
|
||||
frame_pointer = NULL;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifdef PIPE_ARCH_X86
|
||||
while(nr_frames) {
|
||||
while (nr_frames) {
|
||||
const void **next_frame_pointer;
|
||||
|
||||
if(!frame_pointer)
|
||||
if (!frame_pointer)
|
||||
break;
|
||||
|
||||
if(start_frame)
|
||||
|
||||
if (start_frame)
|
||||
--start_frame;
|
||||
else {
|
||||
backtrace[i++].function = frame_pointer[1];
|
||||
--nr_frames;
|
||||
}
|
||||
|
||||
|
||||
next_frame_pointer = (const void **)frame_pointer[0];
|
||||
|
||||
|
||||
/* Limit the stack walk to avoid referencing undefined memory */
|
||||
if((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer ||
|
||||
(uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024)
|
||||
if ((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer ||
|
||||
(uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024)
|
||||
break;
|
||||
|
||||
|
||||
frame_pointer = next_frame_pointer;
|
||||
}
|
||||
#else
|
||||
(void) frame_pointer;
|
||||
#endif
|
||||
|
||||
while(nr_frames) {
|
||||
while (nr_frames) {
|
||||
backtrace[i++].function = NULL;
|
||||
--nr_frames;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
debug_backtrace_dump(const struct debug_stack_frame *backtrace,
|
||||
debug_backtrace_dump(const struct debug_stack_frame *backtrace,
|
||||
unsigned nr_frames)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for(i = 0; i < nr_frames; ++i) {
|
||||
if(!backtrace[i].function)
|
||||
|
||||
for (i = 0; i < nr_frames; ++i) {
|
||||
if (!backtrace[i].function)
|
||||
break;
|
||||
debug_symbol_print(backtrace[i].function);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@
|
|||
#define NUM_NEW_TOKENS 53
|
||||
|
||||
|
||||
static void
|
||||
void
|
||||
util_pstipple_update_stipple_texture(struct pipe_context *pipe,
|
||||
struct pipe_resource *tex,
|
||||
const uint32_t pattern[32])
|
||||
|
|
@ -118,7 +118,7 @@ util_pstipple_create_stipple_texture(struct pipe_context *pipe,
|
|||
|
||||
tex = screen->resource_create(screen, &templat);
|
||||
|
||||
if (tex)
|
||||
if (tex && pattern)
|
||||
util_pstipple_update_stipple_texture(pipe, tex, pattern);
|
||||
|
||||
return tex;
|
||||
|
|
|
|||
|
|
@ -36,6 +36,11 @@ struct pipe_resource;
|
|||
struct pipe_shader_state;
|
||||
|
||||
|
||||
extern void
|
||||
util_pstipple_update_stipple_texture(struct pipe_context *pipe,
|
||||
struct pipe_resource *tex,
|
||||
const uint32_t pattern[32]);
|
||||
|
||||
extern struct pipe_resource *
|
||||
util_pstipple_create_stipple_texture(struct pipe_context *pipe,
|
||||
const uint32_t pattern[32]);
|
||||
|
|
|
|||
|
|
@ -29,11 +29,14 @@
|
|||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
|
||||
|
||||
static void
|
||||
util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template)
|
||||
util_staging_resource_template(struct pipe_resource *pt, unsigned width,
|
||||
unsigned height, unsigned depth,
|
||||
struct pipe_resource *template)
|
||||
{
|
||||
memset(template, 0, sizeof(struct pipe_resource));
|
||||
if(pt->target != PIPE_BUFFER && depth <= 1)
|
||||
if (pt->target != PIPE_BUFFER && depth <= 1)
|
||||
template->target = PIPE_TEXTURE_RECT;
|
||||
else
|
||||
template->target = pt->target;
|
||||
|
|
@ -49,16 +52,15 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
|
|||
template->flags = 0;
|
||||
}
|
||||
|
||||
|
||||
struct util_staging_transfer *
|
||||
util_staging_transfer_init(struct pipe_context *pipe,
|
||||
struct pipe_resource *pt,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
boolean direct, struct util_staging_transfer *tx)
|
||||
struct pipe_resource *pt,
|
||||
unsigned level, unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
boolean direct, struct util_staging_transfer *tx)
|
||||
{
|
||||
struct pipe_screen *pscreen = pipe->screen;
|
||||
|
||||
struct pipe_resource staging_resource_template;
|
||||
|
||||
pipe_resource_reference(&tx->base.resource, pt);
|
||||
|
|
@ -66,23 +68,22 @@ util_staging_transfer_init(struct pipe_context *pipe,
|
|||
tx->base.usage = usage;
|
||||
tx->base.box = *box;
|
||||
|
||||
if (direct)
|
||||
{
|
||||
if (direct) {
|
||||
tx->staging_resource = pt;
|
||||
return tx;
|
||||
}
|
||||
|
||||
util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template);
|
||||
tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template);
|
||||
if (!tx->staging_resource)
|
||||
{
|
||||
util_staging_resource_template(pt, box->width, box->height,
|
||||
box->depth, &staging_resource_template);
|
||||
tx->staging_resource = pscreen->resource_create(pscreen,
|
||||
&staging_resource_template);
|
||||
if (!tx->staging_resource) {
|
||||
pipe_resource_reference(&tx->base.resource, NULL);
|
||||
FREE(tx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (usage & PIPE_TRANSFER_READ)
|
||||
{
|
||||
if (usage & PIPE_TRANSFER_READ) {
|
||||
/* XXX this looks wrong dst is always the same but looping over src z? */
|
||||
int zi;
|
||||
struct pipe_box sbox;
|
||||
|
|
@ -92,7 +93,7 @@ util_staging_transfer_init(struct pipe_context *pipe,
|
|||
sbox.width = box->width;
|
||||
sbox.height = box->height;
|
||||
sbox.depth = 1;
|
||||
for(zi = 0; zi < box->depth; ++zi) {
|
||||
for (zi = 0; zi < box->depth; ++zi) {
|
||||
sbox.z = sbox.z + zi;
|
||||
pipe->resource_copy_region(pipe, tx->staging_resource, 0, 0, 0, 0,
|
||||
tx->base.resource, level, &sbox);
|
||||
|
|
@ -102,14 +103,15 @@ util_staging_transfer_init(struct pipe_context *pipe,
|
|||
return tx;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
|
||||
util_staging_transfer_destroy(struct pipe_context *pipe,
|
||||
struct pipe_transfer *ptx)
|
||||
{
|
||||
struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx;
|
||||
|
||||
if (tx->staging_resource != tx->base.resource)
|
||||
{
|
||||
if(tx->base.usage & PIPE_TRANSFER_WRITE) {
|
||||
if (tx->staging_resource != tx->base.resource) {
|
||||
if (tx->base.usage & PIPE_TRANSFER_WRITE) {
|
||||
/* XXX this looks wrong src is always the same but looping over dst z? */
|
||||
int zi;
|
||||
struct pipe_box sbox;
|
||||
|
|
@ -119,8 +121,10 @@ util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *p
|
|||
sbox.width = tx->base.box.width;
|
||||
sbox.height = tx->base.box.height;
|
||||
sbox.depth = 1;
|
||||
for(zi = 0; zi < tx->base.box.depth; ++zi)
|
||||
pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi,
|
||||
for (zi = 0; zi < tx->base.box.depth; ++zi)
|
||||
pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level,
|
||||
tx->base.box.x, tx->base.box.y,
|
||||
tx->base.box.z + zi,
|
||||
tx->staging_resource, 0, &sbox);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,22 +42,26 @@
|
|||
struct util_staging_transfer {
|
||||
struct pipe_transfer base;
|
||||
|
||||
/* if direct, same as base.resource, otherwise the temporary staging resource */
|
||||
/* if direct, same as base.resource, otherwise the temporary staging
|
||||
* resource
|
||||
*/
|
||||
struct pipe_resource *staging_resource;
|
||||
};
|
||||
|
||||
/* user must be stride, slice_stride and offset */
|
||||
/* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING should be a good value to pass for direct */
|
||||
/* staging resource is currently created with PIPE_USAGE_STAGING */
|
||||
/* user must be stride, slice_stride and offset.
|
||||
* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING
|
||||
* should be a good value to pass for direct staging resource is currently
|
||||
* created with PIPE_USAGE_STAGING
|
||||
*/
|
||||
struct util_staging_transfer *
|
||||
util_staging_transfer_init(struct pipe_context *pipe,
|
||||
struct pipe_resource *pt,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
boolean direct, struct util_staging_transfer *tx);
|
||||
struct pipe_resource *pt,
|
||||
unsigned level, unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
boolean direct, struct util_staging_transfer *tx);
|
||||
|
||||
void
|
||||
util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
|
||||
util_staging_transfer_destroy(struct pipe_context *pipe,
|
||||
struct pipe_transfer *ptx);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1004,7 +1004,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
|||
nir_const_value *const_offset;
|
||||
/* UBO addresses are the first driver params: */
|
||||
unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
|
||||
int off = intr->const_index[0];
|
||||
int off = 0;
|
||||
|
||||
/* First src is ubo index, which could either be an immed or not: */
|
||||
src0 = get_src(ctx, &intr->src[0])[0];
|
||||
|
|
@ -1092,7 +1092,7 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
|
||||
struct ir3_array *arr = get_var(ctx, dvar->var);
|
||||
struct ir3_instruction *addr, **src;
|
||||
unsigned wrmask = intr->const_index[0];
|
||||
unsigned wrmask = nir_intrinsic_write_mask(intr);
|
||||
|
||||
compile_assert(ctx, dvar->deref.child &&
|
||||
(dvar->deref.child->deref_type == nir_deref_type_array));
|
||||
|
|
@ -1145,8 +1145,8 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
|
||||
struct ir3_instruction **dst, **src;
|
||||
struct ir3_block *b = ctx->block;
|
||||
int idx = intr->const_index[0];
|
||||
nir_const_value *const_offset;
|
||||
int idx;
|
||||
|
||||
if (info->has_dest) {
|
||||
dst = get_dst(ctx, &intr->dest, intr->num_components);
|
||||
|
|
@ -1156,6 +1156,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_uniform:
|
||||
idx = nir_intrinsic_base(intr);
|
||||
const_offset = nir_src_as_const_value(intr->src[0]);
|
||||
if (const_offset) {
|
||||
idx += const_offset->u[0];
|
||||
|
|
@ -1182,6 +1183,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
emit_intrinsic_load_ubo(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_load_input:
|
||||
idx = nir_intrinsic_base(intr);
|
||||
const_offset = nir_src_as_const_value(intr->src[0]);
|
||||
if (const_offset) {
|
||||
idx += const_offset->u[0];
|
||||
|
|
@ -1208,6 +1210,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
emit_intrinsic_store_var(ctx, intr);
|
||||
break;
|
||||
case nir_intrinsic_store_output:
|
||||
idx = nir_intrinsic_base(intr);
|
||||
const_offset = nir_src_as_const_value(intr->src[1]);
|
||||
compile_assert(ctx, const_offset != NULL);
|
||||
idx += const_offset->u[0];
|
||||
|
|
@ -1243,6 +1246,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
dst[0] = ctx->instance_id;
|
||||
break;
|
||||
case nir_intrinsic_load_user_clip_plane:
|
||||
idx = nir_intrinsic_ucp_id(intr);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = idx * 4 + i;
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ query_process_bo(const struct ilo_context *ilo, struct ilo_query *q)
|
|||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
|
|
@ -157,6 +158,7 @@ ilo_init_draw_query(struct ilo_context *ilo, struct ilo_query *q)
|
|||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ static const struct {
|
|||
#define INFOX(prefix) { NULL, NULL, NULL, NULL, }
|
||||
|
||||
[PIPE_QUERY_OCCLUSION_COUNTER] = INFO(draw),
|
||||
[PIPE_QUERY_OCCLUSION_PREDICATE] = INFOX(draw),
|
||||
[PIPE_QUERY_OCCLUSION_PREDICATE] = INFO(draw),
|
||||
[PIPE_QUERY_TIMESTAMP] = INFO(draw),
|
||||
[PIPE_QUERY_TIMESTAMP_DISJOINT] = INFOX(draw),
|
||||
[PIPE_QUERY_TIME_ELAPSED] = INFO(draw),
|
||||
|
|
@ -75,6 +75,7 @@ ilo_create_query(struct pipe_context *pipe, unsigned query_type, unsigned index)
|
|||
|
||||
switch (query_type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
|
|
@ -163,6 +164,12 @@ query_serialize(const struct ilo_query *q, void *buf)
|
|||
dst[0] = q->result.u64;
|
||||
}
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
{
|
||||
uint64_t *dst = buf;
|
||||
dst[0] = !!q->result.u64;
|
||||
}
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
{
|
||||
const struct pipe_query_data_pipeline_statistics *stats =
|
||||
|
|
|
|||
|
|
@ -202,6 +202,7 @@ ilo_render_get_query_len(const struct ilo_render *render,
|
|||
|
||||
switch (query_type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
/* no reg */
|
||||
|
|
@ -268,6 +269,7 @@ ilo_render_emit_query(struct ilo_render *render,
|
|||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
|
||||
GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/u_debug_image.h"
|
||||
#include "util/u_string.h"
|
||||
#include "draw/draw_context.h"
|
||||
#include "lp_flush.h"
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ nv30_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
|
|||
q->report = 1;
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
q->enable = NV30_3D_QUERY_ENABLE;
|
||||
q->report = 1;
|
||||
break;
|
||||
|
|
@ -203,7 +204,6 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
struct nv30_query *q = nv30_query(pq);
|
||||
volatile uint32_t *ntfy0 = nv30_ntfy(screen, q->qo[0]);
|
||||
volatile uint32_t *ntfy1 = nv30_ntfy(screen, q->qo[1]);
|
||||
uint64_t *res64 = &result->u64;
|
||||
|
||||
if (ntfy1) {
|
||||
while (ntfy1[3] & 0xff000000) {
|
||||
|
|
@ -227,7 +227,10 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
nv30_query_object_del(screen, &q->qo[1]);
|
||||
}
|
||||
|
||||
*res64 = q->result;
|
||||
if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE)
|
||||
result->b = !!q->result;
|
||||
else
|
||||
result->u64 = q->result;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -156,6 +156,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
|
|||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
hq->nesting = nv50->screen->num_occlusion_queries_active++;
|
||||
if (hq->nesting) {
|
||||
nv50_hw_query_get(push, q, 0x10, 0x0100f002);
|
||||
|
|
@ -213,6 +214,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
|
|||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
nv50_hw_query_get(push, q, 0, 0x0100f002);
|
||||
if (--nv50->screen->num_occlusion_queries_active == 0) {
|
||||
PUSH_SPACE(push, 2);
|
||||
|
|
@ -304,6 +306,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
|
|||
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
|
||||
res64[0] = hq->data[1] - hq->data[5];
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
res8[0] = hq->data[1] != hq->data[5];
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
|
||||
res64[0] = data64[0] - data64[2];
|
||||
|
|
@ -372,6 +377,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
|
|||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
hq->rotate = 32;
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
|
|
|
|||
|
|
@ -961,8 +961,8 @@ struct pipe_resource *r600_compute_global_buffer_create(
|
|||
templ->array_size);
|
||||
|
||||
result->base.b.vtbl = &r600_global_buffer_vtbl;
|
||||
result->base.b.b.screen = screen;
|
||||
result->base.b.b = *templ;
|
||||
result->base.b.b.screen = screen;
|
||||
pipe_reference_init(&result->base.b.b.reference, 1);
|
||||
|
||||
size_in_dw = (templ->width0+3) / 4;
|
||||
|
|
|
|||
|
|
@ -55,6 +55,14 @@ enum radeon_llvm_shader_type {
|
|||
RADEON_LLVM_SHADER_CS = 3,
|
||||
};
|
||||
|
||||
void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
|
||||
{
|
||||
char str[16];
|
||||
|
||||
snprintf(str, sizeof(str), "%i", value);
|
||||
LLVMAddTargetDependentFunctionAttr(F, name, str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the shader type we want to compile
|
||||
*
|
||||
|
|
@ -62,7 +70,6 @@ enum radeon_llvm_shader_type {
|
|||
*/
|
||||
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
|
||||
{
|
||||
char Str[2];
|
||||
enum radeon_llvm_shader_type llvm_type;
|
||||
|
||||
switch (type) {
|
||||
|
|
@ -84,9 +91,7 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
|
|||
assert(0);
|
||||
}
|
||||
|
||||
sprintf(Str, "%1d", llvm_type);
|
||||
|
||||
LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
|
||||
radeon_llvm_add_attribute(F, "ShaderType", llvm_type);
|
||||
}
|
||||
|
||||
static void init_r600_target()
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
struct pipe_debug_callback;
|
||||
struct radeon_shader_binary;
|
||||
|
||||
void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value);
|
||||
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
|
||||
|
||||
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
|
||||
|
|
|
|||
|
|
@ -124,7 +124,8 @@ static void *si_create_compute_state(
|
|||
code, header->num_bytes);
|
||||
si_compile_llvm(sctx->screen, &program->kernels[i].binary,
|
||||
&program->kernels[i].config, sctx->tm,
|
||||
mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE);
|
||||
mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE,
|
||||
"Compute Shader");
|
||||
si_shader_dump(sctx->screen, &program->kernels[i],
|
||||
&sctx->b.debug, TGSI_PROCESSOR_COMPUTE);
|
||||
si_shader_binary_upload(sctx->screen, &program->kernels[i]);
|
||||
|
|
|
|||
|
|
@ -182,7 +182,6 @@ void si_begin_new_cs(struct si_context *ctx)
|
|||
si_mark_atom_dirty(ctx, &ctx->db_render_state);
|
||||
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
|
||||
si_mark_atom_dirty(ctx, &ctx->spi_map);
|
||||
si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
|
||||
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
|
||||
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
|
||||
si_all_descriptors_begin_new_cs(ctx);
|
||||
|
|
|
|||
|
|
@ -202,7 +202,6 @@ struct si_context {
|
|||
struct si_viewports viewports;
|
||||
struct si_stencil_ref stencil_ref;
|
||||
struct r600_atom spi_map;
|
||||
struct r600_atom spi_ps_input;
|
||||
|
||||
/* Precomputed states. */
|
||||
struct si_pm4_state *init_config;
|
||||
|
|
@ -222,7 +221,6 @@ struct si_context {
|
|||
struct si_vertex_element *vertex_elements;
|
||||
unsigned sprite_coord_enable;
|
||||
bool flatshade;
|
||||
bool force_persample_interp;
|
||||
|
||||
/* shader descriptors */
|
||||
struct si_descriptors vertex_buffers;
|
||||
|
|
|
|||
|
|
@ -833,14 +833,11 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
|
|||
}
|
||||
|
||||
/* This shouldn't be used by explicit INTERP opcodes. */
|
||||
static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
|
||||
unsigned param)
|
||||
static unsigned select_interp_param(struct si_shader_context *si_shader_ctx,
|
||||
unsigned param)
|
||||
{
|
||||
struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
|
||||
unsigned sample_param = 0;
|
||||
LLVMValueRef default_ij, sample_ij, force_sample;
|
||||
|
||||
default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
|
||||
if (!si_shader_ctx->shader->key.ps.force_persample_interp)
|
||||
return param;
|
||||
|
||||
/* If the shader doesn't use center/centroid, just return the parameter.
|
||||
*
|
||||
|
|
@ -850,79 +847,52 @@ static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
|
|||
switch (param) {
|
||||
case SI_PARAM_PERSP_CENTROID:
|
||||
case SI_PARAM_PERSP_CENTER:
|
||||
if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
|
||||
return default_ij;
|
||||
|
||||
sample_param = SI_PARAM_PERSP_SAMPLE;
|
||||
break;
|
||||
return SI_PARAM_PERSP_SAMPLE;
|
||||
|
||||
case SI_PARAM_LINEAR_CENTROID:
|
||||
case SI_PARAM_LINEAR_CENTER:
|
||||
if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
|
||||
return default_ij;
|
||||
|
||||
sample_param = SI_PARAM_LINEAR_SAMPLE;
|
||||
break;
|
||||
return SI_PARAM_LINEAR_SAMPLE;
|
||||
|
||||
default:
|
||||
return default_ij;
|
||||
return param;
|
||||
}
|
||||
|
||||
/* Otherwise, we have to select (i,j) based on a user data SGPR. */
|
||||
sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
|
||||
|
||||
/* TODO: this can be done more efficiently by switching between
|
||||
* 2 prologs.
|
||||
*/
|
||||
force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
|
||||
SI_PARAM_PS_STATE_BITS);
|
||||
force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
|
||||
LLVMInt1TypeInContext(gallivm->context), "");
|
||||
return LLVMBuildSelect(gallivm->builder, force_sample,
|
||||
sample_ij, default_ij, "");
|
||||
}
|
||||
|
||||
static void declare_input_fs(
|
||||
struct radeon_llvm_context *radeon_bld,
|
||||
unsigned input_index,
|
||||
const struct tgsi_full_declaration *decl)
|
||||
/**
|
||||
* Interpolate a fragment shader input.
|
||||
*
|
||||
* @param si_shader_ctx context
|
||||
* @param input_index index of the input in hardware
|
||||
* @param semantic_name TGSI_SEMANTIC_*
|
||||
* @param semantic_index semantic index
|
||||
* @param num_interp_inputs number of all interpolated inputs (= BCOLOR offset)
|
||||
* @param colors_read_mask color components read (4 bits for each color, 8 bits in total)
|
||||
* @param interp_param interpolation weights (i,j)
|
||||
* @param prim_mask SI_PARAM_PRIM_MASK
|
||||
* @param face SI_PARAM_FRONT_FACE
|
||||
* @param result the return value (4 components)
|
||||
*/
|
||||
static void interp_fs_input(struct si_shader_context *si_shader_ctx,
|
||||
unsigned input_index,
|
||||
unsigned semantic_name,
|
||||
unsigned semantic_index,
|
||||
unsigned num_interp_inputs,
|
||||
unsigned colors_read_mask,
|
||||
LLVMValueRef interp_param,
|
||||
LLVMValueRef prim_mask,
|
||||
LLVMValueRef face,
|
||||
LLVMValueRef result[4])
|
||||
{
|
||||
struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
|
||||
struct si_shader_context *si_shader_ctx =
|
||||
si_shader_context(&radeon_bld->soa.bld_base);
|
||||
struct si_shader *shader = si_shader_ctx->shader;
|
||||
struct lp_build_context *uint = &radeon_bld->soa.bld_base.uint_bld;
|
||||
struct lp_build_context *base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
|
||||
struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
|
||||
struct gallivm_state *gallivm = base->gallivm;
|
||||
LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
|
||||
LLVMValueRef main_fn = radeon_bld->main_fn;
|
||||
|
||||
LLVMValueRef interp_param = NULL;
|
||||
int interp_param_idx;
|
||||
const char * intr_name;
|
||||
|
||||
/* This value is:
|
||||
* [15:0] NewPrimMask (Bit mask for each quad. It is set it the
|
||||
* quad begins a new primitive. Bit 0 always needs
|
||||
* to be unset)
|
||||
* [32:16] ParamOffset
|
||||
*
|
||||
*/
|
||||
LLVMValueRef params = LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK);
|
||||
LLVMValueRef attr_number;
|
||||
|
||||
unsigned chan;
|
||||
|
||||
shader->ps_input_param_offset[input_index] = shader->nparam++;
|
||||
attr_number = lp_build_const_int32(gallivm,
|
||||
shader->ps_input_param_offset[input_index]);
|
||||
|
||||
shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
|
||||
interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
|
||||
decl->Interp.Location);
|
||||
if (interp_param_idx == -1)
|
||||
return;
|
||||
else if (interp_param_idx)
|
||||
interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
|
||||
attr_number = lp_build_const_int32(gallivm, input_index);
|
||||
|
||||
/* fs.constant returns the param from the middle vertex, so it's not
|
||||
* really useful for flat shading. It's meant to be used for custom
|
||||
|
|
@ -936,24 +906,28 @@ static void declare_input_fs(
|
|||
*/
|
||||
intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
|
||||
|
||||
if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
|
||||
if (semantic_name == TGSI_SEMANTIC_COLOR &&
|
||||
si_shader_ctx->shader->key.ps.color_two_side) {
|
||||
LLVMValueRef args[4];
|
||||
LLVMValueRef face, is_face_positive;
|
||||
LLVMValueRef back_attr_number =
|
||||
lp_build_const_int32(gallivm,
|
||||
shader->ps_input_param_offset[input_index] + 1);
|
||||
LLVMValueRef is_face_positive;
|
||||
LLVMValueRef back_attr_number;
|
||||
|
||||
face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
|
||||
/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
|
||||
* otherwise it's at offset "num_inputs".
|
||||
*/
|
||||
unsigned back_attr_offset = num_interp_inputs;
|
||||
if (semantic_index == 1 && colors_read_mask & 0xf)
|
||||
back_attr_offset += 1;
|
||||
|
||||
back_attr_number = lp_build_const_int32(gallivm, back_attr_offset);
|
||||
|
||||
is_face_positive = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
|
||||
face, uint->zero, "");
|
||||
|
||||
args[2] = params;
|
||||
args[2] = prim_mask;
|
||||
args[3] = interp_param;
|
||||
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
|
||||
LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
|
||||
unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
|
||||
LLVMValueRef front, back;
|
||||
|
||||
args[0] = llvm_chan;
|
||||
|
|
@ -967,48 +941,71 @@ static void declare_input_fs(
|
|||
input_type, args, args[3] ? 4 : 3,
|
||||
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
|
||||
|
||||
radeon_bld->inputs[soa_index] =
|
||||
LLVMBuildSelect(gallivm->builder,
|
||||
result[chan] = LLVMBuildSelect(gallivm->builder,
|
||||
is_face_positive,
|
||||
front,
|
||||
back,
|
||||
"");
|
||||
}
|
||||
|
||||
shader->nparam++;
|
||||
} else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
|
||||
} else if (semantic_name == TGSI_SEMANTIC_FOG) {
|
||||
LLVMValueRef args[4];
|
||||
|
||||
args[0] = uint->zero;
|
||||
args[1] = attr_number;
|
||||
args[2] = params;
|
||||
args[2] = prim_mask;
|
||||
args[3] = interp_param;
|
||||
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
|
||||
lp_build_intrinsic(gallivm->builder, intr_name,
|
||||
result[0] = lp_build_intrinsic(gallivm->builder, intr_name,
|
||||
input_type, args, args[3] ? 4 : 3,
|
||||
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
|
||||
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
|
||||
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
|
||||
lp_build_const_float(gallivm, 0.0f);
|
||||
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
|
||||
lp_build_const_float(gallivm, 1.0f);
|
||||
result[1] =
|
||||
result[2] = lp_build_const_float(gallivm, 0.0f);
|
||||
result[3] = lp_build_const_float(gallivm, 1.0f);
|
||||
} else {
|
||||
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
|
||||
LLVMValueRef args[4];
|
||||
LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
|
||||
unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
|
||||
|
||||
args[0] = llvm_chan;
|
||||
args[1] = attr_number;
|
||||
args[2] = params;
|
||||
args[2] = prim_mask;
|
||||
args[3] = interp_param;
|
||||
radeon_bld->inputs[soa_index] =
|
||||
lp_build_intrinsic(gallivm->builder, intr_name,
|
||||
result[chan] = lp_build_intrinsic(gallivm->builder, intr_name,
|
||||
input_type, args, args[3] ? 4 : 3,
|
||||
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void declare_input_fs(
|
||||
struct radeon_llvm_context *radeon_bld,
|
||||
unsigned input_index,
|
||||
const struct tgsi_full_declaration *decl)
|
||||
{
|
||||
struct si_shader_context *si_shader_ctx =
|
||||
si_shader_context(&radeon_bld->soa.bld_base);
|
||||
struct si_shader *shader = si_shader_ctx->shader;
|
||||
LLVMValueRef main_fn = radeon_bld->main_fn;
|
||||
LLVMValueRef interp_param = NULL;
|
||||
int interp_param_idx;
|
||||
|
||||
interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
|
||||
decl->Interp.Location);
|
||||
if (interp_param_idx == -1)
|
||||
return;
|
||||
else if (interp_param_idx) {
|
||||
interp_param_idx = select_interp_param(si_shader_ctx,
|
||||
interp_param_idx);
|
||||
interp_param = LLVMGetParam(main_fn, interp_param_idx);
|
||||
}
|
||||
|
||||
interp_fs_input(si_shader_ctx, input_index, decl->Semantic.Name,
|
||||
decl->Semantic.Index, shader->selector->info.num_inputs,
|
||||
shader->selector->info.colors_read, interp_param,
|
||||
LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
|
||||
LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
|
||||
&radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)]);
|
||||
}
|
||||
|
||||
static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
|
||||
{
|
||||
return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
|
||||
|
|
@ -1060,7 +1057,6 @@ static void declare_system_value(
|
|||
struct si_shader_context *si_shader_ctx =
|
||||
si_shader_context(&radeon_bld->soa.bld_base);
|
||||
struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
|
||||
struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
|
||||
struct gallivm_state *gallivm = &radeon_bld->gallivm;
|
||||
LLVMValueRef value = 0;
|
||||
|
||||
|
|
@ -1136,12 +1132,10 @@ static void declare_system_value(
|
|||
}
|
||||
|
||||
case TGSI_SEMANTIC_SAMPLEMASK:
|
||||
/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
|
||||
* Therefore, force gl_SampleMaskIn to 1 for GL. */
|
||||
if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
|
||||
value = uint_bld->one;
|
||||
else
|
||||
value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
|
||||
/* This can only occur with the OpenGL Core profile, which
|
||||
* doesn't support smoothing.
|
||||
*/
|
||||
value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
|
||||
break;
|
||||
|
||||
case TGSI_SEMANTIC_TESSCOORD:
|
||||
|
|
@ -1965,21 +1959,20 @@ handle_semantic:
|
|||
}
|
||||
}
|
||||
|
||||
/* This only writes the tessellation factor levels. */
|
||||
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||
static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
|
||||
LLVMValueRef rel_patch_id,
|
||||
LLVMValueRef invocation_id,
|
||||
LLVMValueRef tcs_out_current_patch_data_offset)
|
||||
{
|
||||
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
struct si_shader *shader = si_shader_ctx->shader;
|
||||
unsigned tess_inner_index, tess_outer_index;
|
||||
LLVMValueRef lds_base, lds_inner, lds_outer;
|
||||
LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
|
||||
LLVMValueRef out[6], vec0, vec1, invocation_id;
|
||||
LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
|
||||
LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base;
|
||||
unsigned stride, outer_comps, inner_comps, i;
|
||||
struct lp_build_if_state if_ctx;
|
||||
|
||||
invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
|
||||
|
||||
/* Do this only for invocation 0, because the tess levels are per-patch,
|
||||
* not per-vertex.
|
||||
*
|
||||
|
|
@ -2018,7 +2011,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
|||
tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
|
||||
tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
|
||||
|
||||
lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx);
|
||||
lds_base = tcs_out_current_patch_data_offset;
|
||||
lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
|
||||
lp_build_const_int32(gallivm,
|
||||
tess_inner_index * 4), "");
|
||||
|
|
@ -2047,7 +2040,6 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
|||
/* Get the offset. */
|
||||
tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
|
||||
SI_PARAM_TESS_FACTOR_OFFSET);
|
||||
rel_patch_id = get_rel_patch_id(si_shader_ctx);
|
||||
byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
|
||||
lp_build_const_int32(gallivm, 4 * stride), "");
|
||||
|
||||
|
|
@ -2060,6 +2052,20 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
|||
lp_build_endif(&if_ctx);
|
||||
}
|
||||
|
||||
/* This only writes the tessellation factor levels. */
|
||||
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||
{
|
||||
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
|
||||
LLVMValueRef invocation_id;
|
||||
|
||||
invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
|
||||
|
||||
si_write_tess_factors(bld_base,
|
||||
get_rel_patch_id(si_shader_ctx),
|
||||
invocation_id,
|
||||
get_tcs_out_current_patch_data_offset(si_shader_ctx));
|
||||
}
|
||||
|
||||
static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
|
||||
{
|
||||
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
|
||||
|
|
@ -3253,17 +3259,17 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
|
|||
LLVMValueRef interp_param;
|
||||
const struct tgsi_full_instruction *inst = emit_data->inst;
|
||||
const char *intr_name;
|
||||
int input_index;
|
||||
int input_index = inst->Src[0].Register.Index;
|
||||
int chan;
|
||||
int i;
|
||||
LLVMValueRef attr_number;
|
||||
LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
|
||||
LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
|
||||
int interp_param_idx;
|
||||
unsigned interp = shader->selector->info.input_interpolate[input_index];
|
||||
unsigned location;
|
||||
|
||||
assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
|
||||
input_index = inst->Src[0].Register.Index;
|
||||
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
|
||||
inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
|
||||
|
|
@ -3271,8 +3277,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
|
|||
else
|
||||
location = TGSI_INTERPOLATE_LOC_CENTROID;
|
||||
|
||||
interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index],
|
||||
location);
|
||||
interp_param_idx = lookup_interp_param_index(interp, location);
|
||||
if (interp_param_idx == -1)
|
||||
return;
|
||||
else if (interp_param_idx)
|
||||
|
|
@ -3280,8 +3285,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
|
|||
else
|
||||
interp_param = NULL;
|
||||
|
||||
attr_number = lp_build_const_int32(gallivm,
|
||||
shader->ps_input_param_offset[input_index]);
|
||||
attr_number = lp_build_const_int32(gallivm, input_index);
|
||||
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
|
||||
inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
|
||||
|
|
@ -3632,7 +3636,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
|
|||
|
||||
case TGSI_PROCESSOR_FRAGMENT:
|
||||
params[SI_PARAM_ALPHA_REF] = f32;
|
||||
params[SI_PARAM_PS_STATE_BITS] = i32;
|
||||
params[SI_PARAM_PRIM_MASK] = i32;
|
||||
last_sgpr = SI_PARAM_PRIM_MASK;
|
||||
params[SI_PARAM_PERSP_SAMPLE] = v2i32;
|
||||
|
|
@ -3663,10 +3666,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
|
|||
radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
|
||||
radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
|
||||
|
||||
if (shader->dx10_clamp_mode)
|
||||
LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn,
|
||||
"enable-no-nans-fp-math", "true");
|
||||
|
||||
for (i = 0; i <= last_sgpr; ++i) {
|
||||
LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
|
||||
|
||||
|
|
@ -3884,7 +3883,7 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
|
|||
conf->spi_ps_input_ena = value;
|
||||
break;
|
||||
case R_0286D0_SPI_PS_INPUT_ADDR:
|
||||
/* Not used yet, but will be in the future */
|
||||
conf->spi_ps_input_addr = value;
|
||||
break;
|
||||
case R_0286E8_SPI_TMPRING_SIZE:
|
||||
case R_00B860_COMPUTE_TMPRING_SIZE:
|
||||
|
|
@ -3904,6 +3903,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
|
|||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!conf->spi_ps_input_addr)
|
||||
conf->spi_ps_input_addr = conf->spi_ps_input_ena;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4045,6 +4047,13 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
|
|||
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
|
||||
|
||||
if (r600_can_dump_shader(&sscreen->b, processor)) {
|
||||
if (processor == TGSI_PROCESSOR_FRAGMENT) {
|
||||
fprintf(stderr, "*** SHADER CONFIG ***\n"
|
||||
"SPI_PS_INPUT_ADDR = 0x%04x\n"
|
||||
"SPI_PS_INPUT_ENA = 0x%04x\n",
|
||||
conf->spi_ps_input_addr, conf->spi_ps_input_ena);
|
||||
}
|
||||
|
||||
fprintf(stderr, "*** SHADER STATS ***\n"
|
||||
"SGPRS: %d\n"
|
||||
"VGPRS: %d\n"
|
||||
|
|
@ -4084,7 +4093,8 @@ int si_compile_llvm(struct si_screen *sscreen,
|
|||
LLVMTargetMachineRef tm,
|
||||
LLVMModuleRef mod,
|
||||
struct pipe_debug_callback *debug,
|
||||
unsigned processor)
|
||||
unsigned processor,
|
||||
const char *name)
|
||||
{
|
||||
int r = 0;
|
||||
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
|
||||
|
|
@ -4092,8 +4102,11 @@ int si_compile_llvm(struct si_screen *sscreen,
|
|||
if (r600_can_dump_shader(&sscreen->b, processor)) {
|
||||
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
|
||||
|
||||
if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR)))
|
||||
if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
|
||||
fprintf(stderr, "%s LLVM IR:\n\n", name);
|
||||
LLVMDumpModule(mod);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (!si_replace_shader(count, binary)) {
|
||||
|
|
@ -4106,6 +4119,20 @@ int si_compile_llvm(struct si_screen *sscreen,
|
|||
|
||||
si_shader_binary_read_config(binary, conf, 0);
|
||||
|
||||
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
||||
* cost.
|
||||
*
|
||||
* If denormals are enabled, all floating-point output modifiers are
|
||||
* ignored.
|
||||
*
|
||||
* Don't enable denormals for 32-bit floats, because:
|
||||
* - Floating-point output modifiers would be ignored by the hw.
|
||||
* - Some opcodes don't support denormals, such as v_mad_f32. We would
|
||||
* have to stop using those.
|
||||
* - SI & CI would be very slow.
|
||||
*/
|
||||
conf->float_mode |= V_00B028_FP_64_DENORMS;
|
||||
|
||||
FREE(binary->config);
|
||||
FREE(binary->global_symbol_offsets);
|
||||
binary->config = NULL;
|
||||
|
|
@ -4116,7 +4143,7 @@ int si_compile_llvm(struct si_screen *sscreen,
|
|||
/* Generate code for the hardware VS shader stage to go with a geometry shader */
|
||||
static int si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
struct si_shader_context *si_shader_ctx,
|
||||
struct si_shader *gs, bool dump,
|
||||
struct si_shader *gs,
|
||||
struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
|
||||
|
|
@ -4186,14 +4213,14 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
|
||||
radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
|
||||
|
||||
if (dump)
|
||||
fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
|
||||
|
||||
r = si_compile_llvm(sscreen, &si_shader_ctx->shader->binary,
|
||||
&si_shader_ctx->shader->config, si_shader_ctx->tm,
|
||||
bld_base->base.gallivm->module,
|
||||
debug, TGSI_PROCESSOR_GEOMETRY);
|
||||
debug, TGSI_PROCESSOR_GEOMETRY,
|
||||
"GS Copy Shader");
|
||||
if (!r) {
|
||||
if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
|
||||
fprintf(stderr, "GS Copy Shader:\n");
|
||||
si_shader_dump(sscreen, si_shader_ctx->shader, debug,
|
||||
TGSI_PROCESSOR_GEOMETRY);
|
||||
r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
|
||||
|
|
@ -4250,47 +4277,26 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
|
|||
}
|
||||
}
|
||||
|
||||
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
||||
struct si_shader *shader,
|
||||
struct pipe_debug_callback *debug)
|
||||
static void si_init_shader_ctx(struct si_shader_context *ctx,
|
||||
struct si_screen *sscreen,
|
||||
struct si_shader *shader,
|
||||
LLVMTargetMachineRef tm,
|
||||
struct tgsi_shader_info *info)
|
||||
{
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
struct tgsi_token *tokens = sel->tokens;
|
||||
struct si_shader_context si_shader_ctx;
|
||||
struct lp_build_tgsi_context * bld_base;
|
||||
struct tgsi_shader_info stipple_shader_info;
|
||||
LLVMModuleRef mod;
|
||||
int r = 0;
|
||||
bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
|
||||
shader->key.ps.poly_stipple;
|
||||
bool dump = r600_can_dump_shader(&sscreen->b, sel->info.processor);
|
||||
struct lp_build_tgsi_context *bld_base;
|
||||
|
||||
if (poly_stipple) {
|
||||
tokens = util_pstipple_create_fragment_shader(tokens, NULL,
|
||||
SI_POLY_STIPPLE_SAMPLER,
|
||||
TGSI_FILE_SYSTEM_VALUE);
|
||||
tgsi_scan_shader(tokens, &stipple_shader_info);
|
||||
}
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
radeon_llvm_context_init(&ctx->radeon_bld);
|
||||
ctx->tm = tm;
|
||||
ctx->screen = sscreen;
|
||||
if (shader && shader->selector)
|
||||
ctx->type = shader->selector->info.processor;
|
||||
else
|
||||
ctx->type = -1;
|
||||
ctx->shader = shader;
|
||||
|
||||
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
|
||||
* conversion fails. */
|
||||
if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
|
||||
si_dump_shader_key(sel->type, &shader->key, stderr);
|
||||
tgsi_dump(tokens, 0);
|
||||
si_dump_streamout(&sel->so);
|
||||
}
|
||||
|
||||
assert(shader->nparam == 0);
|
||||
|
||||
memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
|
||||
radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
|
||||
bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
|
||||
|
||||
if (sel->type != PIPE_SHADER_COMPUTE)
|
||||
shader->dx10_clamp_mode = true;
|
||||
|
||||
shader->uses_instanceid = sel->info.uses_instanceid;
|
||||
bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
|
||||
bld_base = &ctx->radeon_bld.soa.bld_base;
|
||||
bld_base->info = info;
|
||||
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
|
||||
|
||||
bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
|
||||
|
|
@ -4326,12 +4332,45 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
|||
bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
|
||||
}
|
||||
}
|
||||
|
||||
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
||||
struct si_shader *shader,
|
||||
struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
struct tgsi_token *tokens = sel->tokens;
|
||||
struct si_shader_context si_shader_ctx;
|
||||
struct lp_build_tgsi_context * bld_base;
|
||||
struct tgsi_shader_info stipple_shader_info;
|
||||
LLVMModuleRef mod;
|
||||
int r = 0;
|
||||
bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
|
||||
shader->key.ps.poly_stipple;
|
||||
|
||||
if (poly_stipple) {
|
||||
tokens = util_pstipple_create_fragment_shader(tokens, NULL,
|
||||
SI_POLY_STIPPLE_SAMPLER,
|
||||
TGSI_FILE_SYSTEM_VALUE);
|
||||
tgsi_scan_shader(tokens, &stipple_shader_info);
|
||||
}
|
||||
|
||||
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
|
||||
* conversion fails. */
|
||||
if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
|
||||
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
|
||||
si_dump_shader_key(sel->type, &shader->key, stderr);
|
||||
tgsi_dump(tokens, 0);
|
||||
si_dump_streamout(&sel->so);
|
||||
}
|
||||
|
||||
si_init_shader_ctx(&si_shader_ctx, sscreen, shader, tm,
|
||||
poly_stipple ? &stipple_shader_info : &sel->info);
|
||||
|
||||
shader->uses_instanceid = sel->info.uses_instanceid;
|
||||
|
||||
bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
|
||||
si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
|
||||
si_shader_ctx.shader = shader;
|
||||
si_shader_ctx.type = tgsi_get_processor_type(tokens);
|
||||
si_shader_ctx.screen = sscreen;
|
||||
si_shader_ctx.tm = tm;
|
||||
|
||||
switch (si_shader_ctx.type) {
|
||||
case TGSI_PROCESSOR_VERTEX:
|
||||
|
|
@ -4401,7 +4440,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
|||
radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
|
||||
|
||||
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
|
||||
mod, debug, si_shader_ctx.type);
|
||||
mod, debug, si_shader_ctx.type, "TGSI shader");
|
||||
if (r) {
|
||||
fprintf(stderr, "LLVM failed to compile shader\n");
|
||||
goto out;
|
||||
|
|
@ -4422,7 +4461,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
|||
shader->gs_copy_shader->selector = shader->selector;
|
||||
si_shader_ctx.shader = shader->gs_copy_shader;
|
||||
if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
|
||||
shader, dump, debug))) {
|
||||
shader, debug))) {
|
||||
free(shader->gs_copy_shader);
|
||||
shader->gs_copy_shader = NULL;
|
||||
goto out;
|
||||
|
|
|
|||
|
|
@ -88,7 +88,6 @@ struct radeon_shader_reloc;
|
|||
#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
|
||||
#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
|
||||
#define SI_SGPR_ALPHA_REF 8 /* PS only */
|
||||
#define SI_SGPR_PS_STATE_BITS 9 /* PS only */
|
||||
|
||||
#define SI_VS_NUM_USER_SGPR 13 /* API VS */
|
||||
#define SI_ES_NUM_USER_SGPR 12 /* API VS */
|
||||
|
|
@ -97,7 +96,7 @@ struct radeon_shader_reloc;
|
|||
#define SI_TES_NUM_USER_SGPR 10
|
||||
#define SI_GS_NUM_USER_SGPR 8
|
||||
#define SI_GSCOPY_NUM_USER_SGPR 4
|
||||
#define SI_PS_NUM_USER_SGPR 10
|
||||
#define SI_PS_NUM_USER_SGPR 9
|
||||
|
||||
/* LLVM function parameter indices */
|
||||
#define SI_PARAM_RW_BUFFERS 0
|
||||
|
|
@ -152,27 +151,23 @@ struct radeon_shader_reloc;
|
|||
|
||||
/* PS only parameters */
|
||||
#define SI_PARAM_ALPHA_REF 4
|
||||
/* Bits:
|
||||
* 0: force_persample_interp
|
||||
*/
|
||||
#define SI_PARAM_PS_STATE_BITS 5
|
||||
#define SI_PARAM_PRIM_MASK 6
|
||||
#define SI_PARAM_PERSP_SAMPLE 7
|
||||
#define SI_PARAM_PERSP_CENTER 8
|
||||
#define SI_PARAM_PERSP_CENTROID 9
|
||||
#define SI_PARAM_PERSP_PULL_MODEL 10
|
||||
#define SI_PARAM_LINEAR_SAMPLE 11
|
||||
#define SI_PARAM_LINEAR_CENTER 12
|
||||
#define SI_PARAM_LINEAR_CENTROID 13
|
||||
#define SI_PARAM_LINE_STIPPLE_TEX 14
|
||||
#define SI_PARAM_POS_X_FLOAT 15
|
||||
#define SI_PARAM_POS_Y_FLOAT 16
|
||||
#define SI_PARAM_POS_Z_FLOAT 17
|
||||
#define SI_PARAM_POS_W_FLOAT 18
|
||||
#define SI_PARAM_FRONT_FACE 19
|
||||
#define SI_PARAM_ANCILLARY 20
|
||||
#define SI_PARAM_SAMPLE_COVERAGE 21
|
||||
#define SI_PARAM_POS_FIXED_PT 22
|
||||
#define SI_PARAM_PRIM_MASK 5
|
||||
#define SI_PARAM_PERSP_SAMPLE 6
|
||||
#define SI_PARAM_PERSP_CENTER 7
|
||||
#define SI_PARAM_PERSP_CENTROID 8
|
||||
#define SI_PARAM_PERSP_PULL_MODEL 9
|
||||
#define SI_PARAM_LINEAR_SAMPLE 10
|
||||
#define SI_PARAM_LINEAR_CENTER 11
|
||||
#define SI_PARAM_LINEAR_CENTROID 12
|
||||
#define SI_PARAM_LINE_STIPPLE_TEX 13
|
||||
#define SI_PARAM_POS_X_FLOAT 14
|
||||
#define SI_PARAM_POS_Y_FLOAT 15
|
||||
#define SI_PARAM_POS_Z_FLOAT 16
|
||||
#define SI_PARAM_POS_W_FLOAT 17
|
||||
#define SI_PARAM_FRONT_FACE 18
|
||||
#define SI_PARAM_ANCILLARY 19
|
||||
#define SI_PARAM_SAMPLE_COVERAGE 20
|
||||
#define SI_PARAM_POS_FIXED_PT 21
|
||||
|
||||
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
|
||||
|
||||
|
|
@ -193,14 +188,6 @@ struct si_shader_selector {
|
|||
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
|
||||
unsigned type;
|
||||
|
||||
/* Whether the shader has to use a conditional assignment to
|
||||
* choose between weights when emulating
|
||||
* pipe_rasterizer_state::force_persample_interp.
|
||||
* If false, "si_emit_spi_ps_input" will take care of it instead.
|
||||
*/
|
||||
bool forces_persample_interp_for_persp;
|
||||
bool forces_persample_interp_for_linear;
|
||||
|
||||
/* GS parameters. */
|
||||
unsigned esgs_itemsize;
|
||||
unsigned gs_input_verts_per_prim;
|
||||
|
|
@ -245,6 +232,7 @@ union si_shader_key {
|
|||
unsigned poly_stipple:1;
|
||||
unsigned poly_line_smoothing:1;
|
||||
unsigned clamp_color:1;
|
||||
unsigned force_persample_interp:1;
|
||||
} ps;
|
||||
struct {
|
||||
unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
|
||||
|
|
@ -272,6 +260,7 @@ struct si_shader_config {
|
|||
unsigned num_vgprs;
|
||||
unsigned lds_size;
|
||||
unsigned spi_ps_input_ena;
|
||||
unsigned spi_ps_input_addr;
|
||||
unsigned float_mode;
|
||||
unsigned scratch_bytes_per_wave;
|
||||
unsigned rsrc1;
|
||||
|
|
@ -290,14 +279,10 @@ struct si_shader {
|
|||
struct radeon_shader_binary binary;
|
||||
struct si_shader_config config;
|
||||
|
||||
unsigned nparam;
|
||||
unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
|
||||
unsigned ps_input_param_offset[PIPE_MAX_SHADER_INPUTS];
|
||||
unsigned ps_input_interpolate[PIPE_MAX_SHADER_INPUTS];
|
||||
bool uses_instanceid;
|
||||
unsigned nr_pos_exports;
|
||||
unsigned nr_param_exports;
|
||||
bool dx10_clamp_mode; /* convert NaNs to 0 */
|
||||
};
|
||||
|
||||
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
|
||||
|
|
@ -343,7 +328,8 @@ int si_compile_llvm(struct si_screen *sscreen,
|
|||
LLVMTargetMachineRef tm,
|
||||
LLVMModuleRef mod,
|
||||
struct pipe_debug_callback *debug,
|
||||
unsigned processor);
|
||||
unsigned processor,
|
||||
const char *name);
|
||||
void si_shader_destroy(struct si_shader *shader);
|
||||
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
|
||||
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
|
||||
|
|
|
|||
|
|
@ -133,7 +133,6 @@ union si_state_atoms {
|
|||
struct r600_atom *viewports;
|
||||
struct r600_atom *stencil_ref;
|
||||
struct r600_atom *spi_map;
|
||||
struct r600_atom *spi_ps_input;
|
||||
} s;
|
||||
struct r600_atom *array[0];
|
||||
};
|
||||
|
|
|
|||
|
|
@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader)
|
|||
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B528_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B528_DX10_CLAMP(shader->dx10_clamp_mode);
|
||||
S_00B528_DX10_CLAMP(1) |
|
||||
S_00B528_FLOAT_MODE(shader->config.float_mode);
|
||||
shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
||||
}
|
||||
|
|
@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
|
||||
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B428_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B428_DX10_CLAMP(shader->dx10_clamp_mode));
|
||||
S_00B428_DX10_CLAMP(1) |
|
||||
S_00B428_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
|
||||
S_00B42C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
|
|
@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader)
|
|||
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B328_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
|
||||
S_00B328_DX10_CLAMP(1) |
|
||||
S_00B328_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
|
||||
S_00B32C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
|
|
@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B228_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
|
||||
S_00B228_DX10_CLAMP(1) |
|
||||
S_00B228_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
S_00B22C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
|
|
@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
|
|||
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B128_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
|
||||
S_00B128_DX10_CLAMP(1) |
|
||||
S_00B128_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
|
||||
S_00B12C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
|
||||
|
|
@ -404,6 +409,18 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
|
|||
si_set_tesseval_regs(shader, pm4);
|
||||
}
|
||||
|
||||
static unsigned si_get_ps_num_interp(struct si_shader *ps)
|
||||
{
|
||||
struct tgsi_shader_info *info = &ps->selector->info;
|
||||
unsigned num_colors = !!(info->colors_read & 0x0f) +
|
||||
!!(info->colors_read & 0xf0);
|
||||
unsigned num_interp = ps->selector->info.num_inputs +
|
||||
(ps->key.ps.color_two_side ? num_colors : 0);
|
||||
|
||||
assert(num_interp <= 32);
|
||||
return MIN2(num_interp, 32);
|
||||
}
|
||||
|
||||
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
|
||||
{
|
||||
unsigned value = shader->key.ps.spi_shader_col_format;
|
||||
|
|
@ -460,6 +477,17 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
|
||||
uint64_t va;
|
||||
bool has_centroid;
|
||||
unsigned input_ena = shader->config.spi_ps_input_ena;
|
||||
|
||||
/* we need to enable at least one of them, otherwise we hang the GPU */
|
||||
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_CENTER_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
|
||||
G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
|
||||
|
||||
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
|
||||
|
|
@ -503,11 +531,15 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
|
||||
spi_shader_col_format = V_028714_SPI_SHADER_32_R;
|
||||
|
||||
si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
|
||||
si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR,
|
||||
shader->config.spi_ps_input_addr);
|
||||
|
||||
/* Set interpolation controls. */
|
||||
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
|
||||
|
||||
spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
|
||||
spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader)) |
|
||||
S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
|
||||
|
||||
/* Set registers. */
|
||||
|
|
@ -540,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
|
||||
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B028_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
|
||||
S_00B028_DX10_CLAMP(1) |
|
||||
S_00B028_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
|
||||
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
|
||||
S_00B02C_USER_SGPR(num_user_sgprs) |
|
||||
|
|
@ -681,7 +714,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
|
|||
sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
|
||||
bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
|
||||
|
||||
key->ps.color_two_side = rs->two_side;
|
||||
key->ps.color_two_side = rs->two_side && sel->info.colors_read;
|
||||
|
||||
if (sctx->queued.named.blend) {
|
||||
key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
|
||||
|
|
@ -694,6 +727,15 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
|
|||
(is_line && rs->line_smooth)) &&
|
||||
sctx->framebuffer.nr_samples <= 1;
|
||||
key->ps.clamp_color = rs->clamp_fragment_color;
|
||||
|
||||
key->ps.force_persample_interp = rs->force_persample_interp &&
|
||||
rs->multisample_enable &&
|
||||
sctx->framebuffer.nr_samples > 1 &&
|
||||
sctx->ps_iter_samples > 1 &&
|
||||
(sel->info.uses_persp_center ||
|
||||
sel->info.uses_persp_centroid ||
|
||||
sel->info.uses_linear_center ||
|
||||
sel->info.uses_linear_centroid);
|
||||
}
|
||||
|
||||
key->ps.alpha_func = si_get_alpha_test_func(sctx);
|
||||
|
|
@ -796,7 +838,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor);
|
||||
p_atomic_inc(&sscreen->b.num_shaders_created);
|
||||
|
||||
/* First set which opcode uses which (i,j) pair. */
|
||||
/* Set which opcode uses which (i,j) pair. */
|
||||
if (sel->info.uses_persp_opcode_interp_centroid)
|
||||
sel->info.uses_persp_centroid = true;
|
||||
|
||||
|
|
@ -811,19 +853,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
sel->info.uses_linear_opcode_interp_sample)
|
||||
sel->info.uses_linear_center = true;
|
||||
|
||||
/* Determine if the shader has to use a conditional assignment when
|
||||
* emulating force_persample_interp.
|
||||
*/
|
||||
sel->forces_persample_interp_for_persp =
|
||||
sel->info.uses_persp_center +
|
||||
sel->info.uses_persp_centroid +
|
||||
sel->info.uses_persp_sample >= 2;
|
||||
|
||||
sel->forces_persample_interp_for_linear =
|
||||
sel->info.uses_linear_center +
|
||||
sel->info.uses_linear_centroid +
|
||||
sel->info.uses_linear_sample >= 2;
|
||||
|
||||
switch (sel->type) {
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
sel->gs_output_prim =
|
||||
|
|
@ -893,7 +922,8 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
}
|
||||
|
||||
/* Pre-compilation. */
|
||||
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
|
||||
if (sel->type == PIPE_SHADER_GEOMETRY ||
|
||||
sscreen->b.debug_flags & DBG_PRECOMPILE) {
|
||||
struct si_shader_ctx_state state = {sel};
|
||||
union si_shader_key key;
|
||||
|
||||
|
|
@ -1030,6 +1060,41 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
|
|||
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
|
||||
}
|
||||
|
||||
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
|
||||
{
|
||||
if (shader->pm4) {
|
||||
switch (shader->selector->type) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
if (shader->key.vs.as_ls)
|
||||
si_pm4_delete_state(sctx, ls, shader->pm4);
|
||||
else if (shader->key.vs.as_es)
|
||||
si_pm4_delete_state(sctx, es, shader->pm4);
|
||||
else
|
||||
si_pm4_delete_state(sctx, vs, shader->pm4);
|
||||
break;
|
||||
case PIPE_SHADER_TESS_CTRL:
|
||||
si_pm4_delete_state(sctx, hs, shader->pm4);
|
||||
break;
|
||||
case PIPE_SHADER_TESS_EVAL:
|
||||
if (shader->key.tes.as_es)
|
||||
si_pm4_delete_state(sctx, es, shader->pm4);
|
||||
else
|
||||
si_pm4_delete_state(sctx, vs, shader->pm4);
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
si_pm4_delete_state(sctx, gs, shader->pm4);
|
||||
si_pm4_delete_state(sctx, vs, shader->gs_copy_shader->pm4);
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
si_pm4_delete_state(sctx, ps, shader->pm4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
si_shader_destroy(shader);
|
||||
free(shader);
|
||||
}
|
||||
|
||||
static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
|
@ -1050,35 +1115,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
|
|||
|
||||
while (p) {
|
||||
c = p->next_variant;
|
||||
switch (sel->type) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
if (p->key.vs.as_ls)
|
||||
si_pm4_delete_state(sctx, ls, p->pm4);
|
||||
else if (p->key.vs.as_es)
|
||||
si_pm4_delete_state(sctx, es, p->pm4);
|
||||
else
|
||||
si_pm4_delete_state(sctx, vs, p->pm4);
|
||||
break;
|
||||
case PIPE_SHADER_TESS_CTRL:
|
||||
si_pm4_delete_state(sctx, hs, p->pm4);
|
||||
break;
|
||||
case PIPE_SHADER_TESS_EVAL:
|
||||
if (p->key.tes.as_es)
|
||||
si_pm4_delete_state(sctx, es, p->pm4);
|
||||
else
|
||||
si_pm4_delete_state(sctx, vs, p->pm4);
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
si_pm4_delete_state(sctx, gs, p->pm4);
|
||||
si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4);
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
si_pm4_delete_state(sctx, ps, p->pm4);
|
||||
break;
|
||||
}
|
||||
|
||||
si_shader_destroy(p);
|
||||
free(p);
|
||||
si_delete_shader(sctx, p);
|
||||
p = c;
|
||||
}
|
||||
|
||||
|
|
@ -1087,132 +1124,86 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
|
|||
free(sel);
|
||||
}
|
||||
|
||||
static unsigned si_get_ps_input_cntl(struct si_context *sctx,
|
||||
struct si_shader *vs, unsigned name,
|
||||
unsigned index, unsigned interpolate)
|
||||
{
|
||||
struct tgsi_shader_info *vsinfo = &vs->selector->info;
|
||||
unsigned j, ps_input_cntl = 0;
|
||||
|
||||
if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
|
||||
(interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
|
||||
ps_input_cntl |= S_028644_FLAT_SHADE(1);
|
||||
|
||||
if (name == TGSI_SEMANTIC_PCOORD ||
|
||||
(name == TGSI_SEMANTIC_TEXCOORD &&
|
||||
sctx->sprite_coord_enable & (1 << index))) {
|
||||
ps_input_cntl |= S_028644_PT_SPRITE_TEX(1);
|
||||
}
|
||||
|
||||
for (j = 0; j < vsinfo->num_outputs; j++) {
|
||||
if (name == vsinfo->output_semantic_name[j] &&
|
||||
index == vsinfo->output_semantic_index[j]) {
|
||||
ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (name == TGSI_SEMANTIC_PRIMID)
|
||||
/* PrimID is written after the last output. */
|
||||
ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
|
||||
else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
|
||||
/* No corresponding output found, load defaults into input.
|
||||
* Don't set any other bits.
|
||||
* (FLAT_SHADE=1 completely changes behavior) */
|
||||
ps_input_cntl = S_028644_OFFSET(0x20);
|
||||
}
|
||||
return ps_input_cntl;
|
||||
}
|
||||
|
||||
static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
|
||||
struct si_shader *ps = sctx->ps_shader.current;
|
||||
struct si_shader *vs = si_get_vs_state(sctx);
|
||||
struct tgsi_shader_info *psinfo;
|
||||
struct tgsi_shader_info *vsinfo = &vs->selector->info;
|
||||
unsigned i, j, tmp, num_written = 0;
|
||||
struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
|
||||
unsigned i, num_interp, num_written = 0, bcol_interp[2];
|
||||
|
||||
if (!ps || !ps->nparam)
|
||||
if (!ps || !ps->selector->info.num_inputs)
|
||||
return;
|
||||
|
||||
psinfo = &ps->selector->info;
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
|
||||
num_interp = si_get_ps_num_interp(ps);
|
||||
assert(num_interp > 0);
|
||||
radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp);
|
||||
|
||||
for (i = 0; i < psinfo->num_inputs; i++) {
|
||||
unsigned name = psinfo->input_semantic_name[i];
|
||||
unsigned index = psinfo->input_semantic_index[i];
|
||||
unsigned interpolate = psinfo->input_interpolate[i];
|
||||
unsigned param_offset = ps->ps_input_param_offset[i];
|
||||
bcolor:
|
||||
tmp = 0;
|
||||
|
||||
if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
|
||||
(interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
|
||||
tmp |= S_028644_FLAT_SHADE(1);
|
||||
|
||||
if (name == TGSI_SEMANTIC_PCOORD ||
|
||||
(name == TGSI_SEMANTIC_TEXCOORD &&
|
||||
sctx->sprite_coord_enable & (1 << index))) {
|
||||
tmp |= S_028644_PT_SPRITE_TEX(1);
|
||||
}
|
||||
|
||||
for (j = 0; j < vsinfo->num_outputs; j++) {
|
||||
if (name == vsinfo->output_semantic_name[j] &&
|
||||
index == vsinfo->output_semantic_index[j]) {
|
||||
tmp |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (name == TGSI_SEMANTIC_PRIMID)
|
||||
/* PrimID is written after the last output. */
|
||||
tmp |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
|
||||
else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
|
||||
/* No corresponding output found, load defaults into input.
|
||||
* Don't set any other bits.
|
||||
* (FLAT_SHADE=1 completely changes behavior) */
|
||||
tmp = S_028644_OFFSET(0x20);
|
||||
}
|
||||
|
||||
assert(param_offset == num_written);
|
||||
radeon_emit(cs, tmp);
|
||||
radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
|
||||
interpolate));
|
||||
num_written++;
|
||||
|
||||
if (name == TGSI_SEMANTIC_COLOR &&
|
||||
ps->key.ps.color_two_side) {
|
||||
name = TGSI_SEMANTIC_BCOLOR;
|
||||
param_offset++;
|
||||
goto bcolor;
|
||||
if (name == TGSI_SEMANTIC_COLOR) {
|
||||
assert(index < ARRAY_SIZE(bcol_interp));
|
||||
bcol_interp[index] = interpolate;
|
||||
}
|
||||
}
|
||||
assert(ps->nparam == num_written);
|
||||
}
|
||||
|
||||
static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
|
||||
struct si_shader *ps = sctx->ps_shader.current;
|
||||
unsigned input_ena;
|
||||
if (ps->key.ps.color_two_side) {
|
||||
unsigned bcol = TGSI_SEMANTIC_BCOLOR;
|
||||
|
||||
if (!ps)
|
||||
return;
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (!(psinfo->colors_read & (0xf << (i * 4))))
|
||||
continue;
|
||||
|
||||
input_ena = ps->config.spi_ps_input_ena;
|
||||
|
||||
/* we need to enable at least one of them, otherwise we hang the GPU */
|
||||
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_CENTER_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
|
||||
G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
|
||||
|
||||
if (sctx->force_persample_interp) {
|
||||
unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) +
|
||||
G_0286CC_PERSP_CENTER_ENA(input_ena) +
|
||||
G_0286CC_PERSP_CENTROID_ENA(input_ena);
|
||||
unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) +
|
||||
G_0286CC_LINEAR_CENTER_ENA(input_ena) +
|
||||
G_0286CC_LINEAR_CENTROID_ENA(input_ena);
|
||||
|
||||
/* If only one set of (i,j) coordinates is used, we can disable
|
||||
* CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates
|
||||
* where CENTER/CENTROID are expected, effectively forcing per-sample
|
||||
* interpolation.
|
||||
*/
|
||||
if (num_persp == 1) {
|
||||
input_ena &= C_0286CC_PERSP_CENTER_ENA;
|
||||
input_ena &= C_0286CC_PERSP_CENTROID_ENA;
|
||||
input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol,
|
||||
i, bcol_interp[i]));
|
||||
num_written++;
|
||||
}
|
||||
if (num_linear == 1) {
|
||||
input_ena &= C_0286CC_LINEAR_CENTER_ENA;
|
||||
input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
|
||||
input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1);
|
||||
}
|
||||
|
||||
/* If at least 2 sets of coordinates are used, we can't use this
|
||||
* trick and have to select SAMPLE using a conditional assignment
|
||||
* in the shader with "force_persample_interp" being a shader constant.
|
||||
*/
|
||||
}
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
|
||||
radeon_emit(cs, input_ena);
|
||||
radeon_emit(cs, input_ena);
|
||||
|
||||
if (ps->selector->forces_persample_interp_for_persp ||
|
||||
ps->selector->forces_persample_interp_for_linear)
|
||||
radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
|
||||
SI_SGPR_PS_STATE_BITS * 4,
|
||||
sctx->force_persample_interp);
|
||||
assert(num_interp == num_written);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1746,12 +1737,6 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
si_mark_atom_dirty(sctx, &sctx->spi_map);
|
||||
}
|
||||
|
||||
if (si_pm4_state_changed(sctx, ps) ||
|
||||
sctx->force_persample_interp != rs->force_persample_interp) {
|
||||
sctx->force_persample_interp = rs->force_persample_interp;
|
||||
si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
|
||||
}
|
||||
|
||||
if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
|
||||
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
|
||||
|
||||
|
|
@ -1784,7 +1769,6 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
void si_init_shader_functions(struct si_context *sctx)
|
||||
{
|
||||
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
|
||||
si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
|
||||
|
||||
sctx->b.b.create_vs_state = si_create_shader_selector;
|
||||
sctx->b.b.create_tcs_state = si_create_shader_selector;
|
||||
|
|
|
|||
|
|
@ -2845,6 +2845,9 @@
|
|||
#define S_00B028_FLOAT_MODE(x) (((x) & 0xFF) << 12)
|
||||
#define G_00B028_FLOAT_MODE(x) (((x) >> 12) & 0xFF)
|
||||
#define C_00B028_FLOAT_MODE 0xFFF00FFF
|
||||
#define V_00B028_FP_32_DENORMS 0x30
|
||||
#define V_00B028_FP_64_DENORMS 0xc0
|
||||
#define V_00B028_FP_ALL_DENORMS 0xf0
|
||||
#define S_00B028_PRIV(x) (((x) & 0x1) << 20)
|
||||
#define G_00B028_PRIV(x) (((x) >> 20) & 0x1)
|
||||
#define C_00B028_PRIV 0xFFEFFFFF
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "sp_state.h"
|
||||
#include "sp_tile_cache.h"
|
||||
#include "sp_tex_tile_cache.h"
|
||||
#include "util/u_debug_image.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_string.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
**********************************************************/
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "util/u_debug_image.h"
|
||||
#include "util/u_string.h"
|
||||
#include "svga_screen.h"
|
||||
#include "svga_surface.h"
|
||||
|
|
|
|||
|
|
@ -120,18 +120,13 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
|
|||
trace_dump_trace_flush();
|
||||
|
||||
if (info->indirect) {
|
||||
struct pipe_draw_info *_info = NULL;
|
||||
struct pipe_draw_info _info;
|
||||
|
||||
_info = MALLOC(sizeof(*_info));
|
||||
if (!_info)
|
||||
return;
|
||||
|
||||
memcpy(_info, info, sizeof(*_info));
|
||||
_info->indirect = trace_resource_unwrap(tr_ctx, _info->indirect);
|
||||
_info->indirect_params = trace_resource_unwrap(tr_ctx,
|
||||
_info->indirect_params);
|
||||
pipe->draw_vbo(pipe, _info);
|
||||
FREE(_info);
|
||||
memcpy(&_info, info, sizeof(_info));
|
||||
_info.indirect = trace_resource_unwrap(tr_ctx, _info.indirect);
|
||||
_info.indirect_params = trace_resource_unwrap(tr_ctx,
|
||||
_info.indirect_params);
|
||||
pipe->draw_vbo(pipe, &_info);
|
||||
} else {
|
||||
pipe->draw_vbo(pipe, info);
|
||||
}
|
||||
|
|
@ -1284,6 +1279,33 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe,
|
|||
trace_dump_call_end();
|
||||
}
|
||||
|
||||
static inline void
|
||||
trace_context_clear_texture(struct pipe_context *_pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
const void *data)
|
||||
{
|
||||
struct trace_context *tr_ctx = trace_context(_pipe);
|
||||
struct pipe_context *pipe = tr_ctx->pipe;
|
||||
|
||||
res = trace_resource_unwrap(tr_ctx, res);
|
||||
|
||||
trace_dump_call_begin("pipe_context", "clear_texture");
|
||||
|
||||
trace_dump_arg(ptr, pipe);
|
||||
trace_dump_arg(ptr, res);
|
||||
trace_dump_arg(uint, level);
|
||||
trace_dump_arg_begin("box");
|
||||
trace_dump_box(box);
|
||||
trace_dump_arg_end();
|
||||
trace_dump_arg(ptr, data);
|
||||
|
||||
pipe->clear_texture(pipe, res, level, box, data);
|
||||
|
||||
trace_dump_call_end();
|
||||
}
|
||||
|
||||
static inline void
|
||||
trace_context_flush(struct pipe_context *_pipe,
|
||||
struct pipe_fence_handle **fence,
|
||||
|
|
@ -1709,6 +1731,7 @@ trace_context_create(struct trace_screen *tr_scr,
|
|||
TR_CTX_INIT(clear);
|
||||
TR_CTX_INIT(clear_render_target);
|
||||
TR_CTX_INIT(clear_depth_stencil);
|
||||
TR_CTX_INIT(clear_texture);
|
||||
TR_CTX_INIT(flush);
|
||||
TR_CTX_INIT(generate_mipmap);
|
||||
TR_CTX_INIT(texture_barrier);
|
||||
|
|
|
|||
|
|
@ -348,6 +348,12 @@ enum pipe_flush_flags
|
|||
*/
|
||||
#define PIPE_CONTEXT_DEBUG (1 << 1)
|
||||
|
||||
/**
|
||||
* Whether out-of-bounds shader loads must return zero and out-of-bounds
|
||||
* shader stores must be dropped.
|
||||
*/
|
||||
#define PIPE_CONTEXT_ROBUST_BUFFER_ACCESS (1 << 2)
|
||||
|
||||
/**
|
||||
* Flags for pipe_context::memory_barrier.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include "pipe/p_state.h"
|
||||
#include "tgsi/tgsi_text.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_debug_image.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "state_tracker/graw.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "util/u_box.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_debug_image.h"
|
||||
#include "util/u_draw_quad.h"
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_inlines.h"
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@
|
|||
/* u_sampler_view_default_template */
|
||||
#include "util/u_sampler.h"
|
||||
/* debug_dump_surface_bmp */
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_debug_image.h"
|
||||
/* util_draw_vertex_buffer helper */
|
||||
#include "util/u_draw_quad.h"
|
||||
/* FREE & CALLOC_STRUCT */
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@
|
|||
#include "cso_cache/cso_context.h"
|
||||
|
||||
/* debug_dump_surface_bmp */
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_debug_image.h"
|
||||
/* util_draw_vertex_buffer helper */
|
||||
#include "util/u_draw_quad.h"
|
||||
/* FREE & CALLOC_STRUCT */
|
||||
|
|
|
|||
|
|
@ -405,6 +405,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
|
||||
&ws->info.num_tile_pipes);
|
||||
|
||||
/* The kernel returns 12 for some cards for an unknown reason.
|
||||
* I thought this was supposed to be a power of two.
|
||||
*/
|
||||
if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12)
|
||||
ws->info.num_tile_pipes = 8;
|
||||
|
||||
if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
|
||||
&ws->info.r600_gb_backend_map))
|
||||
ws->info.r600_gb_backend_map_valid = TRUE;
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ header = """/**
|
|||
#include "main/convolve.h"
|
||||
#include "main/copyimage.h"
|
||||
#include "main/depth.h"
|
||||
#include "main/debug_output.h"
|
||||
#include "main/dlist.h"
|
||||
#include "main/drawpix.h"
|
||||
#include "main/drawtex.h"
|
||||
|
|
|
|||
|
|
@ -57,6 +57,8 @@ MAIN_FILES = \
|
|||
main/dd.h \
|
||||
main/debug.c \
|
||||
main/debug.h \
|
||||
main/debug_output.c \
|
||||
main/debug_output.h \
|
||||
main/depth.c \
|
||||
main/depth.h \
|
||||
main/dlist.c \
|
||||
|
|
@ -530,8 +532,6 @@ PROGRAM_FILES = \
|
|||
program/program_parser.h \
|
||||
program/prog_statevars.c \
|
||||
program/prog_statevars.h \
|
||||
program/sampler.cpp \
|
||||
program/sampler.h \
|
||||
program/string_to_uint_map.cpp \
|
||||
program/symbol_table.c \
|
||||
program/symbol_table.h
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@
|
|||
#include "main/mtypes.h"
|
||||
#include "main/framebuffer.h"
|
||||
#include "main/version.h"
|
||||
#include "main/debug_output.h"
|
||||
#include "main/errors.h"
|
||||
#include "main/macros.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -1140,7 +1140,7 @@ namespace brw {
|
|||
dims, rsize, op, pred);
|
||||
|
||||
/* An unbound surface access should give zero as result. */
|
||||
if (rsize)
|
||||
if (rsize && pred)
|
||||
set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
|
||||
|
||||
return tmp;
|
||||
|
|
|
|||
|
|
@ -919,7 +919,7 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
|
|||
* MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
|
||||
*/
|
||||
const unsigned dc_flush =
|
||||
brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_INVALIDATE : 0;
|
||||
brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
|
||||
|
||||
if (brw->gen == 6) {
|
||||
/* Hardware workaround: SNB B-Spec says:
|
||||
|
|
|
|||
|
|
@ -490,6 +490,10 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar)
|
|||
|
||||
nir = nir_optimize(nir, is_scalar);
|
||||
|
||||
if (is_scalar) {
|
||||
OPT_V(nir_lower_load_const_to_scalar);
|
||||
}
|
||||
|
||||
/* Lower a bunch of stuff */
|
||||
OPT_V(nir_lower_var_copies);
|
||||
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ gen8_add_cs_stall_workaround_bits(uint32_t *flags)
|
|||
PIPE_CONTROL_WRITE_TIMESTAMP |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD |
|
||||
PIPE_CONTROL_DEPTH_STALL |
|
||||
PIPE_CONTROL_DATA_CACHE_INVALIDATE;
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH;
|
||||
|
||||
/* If we're doing a CS stall, and don't already have one of the
|
||||
* workaround bits set, add "Stall at Pixel Scoreboard."
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ static void
|
|||
brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
|
||||
unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
|
||||
PIPE_CONTROL_NO_WRITE |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
assert(brw->gen >= 7 && brw->gen <= 9);
|
||||
|
|
|
|||
|
|
@ -939,8 +939,9 @@ fs_instruction_scheduler::calculate_deps()
|
|||
foreach_in_list(schedule_node, n, &instructions) {
|
||||
fs_inst *inst = (fs_inst *)n->inst;
|
||||
|
||||
if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
|
||||
inst->has_side_effects())
|
||||
if ((inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
|
||||
inst->has_side_effects()) &&
|
||||
inst->opcode != FS_OPCODE_FB_WRITE)
|
||||
add_barrier_deps(n);
|
||||
|
||||
/* read-after-write deps. */
|
||||
|
|
@ -1195,7 +1196,7 @@ vec4_instruction_scheduler::calculate_deps()
|
|||
foreach_in_list(schedule_node, n, &instructions) {
|
||||
vec4_instruction *inst = (vec4_instruction *)n->inst;
|
||||
|
||||
if (inst->has_side_effects())
|
||||
if (inst->has_side_effects() && inst->opcode != FS_OPCODE_FB_WRITE)
|
||||
add_barrier_deps(n);
|
||||
|
||||
/* read-after-write deps. */
|
||||
|
|
|
|||
|
|
@ -330,23 +330,39 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
|
|||
|
||||
/* According to the hardware docs, the L3 partitioning can only be changed
|
||||
* while the pipeline is completely drained and the caches are flushed,
|
||||
* which involves a first PIPE_CONTROL flush which stalls the pipeline and
|
||||
* initiates invalidation of the relevant caches...
|
||||
* which involves a first PIPE_CONTROL flush which stalls the pipeline...
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH |
|
||||
PIPE_CONTROL_NO_WRITE |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
/* ...followed by a second pipelined PIPE_CONTROL that initiates
|
||||
* invalidation of the relevant caches. Note that because RO invalidation
|
||||
* happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
|
||||
* command is processed by the CS) we cannot combine it with the previous
|
||||
* stalling flush as the hardware documentation suggests, because that
|
||||
* would cause the CS to stall on previous rendering *after* RO
|
||||
* invalidation and wouldn't prevent the RO caches from being polluted by
|
||||
* concurrent rendering before the stall completes. This intentionally
|
||||
* doesn't implement the SKL+ hardware workaround suggesting to enable CS
|
||||
* stall on PIPE_CONTROLs with the texture cache invalidation bit set for
|
||||
* GPGPU workloads because the previous and subsequent PIPE_CONTROLs
|
||||
* already guarantee that there is no concurrent GPGPU kernel execution
|
||||
* (see SKL HSD 2132585).
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
|
||||
PIPE_CONTROL_DATA_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_NO_WRITE |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_NO_WRITE);
|
||||
|
||||
/* ...followed by a second stalling flush which guarantees that
|
||||
* invalidation is complete when the L3 configuration registers are
|
||||
* modified.
|
||||
/* Now send a third stalling flush to make sure that invalidation is
|
||||
* complete when the L3 configuration registers are modified.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DATA_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH |
|
||||
PIPE_CONTROL_NO_WRITE |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@
|
|||
#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
|
||||
#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */
|
||||
/* GT */
|
||||
#define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5)
|
||||
#define PIPE_CONTROL_DATA_CACHE_FLUSH (1 << 5)
|
||||
#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
|
||||
#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
|
||||
#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ public:
|
|||
struct gl_context *ctx;
|
||||
struct gl_shader_program *shader_prog;
|
||||
struct brw_vertex_program *vp;
|
||||
struct brw_vue_prog_data *prog_data;
|
||||
vec4_visitor *v;
|
||||
};
|
||||
|
||||
|
|
@ -47,9 +48,13 @@ class cmod_propagation_vec4_visitor : public vec4_visitor
|
|||
{
|
||||
public:
|
||||
cmod_propagation_vec4_visitor(struct brw_compiler *compiler,
|
||||
nir_shader *shader)
|
||||
: vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
|
||||
false, -1) {}
|
||||
nir_shader *shader,
|
||||
struct brw_vue_prog_data *prog_data)
|
||||
: vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
|
||||
false, -1)
|
||||
{
|
||||
prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
|
||||
}
|
||||
|
||||
protected:
|
||||
/* Dummy implementation for pure virtual methods */
|
||||
|
|
@ -96,13 +101,14 @@ void cmod_propagation_test::SetUp()
|
|||
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
|
||||
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
|
||||
devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
|
||||
prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
|
||||
compiler->devinfo = devinfo;
|
||||
|
||||
vp = ralloc(NULL, struct brw_vertex_program);
|
||||
|
||||
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
|
||||
|
||||
v = new cmod_propagation_vec4_visitor(compiler, shader);
|
||||
v = new cmod_propagation_vec4_visitor(compiler, shader, prog_data);
|
||||
|
||||
_mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ public:
|
|||
struct gl_context *ctx;
|
||||
struct gl_shader_program *shader_prog;
|
||||
struct brw_vertex_program *vp;
|
||||
struct brw_vue_prog_data *prog_data;
|
||||
vec4_visitor *v;
|
||||
};
|
||||
|
||||
|
|
@ -46,10 +47,12 @@ class copy_propagation_vec4_visitor : public vec4_visitor
|
|||
{
|
||||
public:
|
||||
copy_propagation_vec4_visitor(struct brw_compiler *compiler,
|
||||
nir_shader *shader)
|
||||
: vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
|
||||
nir_shader *shader,
|
||||
struct brw_vue_prog_data *prog_data)
|
||||
: vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
|
||||
false /* no_spills */, -1)
|
||||
{
|
||||
prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
@ -91,13 +94,14 @@ void copy_propagation_test::SetUp()
|
|||
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
|
||||
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
|
||||
devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
|
||||
prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
|
||||
compiler->devinfo = devinfo;
|
||||
|
||||
vp = ralloc(NULL, struct brw_vertex_program);
|
||||
|
||||
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
|
||||
|
||||
v = new copy_propagation_vec4_visitor(compiler, shader);
|
||||
v = new copy_propagation_vec4_visitor(compiler, shader, prog_data);
|
||||
|
||||
_mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ public:
|
|||
struct gl_context *ctx;
|
||||
struct gl_shader_program *shader_prog;
|
||||
struct brw_vertex_program *vp;
|
||||
struct brw_vue_prog_data *prog_data;
|
||||
vec4_visitor *v;
|
||||
};
|
||||
|
||||
|
|
@ -49,10 +50,12 @@ class register_coalesce_vec4_visitor : public vec4_visitor
|
|||
{
|
||||
public:
|
||||
register_coalesce_vec4_visitor(struct brw_compiler *compiler,
|
||||
nir_shader *shader)
|
||||
: vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
|
||||
nir_shader *shader,
|
||||
struct brw_vue_prog_data *prog_data)
|
||||
: vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
|
||||
false /* no_spills */, -1)
|
||||
{
|
||||
prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
@ -94,13 +97,14 @@ void register_coalesce_test::SetUp()
|
|||
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
|
||||
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
|
||||
devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
|
||||
prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
|
||||
compiler->devinfo = devinfo;
|
||||
|
||||
vp = ralloc(NULL, struct brw_vertex_program);
|
||||
|
||||
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
|
||||
|
||||
v = new register_coalesce_vec4_visitor(compiler, shader);
|
||||
v = new register_coalesce_vec4_visitor(compiler, shader, prog_data);
|
||||
|
||||
_mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@
|
|||
#include "context.h"
|
||||
#include "cpuinfo.h"
|
||||
#include "debug.h"
|
||||
#include "debug_output.h"
|
||||
#include "depth.h"
|
||||
#include "dlist.h"
|
||||
#include "eval.h"
|
||||
|
|
@ -814,8 +815,8 @@ init_attrib_groups(struct gl_context *ctx)
|
|||
_mesa_init_current( ctx );
|
||||
_mesa_init_depth( ctx );
|
||||
_mesa_init_debug( ctx );
|
||||
_mesa_init_debug_output( ctx );
|
||||
_mesa_init_display_list( ctx );
|
||||
_mesa_init_errors( ctx );
|
||||
_mesa_init_eval( ctx );
|
||||
_mesa_init_fbobjects( ctx );
|
||||
_mesa_init_feedback( ctx );
|
||||
|
|
|
|||
1301
src/mesa/main/debug_output.c
Normal file
1301
src/mesa/main/debug_output.c
Normal file
File diff suppressed because it is too large
Load diff
107
src/mesa/main/debug_output.h
Normal file
107
src/mesa/main/debug_output.h
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2016 Brian Paul, et al All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DEBUG_OUTPUT_H
|
||||
#define DEBUG_OUTPUT_H
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include "compiler.h"
|
||||
#include "glheader.h"
|
||||
#include "mtypes.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
void
|
||||
_mesa_init_debug_output(struct gl_context *ctx);
|
||||
|
||||
void
|
||||
_mesa_free_errors_data(struct gl_context *ctx);
|
||||
|
||||
void
|
||||
_mesa_debug_get_id(GLuint *id);
|
||||
|
||||
bool
|
||||
_mesa_set_debug_state_int(struct gl_context *ctx, GLenum pname, GLint val);
|
||||
|
||||
GLint
|
||||
_mesa_get_debug_state_int(struct gl_context *ctx, GLenum pname);
|
||||
|
||||
void *
|
||||
_mesa_get_debug_state_ptr(struct gl_context *ctx, GLenum pname);
|
||||
|
||||
void
|
||||
_mesa_log_msg(struct gl_context *ctx, enum mesa_debug_source source,
|
||||
enum mesa_debug_type type, GLuint id,
|
||||
enum mesa_debug_severity severity, GLint len, const char *buf);
|
||||
|
||||
bool
|
||||
_mesa_debug_is_message_enabled(const struct gl_debug_state *debug,
|
||||
enum mesa_debug_source source,
|
||||
enum mesa_debug_type type,
|
||||
GLuint id,
|
||||
enum mesa_debug_severity severity);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
|
||||
GLenum severity, GLint length,
|
||||
const GLchar* buf);
|
||||
|
||||
GLuint GLAPIENTRY
|
||||
_mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum* sources,
|
||||
GLenum* types, GLenum* ids, GLenum* severities,
|
||||
GLsizei* lengths, GLchar* messageLog);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DebugMessageControl(GLenum source, GLenum type, GLenum severity,
|
||||
GLsizei count, const GLuint *ids,
|
||||
GLboolean enabled);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DebugMessageCallback(GLDEBUGPROC callback,
|
||||
const void *userParam);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
|
||||
const GLchar *message);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_PopDebugGroup(void);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_StringMarkerGREMEDY(GLsizei len, const GLvoid *string);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* DEBUG_OUTPUT_H */
|
||||
|
|
@ -194,7 +194,7 @@ typedef enum
|
|||
OPCODE_BLEND_FUNC_SEPARATE_I,
|
||||
|
||||
OPCODE_CALL_LIST,
|
||||
OPCODE_CALL_LIST_OFFSET,
|
||||
OPCODE_CALL_LISTS,
|
||||
OPCODE_CLEAR,
|
||||
OPCODE_CLEAR_ACCUM,
|
||||
OPCODE_CLEAR_COLOR,
|
||||
|
|
@ -706,6 +706,10 @@ _mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist)
|
|||
free(get_pointer(&n[10]));
|
||||
n += InstSize[n[0].opcode];
|
||||
break;
|
||||
case OPCODE_CALL_LISTS:
|
||||
free(get_pointer(&n[3]));
|
||||
n += InstSize[n[0].opcode];
|
||||
break;
|
||||
case OPCODE_DRAW_PIXELS:
|
||||
free(get_pointer(&n[5]));
|
||||
n += InstSize[n[0].opcode];
|
||||
|
|
@ -1569,37 +1573,49 @@ static void GLAPIENTRY
|
|||
save_CallLists(GLsizei num, GLenum type, const GLvoid * lists)
|
||||
{
|
||||
GET_CURRENT_CONTEXT(ctx);
|
||||
GLint i;
|
||||
GLboolean typeErrorFlag;
|
||||
unsigned type_size;
|
||||
Node *n;
|
||||
void *lists_copy;
|
||||
|
||||
SAVE_FLUSH_VERTICES(ctx);
|
||||
|
||||
switch (type) {
|
||||
case GL_BYTE:
|
||||
case GL_UNSIGNED_BYTE:
|
||||
type_size = 1;
|
||||
break;
|
||||
case GL_SHORT:
|
||||
case GL_UNSIGNED_SHORT:
|
||||
case GL_2_BYTES:
|
||||
type_size = 2;
|
||||
break;
|
||||
case GL_3_BYTES:
|
||||
type_size = 3;
|
||||
break;
|
||||
case GL_INT:
|
||||
case GL_UNSIGNED_INT:
|
||||
case GL_FLOAT:
|
||||
case GL_2_BYTES:
|
||||
case GL_3_BYTES:
|
||||
case GL_4_BYTES:
|
||||
typeErrorFlag = GL_FALSE;
|
||||
type_size = 4;
|
||||
break;
|
||||
default:
|
||||
typeErrorFlag = GL_TRUE;
|
||||
type_size = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
GLint list = translate_id(i, type, lists);
|
||||
Node *n = alloc_instruction(ctx, OPCODE_CALL_LIST_OFFSET, 2);
|
||||
if (n) {
|
||||
n[1].i = list;
|
||||
n[2].b = typeErrorFlag;
|
||||
}
|
||||
if (num > 0 && type_size > 0) {
|
||||
/* create a copy of the array of list IDs to save in the display list */
|
||||
lists_copy = memdup(lists, num * type_size);
|
||||
} else {
|
||||
lists_copy = NULL;
|
||||
}
|
||||
|
||||
n = alloc_instruction(ctx, OPCODE_CALL_LISTS, 2 + POINTER_DWORDS);
|
||||
if (n) {
|
||||
n[1].i = num;
|
||||
n[2].e = type;
|
||||
save_pointer(&n[3], lists_copy);
|
||||
};
|
||||
|
||||
/* After this, we don't know what state we're in. Invalidate all
|
||||
* cached information previously gathered:
|
||||
*/
|
||||
|
|
@ -7772,15 +7788,9 @@ execute_list(struct gl_context *ctx, GLuint list)
|
|||
execute_list(ctx, n[1].ui);
|
||||
}
|
||||
break;
|
||||
case OPCODE_CALL_LIST_OFFSET:
|
||||
/* Generated by glCallLists() so we must add ListBase */
|
||||
if (n[2].b) {
|
||||
/* user specified a bad data type at compile time */
|
||||
_mesa_error(ctx, GL_INVALID_ENUM, "glCallLists(type)");
|
||||
}
|
||||
else if (ctx->ListState.CallDepth < MAX_LIST_NESTING) {
|
||||
GLuint list = (GLuint) (ctx->List.ListBase + n[1].i);
|
||||
execute_list(ctx, list);
|
||||
case OPCODE_CALL_LISTS:
|
||||
if (ctx->ListState.CallDepth < MAX_LIST_NESTING) {
|
||||
CALL_CallLists(ctx->Exec, (n[1].i, n[2].e, get_pointer(&n[3])));
|
||||
}
|
||||
break;
|
||||
case OPCODE_CLEAR:
|
||||
|
|
@ -9105,6 +9115,14 @@ _mesa_CallLists(GLsizei n, GLenum type, const GLvoid * lists)
|
|||
return;
|
||||
}
|
||||
|
||||
if (n < 0) {
|
||||
_mesa_error(ctx, GL_INVALID_VALUE, "glCallLists(n < 0)");
|
||||
return;
|
||||
} else if (n == 0 || lists == NULL) {
|
||||
/* nothing to do */
|
||||
return;
|
||||
}
|
||||
|
||||
/* Save the CompileFlag status, turn it off, execute display list,
|
||||
* and restore the CompileFlag.
|
||||
*/
|
||||
|
|
@ -9728,9 +9746,8 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
|
|||
case OPCODE_CALL_LIST:
|
||||
fprintf(f, "CallList %d\n", (int) n[1].ui);
|
||||
break;
|
||||
case OPCODE_CALL_LIST_OFFSET:
|
||||
fprintf(f, "CallList %d + offset %u = %u\n", (int) n[1].ui,
|
||||
ctx->List.ListBase, ctx->List.ListBase + n[1].ui);
|
||||
case OPCODE_CALL_LISTS:
|
||||
fprintf(f, "CallLists %d, %s\n", n[1].i, enum_string(n[1].e));
|
||||
break;
|
||||
case OPCODE_DISABLE:
|
||||
fprintf(f, "Disable %s\n", enum_string(n[1].e));
|
||||
|
|
|
|||
|
|
@ -38,46 +38,61 @@
|
|||
|
||||
GLboolean GLAPIENTRY
|
||||
_mesa_IsList(GLuint list);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DeleteLists(GLuint list, GLsizei range);
|
||||
|
||||
GLuint GLAPIENTRY
|
||||
_mesa_GenLists(GLsizei range);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_NewList(GLuint name, GLenum mode);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_EndList(void);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_CallList( GLuint list );
|
||||
_mesa_CallList(GLuint list);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_CallLists( GLsizei n, GLenum type, const GLvoid *lists );
|
||||
_mesa_CallLists(GLsizei n, GLenum type, const GLvoid *lists);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_ListBase(GLuint base);
|
||||
|
||||
extern struct gl_display_list *
|
||||
struct gl_display_list *
|
||||
_mesa_lookup_list(struct gl_context *ctx, GLuint list);
|
||||
|
||||
extern void _mesa_compile_error( struct gl_context *ctx, GLenum error, const char *s );
|
||||
void
|
||||
_mesa_compile_error(struct gl_context *ctx, GLenum error, const char *s);
|
||||
|
||||
extern void *_mesa_dlist_alloc(struct gl_context *ctx, GLuint opcode, GLuint sz);
|
||||
void *
|
||||
_mesa_dlist_alloc(struct gl_context *ctx, GLuint opcode, GLuint sz);
|
||||
|
||||
extern void *
|
||||
void *
|
||||
_mesa_dlist_alloc_aligned(struct gl_context *ctx, GLuint opcode, GLuint bytes);
|
||||
|
||||
extern GLint _mesa_dlist_alloc_opcode( struct gl_context *ctx, GLuint sz,
|
||||
void (*execute)( struct gl_context *, void * ),
|
||||
void (*destroy)( struct gl_context *, void * ),
|
||||
void (*print)( struct gl_context *, void *, FILE * ) );
|
||||
GLint
|
||||
_mesa_dlist_alloc_opcode(struct gl_context *ctx, GLuint sz,
|
||||
void (*execute)(struct gl_context *, void *),
|
||||
void (*destroy)(struct gl_context *, void *),
|
||||
void (*print)(struct gl_context *, void *, FILE *));
|
||||
|
||||
extern void _mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist);
|
||||
void
|
||||
_mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist);
|
||||
|
||||
extern void _mesa_initialize_save_table(const struct gl_context *);
|
||||
void
|
||||
_mesa_initialize_save_table(const struct gl_context *);
|
||||
|
||||
extern void _mesa_install_dlist_vtxfmt(struct _glapi_table *disp,
|
||||
const GLvertexformat *vfmt);
|
||||
void
|
||||
_mesa_install_dlist_vtxfmt(struct _glapi_table *disp,
|
||||
const GLvertexformat *vfmt);
|
||||
|
||||
extern void _mesa_init_display_list( struct gl_context * ctx );
|
||||
void
|
||||
_mesa_init_display_list(struct gl_context * ctx);
|
||||
|
||||
extern void _mesa_free_display_list_data(struct gl_context *ctx);
|
||||
void
|
||||
_mesa_free_display_list_data(struct gl_context *ctx);
|
||||
|
||||
|
||||
#endif /* DLIST_H */
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include "glheader.h"
|
||||
#include "clip.h"
|
||||
#include "context.h"
|
||||
#include "debug_output.h"
|
||||
#include "enable.h"
|
||||
#include "errors.h"
|
||||
#include "light.h"
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -47,14 +47,6 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct _glapi_table;
|
||||
|
||||
extern void
|
||||
_mesa_init_errors( struct gl_context *ctx );
|
||||
|
||||
extern void
|
||||
_mesa_free_errors_data( struct gl_context *ctx );
|
||||
|
||||
extern void
|
||||
_mesa_warning( struct gl_context *gc, const char *fmtString, ... ) PRINTFLIKE(2, 3);
|
||||
|
||||
|
|
@ -76,6 +68,10 @@ _mesa_log(const char *fmtString, ...) PRINTFLIKE(1, 2);
|
|||
extern FILE *
|
||||
_mesa_get_log_file(void);
|
||||
|
||||
void
|
||||
_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
|
||||
const char *msg);
|
||||
|
||||
extern void
|
||||
_mesa_gl_vdebug(struct gl_context *ctx,
|
||||
GLuint *id,
|
||||
|
|
@ -104,42 +100,6 @@ _mesa_gl_debug(struct gl_context *ctx,
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
bool
|
||||
_mesa_set_debug_state_int(struct gl_context *ctx, GLenum pname, GLint val);
|
||||
|
||||
GLint
|
||||
_mesa_get_debug_state_int(struct gl_context *ctx, GLenum pname);
|
||||
|
||||
void *
|
||||
_mesa_get_debug_state_ptr(struct gl_context *ctx, GLenum pname);
|
||||
|
||||
extern void
|
||||
_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
|
||||
const char *msg);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
|
||||
GLenum severity, GLint length,
|
||||
const GLchar* buf);
|
||||
GLuint GLAPIENTRY
|
||||
_mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum* sources,
|
||||
GLenum* types, GLenum* ids, GLenum* severities,
|
||||
GLsizei* lengths, GLchar* messageLog);
|
||||
void GLAPIENTRY
|
||||
_mesa_DebugMessageControl(GLenum source, GLenum type, GLenum severity,
|
||||
GLsizei count, const GLuint *ids,
|
||||
GLboolean enabled);
|
||||
void GLAPIENTRY
|
||||
_mesa_DebugMessageCallback(GLDEBUGPROC callback,
|
||||
const void *userParam);
|
||||
void GLAPIENTRY
|
||||
_mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
|
||||
const GLchar *message);
|
||||
void GLAPIENTRY
|
||||
_mesa_PopDebugGroup(void);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_StringMarkerGREMEDY(GLsizei len, const GLvoid *string);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -273,6 +273,8 @@ EXT(MESA_texture_signed_rgba , EXT_texture_snorm
|
|||
EXT(MESA_window_pos , dummy_true , GLL, x , x , x , 2000)
|
||||
EXT(MESA_ycbcr_texture , MESA_ycbcr_texture , GLL, GLC, x , x , 2002)
|
||||
|
||||
EXT(NVX_gpu_memory_info , NVX_gpu_memory_info , GLL, GLC, x , x , 2013)
|
||||
|
||||
EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999)
|
||||
EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , x , 2008)
|
||||
EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001)
|
||||
|
|
@ -293,7 +295,6 @@ EXT(NV_texture_barrier , NV_texture_barrier
|
|||
EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
|
||||
EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
|
||||
EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
|
||||
EXT(NVX_gpu_memory_info , NVX_gpu_memory_info , GLL, GLC, x , x , 2013)
|
||||
|
||||
EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
|
||||
EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#include "glheader.h"
|
||||
#include "context.h"
|
||||
#include "blend.h"
|
||||
#include "debug_output.h"
|
||||
#include "enable.h"
|
||||
#include "enums.h"
|
||||
#include "errors.h"
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#include <stdbool.h>
|
||||
#include "glheader.h"
|
||||
#include "context.h"
|
||||
#include "debug_output.h"
|
||||
#include "get.h"
|
||||
#include "enums.h"
|
||||
#include "extensions.h"
|
||||
|
|
|
|||
|
|
@ -807,11 +807,6 @@ invalid_enum:
|
|||
return;
|
||||
}
|
||||
|
||||
/* TODO: Have the driver be required to handle this fixup. */
|
||||
if (q->Target == GL_ANY_SAMPLES_PASSED ||
|
||||
q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE)
|
||||
value = !!value;
|
||||
|
||||
switch (ptype) {
|
||||
case GL_INT: {
|
||||
GLint *param = (GLint *)offset;
|
||||
|
|
|
|||
|
|
@ -456,11 +456,11 @@ _mesa_get_viewport_xform(struct gl_context *ctx, unsigned i,
|
|||
translate[0] = half_width + x;
|
||||
if (ctx->Transform.ClipOrigin == GL_UPPER_LEFT) {
|
||||
scale[1] = -half_height;
|
||||
translate[1] = half_height - y;
|
||||
} else {
|
||||
scale[1] = half_height;
|
||||
translate[1] = half_height + y;
|
||||
}
|
||||
translate[1] = half_height + y;
|
||||
|
||||
if (ctx->Transform.ClipDepthMode == GL_NEGATIVE_ONE_TO_ONE) {
|
||||
scale[2] = 0.5 * (f - n);
|
||||
translate[2] = 0.5 * (n + f);
|
||||
|
|
|
|||
|
|
@ -51,7 +51,6 @@
|
|||
#include "program/prog_print.h"
|
||||
#include "program/program.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/sampler.h"
|
||||
|
||||
|
||||
static int swizzle_for_size(int size);
|
||||
|
|
@ -1390,7 +1389,7 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
|
|||
switch (var->data.mode) {
|
||||
case ir_var_uniform:
|
||||
entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
|
||||
var->data.location);
|
||||
var->data.param_index);
|
||||
this->variables.push_tail(entry);
|
||||
break;
|
||||
case ir_var_shader_in:
|
||||
|
|
@ -1540,6 +1539,82 @@ get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
|
|||
return dst_reg(v->result);
|
||||
}
|
||||
|
||||
/* Calculate the sampler index and also calculate the base uniform location
|
||||
* for struct members.
|
||||
*/
|
||||
static void
|
||||
calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref,
|
||||
unsigned *offset, unsigned *array_elements,
|
||||
unsigned *location)
|
||||
{
|
||||
if (deref->ir_type == ir_type_dereference_variable)
|
||||
return;
|
||||
|
||||
switch (deref->ir_type) {
|
||||
case ir_type_dereference_array: {
|
||||
ir_dereference_array *deref_arr = deref->as_dereference_array();
|
||||
ir_constant *array_index =
|
||||
deref_arr->array_index->constant_expression_value();
|
||||
|
||||
if (!array_index) {
|
||||
/* GLSL 1.10 and 1.20 allowed variable sampler array indices,
|
||||
* while GLSL 1.30 requires that the array indices be
|
||||
* constant integer expressions. We don't expect any driver
|
||||
* to actually work with a really variable array index, so
|
||||
* all that would work would be an unrolled loop counter that ends
|
||||
* up being constant above.
|
||||
*/
|
||||
ralloc_strcat(&prog->InfoLog,
|
||||
"warning: Variable sampler array index unsupported.\n"
|
||||
"This feature of the language was removed in GLSL 1.20 "
|
||||
"and is unlikely to be supported for 1.10 in Mesa.\n");
|
||||
} else {
|
||||
*offset += array_index->value.u[0] * *array_elements;
|
||||
}
|
||||
|
||||
*array_elements *= deref_arr->array->type->length;
|
||||
|
||||
calc_sampler_offsets(prog, deref_arr->array->as_dereference(),
|
||||
offset, array_elements, location);
|
||||
break;
|
||||
}
|
||||
|
||||
case ir_type_dereference_record: {
|
||||
ir_dereference_record *deref_record = deref->as_dereference_record();
|
||||
unsigned field_index =
|
||||
deref_record->record->type->field_index(deref_record->field);
|
||||
*location +=
|
||||
deref_record->record->type->record_location_offset(field_index);
|
||||
calc_sampler_offsets(prog, deref_record->record->as_dereference(),
|
||||
offset, array_elements, location);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid deref type");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
get_sampler_uniform_value(class ir_dereference *sampler,
|
||||
struct gl_shader_program *shader_program,
|
||||
const struct gl_program *prog)
|
||||
{
|
||||
GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
|
||||
ir_variable *var = sampler->variable_referenced();
|
||||
unsigned location = var->data.location;
|
||||
unsigned array_elements = 1;
|
||||
unsigned offset = 0;
|
||||
|
||||
calc_sampler_offsets(shader_program, sampler, &offset, &array_elements,
|
||||
&location);
|
||||
|
||||
assert(shader_program->UniformStorage[location].opaque[shader].active);
|
||||
return shader_program->UniformStorage[location].opaque[shader].index +
|
||||
offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the condition of a conditional assignment
|
||||
*
|
||||
|
|
@ -1989,9 +2064,8 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
|
|||
if (ir->shadow_comparitor)
|
||||
inst->tex_shadow = GL_TRUE;
|
||||
|
||||
inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
|
||||
this->shader_program,
|
||||
this->prog);
|
||||
inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program,
|
||||
prog);
|
||||
|
||||
switch (sampler_type->sampler_dimensionality) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
|
|
@ -2269,8 +2343,7 @@ public:
|
|||
{
|
||||
this->idx = -1;
|
||||
this->program_resource_visitor::process(var);
|
||||
|
||||
var->data.location = this->idx;
|
||||
var->data.param_index = this->idx;
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -928,7 +928,7 @@ ptn_add_output_stores(struct ptn_compile *c)
|
|||
nir_intrinsic_instr *store =
|
||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
|
||||
store->num_components = glsl_get_vector_elements(var->type);
|
||||
store->const_index[0] = (1 << store->num_components) - 1;
|
||||
nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
|
||||
store->variables[0] =
|
||||
nir_deref_var_create(store, c->output_vars[var->data.location]);
|
||||
|
||||
|
|
@ -999,7 +999,7 @@ setup_registers_and_variables(struct ptn_compile *c)
|
|||
nir_intrinsic_instr *store =
|
||||
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
|
||||
store->num_components = 4;
|
||||
store->const_index[0] = WRITEMASK_XYZW;
|
||||
nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
|
||||
store->variables[0] = nir_deref_var_create(store, fullvar);
|
||||
store->src[0] = nir_src_for_ssa(f001);
|
||||
nir_builder_instr_insert(b, &store->instr);
|
||||
|
|
|
|||
|
|
@ -1,144 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
|
||||
* Copyright (C) 2008 VMware, Inc. All Rights Reserved.
|
||||
* Copyright © 2010 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "compiler/glsl/ir.h"
|
||||
#include "compiler/glsl/ir_uniform.h"
|
||||
#include "compiler/glsl/ir_visitor.h"
|
||||
#include "compiler/glsl/program.h"
|
||||
#include "program/hash_table.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/program.h"
|
||||
|
||||
|
||||
class get_sampler_name : public ir_hierarchical_visitor
|
||||
{
|
||||
public:
|
||||
get_sampler_name(ir_dereference *last,
|
||||
struct gl_shader_program *shader_program)
|
||||
{
|
||||
this->mem_ctx = ralloc_context(NULL);
|
||||
this->shader_program = shader_program;
|
||||
this->name = NULL;
|
||||
this->offset = 0;
|
||||
this->last = last;
|
||||
}
|
||||
|
||||
~get_sampler_name()
|
||||
{
|
||||
ralloc_free(this->mem_ctx);
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *ir)
|
||||
{
|
||||
this->name = ir->var->name;
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit_leave(ir_dereference_record *ir)
|
||||
{
|
||||
this->name = ralloc_asprintf(mem_ctx, "%s.%s", name, ir->field);
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
|
||||
{
|
||||
ir_constant *index = ir->array_index->as_constant();
|
||||
int i;
|
||||
|
||||
if (index) {
|
||||
i = index->value.i[0];
|
||||
} else {
|
||||
/* GLSL 1.10 and 1.20 allowed variable sampler array indices,
|
||||
* while GLSL 1.30 requires that the array indices be
|
||||
* constant integer expressions. We don't expect any driver
|
||||
* to actually work with a really variable array index, so
|
||||
* all that would work would be an unrolled loop counter that ends
|
||||
* up being constant above.
|
||||
*/
|
||||
ralloc_strcat(&shader_program->InfoLog,
|
||||
"warning: Variable sampler array index unsupported.\n"
|
||||
"This feature of the language was removed in GLSL 1.20 "
|
||||
"and is unlikely to be supported for 1.10 in Mesa.\n");
|
||||
i = 0;
|
||||
}
|
||||
if (ir != last) {
|
||||
this->name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
|
||||
} else {
|
||||
offset = i;
|
||||
}
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
struct gl_shader_program *shader_program;
|
||||
const char *name;
|
||||
void *mem_ctx;
|
||||
int offset;
|
||||
ir_dereference *last;
|
||||
};
|
||||
|
||||
|
||||
int
|
||||
_mesa_get_sampler_uniform_value(class ir_dereference *sampler,
|
||||
struct gl_shader_program *shader_program,
|
||||
const struct gl_program *prog)
|
||||
{
|
||||
get_sampler_name getname(sampler, shader_program);
|
||||
|
||||
GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
|
||||
|
||||
sampler->accept(&getname);
|
||||
|
||||
unsigned location;
|
||||
if (!shader_program->UniformHash->get(location, getname.name)) {
|
||||
linker_error(shader_program,
|
||||
"failed to find sampler named %s.\n", getname.name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!shader_program->UniformStorage[location].opaque[shader].active) {
|
||||
assert(0 && "cannot return a sampler");
|
||||
linker_error(shader_program,
|
||||
"cannot return a sampler named %s, because it is not "
|
||||
"used in this shader stage. This is a driver bug.\n",
|
||||
getname.name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return shader_program->UniformStorage[location].opaque[shader].index +
|
||||
getname.offset;
|
||||
}
|
||||
|
||||
|
||||
class ir_rvalue *
|
||||
_mesa_get_sampler_array_nonconst_index(class ir_dereference *sampler)
|
||||
{
|
||||
ir_dereference_array *deref_arr = sampler->as_dereference_array();
|
||||
if (!deref_arr || deref_arr->array_index->as_constant())
|
||||
return NULL;
|
||||
|
||||
return deref_arr->array_index;
|
||||
}
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
|
||||
* Copyright (C) 2008 VMware, Inc. All Rights Reserved.
|
||||
* Copyright © 2010 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SAMPLER_H
|
||||
#define SAMPLER_H
|
||||
|
||||
|
||||
int
|
||||
_mesa_get_sampler_uniform_value(class ir_dereference *sampler,
|
||||
struct gl_shader_program *shader_program,
|
||||
const struct gl_program *prog);
|
||||
|
||||
class ir_rvalue *
|
||||
_mesa_get_sampler_array_nonconst_index(class ir_dereference *sampler);
|
||||
|
||||
|
||||
#endif /* SAMPLER_H */
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue