diff --git a/.dir-locals.el b/.dir-locals.el
index d95eb4803f6..4b5393198de 100644
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -5,6 +5,7 @@
(c-file-style . "stroustrup")
(fill-column . 78)
(eval . (progn
+ (c-set-offset 'case-label '0)
(c-set-offset 'innamespace '0)
(c-set-offset 'inline-open '0)))
)
diff --git a/appveyor.yml b/appveyor.yml
index 68cc368a3a1..bf7ac752857 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -6,7 +6,7 @@
# - Select Git and fill in the Git clone URL
# - Setup a Git hook as explained in
# https://github.com/appveyor/webhooks#installing-git-hook
-# - Check 'Settings > General > Skip branches without appveyor'
+# - Check 'Settings > General > Skip branches without appveyor.yml'
# - Check 'Settings > General > Rolling builds'
# - Setup the global or project notifications to your liking
#
@@ -24,7 +24,14 @@ branches:
except:
- /^travis.*$/
-clone_depth: 5
+# Don't download the full Mesa history to speed up cloning. However the clone
+# depth must not be too small, otherwise builds might fail when lots of patches
+# are committed in succession, because the desired commit is not found on the
+# truncated history.
+#
+# See also:
+# - https://www.appveyor.com/blog/2014/06/04/shallow-clone-for-git-repositories
+clone_depth: 100
cache:
- win_flex_bison-2.4.5.zip
diff --git a/configure.ac b/configure.ac
index a18080d4ce5..e3d721d93aa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2161,7 +2161,12 @@ gallium_require_drm_loader() {
fi
}
+dnl This is for Glamor. Skip this if OpenGL is disabled.
require_egl_drm() {
+ if test "x$enable_opengl" = xno; then
+ return 0
+ fi
+
case "$with_egl_platforms" in
*drm*)
;;
diff --git a/docs/GL3.txt b/docs/GL3.txt
index f12e0ba8d29..257fc73225c 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -135,7 +135,7 @@ GL 4.2, GLSL 4.20:
GL_ARB_texture_compression_bptc DONE (i965, nvc0, r600, radeonsi)
GL_ARB_compressed_texture_pixel_storage DONE (all drivers)
- GL_ARB_shader_atomic_counters DONE (i965)
+ GL_ARB_shader_atomic_counters DONE (i965, nvc0)
GL_ARB_texture_storage DONE (all drivers)
GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_robust_buffer_access_behavior not started
GL_ARB_shader_image_size DONE (i965)
- GL_ARB_shader_storage_buffer_object DONE (i965)
+ GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
@@ -186,7 +186,7 @@ GL 4.4, GLSL 4.40:
- specified transform/feedback layout in progress
- input/output block locations DONE
GL_ARB_multi_bind DONE (all drivers)
- GL_ARB_query_buffer_object not started
+ GL_ARB_query_buffer_object DONE (nvc0)
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
diff --git a/docs/envvars.html b/docs/envvars.html
index 5bb7b1e65bb..ba83335d0b0 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -96,6 +96,7 @@ glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as
"130". Mesa will not really implement all the features of the given language version
if it's higher than what's normally reported. (for developers only)
MESA_GLSL - shading language compiler options
+MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally disabled.
diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
index 616c134a768..0d92ed41ee8 100644
--- a/docs/relnotes/11.2.0.html
+++ b/docs/relnotes/11.2.0.html
@@ -48,7 +48,10 @@ Note: some of the new features are only available with certain drivers.
GL_ARB_compute_shader on i965
GL_ARB_copy_image on r600
GL_ARB_indirect_parameters on nvc0
+GL_ARB_query_buffer_object on nvc0
+GL_ARB_shader_atomic_counters on nvc0
GL_ARB_shader_draw_parameters on i965, nvc0
+GL_ARB_shader_storage_buffer_object on nvc0
GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)
GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx
GL_ARB_texture_buffer_range on freedreno/a4xx
@@ -58,6 +61,8 @@ Note: some of the new features are only available with certain drivers.
GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx
GL_KHR_texture_compression_astc_ldr on freedreno/a4xx
GL_AMD_performance_monitor on radeonsi (CIK+ only)
+GL_ATI_meminfo on r600, radeonsi
+GL_NVX_gpu_memory_info on r600, radeonsi
New OSMesaCreateContextAttribs() function (for creating core profile
contexts)
diff --git a/include/D3D9/d3d9types.h b/include/D3D9/d3d9types.h
index 52fbc99dad7..d74ce80bb30 100644
--- a/include/D3D9/d3d9types.h
+++ b/include/D3D9/d3d9types.h
@@ -227,6 +227,7 @@ typedef struct _RGNDATA {
#define D3DERR_DRIVERINVALIDCALL MAKE_D3DHRESULT(2157)
#define D3DERR_DEVICEREMOVED MAKE_D3DHRESULT(2160)
#define D3DERR_DEVICEHUNG MAKE_D3DHRESULT(2164)
+#define S_PRESENT_OCCLUDED MAKE_D3DSTATUS(2168)
/********************************************************
* Bitmasks *
diff --git a/include/d3dadapter/present.h b/include/d3dadapter/present.h
index 08a97297201..162f703e320 100644
--- a/include/d3dadapter/present.h
+++ b/include/d3dadapter/present.h
@@ -69,6 +69,8 @@ typedef struct ID3DPresentVtbl
HRESULT (WINAPI *SetCursor)(ID3DPresent *This, void *pBitmap, POINT *pHotspot, BOOL bShow);
HRESULT (WINAPI *SetGammaRamp)(ID3DPresent *This, const D3DGAMMARAMP *pRamp, HWND hWndOverride);
HRESULT (WINAPI *GetWindowInfo)(ID3DPresent *This, HWND hWnd, int *width, int *height, int *depth);
+ /* Available since version 1.1 */
+ BOOL (WINAPI *GetWindowOccluded)(ID3DPresent *This);
} ID3DPresentVtbl;
struct ID3DPresent
@@ -96,6 +98,7 @@ struct ID3DPresent
#define ID3DPresent_SetCursor(p,a,b,c) (p)->lpVtbl->SetCursor(p,a,b,c)
#define ID3DPresent_SetGammaRamp(p,a,b) (p)->lpVtbl->SetGammaRamp(p,a,b)
#define ID3DPresent_GetWindowInfo(p,a,b,c,d) (p)->lpVtbl->GetWindowSize(p,a,b,c,d)
+#define ID3DPresent_GetWindowOccluded(p) (p)->lpVtbl->GetWindowOccluded(p)
typedef struct ID3DPresentGroupVtbl
{
diff --git a/src/compiler/.gitignore b/src/compiler/.gitignore
new file mode 100644
index 00000000000..6fb069f0bcb
--- /dev/null
+++ b/src/compiler/.gitignore
@@ -0,0 +1 @@
+glsl_compiler
diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am
index e3d297fe299..fe96cb3c879 100644
--- a/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@ -220,9 +220,11 @@ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
+ $(MKDIR_GEN)
$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
+ $(MKDIR_GEN)
$(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
diff --git a/src/compiler/glsl/.gitignore b/src/compiler/glsl/.gitignore
index e80f8af6bfc..6db4e738f6e 100644
--- a/src/compiler/glsl/.gitignore
+++ b/src/compiler/glsl/.gitignore
@@ -1,4 +1,3 @@
-glsl_compiler
glsl_lexer.cpp
glsl_parser.cpp
glsl_parser.h
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 98d8bc5f268..7213ad8ebec 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -291,6 +291,10 @@ apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from,
if (!state->is_version(120, 0))
return false;
+ /* ESSL does not allow implicit conversions */
+ if (state->es_shader)
+ return false;
+
/* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec:
*
* "There are no implicit array or structure conversions. For
diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp
index 95e86df1cdd..5512a33f114 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -661,7 +661,7 @@ private:
BA1(roundEven)
BA1(ceil)
BA1(fract)
- B2(mod)
+ BA2(mod)
BA1(modf)
BA2(min)
BA2(max)
@@ -1242,23 +1242,23 @@ builtin_builder::create_builtins()
FD(fract)
add_function("mod",
- _mod(glsl_type::float_type, glsl_type::float_type),
- _mod(glsl_type::vec2_type, glsl_type::float_type),
- _mod(glsl_type::vec3_type, glsl_type::float_type),
- _mod(glsl_type::vec4_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::float_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec2_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec3_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec4_type, glsl_type::float_type),
- _mod(glsl_type::vec2_type, glsl_type::vec2_type),
- _mod(glsl_type::vec3_type, glsl_type::vec3_type),
- _mod(glsl_type::vec4_type, glsl_type::vec4_type),
+ _mod(always_available, glsl_type::vec2_type, glsl_type::vec2_type),
+ _mod(always_available, glsl_type::vec3_type, glsl_type::vec3_type),
+ _mod(always_available, glsl_type::vec4_type, glsl_type::vec4_type),
- _mod(glsl_type::double_type, glsl_type::double_type),
- _mod(glsl_type::dvec2_type, glsl_type::double_type),
- _mod(glsl_type::dvec3_type, glsl_type::double_type),
- _mod(glsl_type::dvec4_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::double_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec2_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec3_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec4_type, glsl_type::double_type),
- _mod(glsl_type::dvec2_type, glsl_type::dvec2_type),
- _mod(glsl_type::dvec3_type, glsl_type::dvec3_type),
- _mod(glsl_type::dvec4_type, glsl_type::dvec4_type),
+ _mod(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type),
+ _mod(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),
+ _mod(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),
NULL);
FD(modf)
@@ -3452,9 +3452,10 @@ UNOPA(ceil, ir_unop_ceil)
UNOPA(fract, ir_unop_fract)
ir_function_signature *
-builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
+builtin_builder::_mod(builtin_available_predicate avail,
+ const glsl_type *x_type, const glsl_type *y_type)
{
- return binop(always_available, ir_binop_mod, x_type, x_type, y_type);
+ return binop(avail, ir_binop_mod, x_type, x_type, y_type);
}
ir_function_signature *
diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp
index ccc04c00cea..6db74f1c634 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -328,6 +328,11 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
this->fields[this->num_fields].sample = 0;
this->fields[this->num_fields].patch = 0;
this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
+ this->fields[this->num_fields].image_read_only = 0;
+ this->fields[this->num_fields].image_write_only = 0;
+ this->fields[this->num_fields].image_coherent = 0;
+ this->fields[this->num_fields].image_volatile = 0;
+ this->fields[this->num_fields].image_restrict = 0;
this->num_fields++;
}
@@ -1201,7 +1206,12 @@ builtin_variable_generator::generate_varyings()
/* gl_Position and gl_PointSize are not visible from fragment shaders. */
if (state->stage != MESA_SHADER_FRAGMENT) {
add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position");
- add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+ if (!state->es_shader ||
+ state->stage == MESA_SHADER_VERTEX ||
+ (state->stage == MESA_SHADER_GEOMETRY &&
+ state->OES_geometry_point_size_enable)) {
+ add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+ }
}
if (state->is_version(130, 0)) {
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y
index ef1a6575aaa..43a1aa94aff 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2386,6 +2386,13 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
if (extensions->ARB_blend_func_extended)
add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
+
+ if (version >= 310) {
+ if (extensions->OES_geometry_shader) {
+ add_builtin_define(parser, "GL_OES_geometry_point_size", 1);
+ add_builtin_define(parser, "GL_OES_geometry_shader", 1);
+ }
+ }
}
} else {
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index ecf0d7f76e5..d7a4b254aa2 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -600,6 +600,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
/* OES extensions go here, sorted alphabetically.
*/
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
+ EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
EXT(OES_texture_3D, false, true, dummy_true),
@@ -1867,59 +1868,76 @@ do_common_optimization(exec_list *ir, bool linked,
const struct gl_shader_compiler_options *options,
bool native_integers)
{
+ const bool debug = false;
GLboolean progress = GL_FALSE;
- progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
+#define OPT(PASS, ...) do { \
+ if (debug) { \
+ fprintf(stderr, "START GLSL optimization %s\n", #PASS); \
+ const bool opt_progress = PASS(__VA_ARGS__); \
+ progress = opt_progress || progress; \
+ if (opt_progress) \
+ _mesa_print_ir(stderr, ir, NULL); \
+ fprintf(stderr, "GLSL optimization %s: %s progress\n", \
+ #PASS, opt_progress ? "made" : "no"); \
+ } else { \
+ progress = PASS(__VA_ARGS__) || progress; \
+ } \
+ } while (false)
+
+ OPT(lower_instructions, ir, SUB_TO_ADD_NEG);
if (linked) {
- progress = do_function_inlining(ir) || progress;
- progress = do_dead_functions(ir) || progress;
- progress = do_structure_splitting(ir) || progress;
+ OPT(do_function_inlining, ir);
+ OPT(do_dead_functions, ir);
+ OPT(do_structure_splitting, ir);
}
- progress = do_if_simplification(ir) || progress;
- progress = opt_flatten_nested_if_blocks(ir) || progress;
- progress = opt_conditional_discard(ir) || progress;
- progress = do_copy_propagation(ir) || progress;
- progress = do_copy_propagation_elements(ir) || progress;
+ OPT(do_if_simplification, ir);
+ OPT(opt_flatten_nested_if_blocks, ir);
+ OPT(opt_conditional_discard, ir);
+ OPT(do_copy_propagation, ir);
+ OPT(do_copy_propagation_elements, ir);
if (options->OptimizeForAOS && !linked)
- progress = opt_flip_matrices(ir) || progress;
+ OPT(opt_flip_matrices, ir);
if (linked && options->OptimizeForAOS) {
- progress = do_vectorize(ir) || progress;
+ OPT(do_vectorize, ir);
}
if (linked)
- progress = do_dead_code(ir, uniform_locations_assigned) || progress;
+ OPT(do_dead_code, ir, uniform_locations_assigned);
else
- progress = do_dead_code_unlinked(ir) || progress;
- progress = do_dead_code_local(ir) || progress;
- progress = do_tree_grafting(ir) || progress;
- progress = do_constant_propagation(ir) || progress;
+ OPT(do_dead_code_unlinked, ir);
+ OPT(do_dead_code_local, ir);
+ OPT(do_tree_grafting, ir);
+ OPT(do_constant_propagation, ir);
if (linked)
- progress = do_constant_variable(ir) || progress;
+ OPT(do_constant_variable, ir);
else
- progress = do_constant_variable_unlinked(ir) || progress;
- progress = do_constant_folding(ir) || progress;
- progress = do_minmax_prune(ir) || progress;
- progress = do_rebalance_tree(ir) || progress;
- progress = do_algebraic(ir, native_integers, options) || progress;
- progress = do_lower_jumps(ir) || progress;
- progress = do_vec_index_to_swizzle(ir) || progress;
- progress = lower_vector_insert(ir, false) || progress;
- progress = do_swizzle_swizzle(ir) || progress;
- progress = do_noop_swizzle(ir) || progress;
+ OPT(do_constant_variable_unlinked, ir);
+ OPT(do_constant_folding, ir);
+ OPT(do_minmax_prune, ir);
+ OPT(do_rebalance_tree, ir);
+ OPT(do_algebraic, ir, native_integers, options);
+ OPT(do_lower_jumps, ir);
+ OPT(do_vec_index_to_swizzle, ir);
+ OPT(lower_vector_insert, ir, false);
+ OPT(do_swizzle_swizzle, ir);
+ OPT(do_noop_swizzle, ir);
- progress = optimize_split_arrays(ir, linked) || progress;
- progress = optimize_redundant_jumps(ir) || progress;
+ OPT(optimize_split_arrays, ir, linked);
+ OPT(optimize_redundant_jumps, ir);
loop_state *ls = analyze_loop_variables(ir);
if (ls->loop_found) {
- progress = set_loop_controls(ir, ls) || progress;
- progress = unroll_loops(ir, ls, options) || progress;
+ OPT(set_loop_controls, ir, ls);
+ OPT(unroll_loops, ir, ls, options);
}
delete ls;
+#undef OPT
+
return progress;
}
diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h
index 3f88e01d599..a905b564787 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -591,6 +591,8 @@ struct _mesa_glsl_parse_state {
*/
bool OES_EGL_image_external_enable;
bool OES_EGL_image_external_warn;
+ bool OES_geometry_point_size_enable;
+ bool OES_geometry_point_size_warn;
bool OES_geometry_shader_enable;
bool OES_geometry_shader_warn;
bool OES_standard_derivatives_enable;
diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp
index 33b2d4c8646..7072c16cb28 100644
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -471,10 +471,11 @@ private:
*/
class parcel_out_uniform_storage : public program_resource_visitor {
public:
- parcel_out_uniform_storage(struct string_to_uint_map *map,
+ parcel_out_uniform_storage(struct gl_shader_program *prog,
+ struct string_to_uint_map *map,
struct gl_uniform_storage *uniforms,
union gl_constant_value *values)
- : map(map), uniforms(uniforms), values(values)
+ : prog(prog), map(map), uniforms(uniforms), values(values)
{
}
@@ -492,8 +493,7 @@ public:
memset(this->targets, 0, sizeof(this->targets));
}
- void set_and_process(struct gl_shader_program *prog,
- ir_variable *var)
+ void set_and_process(ir_variable *var)
{
current_var = var;
field_counter = 0;
@@ -643,6 +643,16 @@ private:
uniform->opaque[shader_type].index = this->next_image;
uniform->opaque[shader_type].active = true;
+ /* Set image access qualifiers */
+ const GLenum access =
+ (current_var->data.image_read_only ? GL_READ_ONLY :
+ current_var->data.image_write_only ? GL_WRITE_ONLY :
+ GL_READ_WRITE);
+
+ for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j)
+ prog->_LinkedShaders[shader_type]->
+ ImageAccess[this->next_image + j] = access;
+
/* Increment the image index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
@@ -844,6 +854,11 @@ private:
this->values += values_for_type(type);
}
+ /**
+ * Current program being processed.
+ */
+ struct gl_shader_program *prog;
+
struct string_to_uint_map *map;
struct gl_uniform_storage *uniforms;
@@ -1007,40 +1022,6 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
}
}
-static void
-link_set_image_access_qualifiers(struct gl_shader_program *prog,
- gl_shader *sh, unsigned shader_stage,
- ir_variable *var, const glsl_type *type,
- char **name, size_t name_length)
-{
- /* Handle arrays of arrays */
- if (type->is_array() && type->fields.array->is_array()) {
- for (unsigned i = 0; i < type->length; i++) {
- size_t new_length = name_length;
-
- /* Append the subscript to the current variable name */
- ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
-
- link_set_image_access_qualifiers(prog, sh, shader_stage, var,
- type->fields.array, name,
- new_length);
- }
- } else {
- unsigned id = 0;
- bool found = prog->UniformHash->get(id, *name);
- assert(found);
- (void) found;
- const gl_uniform_storage *storage = &prog->UniformStorage[id];
- const unsigned index = storage->opaque[shader_stage].index;
- const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
- var->data.image_write_only ? GL_WRITE_ONLY :
- GL_READ_WRITE);
-
- for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j)
- sh->ImageAccess[index + j] = access;
- }
-}
-
/**
* Combine the hidden uniform hash map with the uniform hash map so that the
* hidden uniforms will be given indicies at the end of the uniform storage
@@ -1148,7 +1129,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
union gl_constant_value *data_end = &data[num_data_slots];
#endif
- parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);
+ parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
@@ -1163,7 +1144,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
var->data.mode != ir_var_shader_storage))
continue;
- parcel.set_and_process(prog, var);
+ parcel.set_and_process(var);
}
prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
@@ -1301,29 +1282,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
prog->NumHiddenUniforms = hidden_uniforms;
prog->UniformStorage = uniforms;
- /**
- * Scan the program for image uniforms and store image unit access
- * information into the gl_shader data structure.
- */
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- gl_shader *sh = prog->_LinkedShaders[i];
-
- if (sh == NULL)
- continue;
-
- foreach_in_list(ir_instruction, node, sh->ir) {
- ir_variable *var = node->as_variable();
-
- if (var && var->data.mode == ir_var_uniform &&
- var->type->contains_image()) {
- char *name_copy = ralloc_strdup(NULL, var->name);
- link_set_image_access_qualifiers(prog, sh, i, var, var->type,
- &name_copy, strlen(var->name));
- ralloc_free(name_copy);
- }
- }
- }
-
link_set_uniform_initializers(prog, boolean_true);
return;
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 264b69ca619..a4c730ffdcf 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -967,11 +967,16 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
return;
}
- if ((consumer_var == NULL && producer_var->type->contains_integer()) ||
+ bool needs_flat_qualifier = consumer_var == NULL &&
+ (producer_var->type->contains_integer() ||
+ producer_var->type->contains_double());
+
+ if (needs_flat_qualifier ||
(consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) {
/* Since this varying is not being consumed by the fragment shader, its
* interpolation type varying cannot possibly affect rendering.
- * Also, this variable is non-flat and is (or contains) an integer.
+ * Also, this variable is non-flat and is (or contains) an integer
+ * or a double.
* If the consumer stage is unknown, don't modify the interpolation
* type as it could affect rendering later with separate shaders.
*
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 6657777d74c..4776ffa6acd 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4633,8 +4633,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
&prog->NumShaderStorageBlocks,
&prog->SsboInterfaceBlockIndex);
- /* FINISHME: Assign fragment shader output locations. */
-
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;
diff --git a/src/compiler/glsl/lower_buffer_access.cpp b/src/compiler/glsl/lower_buffer_access.cpp
index f8c8d140ea8..9ad811de9f1 100644
--- a/src/compiler/glsl/lower_buffer_access.cpp
+++ b/src/compiler/glsl/lower_buffer_access.cpp
@@ -327,6 +327,7 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx,
unsigned *const_offset,
bool *row_major,
int *matrix_columns,
+ const glsl_struct_field **struct_field,
unsigned packing)
{
*offset = new(mem_ctx) ir_constant(0u);
@@ -442,8 +443,11 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx,
intra_struct_offset = glsl_align(intra_struct_offset, field_align);
if (strcmp(struct_type->fields.structure[i].name,
- deref_record->field) == 0)
+ deref_record->field) == 0) {
+ if (struct_field)
+ *struct_field = &struct_type->fields.structure[i];
break;
+ }
if (packing == GLSL_INTERFACE_PACKING_STD430)
intra_struct_offset += type->std430_size(field_row_major);
diff --git a/src/compiler/glsl/lower_buffer_access.h b/src/compiler/glsl/lower_buffer_access.h
index cc4614e9792..8772bdb76ff 100644
--- a/src/compiler/glsl/lower_buffer_access.h
+++ b/src/compiler/glsl/lower_buffer_access.h
@@ -57,6 +57,7 @@ public:
void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref,
ir_rvalue **offset, unsigned *const_offset,
bool *row_major, int *matrix_columns,
+ const glsl_struct_field **struct_field,
unsigned packing);
};
diff --git a/src/compiler/glsl/lower_shared_reference.cpp b/src/compiler/glsl/lower_shared_reference.cpp
index 533cd9202f4..12499695882 100644
--- a/src/compiler/glsl/lower_shared_reference.cpp
+++ b/src/compiler/glsl/lower_shared_reference.cpp
@@ -142,7 +142,7 @@ lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
/* Now that we've calculated the offset to the start of the
* dereference, walk over the type and emit loads into a temporary.
@@ -210,7 +210,7 @@ lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
deref = new(mem_ctx) ir_dereference_variable(store_var);
@@ -370,7 +370,7 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
assert(offset);
assert(!row_major);
diff --git a/src/compiler/glsl/lower_ubo_reference.cpp b/src/compiler/glsl/lower_ubo_reference.cpp
index a172054bac8..d6269f7cbac 100644
--- a/src/compiler/glsl/lower_ubo_reference.cpp
+++ b/src/compiler/glsl/lower_ubo_reference.cpp
@@ -45,7 +45,7 @@ class lower_ubo_reference_visitor :
public lower_buffer_access::lower_buffer_access {
public:
lower_ubo_reference_visitor(struct gl_shader *shader)
- : shader(shader)
+ : shader(shader), struct_field(NULL), variable(NULL)
{
}
@@ -60,6 +60,7 @@ public:
bool *row_major,
int *matrix_columns,
unsigned packing);
+ uint32_t ssbo_access_params();
ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
ir_rvalue *offset);
ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
@@ -104,6 +105,8 @@ public:
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
+ const struct glsl_struct_field *struct_field;
+ ir_variable *variable;
ir_rvalue *uniform_block;
bool progress;
};
@@ -288,8 +291,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
*const_offset = ubo_var->Offset;
+ this->struct_field = NULL;
setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
- matrix_columns, packing);
+ matrix_columns, &this->struct_field, packing);
}
void
@@ -317,6 +321,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
this->buffer_access_type =
var->is_in_shader_storage_block() ?
ssbo_load_access : ubo_load_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
@@ -370,6 +375,24 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
return state->ARB_shader_storage_buffer_object_enable;
}
+uint32_t
+lower_ubo_reference_visitor::ssbo_access_params()
+{
+ assert(variable);
+
+ if (variable->is_interface_instance()) {
+ assert(struct_field);
+
+ return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
+ (struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
+ (struct_field->image_volatile ? ACCESS_VOLATILE : 0));
+ } else {
+ return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
+ (variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
+ (variable->data.image_volatile ? ACCESS_VOLATILE : 0));
+ }
+}
+
ir_call *
lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
ir_rvalue *deref,
@@ -394,6 +417,10 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
sig_params.push_tail(writemask_ref);
+ ir_variable *access_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
+ sig_params.push_tail(access_ref);
+
ir_function_signature *sig = new(mem_ctx)
ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
assert(sig);
@@ -408,6 +435,7 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
call_params.push_tail(offset->clone(mem_ctx, NULL));
call_params.push_tail(deref->clone(mem_ctx, NULL));
call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
+ call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
return new(mem_ctx) ir_call(sig, NULL, &call_params);
}
@@ -426,6 +454,10 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
sig_params.push_tail(offset_ref);
+ ir_variable *access_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
+ sig_params.push_tail(access_ref);
+
ir_function_signature *sig =
new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
assert(sig);
@@ -444,6 +476,7 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
exec_list call_params;
call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
call_params.push_tail(offset->clone(mem_ctx, NULL));
+ call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
return new(mem_ctx) ir_call(sig, deref_result, &call_params);
}
@@ -499,6 +532,7 @@ lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
unsigned packing = var->get_interface_type()->interface_packing;
this->buffer_access_type = ssbo_store_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
@@ -678,6 +712,7 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
this->buffer_access_type = ssbo_unsized_array_length_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to calculate the length.
@@ -910,6 +945,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
unsigned packing = var->get_interface_type()->interface_packing;
this->buffer_access_type = ssbo_atomic_access;
+ this->variable = var;
setup_for_load_or_store(mem_ctx, var, deref,
&offset, &const_offset,
diff --git a/src/compiler/glsl/opt_tree_grafting.cpp b/src/compiler/glsl/opt_tree_grafting.cpp
index 83effb7424c..812f996fb81 100644
--- a/src/compiler/glsl/opt_tree_grafting.cpp
+++ b/src/compiler/glsl/opt_tree_grafting.cpp
@@ -361,11 +361,12 @@ tree_grafting_basic_block(ir_instruction *bb_first,
if (!lhs_var)
continue;
- if (lhs_var->data.mode == ir_var_function_out ||
- lhs_var->data.mode == ir_var_function_inout ||
- lhs_var->data.mode == ir_var_shader_out ||
- lhs_var->data.mode == ir_var_shader_storage)
- continue;
+ if (lhs_var->data.mode == ir_var_function_out ||
+ lhs_var->data.mode == ir_var_function_inout ||
+ lhs_var->data.mode == ir_var_shader_out ||
+ lhs_var->data.mode == ir_var_shader_storage ||
+ lhs_var->data.mode == ir_var_shader_shared)
+ continue;
ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 5920c2e2611..d2eaec173b3 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -164,6 +164,11 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
this->fields.structure[i].sample = fields[i].sample;
this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
this->fields.structure[i].patch = fields[i].patch;
+ this->fields.structure[i].image_read_only = fields[i].image_read_only;
+ this->fields.structure[i].image_write_only = fields[i].image_write_only;
+ this->fields.structure[i].image_coherent = fields[i].image_coherent;
+ this->fields.structure[i].image_volatile = fields[i].image_volatile;
+ this->fields.structure[i].image_restrict = fields[i].image_restrict;
this->fields.structure[i].precision = fields[i].precision;
}
@@ -1330,6 +1335,13 @@ glsl_type::can_implicitly_convert_to(const glsl_type *desired,
if (this == desired)
return true;
+ /* ESSL does not allow implicit conversions. If there is no state, we're
+ * doing intra-stage function linking where these checks have already been
+ * done.
+ */
+ if (state && state->es_shader)
+ return false;
+
/* There is no conversion among matrix types. */
if (this->matrix_columns > 1 || desired->matrix_columns > 1)
return false;
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index a9b5281e774..5965cb2eedb 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -885,7 +885,8 @@ struct glsl_struct_field {
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
- precision(GLSL_PRECISION_NONE)
+ precision(GLSL_PRECISION_NONE), image_read_only(0), image_write_only(0),
+ image_coherent(0), image_volatile(0), image_restrict(0)
{
/* empty */
}
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 37cb0221e0b..312d2f99a1c 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -139,7 +139,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
b->shader->options->lower_pack_unorm_2x16);
nir_ssa_def *word =
- nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
nir_channel(b, word, 0));
@@ -154,7 +154,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
b->shader->options->lower_pack_unorm_4x8);
nir_ssa_def *byte =
- nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 0eff89783dd..60ade4a80ae 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -238,15 +238,15 @@ unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
-unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """
-dst = (src0.x & 0xffff) | (src0.y >> 16);
+unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
+dst.x = (src0.x & 0xffff) | (src0.y >> 16);
""")
-unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """
-dst = (src0.x << 0) |
- (src0.y << 8) |
- (src0.z << 16) |
- (src0.w << 24);
+unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
+dst.x = (src0.x << 0) |
+ (src0.y << 8) |
+ (src0.z << 16) |
+ (src0.w << 24);
""")
# Lowered floating point unpacking operations.
@@ -562,12 +562,12 @@ dst.y = src1.x;
""")
# Byte extraction
-binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
-binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))")
+binop("extract_u8", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
+binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
# Word extraction
-binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
-binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))")
+binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
+binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
def triop(name, ty, const_expr):
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index f4bfd3a921a..d4f4a3d903c 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -248,19 +248,19 @@ optimizations = [
('ubfe', 'value', 'offset', 'bits')),
'options->lower_bitfield_extract'),
- (('extract_ibyte', a, b),
- ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8),
+ (('extract_i8', a, b),
+ ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
'options->lower_extract_byte'),
- (('extract_ubyte', a, b),
+ (('extract_u8', a, b),
('iand', ('ushr', a, ('imul', b, 8)), 0xff),
'options->lower_extract_byte'),
- (('extract_iword', a, b),
+ (('extract_i16', a, b),
('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
'options->lower_extract_word'),
- (('extract_uword', a, b),
+ (('extract_u16', a, b),
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
'options->lower_extract_word'),
@@ -285,30 +285,30 @@ optimizations = [
'options->lower_pack_snorm_4x8'),
(('unpack_unorm_2x16', 'v'),
- ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0),
- ('extract_uword', 'v', 1), 0, 0)),
+ ('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0),
+ ('extract_u16', 'v', 1))),
65535.0),
'options->lower_unpack_unorm_2x16'),
(('unpack_unorm_4x8', 'v'),
- ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0),
- ('extract_ubyte', 'v', 1),
- ('extract_ubyte', 'v', 2),
- ('extract_ubyte', 'v', 3))),
+ ('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0),
+ ('extract_u8', 'v', 1),
+ ('extract_u8', 'v', 2),
+ ('extract_u8', 'v', 3))),
255.0),
'options->lower_unpack_unorm_4x8'),
(('unpack_snorm_2x16', 'v'),
- ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0),
- ('extract_iword', 'v', 1), 0, 0)),
+ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
+ ('extract_i16', 'v', 1))),
32767.0))),
'options->lower_unpack_snorm_2x16'),
(('unpack_snorm_4x8', 'v'),
- ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0),
- ('extract_ibyte', 'v', 1),
- ('extract_ibyte', 'v', 2),
- ('extract_ibyte', 'v', 3))),
+ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
+ ('extract_i8', 'v', 1),
+ ('extract_i8', 'v', 2),
+ ('extract_i8', 'v', 3))),
127.0))),
'options->lower_unpack_snorm_4x8'),
]
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index e3f46e3d739..d44aabf8f3c 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -544,6 +544,16 @@ enum gl_frag_depth_layout
FRAG_DEPTH_LAYOUT_UNCHANGED
};
+/**
+ * \brief Buffer access qualifiers
+ */
+enum gl_buffer_access_qualifier
+{
+ ACCESS_COHERENT = 1,
+ ACCESS_RESTRICT = 2,
+ ACCESS_VOLATILE = 4,
+};
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 749be7dfeb9..2b469b65ee4 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -85,7 +85,7 @@ endif
# virgl
ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),)
-SUBDIRS += winsys/virgl/drm drivers/virgl
+SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
endif
# vmwgfx
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 7854142f736..7cf0deece81 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
*
* Convert float32 to half floats, preserving Infs and NaNs,
* with rounding towards zero (trunc).
+ * XXX: For GL, would prefer rounding towards nearest(-even).
*/
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
@@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
LLVMValueRef result;
+ /*
+ * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
+ * directly, without any (x86 or generic) intrinsics.
+ * Albeit the rounding mode cannot be specified (and is undefined,
+ * though in practice on x86 seems to do nearest-even but it may
+ * be dependent on instruction set support), so is essentially
+ * useless.
+ */
+
if (util_cpu_caps.has_f16c &&
(length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
@@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
LLVMValueRef index = LLVMConstInt(i32t, i, 0);
LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
- /* XXX: not really supported by backends */
+ /*
+ * XXX: not really supported by backends.
+ * Even if they would now, rounding mode cannot be specified and
+ * is undefined.
+ */
LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 0b0f7f0147c..d80c997ad84 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -256,6 +256,32 @@ lp_build_concat_n(struct gallivm_state *gallivm,
}
+/**
+ * Un-interleave vector.
+ * This will return a vector consisting of every second element
+ * (depending on lo_hi, beginning at 0 or 1).
+ * The returned vector size (elems and width) will only be half
+ * that of the source vector.
+ */
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+ unsigned num_elems,
+ LLVMValueRef a,
+ unsigned lo_hi)
+{
+ LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+ assert(num_elems <= LP_MAX_VECTOR_LENGTH);
+
+ for (i = 0; i < num_elems / 2; ++i)
+ elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
+
+ shuffle = LLVMConstVector(elems, num_elems / 2);
+
+ return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
+}
+
+
/**
* Interleave vector elements.
*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index 7cede35bbde..367fba1fd21 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm,
LLVMValueRef b,
unsigned lo_hi);
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+ unsigned num_elems,
+ LLVMValueRef a,
+ unsigned lo_hi);
void
lp_build_unpack2(struct gallivm_state *gallivm,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index c88dfbf974a..1cbe47ca91f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
/* Ignore deprecated instructions */
switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_UP2H:
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 6f75bec5005..43af6b4ea0d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -45,8 +45,10 @@
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
#include "tgsi/tgsi_exec.h"
@@ -530,6 +532,77 @@ static struct lp_build_tgsi_action log_action = {
log_emit /* emit */
};
+/* TGSI_OPCODE_PK2H */
+
+static void
+pk2h_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.x */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ /* src0.y */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+}
+
+static void
+pk2h_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_type f16i_t;
+ LLVMValueRef lo, hi, res;
+
+ f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
+ lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
+ hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
+ /* maybe some interleave doubling vector width would be useful... */
+ lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
+ hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
+ res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
+
+ emit_data->output[emit_data->chan] = res;
+}
+
+static struct lp_build_tgsi_action pk2h_action = {
+ pk2h_fetch_args, /* fetch_args */
+ pk2h_emit /* emit */
+};
+
+/* TGSI_OPCODE_UP2H */
+
+static void
+up2h_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMContextRef context = gallivm->context;
+ LLVMValueRef lo, hi, res[2], arg;
+ unsigned nr = bld_base->base.type.length;
+ LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
+
+ arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
+ lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
+ hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
+ res[0] = lp_build_half_to_float(gallivm, lo);
+ res[1] = lp_build_half_to_float(gallivm, hi);
+
+ emit_data->output[0] = emit_data->output[2] = res[0];
+ emit_data->output[1] = emit_data->output[3] = res[1];
+}
+
+static struct lp_build_tgsi_action up2h_action = {
+ scalar_unary_fetch_args, /* fetch_args */
+ up2h_emit /* emit */
+};
+
/* TGSI_OPCODE_LRP */
static void
@@ -1032,10 +1105,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
+ bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
+ bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h
index 332b1cba984..90820d3fe91 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -226,14 +226,9 @@ pipe_freedreno_create_screen(int fd)
struct pipe_screen *
pipe_virgl_create_screen(int fd)
{
- struct virgl_winsys *vws;
struct pipe_screen *screen;
- vws = virgl_drm_winsys_create(fd);
- if (!vws)
- return NULL;
-
- screen = virgl_create_screen(vws);
+ screen = virgl_drm_screen_create(fd);
return screen ? debug_screen_wrap(screen) : NULL;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index f67c16200a9..d898fd66f48 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -58,6 +58,7 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
+#include "util/u_half.h"
#include "util/u_memory.h"
#include "util/u_math.h"
@@ -3057,6 +3058,45 @@ exec_dp2(struct tgsi_exec_machine *mach,
}
}
+static void
+exec_pk2h(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned chan;
+ union tgsi_exec_channel arg[2], dst;
+
+ fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
+ dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
+ (util_float_to_half(arg[1].f[chan]) << 16);
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
+ }
+ }
+}
+
+static void
+exec_up2h(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned chan;
+ union tgsi_exec_channel arg, dst[2];
+
+ fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
+ for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
+ dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
+ dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
static void
exec_scs(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
@@ -4339,7 +4379,7 @@ exec_instruction(
break;
case TGSI_OPCODE_PK2H:
- assert (0);
+ exec_pk2h(mach, inst);
break;
case TGSI_OPCODE_PK2US:
@@ -4425,7 +4465,7 @@ exec_instruction(
break;
case TGSI_OPCODE_UP2H:
- assert (0);
+ exec_up2h(mach, inst);
break;
case TGSI_OPCODE_UP2US:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index b270dd73b67..70fc4604537 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
- { 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
+ { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
@@ -426,6 +426,7 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_SAMPLE_I:
case TGSI_OPCODE_SAMPLE_I_MS:
case TGSI_OPCODE_UMUL_HI:
+ case TGSI_OPCODE_UP2H:
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 7a02e27e01e..687fb54830d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -377,6 +377,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->reads_position = TRUE;
else if (semName == TGSI_SEMANTIC_FACE)
info->uses_frontface = TRUE;
+ else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
+ info->reads_samplemask = TRUE;
}
else if (file == TGSI_FILE_OUTPUT) {
info->output_semantic_name[reg] = (ubyte) semName;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index b0b423ab528..0541255764c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -81,6 +81,7 @@ struct tgsi_shader_info
ubyte colors_written;
boolean reads_position; /**< does fragment shader read position? */
boolean reads_z; /**< does fragment shader read depth? */
+ boolean reads_samplemask; /**< does fragment shader read sample mask? */
boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_samplemask; /**< does fragment shader write sample mask? */
diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h
index 66cf989a830..00f231dc683 100644
--- a/src/gallium/auxiliary/util/u_box.h
+++ b/src/gallium/auxiliary/util/u_box.h
@@ -195,4 +195,16 @@ u_box_minify_2d(struct pipe_box *dst,
dst->height = MAX2(src->height >> l, 1);
}
+static inline void
+u_box_minify_3d(struct pipe_box *dst,
+ const struct pipe_box *src, unsigned l)
+{
+ dst->x = src->x >> l;
+ dst->y = src->y >> l;
+ dst->z = src->z >> l;
+ dst->width = MAX2(src->width >> l, 1);
+ dst->height = MAX2(src->height >> l, 1);
+ dst->depth = MAX2(src->depth >> l, 1);
+}
+
#endif
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index c719d3a77f0..a84de4fef7b 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -52,7 +52,7 @@
#include
#endif
-#if defined(PIPE_OS_FREEBSD)
+#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY)
#include
#include
#endif
diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py
index 929017a4486..d83603faa78 100755
--- a/src/gallium/auxiliary/util/u_format_parse.py
+++ b/src/gallium/auxiliary/util/u_format_parse.py
@@ -313,7 +313,7 @@ def _parse_channels(fields, layout, colorspace, swizzles):
return channels
def parse(filename):
- '''Parse the format descrition in CSV format in terms of the
+ '''Parse the format description in CSV format in terms of the
Channel and Format classes above.'''
stream = open(filename)
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
index d28fae3c77d..966d213bdd5 100644
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -74,7 +74,11 @@ util_float_to_half(float f)
f32.ui &= round_mask;
f32.f *= magic.f;
f32.ui -= round_mask;
-
+ /*
+ * XXX: The magic mul relies on denorms being available, otherwise
+ * all f16 denorms get flushed to zero - hence when this is used
+ * for tgsi_exec in softpipe we won't get f16 denorms.
+ */
/*
* Clamp to max finite value if overflowed.
* OpenGL has completely undefined rounding behavior for float to
@@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16)
/* Adjust */
f32.f *= magic.f;
+ /* XXX: The magic mul relies on denorms being available */
/* Inf / NaN */
if (f32.f >= infnan.f)
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index 1c6cdd4f2c9..5241471f516 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -49,6 +49,13 @@ enum VS_OUTPUT
VS_O_VTEX = 0
};
+const int vl_zscan_normal_16[] =
+{
+ /* Zig-Zag scan pattern */
+ 0, 1, 4, 8, 5, 2, 3, 6,
+ 9,12,13,10, 7,11,14,15
+};
+
const int vl_zscan_linear[] =
{
/* Linear scan pattern */
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
index eacee2db64f..268cf0a6e32 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.h
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -64,6 +64,7 @@ struct vl_zscan_buffer
struct pipe_surface *dst;
};
+extern const int vl_zscan_normal_16[];
extern const int vl_zscan_linear[];
extern const int vl_zscan_normal[];
extern const int vl_zscan_alternate[];
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 4c03e00008c..904e1ff04e7 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -325,6 +325,11 @@ returned). Otherwise, if the ``wait`` parameter is FALSE, the call
will not block and the return value will be TRUE if the query has
completed or FALSE otherwise.
+``get_query_result_resource`` is used to store the result of a query into
+a resource without synchronizing with the CPU. This write will optionally
+wait for the query to complete, and will optionally write whether the value
+is available instead of the value itself.
+
The interface currently includes the following types of queries:
``PIPE_QUERY_OCCLUSION_COUNTER`` counts the number of fragments which
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index b461810644a..3324bcca6f4 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -138,6 +138,10 @@ The integer capabilities:
* ``PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT``: Describes the required
alignment for pipe_sampler_view::u.buf.first_element, in bytes.
If a driver does not support first/last_element, it should return 0.
+* ``PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY``: Whether the driver only
+ supports R, RG, RGB and RGBA formats for PIPE_BUFFER sampler views.
+ When this is the case it should be assumed that the swizzle parameters
+ in the sampler view have no effect.
* ``PIPE_CAP_TGSI_TEXCOORD``: This CAP describes a hw limitation.
If true, the hardware cannot replace arbitrary shader inputs with sprite
coordinates and hence the inputs that are desired to be replaceable must
@@ -164,7 +168,7 @@ The integer capabilities:
view it is intended to be used with, or herein undefined results may occur
for permutational swizzles.
* ``PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE``: The maximum accessible size with
- a buffer sampler view, in bytes.
+ a buffer sampler view, in texels.
* ``PIPE_CAP_MAX_VIEWPORTS``: The maximum number of viewports (and scissors
since they are linked) a driver can support. Returning 0 is equivalent
to returning 1 because every driver has to support at least a single
@@ -306,6 +310,15 @@ The integer capabilities:
* ``PIPE_CAP_GENERATE_MIPMAP``: Indicates whether pipe_context::generate_mipmap
is supported.
* ``PIPE_CAP_STRING_MARKER``: Whether pipe->emit_string_marker() is supported.
+* ``PIPE_CAP_SURFACE_REINTERPRET_BLOCKS``: Indicates whether
+ pipe_context::create_surface supports reinterpreting a texture as a surface
+ of a format with different block width/height (but same block size in bits).
+ For example, a compressed texture image can be interpreted as a
+ non-compressed surface whose texels are the same number of bits as the
+ compressed blocks, and vice versa. The width and height of the surface is
+ adjusted appropriately.
+* ``PIPE_CAP_QUERY_BUFFER_OBJECT``: Driver supports
+ context::get_query_result_resource callback.
.. _pipe_capf:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 7810a3eb915..489cbb0bc2f 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2372,6 +2372,23 @@ programs.
the program. Results are unspecified if any of the remaining
threads terminates or never reaches an executed BARRIER instruction.
+.. opcode:: MEMBAR - Memory barrier
+
+ ``MEMBAR type``
+
+ This opcode waits for the completion of all memory accesses based on
+ the type passed in. The type is an immediate bitfield with the following
+ meaning:
+
+ Bit 0: Shader storage buffers
+ Bit 1: Atomic buffers
+ Bit 2: Images
+ Bit 3: Shared memory
+ Bit 4: Thread group
+
+ These may be passed in in any combination. An implementation is free to not
+ distinguish between these as it sees fit. However these map to all the
+ possibilities made available by GLSL.
.. _atomopcodes:
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index c5ea86f9368..c54bb1091f7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -152,6 +152,9 @@ fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len)
struct fd_ringbuffer *ring = ctx->ring;
const uint32_t *buf = (const void *)string;
+ /* max packet size is 0x3fff dwords: */
+ len = MIN2(len, 0x3fff * 4);
+
OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
while (len >= 4) {
OUT_RING(ring, *buf);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 640f50f5dcb..27f4d267438 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -165,6 +165,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_SM3:
@@ -183,6 +184,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLIP_HALFZ:
return is_a3xx(screen) || is_a4xx(screen);
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
if (is_a3xx(screen)) return 16;
if (is_a4xx(screen)) return 32;
@@ -248,6 +251,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -296,6 +300,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Queries. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
return is_a3xx(screen) || is_a4xx(screen);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 1ea2dd9cbf7..6eb6a2d52ef 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -556,6 +556,10 @@ create_frag_coord(struct ir3_compile *ctx, unsigned comp)
}
}
+/* NOTE: this creates the "TGSI" style fragface (ie. input slot
+ * VARYING_SLOT_FACE). For NIR style nir_intrinsic_load_front_face
+ * we can just use the value from hw directly (since it is boolean)
+ */
static struct ir3_instruction *
create_frag_face(struct ir3_compile *ctx, unsigned comp)
{
@@ -1224,7 +1228,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_vertex_id_zero_base:
if (!ctx->vertex_id) {
- ctx->vertex_id = create_input(ctx->block, 0);
+ ctx->vertex_id = create_input(b, 0);
add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
ctx->vertex_id);
}
@@ -1232,7 +1236,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_instance_id:
if (!ctx->instance_id) {
- ctx->instance_id = create_input(ctx->block, 0);
+ ctx->instance_id = create_input(b, 0);
add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
ctx->instance_id);
}
@@ -1244,6 +1248,14 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
}
break;
+ case nir_intrinsic_load_front_face:
+ if (!ctx->frag_face) {
+ ctx->so->frag_face = true;
+ ctx->frag_face = create_input(b, 0);
+ ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
+ }
+ dst[0] = ir3_ADD_S(b, ctx->frag_face, 0, create_immed(b, 1), 0);
+ break;
case nir_intrinsic_discard_if:
case nir_intrinsic_discard: {
struct ir3_instruction *cond, *kill;
@@ -1349,6 +1361,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
struct ir3_block *b = ctx->block;
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
+ struct ir3_instruction *const_off[4];
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;
@@ -1392,7 +1405,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
ddy = get_src(ctx, &tex->src[i].src);
break;
default:
- compile_error(ctx, "Unhandled NIR tex serc type: %d\n",
+ compile_error(ctx, "Unhandled NIR tex src type: %d\n",
tex->src[i].src_type);
return;
}
@@ -1417,6 +1430,21 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
tex_info(tex, &flags, &coords);
+ if (!has_off) {
+ /* could still have a constant offset: */
+ if (tex->const_offset[0] || tex->const_offset[1] ||
+ tex->const_offset[2] || tex->const_offset[3]) {
+ off = const_off;
+
+ off[0] = create_immed(b, tex->const_offset[0]);
+ off[1] = create_immed(b, tex->const_offset[1]);
+ off[2] = create_immed(b, tex->const_offset[2]);
+ off[3] = create_immed(b, tex->const_offset[3]);
+
+ has_off = true;
+ }
+ }
+
/* scale up integer coords for TXF based on the LOD */
if (ctx->unminify_coords && (opc == OPC_ISAML)) {
assert(has_lod);
@@ -2053,6 +2081,9 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1:
break;
+ case VARYING_SLOT_CLIP_VERTEX:
+ /* handled entirely in nir_lower_clip: */
+ return;
default:
if (slot >= VARYING_SLOT_VAR0)
break;
@@ -2135,11 +2166,17 @@ emit_instructions(struct ir3_compile *ctx)
setup_output(ctx, var);
}
- /* Setup variables (which should only be arrays): */
+ /* Setup global variables (which should only be arrays): */
nir_foreach_variable(var, &ctx->s->globals) {
declare_var(ctx, var);
}
+ /* Setup local variables (which should only be arrays): */
+ /* NOTE: need to do something more clever when we support >1 fxn */
+ nir_foreach_variable(var, &fxn->locals) {
+ declare_var(ctx, var);
+ }
+
/* And emit the body: */
ctx->impl = fxn;
emit_function(ctx, fxn);
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 6b0ab587001..8d010f9dc8c 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -262,6 +262,9 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 5171cca9ea6..44d7c11af43 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -428,6 +428,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return true;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1;
case PIPE_CAP_TGSI_TEXCOORD:
@@ -486,6 +488,9 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index db45cbbb057..34008e1c01e 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -308,17 +308,4 @@ void
lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
-#ifdef PIPE_ARCH_SSE
-#include
-#include "util/u_sse.h"
-
-static inline __m128i
-lp_plane_to_m128i(const struct lp_rast_plane *plane)
-{
- return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
- (int32_t)plane->dcdy, (int32_t)plane->eo);
-}
-
-#endif
-
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 0ae6ec28d35..f4a2f0268f0 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -239,7 +239,7 @@ sign_bits4(const __m128i *cstep, int cdiff)
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+ const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
@@ -250,26 +250,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
unsigned nr = 0;
- __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
- __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
- __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ /* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
+ __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
__m128i zero = _mm_setzero_si128();
- __m128i c;
- __m128i dcdx;
- __m128i dcdy;
- __m128i rej4;
-
- __m128i dcdx2;
- __m128i dcdx3;
+ __m128i c, dcdx, dcdy, rej4;
+ __m128i dcdx_neg_mask, dcdy_neg_mask;
+ __m128i dcdx2, dcdx3;
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
-
+
transpose4_epi32(&p0, &p1, &p2, &zero,
- &c, &dcdx, &dcdy, &rej4);
+ &c, &unused, &dcdx, &dcdy);
+
+ /* recalc eo - easier than trying to load as scalars / shuffle... */
+ dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
+ dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
+ rej4 = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
+ _mm_and_si128(dcdx_neg_mask, dcdx));
/* Adjust dcdx;
*/
@@ -349,32 +352,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
void
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+ const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
unsigned x = (arg.triangle.plane_mask & 0xff) + task->x;
unsigned y = (arg.triangle.plane_mask >> 8) + task->y;
- __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
- __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
- __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ /* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
+ __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
__m128i zero = _mm_setzero_si128();
- __m128i c;
- __m128i dcdx;
- __m128i dcdy;
+ __m128i c, dcdx, dcdy;
+ __m128i dcdx2, dcdx3;
- __m128i dcdx2;
- __m128i dcdx3;
-
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
transpose4_epi32(&p0, &p1, &p2, &zero,
- &c, &dcdx, &dcdy, &unused);
+ &c, &unused, &dcdx, &dcdy);
/* Adjust dcdx;
*/
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 879a2e7d2f0..2c66bf46332 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -311,6 +311,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 03bb8ce2b6f..5ab297d7e1a 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -168,6 +168,21 @@ struct lp_setup_context
const float (*v2)[4]);
};
+static inline void
+scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
+ struct u_rect *scissor)
+{
+ /* left */
+ scis_planes[0] = (bbox->x0 < scissor->x0);
+ /* right */
+ scis_planes[1] = (bbox->x1 > scissor->x1);
+ /* top */
+ scis_planes[2] = (bbox->y0 < scissor->y0);
+ /* bottom */
+ scis_planes[3] = (bbox->y1 > scissor->y1);
+}
+
+
void lp_setup_choose_triangle( struct lp_setup_context *setup );
void lp_setup_choose_line( struct lp_setup_context *setup );
void lp_setup_choose_point( struct lp_setup_context *setup );
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index f425825fc2a..af4e7900d3c 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -336,13 +336,6 @@ try_setup_line( struct lp_setup_context *setup,
layer = MIN2(layer, scene->fb_max_layer);
}
- if (setup->scissor_test) {
- nr_planes = 8;
- }
- else {
- nr_planes = 4;
- }
-
dx = v1[0][0] - v2[0][0];
dy = v1[0][1] - v2[0][1];
area = (dx * dx + dy * dy);
@@ -591,6 +584,18 @@ try_setup_line( struct lp_setup_context *setup,
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
+ nr_planes = 4;
+ /*
+ * Determine how many scissor planes we need, that is drop scissor
+ * edges if the bounding box of the tri is fully inside that edge.
+ */
+ if (setup->scissor_test) {
+ /* why not just use draw_regions */
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+ nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+ }
+
line = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
@@ -708,30 +713,46 @@ try_setup_line( struct lp_setup_context *setup,
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
*/
- if (nr_planes == 8) {
- const struct u_rect *scissor =
- &setup->scissors[viewport_index];
+ if (nr_planes > 4) {
+ /* why not just use draw_regions */
+ struct u_rect *scissor = &setup->scissors[viewport_index];
+ struct lp_rast_plane *plane_s = &plane[4];
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, scissor);
- plane[4].dcdx = -1 << 8;
- plane[4].dcdy = 0;
- plane[4].c = (1-scissor->x0) << 8;
- plane[4].eo = 1 << 8;
-
- plane[5].dcdx = 1 << 8;
- plane[5].dcdy = 0;
- plane[5].c = (scissor->x1+1) << 8;
- plane[5].eo = 0;
-
- plane[6].dcdx = 0;
- plane[6].dcdy = 1 << 8;
- plane[6].c = (1-scissor->y0) << 8;
- plane[6].eo = 1 << 8;
-
- plane[7].dcdx = 0;
- plane[7].dcdy = -1 << 8;
- plane[7].c = (scissor->y1+1) << 8;
- plane[7].eo = 0;
+ if (s_planes[0]) {
+ plane_s->dcdx = -1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (1-scissor->x0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (scissor->x1+1) << 8;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = (1-scissor->y0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = -1 << 8;
+ plane_s->c = (scissor->y1+1) << 8;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+ assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 907129dbd1b..cdb3d015dec 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -302,13 +302,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
layer = MIN2(layer, scene->fb_max_layer);
}
- if (setup->scissor_test) {
- nr_planes = 7;
- }
- else {
- nr_planes = 3;
- }
-
/* Bounding rectangle (in pixels) */
{
/* Yes this is necessary to accurately calculate bounding boxes
@@ -347,6 +340,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
+ nr_planes = 3;
+ /*
+ * Determine how many scissor planes we need, that is drop scissor
+ * edges if the bounding box of the tri is fully inside that edge.
+ */
+ if (setup->scissor_test) {
+ /* why not just use draw_regions */
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+ nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+ }
+
tri = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
@@ -367,13 +372,11 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Setup parameter interpolants:
*/
- setup->setup.variant->jit_function( v0,
- v1,
- v2,
- frontfacing,
- GET_A0(&tri->inputs),
- GET_DADX(&tri->inputs),
- GET_DADY(&tri->inputs) );
+ setup->setup.variant->jit_function(v0, v1, v2,
+ frontfacing,
+ GET_A0(&tri->inputs),
+ GET_DADX(&tri->inputs),
+ GET_DADY(&tri->inputs));
tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
@@ -383,9 +386,9 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (0)
lp_dump_setup_coef(&setup->setup.variant->key,
- (const float (*)[4])GET_A0(&tri->inputs),
- (const float (*)[4])GET_DADX(&tri->inputs),
- (const float (*)[4])GET_DADY(&tri->inputs));
+ (const float (*)[4])GET_A0(&tri->inputs),
+ (const float (*)[4])GET_DADX(&tri->inputs),
+ (const float (*)[4])GET_DADY(&tri->inputs));
plane = GET_PLANES(tri);
@@ -672,29 +675,46 @@ do_triangle_ccw(struct lp_setup_context *setup,
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
*/
- if (nr_planes == 7) {
- const struct u_rect *scissor = &setup->scissors[viewport_index];
+ if (nr_planes > 3) {
+ /* why not just use draw_regions */
+ struct u_rect *scissor = &setup->scissors[viewport_index];
+ struct lp_rast_plane *plane_s = &plane[3];
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, scissor);
- plane[3].dcdx = -1 << 8;
- plane[3].dcdy = 0;
- plane[3].c = (1-scissor->x0) << 8;
- plane[3].eo = 1 << 8;
-
- plane[4].dcdx = 1 << 8;
- plane[4].dcdy = 0;
- plane[4].c = (scissor->x1+1) << 8;
- plane[4].eo = 0;
-
- plane[5].dcdx = 0;
- plane[5].dcdy = 1 << 8;
- plane[5].c = (1-scissor->y0) << 8;
- plane[5].eo = 1 << 8;
-
- plane[6].dcdx = 0;
- plane[6].dcdy = -1 << 8;
- plane[6].c = (scissor->y1+1) << 8;
- plane[6].eo = 0;
+ if (s_planes[0]) {
+ plane_s->dcdx = -1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (1-scissor->x0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (scissor->x1+1) << 8;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = (1-scissor->y0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = -1 << 8;
+ plane_s->c = (scissor->y1+1) << 8;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+ assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
@@ -984,17 +1004,16 @@ calc_fixed_position(struct lp_setup_context *setup,
* Both should be acceptable, I think.
*/
#if defined(PIPE_ARCH_SSE)
- __m128d v0r, v1r, v2r;
+ __m128 v0r, v1r;
__m128 vxy0xy2, vxy1xy0;
__m128i vxy0xy2i, vxy1xy0i;
__m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
__m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
__m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
- v0r = _mm_load_sd((const double *)v0[0]);
- v1r = _mm_load_sd((const double *)v1[0]);
- v2r = _mm_load_sd((const double *)v2[0]);
- vxy0xy2 = _mm_castpd_ps(_mm_unpacklo_pd(v0r, v2r));
- vxy1xy0 = _mm_castpd_ps(_mm_unpacklo_pd(v1r, v0r));
+ v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
+ vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
+ v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
+ vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 6ad9dd31681..75e5fd843c2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -393,6 +393,9 @@ ImmediateValue::isInteger(const int i) const
case TYPE_S32:
case TYPE_U32:
return reg.data.s32 == i; // as if ...
+ case TYPE_S64:
+ case TYPE_U64:
+ return reg.data.s64 == i; // as if ...
case TYPE_F32:
return reg.data.f32 == static_cast(i);
case TYPE_F64:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index d1fdd75495f..9d7becf27d4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -132,6 +132,7 @@ enum operation
OP_SUBFM, // surface bitfield manipulation
OP_SUCLAMP, // clamp surface coordinates
OP_SUEAU, // surface effective address
+ OP_SUQ, // surface query
OP_MADSP, // special integer multiply-add
OP_TEXBAR, // texture dependency barrier
OP_DFDX,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 17cb484d2ba..0c7cd1d8137 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1947,10 +1947,16 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 1fa0eb6da6d..dee26225b7e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -673,7 +673,12 @@ CodeEmitterGM107::emitMOV()
(insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) {
switch (insn->src(0).getFile()) {
case FILE_GPR:
- emitInsn(0x5c980000);
+ if (insn->def(0).getFile() == FILE_PREDICATE) {
+ emitInsn(0x5b6a0000);
+ emitGPR (0x08);
+ } else {
+ emitInsn(0x5c980000);
+ }
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
@@ -684,18 +689,32 @@ CodeEmitterGM107::emitMOV()
emitInsn(0x38980000);
emitIMMD(0x14, 19, insn->src(0));
break;
+ case FILE_PREDICATE:
+ emitInsn(0x50880000);
+ emitPRED(0x0c, insn->src(0));
+ emitPRED(0x1d);
+ emitPRED(0x27);
+ break;
default:
assert(!"bad src file");
break;
}
- emitField(0x27, 4, insn->lanes);
+ if (insn->def(0).getFile() != FILE_PREDICATE &&
+ insn->src(0).getFile() != FILE_PREDICATE)
+ emitField(0x27, 4, insn->lanes);
} else {
emitInsn (0x01000000);
emitIMMD (0x14, 32, insn->src(0));
emitField(0x0c, 4, insn->lanes);
}
- emitGPR(0x00, insn->def(0));
+ if (insn->def(0).getFile() == FILE_PREDICATE) {
+ emitPRED(0x27);
+ emitPRED(0x03, insn->def(0));
+ emitPRED(0x00);
+ } else {
+ emitGPR(0x00, insn->def(0));
+ }
}
void
@@ -2684,11 +2703,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
emitRAM();
break;
case OP_MOV:
- if (insn->def(0).getFile() == FILE_GPR &&
- insn->src(0).getFile() != FILE_PREDICATE)
- emitMOV();
- else
- assert(!"R2P/P2R");
+ emitMOV();
break;
case OP_RDSV:
emitS2R();
@@ -2700,7 +2715,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_CEIL:
case OP_TRUNC:
case OP_CVT:
- if (isFloatType(insn->dType)) {
+ if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)) {
+ emitMOV();
+ } else if (isFloatType(insn->dType)) {
if (isFloatType(insn->sType))
emitF2F();
else
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 0b28047e22b..8637db91521 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2021,8 +2021,10 @@ CodeEmitterNVC0::emitATOM(const Instruction *i)
code[0] |= 63 << 20;
}
- if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
- srcId(i->src(2), 32 + 17);
+ if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
+ code[1] |= (SDATA(i->src(1)).id + 1) << 17;
+ }
}
void
@@ -2433,10 +2435,16 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 9c4a38f291b..52ac198221d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -38,6 +38,7 @@ static nv50_ir::operation translateOpcode(uint opcode);
static nv50_ir::DataFile translateFile(uint file);
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
+static nv50_ir::CacheMode translateCacheMode(uint qualifier);
class Instruction
{
@@ -213,6 +214,12 @@ public:
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
+ nv50_ir::CacheMode getCacheMode() const {
+ if (!insn->Instruction.Memory)
+ return nv50_ir::CACHE_CA;
+ return translateCacheMode(insn->Memory.Qualifier);
+ }
+
inline uint getLabel() { return insn->Label.Label; }
unsigned getSaturate() const { return insn->Instruction.Saturate; }
@@ -366,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
- //case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@@ -436,6 +443,15 @@ static nv50_ir::TexTarget translateTexture(uint tex)
}
}
+static nv50_ir::CacheMode translateCacheMode(uint qualifier)
+{
+ if (qualifier & TGSI_MEMORY_VOLATILE)
+ return nv50_ir::CACHE_CV;
+ if (qualifier & TGSI_MEMORY_COHERENT)
+ return nv50_ir::CACHE_CG;
+ return nv50_ir::CACHE_CA;
+}
+
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
@@ -1210,6 +1226,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_BUFFER:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
@@ -1255,6 +1272,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
indirectTempArrays.insert(insn.getDst(0).getArrayId());
+ } else
+ if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ info->io.globalAccess |= 0x2;
}
}
@@ -1264,13 +1284,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (src.isIndirect(0))
indirectTempArrays.insert(src.getArrayId());
} else
-/*
- if (src.getFile() == TGSI_FILE_RESOURCE) {
- if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
- info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+ if (src.getFile() == TGSI_FILE_BUFFER) {
+ info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
-*/
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
@@ -1752,7 +1769,7 @@ Converter::acquireDst(int d, int c)
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
- if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
+ if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
return NULL;
if (dst.isIndirect(0) ||
@@ -2222,6 +2239,28 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector off, src, ldv, def;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Value *off = fetchSrc(1, c);
+ Symbol *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
+ } else {
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ }
+
+ Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
+ ld->cache = tgsi.getCacheMode();
+ if (tgsi.getSrc(0).isIndirect(0))
+ ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ }
+ return;
+ }
+
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
@@ -2298,6 +2337,30 @@ Converter::handleSTORE()
int c;
std::vector off, src, dummy;
+ if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ for (c = 0; c < 4; ++c) {
+ if (!(tgsi.getDst(0).getMask() & (1 << c)))
+ continue;
+
+ Symbol *sym;
+ Value *off;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
+ tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
+ } else {
+ off = fetchSrc(0, 0);
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ }
+
+ Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
+ st->cache = tgsi.getCacheMode();
+ if (tgsi.getDst(0).isIndirect(0))
+ st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
+ }
+ return;
+ }
+
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
@@ -2359,6 +2422,37 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector defv;
LValue *dst = getScratch();
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ for (int c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Instruction *insn;
+ Value *off = fetchSrc(1, c), *off2 = NULL;
+ Value *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
+ else
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
+ if (tgsi.getSrc(0).isIndirect(0))
+ off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
+ if (subOp == NV50_IR_SUBOP_ATOM_CAS)
+ insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
+ else
+ insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
+ if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
+ insn->setIndirect(0, 0, off);
+ if (off2)
+ insn->setIndirect(0, 1, off2);
+ insn->subOp = subOp;
+ }
+ for (int c = 0; c < 4; ++c)
+ if (dst0[c])
+ dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+ return;
+ }
+
+
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {
@@ -3103,6 +3197,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
geni->fixed = 1;
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
break;
+ case TGSI_OPCODE_MEMBAR:
+ geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+ geni->fixed = 1;
+ if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP)
+ geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
+ else
+ geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
+ break;
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
@@ -3115,6 +3217,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
+ case TGSI_OPCODE_RESQ:
+ geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0],
+ makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0));
+ if (tgsi.getSrc(0).isIndirect(0))
+ geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ break;
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_UBFE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index dc1ab769b98..e7cb54bc426 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1022,11 +1022,22 @@ NVC0LoweringPass::handleTXLQ(TexInstruction *i)
return true;
}
+bool
+NVC0LoweringPass::handleSUQ(Instruction *suq)
+{
+ suq->op = OP_MOV;
+ suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
+ suq->getSrc(0)->reg.fileIndex * 16));
+ suq->setIndirect(0, 0, NULL);
+ suq->setIndirect(0, 1, NULL);
+ return true;
+}
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
SVSemantic sv;
+ Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base;
switch (atom->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
@@ -1037,16 +1048,22 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
break;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
+ base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
+ assert(base->reg.size == 8);
+ if (ptr)
+ base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
+ assert(base->reg.size == 8);
+ atom->setIndirect(0, 0, base);
return true;
}
- Value *base =
+ base =
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0));
- Value *ptr = atom->getIndirect(0, 0);
atom->setSrc(0, cloneShallow(func, atom->getSrc(0)));
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr);
+ atom->setIndirect(0, 1, NULL);
atom->setIndirect(0, 0, base);
return true;
@@ -1069,7 +1086,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
cctl->setPredicate(cas->cc, cas->getPredicate());
}
- if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
// CAS is crazy. It's 2nd source is a double reg, and the 3rd source
// should be set to the high part of the double reg or bad things will
// happen elsewhere in the universe.
@@ -1079,6 +1096,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
bld.setPosition(cas, false);
bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2));
cas->setSrc(1, dreg);
+ cas->setSrc(2, dreg);
}
return true;
@@ -1093,6 +1111,32 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
+inline Value *
+NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
+{
+ uint8_t b = prog->driver->io.resInfoCBSlot;
+ off += prog->driver->io.suInfoBase;
+
+ if (ptr)
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
+
+ return bld.
+ mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr);
+}
+
+inline Value *
+NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
+{
+ uint8_t b = prog->driver->io.resInfoCBSlot;
+ off += prog->driver->io.suInfoBase;
+
+ if (ptr)
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
+
+ return bld.
+ mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
+}
+
inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
@@ -1786,6 +1830,7 @@ NVC0LoweringPass::visit(Instruction *i)
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
+ case OP_STORE:
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
@@ -1820,6 +1865,26 @@ NVC0LoweringPass::visit(Instruction *i)
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
+ } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
+ Value *ind = i->getIndirect(0, 1);
+ Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
+ // XXX come up with a way not to do this for EVERY little access but
+ // rather to batch these up somehow. Unfortunately we've lost the
+ // information about the field width by the time we get here.
+ Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
+ Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16);
+ Value *pred = new_LValue(func, FILE_PREDICATE);
+ if (i->src(0).isIndirect(0)) {
+ bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
+ bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
+ }
+ i->setIndirect(0, 1, NULL);
+ i->setIndirect(0, 0, ptr);
+ bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
+ i->setPredicate(CC_NOT_P, pred);
+ if (i->defExists(0)) {
+ bld.mkMov(i->getDef(0), bld.mkImm(0));
+ }
}
break;
case OP_ATOM:
@@ -1838,6 +1903,9 @@ NVC0LoweringPass::visit(Instruction *i)
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
handleSurfaceOpNVE4(i->asTex());
break;
+ case OP_SUQ:
+ handleSUQ(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index adb400a559a..09ec7e69ddc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -101,6 +101,7 @@ protected:
bool handleTXQ(TexInstruction *);
virtual bool handleManualTXD(TexInstruction *);
bool handleTXLQ(TexInstruction *);
+ bool handleSUQ(Instruction *);
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
@@ -116,6 +117,8 @@ private:
void readTessCoord(LValue *dst, int c);
Value *loadResInfo32(Value *ptr, uint32_t off);
+ Value *loadResInfo64(Value *ptr, uint32_t off);
+ Value *loadResLength32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 95e9fdfc57d..05b8db4a3d8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -336,6 +336,7 @@ private:
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
+ void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -388,6 +389,8 @@ ConstantFolding::visit(BasicBlock *bb)
else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
opnd(i, src1, 1);
+ if (i->srcExists(2) && i->src(2).getImmediate(src2))
+ opnd3(i, src2);
}
return true;
}
@@ -872,6 +875,24 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
}
}
+void
+ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
+{
+ switch (i->op) {
+ case OP_MAD:
+ case OP_FMA:
+ if (imm2.isInteger(0)) {
+ i->op = OP_MUL;
+ i->setSrc(2, NULL);
+ foldCount++;
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+}
+
void
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
@@ -1202,6 +1223,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
}
break;
+ case OP_SHR:
+ if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) {
+ bld.setPosition(i, false);
+ i->op = OP_AND;
+ i->setSrc(0, si->getSrc(0));
+ i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1)));
+ }
+ break;
case OP_MUL:
int muls;
if (isFloatType(si->dType))
@@ -2504,6 +2533,12 @@ MemoryOpt::runOpt(BasicBlock *bb)
}
} else
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
+ if (typeSizeof(ldst->dType) == 4 &&
+ ldst->src(1).getFile() == FILE_GPR &&
+ ldst->getSrc(1)->getInsn()->op == OP_NOP) {
+ delete_Instruction(prog, ldst);
+ continue;
+ }
isLoad = false;
} else {
// TODO: maybe have all fixed ops act as barrier ?
@@ -3015,7 +3050,7 @@ Instruction::isResultEqual(const Instruction *that) const
if (that->srcExists(s))
return false;
- if (op == OP_LOAD || op == OP_VFETCH) {
+ if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
switch (src(0).getFile()) {
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
@@ -3046,6 +3081,8 @@ GlobalCSE::visit(BasicBlock *bb)
ik = phi->getSrc(0)->getInsn();
if (!ik)
continue; // probably a function input
+ if (ik->defCount(0xff) > 1)
+ continue; // too painful to check if we can really push this forward
for (s = 1; phi->srcExists(s); ++s) {
if (phi->getSrc(s)->refCount() > 1)
break;
@@ -3179,10 +3216,10 @@ DeadCodeElim::buryAll(Program *prog)
bool
DeadCodeElim::visit(BasicBlock *bb)
{
- Instruction *next;
+ Instruction *prev;
- for (Instruction *i = bb->getFirst(); i; i = next) {
- next = i->next;
+ for (Instruction *i = bb->getExit(); i; i = prev) {
+ prev = i->prev;
if (i->isDead()) {
++deadCount;
delete_Instruction(prog, i);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 0b02599dbdd..47285a25c33 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -161,6 +161,7 @@ const char *operationStr[OP_LAST + 1] =
"subfm",
"suclamp",
"sueau",
+ "suq",
"madsp",
"texbar",
"dfdx",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index cd8c42ced5e..de39be872e4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1544,6 +1544,9 @@ GCRA::cleanup(const bool success)
delete[] nodes;
nodes = NULL;
+ hi.next = hi.prev = &hi;
+ lo[0].next = lo[0].prev = &lo[0];
+ lo[1].next = lo[1].prev = &lo[1];
}
Symbol *
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 4390a726d1c..ae0a8bb61d1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -46,7 +46,7 @@ const uint8_t Target::operationSrcNr[] =
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
- 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
+ 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
@@ -109,8 +109,8 @@ const OpClass Target::operationClass[] =
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
- // SUBFM, SUCLAMP, SUEAU, MADSP
- OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
+ // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
+ OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// TEXBAR
OPCLASS_OTHER,
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
index a3d07deeb18..c6c287bb8bb 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
@@ -266,7 +266,9 @@ nouveau_vp3_bsp_next(struct nouveau_vp3_decoder *dec, unsigned num_buffers,
int i;
for (i = 0; i < num_buffers; ++i) {
+#ifndef NDEBUG
assert(bsp_bo->size >= str_bsp->w0[0] + num_bytes[i]);
+#endif
memcpy(dec->bsp_ptr, data[i], num_bytes[i]);
dec->bsp_ptr += num_bytes[i];
str_bsp->w0[0] += num_bytes[i];
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 61d91fd4cce..b62889119c5 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -184,6 +184,10 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 888d62e1c52..a67ef28abf8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -369,7 +369,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
- FREE(info->bin.syms);
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
@@ -403,10 +402,13 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
break;
}
prog->gp.vert_count = info->prop.gp.maxVertices;
- } else
+ }
+
if (prog->type == PIPE_SHADER_COMPUTE) {
prog->cp.syms = info->bin.syms;
prog->cp.num_syms = info->bin.numSyms;
+ } else {
+ FREE(info->bin.syms);
}
if (prog->pipe.stream_output.num_outputs)
@@ -507,6 +509,9 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
FREE(p->interps);
FREE(p->so);
+ if (type == PIPE_SHADER_COMPUTE)
+ FREE(p->cp.syms);
+
memset(p, 0, sizeof(*p));
p->pipe = pipe;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 32da60e0a23..14d0085975b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -227,6 +227,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 86be1b4c4ed..ec5cf376227 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -594,6 +594,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
PUSH_DATA (push, nv50->rt_array_mode);
}
+static void
+nv50_clear_buffer_push(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+ unsigned count = (size + 3) / 4;
+ unsigned xcoord = offset & 0xff;
+ unsigned tmp, i;
+
+ if (data_size == 1) {
+ tmp = *(unsigned char *)data;
+ tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+ data = &tmp;
+ data_size = 4;
+ } else if (data_size == 2) {
+ tmp = *(unsigned short *)data;
+ tmp = (tmp << 16) | tmp;
+ data = &tmp;
+ data_size = 4;
+ }
+
+ unsigned data_words = data_size / 4;
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ offset &= ~0xff;
+
+ BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+ PUSH_DATA (push, 262144);
+ PUSH_DATA (push, 65536);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+ PUSH_DATA (push, size);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, xcoord);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
+
+ BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+}
+
static void
nv50_clear_buffer(struct pipe_context *pipe,
struct pipe_resource *res,
@@ -643,9 +719,22 @@ nv50_clear_buffer(struct pipe_context *pipe,
assert(size % data_size == 0);
+ if (offset & 0xff) {
+ unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+ assert(fixup_size % data_size == 0);
+ nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+ offset += fixup_size;
+ size -= fixup_size;
+ if (!size)
+ return;
+ }
+
elements = size / data_size;
height = (elements + 8191) / 8192;
width = elements / height;
+ if (height > 1)
+ width &= ~0xff;
+ assert(width > 0);
BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
PUSH_DATAf(push, color.f[0]);
@@ -669,13 +758,13 @@ nv50_clear_buffer(struct pipe_context *pipe,
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
- PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
PUSH_DATA (push, nv50_format_table[dst_fmt].rt);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
- PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
+ PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100));
PUSH_DATA (push, height);
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -694,25 +783,20 @@ nv50_clear_buffer(struct pipe_context *pipe,
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, 0x3c);
- if (width * height != elements) {
- offset += width * height * data_size;
- width = elements - width * height;
- height = 1;
- BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2);
- PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
- BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
- PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
- PUSH_DATA (push, height);
- BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
- PUSH_DATA (push, 0x3c);
- }
-
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, nv50->cond_condmode);
- nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
- nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ if (buf->mm) {
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ if (width * height != elements) {
+ offset += width * height * data_size;
+ width = elements - width * height;
+ nv50_clear_buffer_push(pipe, res, offset, width * data_size,
+ data, data_size);
+ }
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 4daa57d47bb..7f76ec66edb 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -491,3 +491,52 @@ daic_runout:
daic_runout_check:
branz annul $r7 #daic_runout
bra annul #daic_restore
+
+/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
+ *
+ * This is a combination macro for all of our query buffer object needs.
+ * It has the option to clamp results to a configurable amount, as well as
+ * to write out one or two words.
+ *
+ * We use the query engine to write out the values, and expect the query
+ * address to point to the right place.
+ *
+ * arg = clamp value (0 means unclamped). clamped means just 1 written value.
+ * parm[0] = LSB of end value
+ * parm[1] = MSB of end value
+ * parm[2] = LSB of start value
+ * parm[3] = MSB of start value
+ * parm[4] = desired sequence
+ * parm[5] = actual sequence
+ */
+.section #mme9097_query_buffer_write
+ parm $r2
+ parm $r3
+ parm $r4
+ parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */
+ parm $r6
+ parm $r7
+ mov $r6 (sub $r7 $r6) /* actual - desired */
+ mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
+ braz annul $r6 #qbw_ready
+ exit
+qbw_ready:
+ mov $r2 (sub $r2 $r4)
+ braz $r1 #qbw_postclamp
+ mov $r3 (sbb $r3 $r5)
+ branz annul $r3 #qbw_clamp
+ mov $r4 (sub $r1 $r2)
+ mov $r4 (sbb 0x0 0x0)
+ braz annul $r4 #qbw_postclamp
+qbw_clamp:
+ mov $r2 $r1
+qbw_postclamp:
+ send $r2
+ mov $r4 0x1000
+ branz annul $r1 #qbw_done
+ send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+ maddr 0x16c2 /* QUERY_SEQUENCE */
+ send $r3
+qbw_done:
+ exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+ nop
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index bf8625e0584..ecadf7e4d29 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -332,3 +332,36 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
0xfffef837,
0xfffdc027,
};
+
+uint32_t mme9097_query_buffer_write[] = {
+ 0x00000201,
+ 0x00000301,
+/* 0x000a: qbw_ready */
+ 0x00000401,
+ 0x05b08551,
+/* 0x0011: qbw_clamp */
+/* 0x0012: qbw_postclamp */
+ 0x00000601,
+ 0x00000701,
+/* 0x0018: qbw_done */
+ 0x0005be10,
+ 0x00060610,
+ 0x0000b027,
+ 0x00000091,
+ 0x00051210,
+ 0x0001c807,
+ 0x00075b10,
+ 0x00011837,
+ 0x00048c10,
+ 0x00060410,
+ 0x0000a027,
+ 0x00000a11,
+ 0x00001041,
+ 0x04000411,
+ 0x00010837,
+ 0x84010042,
+ 0x05b08021,
+ 0x00001841,
+ 0x840100c2,
+ 0x00000011,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 162661ff2a7..547b8f5d309 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -56,6 +56,7 @@ static void
nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
int i, s;
if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
@@ -90,6 +91,9 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
}
}
}
+ if (flags & PIPE_BARRIER_SHADER_BUFFER) {
+ IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
+ }
}
static void
@@ -122,6 +126,10 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
}
+ for (s = 0; s < 6; ++s)
+ for (i = 0; i < NVC0_MAX_BUFFERS; ++i)
+ pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL);
+
for (i = 0; i < nvc0->num_tfbbufs; ++i)
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
@@ -180,10 +188,9 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
- unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
@@ -194,7 +201,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -204,12 +211,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_COMMAND_ARGS_BUFFER |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (res->target == PIPE_BUFFER) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
nvc0->dirty |= NVC0_NEW_ARRAYS;
@@ -253,6 +255,18 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
+
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < NVC0_MAX_BUFFERS; ++i) {
+ if (nvc0->buffers[s][i].buffer == res) {
+ nvc0->buffers_dirty[s] |= 1 << i;
+ nvc0->dirty |= NVC0_NEW_BUFFERS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
}
return ref;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 12195489691..4ab2ac41183 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -56,6 +56,7 @@
#define NVC0_NEW_SURFACES (1 << 23)
#define NVC0_NEW_MIN_SAMPLES (1 << 24)
#define NVC0_NEW_TESSFACTOR (1 << 25)
+#define NVC0_NEW_BUFFERS (1 << 26)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
@@ -73,9 +74,10 @@
#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
#define NVC0_BIND_TFB 244
#define NVC0_BIND_SUF 245
-#define NVC0_BIND_SCREEN 246
-#define NVC0_BIND_TLS 247
-#define NVC0_BIND_3D_COUNT 248
+#define NVC0_BIND_BUF 246
+#define NVC0_BIND_SCREEN 247
+#define NVC0_BIND_TLS 249
+#define NVC0_BIND_3D_COUNT 250
/* compute bufctx (during launch_grid) */
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
@@ -187,10 +189,15 @@ struct nvc0_context {
struct nvc0_blitctx *blit;
+ /* NOTE: some of these surfaces may reference buffers */
struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
uint16_t surfaces_dirty[2];
uint16_t surfaces_valid[2];
+ struct pipe_shader_buffer buffers[6][NVC0_MAX_BUFFERS];
+ uint32_t buffers_dirty[6];
+ uint32_t buffers_valid[6];
+
struct util_dynarray global_residents;
};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index 27c026b8b30..49e176cbd49 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -33,4 +33,6 @@
#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850
+#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
+
#endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index c3b53621630..93f211bd5fc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -554,6 +554,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
info->io.resInfoCBSlot = 15;
info->io.sampleInfoBase = 256 + 128;
+ info->io.suInfoBase = 512;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}
@@ -635,6 +636,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
*/
if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 26;
+ if (info->io.globalAccess & 0x2)
prog->hdr[0] |= 1 << 16;
if (info->io.fp64)
prog->hdr[0] |= 1 << 27;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 7497317c419..d2acce7d5be 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -73,6 +73,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
return q->funcs->get_query_result(nvc0_context(pipe), q, wait, result);
}
+static void
+nvc0_get_query_result_resource(struct pipe_context *pipe,
+ struct pipe_query *pq,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ if (!q->funcs->get_query_result_resource) {
+ assert(!"Unexpected lack of get_query_result_resource");
+ return;
+ }
+ q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type,
+ index, resource, offset);
+}
+
static void
nvc0_render_condition(struct pipe_context *pipe,
struct pipe_query *pq,
@@ -129,7 +147,7 @@ nvc0_render_condition(struct pipe_context *pipe,
}
if (wait)
- nvc0_hw_query_fifo_wait(push, q);
+ nvc0_hw_query_fifo_wait(nvc0, q);
PUSH_SPACE(push, 7);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
pipe->begin_query = nvc0_begin_query;
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
+ pipe->get_query_result_resource = nvc0_get_query_result_resource;
pipe->render_condition = nvc0_render_condition;
nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
index c46361c31aa..a887b220557 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
@@ -14,6 +14,13 @@ struct nvc0_query_funcs {
void (*end_query)(struct nvc0_context *, struct nvc0_query *);
boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *,
boolean, union pipe_query_result *);
+ void (*get_query_result_resource)(struct nvc0_context *nvc0,
+ struct nvc0_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
};
struct nvc0_query {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 1bed0162baf..62385884137 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -358,11 +358,119 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
return true;
}
+static void
+nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
+ struct nvc0_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+ struct nv04_resource *buf = nv04_resource(resource);
+ unsigned stride;
+
+ assert(!hq->funcs || !hq->funcs->get_query_result);
+
+ if (index == -1) {
+ /* TODO: Use a macro to write the availability of the query */
+ if (hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_update(nvc0->screen->base.client, q);
+ uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY};
+ nvc0->base.push_cb(&nvc0->base, buf, offset,
+ result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
+ ready);
+ return;
+ }
+
+ /* If the fence guarding this query has not been emitted, that makes a lot
+ * of the following logic more complicated.
+ */
+ if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
+ nouveau_fence_emit(hq->fence);
+
+ /* We either need to compute a 32- or 64-bit difference between 2 values,
+ * and then store the result as either a 32- or 64-bit value. As such let's
+ * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
+ * ones), and have one macro that clamps result to i32, u32, or just
+ * outputs the difference (no need to worry about 64-bit clamping).
+ */
+ if (hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_update(nvc0->screen->base.client, q);
+
+ if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_fifo_wait(nvc0, q);
+
+ nouveau_pushbuf_space(push, 16, 2, 0);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7);
+ if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */
+ PUSH_DATA(push, 0x00000001);
+ else if (result_type == PIPE_QUERY_TYPE_I32)
+ PUSH_DATA(push, 0x7fffffff);
+ else if (result_type == PIPE_QUERY_TYPE_U32)
+ PUSH_DATA(push, 0xffffffff);
+ else
+ PUSH_DATA(push, 0x00000000);
+
+ switch (q->type) {
+ case PIPE_QUERY_SO_STATISTICS:
+ stride = 2;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ stride = 12;
+ break;
+ default:
+ assert(index == 0);
+ stride = 1;
+ break;
+ }
+
+ if (hq->is64bit) {
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index,
+ 8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride),
+ 8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ } else {
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ PUSH_DATA(push, 0);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ PUSH_DATA(push, 0);
+ }
+
+ if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
+ PUSH_DATA(push, 0);
+ PUSH_DATA(push, 0);
+ } else if (hq->is64bit) {
+ PUSH_DATA(push, hq->fence->sequence);
+ nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ } else {
+ PUSH_DATA(push, hq->sequence);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+}
+
static const struct nvc0_query_funcs hw_query_funcs = {
.destroy_query = nvc0_hw_destroy_query,
.begin_query = nvc0_hw_begin_query,
.end_query = nvc0_hw_end_query,
.get_query_result = nvc0_hw_get_query_result,
+ .get_query_result_resource = nvc0_hw_get_query_result_resource,
};
struct nvc0_query *
@@ -476,8 +584,9 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
}
void
-nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
+nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q)
{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_hw_query *hq = nvc0_hw_query(q);
unsigned offset = hq->offset;
@@ -486,9 +595,15 @@ nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
PUSH_SPACE(push, 5);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
- PUSH_DATAh(push, hq->bo->offset + offset);
- PUSH_DATA (push, hq->bo->offset + offset);
- PUSH_DATA (push, hq->sequence);
+ if (hq->is64bit) {
+ PUSH_DATAh(push, nvc0->screen->fence.bo->offset);
+ PUSH_DATA (push, nvc0->screen->fence.bo->offset);
+ PUSH_DATA (push, hq->fence->sequence);
+ } else {
+ PUSH_DATAh(push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->sequence);
+ }
PUSH_DATA (push, (1 << 12) |
NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
index 3701eb7100f..8225755d85e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
@@ -51,6 +51,6 @@ void
nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *,
unsigned);
void
-nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *);
+nvc0_hw_query_fifo_wait(struct nvc0_context *, struct nvc0_query *);
#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 84dbd69b8a5..d368fda707d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -111,6 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 256;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1; /* 256 for binding as RT, but that's not possible in GL */
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return 16;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -189,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -212,10 +215,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -322,8 +327,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
- case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return NVC0_MAX_BUFFERS;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16; /* would be 32 in linked (OpenGL-style) mode */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
@@ -676,8 +682,9 @@ nvc0_screen_create(struct nouveau_device *dev)
push->rsvd_kick = 5;
screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_SHADER_BUFFER |
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_COMMAND_ARGS_BUFFER;
+ PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER;
screen->base.sysmem_bindings |=
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
@@ -891,9 +898,9 @@ nvc0_screen_create(struct nouveau_device *dev)
/* TIC and TSC entries for each unit (nve4+ only) */
/* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
@@ -913,8 +920,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 256);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 0.0f);
@@ -922,8 +929,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
@@ -953,8 +960,12 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->tls->size);
BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
+ /* Reduce likelihood of collision with real buffers by placing the hole at
+ * the top of the 4G area. This will have to be dealt with for real
+ * eventually by blocking off that area from the VM.
+ */
BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
- PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0xff << 24);
if (screen->eng3d->oclass < GM107_3D_CLASS) {
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
@@ -1039,6 +1050,7 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 8b73102b98b..1a56177815c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -22,6 +22,8 @@
#define NVC0_MAX_VIEWPORTS 16
+#define NVC0_MAX_BUFFERS 32
+
struct nvc0_context;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index dc02b011bdf..382a18ef153 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -316,7 +316,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
continue;
if (!targ->clean)
- nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
+ nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
nouveau_pushbuf_space(push, 0, 0, 1);
BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 24a6c222dd5..cf3d3497c78 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1243,11 +1243,50 @@ nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
unsigned start_slot, unsigned count,
struct pipe_image_view **views)
{
-#if 0
- nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
+}
- nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
-#endif
+static void
+nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *pbuffers)
+{
+ const unsigned end = start + nr;
+ const unsigned mask = ((1 << nr) - 1) << start;
+ unsigned i;
+
+ assert(t < 5);
+
+ if (pbuffers) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (pbuffers[p].buffer)
+ nvc0->buffers_valid[t] |= (1 << i);
+ else
+ nvc0->buffers_valid[t] &= ~(1 << i);
+ nvc0->buffers[t][i].buffer_offset = pbuffers[p].buffer_offset;
+ nvc0->buffers[t][i].buffer_size = pbuffers[p].buffer_size;
+ pipe_resource_reference(&nvc0->buffers[t][i].buffer, pbuffers[p].buffer);
+ }
+ } else {
+ for (i = start; i < end; ++i)
+ pipe_resource_reference(&nvc0->buffers[t][i].buffer, NULL);
+ nvc0->buffers_valid[t] &= ~mask;
+ }
+ nvc0->buffers_dirty[t] |= mask;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+}
+
+static void
+nvc0_set_shader_buffers(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *buffers)
+{
+ const unsigned s = nvc0_shader_stage(shader);
+ nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers);
+
+ nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS;
}
static inline void
@@ -1377,6 +1416,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_global_binding = nvc0_set_global_bindings;
pipe->set_compute_resources = nvc0_set_compute_resources;
pipe->set_shader_images = nvc0_set_shader_images;
+ pipe->set_shader_buffers = nvc0_set_shader_buffers;
nvc0->sample_mask = ~0;
nvc0->min_samples = 1;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index b02a590c375..c17223a1b2b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -183,9 +183,9 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
PUSH_DATA (push, 256 + 128);
for (i = 0; i < ms; i++) {
@@ -317,9 +317,9 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
- PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
PUSH_DATA (push, 256);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
@@ -470,6 +470,39 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
}
}
+static void
+nvc0_validate_buffers(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ int i, s;
+
+ for (s = 0; s < 5; s++) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
+ PUSH_DATA (push, 512);
+ for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
+ if (nvc0->buffers[s][i].buffer) {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->buffers[s][i].buffer);
+ PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
+ PUSH_DATA (push, 0);
+ BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR);
+ } else {
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+ }
+ }
+
+}
+
static void
nvc0_validate_sample_mask(struct nvc0_context *nvc0)
{
@@ -663,6 +696,7 @@ static struct state_validate {
{ nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
{ nvc0_validate_surfaces, NVC0_NEW_SURFACES },
+ { nvc0_validate_buffers, NVC0_NEW_BUFFERS },
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG },
{ nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES },
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 4e43c4e99fd..71726d1aa59 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -357,27 +357,132 @@ nvc0_clear_render_target(struct pipe_context *pipe,
}
static void
-nvc0_clear_buffer_cpu(struct pipe_context *pipe,
- struct pipe_resource *res,
- unsigned offset, unsigned size,
- const void *data, int data_size)
+nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv04_resource *buf = nv04_resource(res);
- struct pipe_transfer *pt;
- struct pipe_box box;
- unsigned elements, i;
+ unsigned i;
- elements = size / data_size;
+ nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
- u_box_1d(offset, size, &box);
+ unsigned count = (size + 3) / 4;
+ unsigned data_words = data_size / 4;
- uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE,
- &box, &pt);
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
- for (i = 0; i < elements; ++i)
- memcpy(&map[i*data_size], data, data_size);
+ if (!PUSH_SPACE(push, nr + 9))
+ break;
- buf->vtbl->transfer_unmap(pipe, pt);
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push_nve4(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+ unsigned i;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ unsigned count = (size + 3) / 4;
+ unsigned data_words = data_size / 4;
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
+
+ if (!PUSH_SPACE(push, nr + 10))
+ break;
+
+ BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1);
+ PUSH_DATA (push, 0x1001);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned tmp;
+
+ if (data_size == 1) {
+ tmp = *(unsigned char *)data;
+ tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+ data = &tmp;
+ data_size = 4;
+ } else if (data_size == 2) {
+ tmp = *(unsigned short *)data;
+ tmp = (tmp << 16) | tmp;
+ data = &tmp;
+ data_size = 4;
+ }
+
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
+ nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size);
+ else
+ nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size);
}
static void
@@ -402,10 +507,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
memcpy(&color.ui, data, 16);
break;
case 12:
- /* This doesn't work, RGB32 is not a valid RT format.
- * dst_fmt = PIPE_FORMAT_R32G32B32_UINT;
- * memcpy(&color.ui, data, 12);
- * memset(&color.ui[3], 0, 4);
+ /* RGB32 is not a valid RT format. This will be handled by the pushbuf
+ * uploader.
*/
break;
case 8:
@@ -437,14 +540,26 @@ nvc0_clear_buffer(struct pipe_context *pipe,
assert(size % data_size == 0);
if (data_size == 12) {
- /* TODO: Find a way to do this with the GPU! */
- nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size);
+ nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
return;
}
+ if (offset & 0xff) {
+ unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+ assert(fixup_size % data_size == 0);
+ nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+ offset += fixup_size;
+ size -= fixup_size;
+ if (!size)
+ return;
+ }
+
elements = size / data_size;
height = (elements + 16383) / 16384;
width = elements / height;
+ if (height > 1)
+ width &= ~0xff;
+ assert(width > 0);
if (!PUSH_SPACE(push, 40))
return;
@@ -465,7 +580,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
- PUSH_DATA (push, width * data_size);
+ PUSH_DATA (push, align(width * data_size, 0x100));
PUSH_DATA (push, height);
PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR);
@@ -480,24 +595,20 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
- height = 1;
-
- BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 4);
- PUSH_DATAh(push, buf->address + offset);
- PUSH_DATA (push, buf->address + offset);
- PUSH_DATA (push, width * data_size);
- PUSH_DATA (push, height);
-
- IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
+ nvc0_clear_buffer_push(pipe, res, offset, width * data_size,
+ data, data_size);
}
- IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
-
- nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
- nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 74090ce40a5..7223f5aecfb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -515,12 +515,12 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
return;
address = nvc0->screen->uniform_bo->offset + (5 << 16);
- for (s = 0; s < 5; ++s, address += (1 << 9)) {
+ for (s = 0; s < 5; ++s, address += (1 << 10)) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
if (!dirty)
continue;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, 1024);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
do {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index ad79d1cbb9c..44aed1adeeb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -334,7 +334,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
b = ve->pipe.vertex_buffer_index;
vb = &nvc0->vtxbuf[b];
- if (!vb->buffer) {
+ if (nvc0->vbo_user & (1 << b)) {
if (!(nvc0->constant_vbos & (1 << b))) {
if (ve->pipe.instance_divisor) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
@@ -352,13 +352,13 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
if (unlikely(ve->pipe.instance_divisor)) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
- PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
PUSH_DATA (push, ve->pipe.instance_divisor);
} else {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
- PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
}
@@ -382,7 +382,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
unsigned b;
const uint32_t mask = nvc0->vbo_user;
- PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
+ PUSH_SPACE(push, nvc0->num_vtxbufs * 8 + nvc0->vertex->num_elements);
for (b = 0; b < nvc0->num_vtxbufs; ++b) {
struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
struct nv04_resource *buf;
@@ -395,6 +395,10 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
}
/* address/value set in nvc0_update_user_vbufs_shared */
continue;
+ } else if (!vb->buffer) {
+ /* there can be holes in the vertex buffer lists */
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
+ continue;
}
buf = nv04_resource(vb->buffer);
offset = vb->buffer_offset;
@@ -410,6 +414,12 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
}
+ /* If there are more elements than buffers, we might not have unset
+ * fetching on the later elements.
+ */
+ for (; b < nvc0->vertex->num_elements; ++b)
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
+
if (nvc0->vbo_user)
nvc0_update_user_vbufs_shared(nvc0);
}
@@ -680,7 +690,7 @@ nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
if (count & 1) {
count--;
- PUSH_SPACE(push, 1);
+ PUSH_SPACE(push, 2);
BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
PUSH_DATA (push, *map++);
}
@@ -779,7 +789,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
PUSH_SPACE(push, 2);
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
- nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq));
+ nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq));
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
@@ -811,6 +821,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
uint32_t offset = buf->offset + info->indirect_offset;
+ PUSH_SPACE(push, 7);
+
/* must make FIFO wait for engines idle before continuing to process */
if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
(buf_count && buf_count->fence_wr &&
@@ -951,6 +963,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (info->mode == PIPE_PRIM_PATCHES &&
nvc0->state.patch_vertices != info->vertices_per_patch) {
nvc0->state.patch_vertices = info->vertices_per_patch;
+ PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
}
@@ -958,6 +971,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0_state_validate(nvc0, ~0, 8);
if (nvc0->vertprog->vp.need_draw_parameters) {
+ PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
@@ -979,6 +993,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
if (nvc0->cb_dirty) {
+ PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
nvc0->cb_dirty = false;
}
@@ -987,6 +1002,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nvc0->textures_coherent[s])
continue;
+ PUSH_SPACE(push, nvc0->num_textures[s] * 2);
+
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
if (!(nvc0->textures_coherent[s] & (1 << i)))
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 90c4f71a945..a2b7f87855d 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -210,6 +210,10 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
/* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 20945ece155..2cf08897a8d 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -225,7 +225,7 @@ void *evergreen_create_compute_state(
}
}
#else
- memset(&shader->binary, 0, sizeof(shader->binary));
+ radeon_shader_binary_init(&shader->binary);
radeon_elf_read(code, header->num_bytes, &shader->binary);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
@@ -245,13 +245,31 @@ void *evergreen_create_compute_state(
return shader;
}
-void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
+void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
{
- struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n");
+ struct r600_pipe_compute *shader = state;
if (!shader)
return;
+#ifdef HAVE_OPENCL
+#if HAVE_LLVM < 0x0306
+ for (unsigned i = 0; i < shader->num_kernels; i++) {
+ struct r600_kernel *kernel = &shader->kernels[i];
+ LLVMDisposeModule(module);
+ }
+ FREE(shader->kernels);
+ LLVMContextDispose(shader->llvm_ctx);
+#else
+ radeon_shader_binary_clean(&shader->binary);
+ r600_destroy_shader(&shader->bc);
+
+ /* TODO destroy shader->code_bo, shader->const_bo
+ * we'll need something like r600_buffer_free */
+#endif
+#endif
FREE(shader);
}
@@ -349,7 +367,7 @@ static void evergreen_emit_direct_dispatch(
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
unsigned num_waves;
- unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
+ unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
int group_size = 1;
int grid_size = 1;
@@ -723,7 +741,7 @@ static void evergreen_set_global_binding(
* command stream by the start_cs_cmd atom. However, since the SET_CONTEXT_REG
* packet requires that the shader type bit be set, we must initialize all
* context registers needed for compute in this function. The registers
- * intialized by the start_cs_cmd atom can be found in evereen_state.c in the
+ * initialized by the start_cs_cmd atom can be found in evergreen_state.c in the
* functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending
* on the GPU family.
*/
@@ -733,7 +751,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
int num_threads;
int num_stack_entries;
- /* since all required registers are initialised in the
+ /* since all required registers are initialized in the
* start_compute_cs_cmd atom, we can EMIT_EARLY here.
*/
r600_init_command_buffer(cb, 256);
@@ -818,7 +836,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
* R_008E28_SQ_STATIC_THREAD_MGMT3
*/
- /* XXX: We may need to adjust the thread and stack resouce
+ /* XXX: We may need to adjust the thread and stack resource
* values for 3D/compute interop */
r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 9dfb84965cf..61d32c06671 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -772,7 +772,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
if (util_format_get_blocksize(pipe_format) >= 16)
non_disp_tiling = 1;
}
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
if (state->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
@@ -986,7 +986,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
unsigned block_size =
align(util_format_get_blocksize(pipe_buffer->format), 4);
unsigned pitch_alignment =
- MAX2(64, rctx->screen->b.tiling_info.group_bytes / block_size);
+ MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
unsigned pitch = align(pipe_buffer->width0, pitch_alignment);
/* XXX: This is copied from evergreen_init_color_surface(). I don't
@@ -1098,7 +1098,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
if (util_format_get_blocksize(surf->base.format) >= 16)
non_disp_tiling = 1;
}
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
desc = util_format_description(surf->base.format);
for (i = 0; i < 4; i++) {
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
@@ -1253,7 +1253,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
macro_aspect = eg_macro_tile_aspect(macro_aspect);
bankw = eg_bank_wh(bankw);
bankh = eg_bank_wh(bankh);
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
offset >>= 8;
surf->db_z_info = S_028040_ARRAY_MODE(array_mode) |
@@ -3467,7 +3467,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
sub_cmd = EG_DMA_COPY_TILED;
lbpp = util_logbase2(bpp);
pitch_tile_max = ((pitch / bpp) / 8) - 1;
- nbanks = eg_num_banks(rctx->screen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks);
if (dst_mode == RADEON_SURF_MODE_LINEAR) {
/* T2L */
@@ -3670,9 +3670,9 @@ void evergreen_init_state_functions(struct r600_context *rctx)
unsigned id = 1;
unsigned i;
/* !!!
- * To avoid GPU lockup registers must be emited in a specific order
+ * To avoid GPU lockup registers must be emitted in a specific order
* (no kidding ...). The order below is important and have been
- * partialy infered from analyzing fglrx command stream.
+ * partially inferred from analyzing fglrx command stream.
*
* Don't reorder atom without carefully checking the effect (GPU lockup
* or piglit regression).
@@ -3793,7 +3793,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
unsigned output_patch0_offset, perpatch_output_offset, lds_size;
uint32_t values[16];
unsigned num_waves;
- unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
+ unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
*num_patches = 1;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 0b78290295a..1629399d8fe 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -245,8 +245,8 @@ struct r600_bytecode {
unsigned ar_chan;
unsigned ar_handling;
unsigned r6xx_nop_after_rel_dst;
- bool index_loaded[2];
- unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
+ bool index_loaded[2];
+ unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
unsigned debug_id;
struct r600_isa* isa;
};
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 8b91372f3ae..0fe7c74418d 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -848,6 +848,7 @@ LLVMModuleRef r600_tgsi_llvm(
lp_build_tgsi_llvm(bld_base, tokens);
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
radeon_llvm_finalize_module(ctx);
return ctx->gallivm.module;
@@ -910,6 +911,11 @@ unsigned r600_create_shader(struct r600_bytecode *bc,
return 0;
}
+void r600_destroy_shader(struct r600_bytecode *bc)
+{
+ FREE(bc->bytecode);
+}
+
unsigned r600_llvm_compile(
LLVMModuleRef mod,
enum radeon_family family,
@@ -922,17 +928,14 @@ unsigned r600_llvm_compile(
struct radeon_shader_binary binary;
const char * gpu_family = r600_get_llvm_processor_name(family);
- memset(&binary, 0, sizeof(struct radeon_shader_binary));
+ radeon_shader_binary_init(&binary);
if (dump)
LLVMDumpModule(mod);
r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug);
r = r600_create_shader(bc, &binary, use_kill);
- FREE(binary.code);
- FREE(binary.config);
- FREE(binary.rodata);
- FREE(binary.global_symbol_offsets);
+ radeon_shader_binary_clean(&binary);
return r;
}
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
index f570b739fbe..3f7fc4bef7e 100644
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -30,6 +30,8 @@ unsigned r600_create_shader(struct r600_bytecode *bc,
const struct radeon_shader_binary *binary,
boolean *use_kill);
+void r600_destroy_shader(struct r600_bytecode *bc);
+
void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
struct r600_bytecode *bc,
uint64_t symbol_offset,
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 9b0f31270df..9d378013be0 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -285,6 +285,8 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 1;
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -342,6 +344,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
/* kernel command checker support is also required */
return family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return family >= CHIP_CEDAR ? 0 : 1;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
@@ -364,6 +369,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
@@ -415,10 +421,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
- return rscreen->b.info.r600_clock_crystal_freq != 0;
+ return rscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_QUERY_TIMESTAMP:
return rscreen->b.info.drm_minor >= 20 &&
- rscreen->b.info.r600_clock_crystal_freq != 0;
+ rscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
diff --git a/src/gallium/drivers/r600/r600_uvd.c b/src/gallium/drivers/r600/r600_uvd.c
index 18d2b69afb0..0c928345773 100644
--- a/src/gallium/drivers/r600/r600_uvd.c
+++ b/src/gallium/drivers/r600/r600_uvd.c
@@ -160,7 +160,7 @@ static struct pb_buffer* r600_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_
struct r600_texture *chroma = (struct r600_texture *)buf->resources[1];
msg->body.decode.dt_field_mode = buf->base.interlaced;
- msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.tiling_info.num_banks));
+ msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.info.r600_num_banks));
ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index c7984c47304..b384baa9237 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -181,7 +181,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
old_buf = res->buf;
res->buf = new_buf; /* should be atomic */
- if (rscreen->info.r600_virtual_address)
+ if (rscreen->info.has_virtual_memory)
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
else
res->gpu_address = 0;
@@ -511,7 +511,7 @@ r600_buffer_from_user_memory(struct pipe_screen *screen,
return NULL;
}
- if (rscreen->info.r600_virtual_address)
+ if (rscreen->info.has_virtual_memory)
rbuffer->gpu_address =
ws->buffer_get_virtual_address(rbuffer->buf);
else
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index caf7deef37c..ff5b055448a 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -60,7 +60,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx,
enum radeon_bo_priority priority)
{
struct radeon_winsys_cs *cs = ring->cs;
- bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
+ bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
if (!has_vm) {
diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
index fad7bdec40a..f3529a1fe0f 100644
--- a/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -33,10 +33,6 @@
/* Max counters per HW block */
#define R600_QUERY_MAX_COUNTERS 16
-static const char * const r600_pc_shader_suffix[] = {
- "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
-};
-
static struct r600_perfcounter_block *
lookup_counter(struct r600_perfcounters *pc, unsigned index,
unsigned *base_gid, unsigned *sub_index)
@@ -92,6 +88,8 @@ struct r600_pc_counter {
unsigned stride;
};
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
struct r600_query_pc {
struct r600_query_hw b;
@@ -246,32 +244,29 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
if (block->flags & R600_PC_BLOCK_SHADER) {
unsigned sub_gids = block->num_instances;
unsigned shader_id;
- unsigned shader_mask;
- unsigned query_shader_mask;
+ unsigned shaders;
+ unsigned query_shaders;
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
sub_gids = sub_gids * screen->info.max_se;
shader_id = sub_gid / sub_gids;
sub_gid = sub_gid % sub_gids;
- if (shader_id == 0)
- shader_mask = R600_PC_SHADER_ALL;
- else
- shader_mask = 1 << (shader_id - 1);
+ shaders = screen->perfcounters->shader_type_bits[shader_id];
- query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
- if (query_shader_mask && query_shader_mask != shader_mask) {
+ query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+ if (query_shaders && query_shaders != shaders) {
fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
FREE(group);
return NULL;
}
- query->shaders |= shader_mask;
+ query->shaders = shaders;
}
- if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+ if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
// A non-zero value in query->shaders ensures that the shader
// masking is reset unless the user explicitly requests one.
- query->shaders |= R600_PC_SHADER_WINDOWING;
+ query->shaders = R600_PC_SHADERS_WINDOWING;
}
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
@@ -379,8 +374,8 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
}
if (query->shaders) {
- if ((query->shaders & R600_PC_SHADER_ALL) == 0)
- query->shaders |= R600_PC_SHADER_ALL;
+ if (query->shaders == R600_PC_SHADERS_WINDOWING)
+ query->shaders = 0xffffffff;
query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
}
@@ -438,7 +433,7 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
groups_se = screen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
- groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
+ groups_shader = screen->perfcounters->num_shader_types;
namelen = strlen(block->basename);
block->group_name_stride = namelen + 1;
@@ -462,14 +457,15 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
groupname = block->group_names;
for (i = 0; i < groups_shader; ++i) {
- unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
+ const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+ unsigned shaderlen = strlen(shader_suffix);
for (j = 0; j < groups_se; ++j) {
for (k = 0; k < groups_instance; ++k) {
strcpy(groupname, block->basename);
p = groupname + namelen;
if (block->flags & R600_PC_BLOCK_SHADER) {
- strcpy(p, r600_pc_shader_suffix[i]);
+ strcpy(p, shader_suffix);
p += shaderlen;
}
@@ -626,7 +622,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
block->num_groups *= rscreen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
- block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix);
+ block->num_groups *= pc->num_shader_types;
++pc->num_blocks;
pc->num_groups += block->num_groups;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 4c066c14cd8..d75317b1cbe 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -48,6 +48,26 @@ struct r600_multi_fence {
struct pipe_fence_handle *sdma;
};
+/*
+ * shader binary helpers.
+ */
+void radeon_shader_binary_init(struct radeon_shader_binary *b)
+{
+ memset(b, 0, sizeof(*b));
+}
+
+void radeon_shader_binary_clean(struct radeon_shader_binary *b)
+{
+ if (!b)
+ return;
+ FREE(b->code);
+ FREE(b->config);
+ FREE(b->rodata);
+ FREE(b->global_symbol_offsets);
+ FREE(b->relocs);
+ FREE(b->disasm_string);
+}
+
/*
* pipe_context
*/
@@ -251,7 +271,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->chip_class = rscreen->chip_class;
if (rscreen->chip_class >= CIK)
- rctx->max_db = MAX2(8, rscreen->info.r600_num_backends);
+ rctx->max_db = MAX2(8, rscreen->info.num_render_backends);
else if (rscreen->chip_class >= EVERGREEN)
rctx->max_db = 8;
else
@@ -295,7 +315,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
if (!rctx->ctx)
return false;
- if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
+ if (rscreen->info.has_sdma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
rctx, NULL);
@@ -373,6 +393,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "noir", DBG_NO_IR, "Don't print the LLVM IR"},
{ "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
{ "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
+ { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" },
/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
@@ -389,6 +410,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
+ { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -698,7 +720,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
if (ret) {
uint32_t *max_clock_frequency = ret;
- *max_clock_frequency = rscreen->info.max_sclk;
+ *max_clock_frequency = rscreen->info.max_shader_clock;
}
return sizeof(uint32_t);
@@ -734,7 +756,7 @@ static uint64_t r600_get_timestamp(struct pipe_screen *screen)
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
- rscreen->info.r600_clock_crystal_freq;
+ rscreen->info.clock_crystal_freq;
}
static void r600_fence_reference(struct pipe_screen *screen,
@@ -778,116 +800,40 @@ static boolean r600_fence_finish(struct pipe_screen *screen,
return rws->fence_wait(rws, rfence->gfx, timeout);
}
-static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
- uint32_t tiling_config)
+static void r600_query_memory_info(struct pipe_screen *screen,
+ struct pipe_memory_info *info)
{
- switch ((tiling_config & 0xe) >> 1) {
- case 0:
- rscreen->tiling_info.num_channels = 1;
- break;
- case 1:
- rscreen->tiling_info.num_channels = 2;
- break;
- case 2:
- rscreen->tiling_info.num_channels = 4;
- break;
- case 3:
- rscreen->tiling_info.num_channels = 8;
- break;
- default:
- return false;
- }
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ unsigned vram_usage, gtt_usage;
- switch ((tiling_config & 0x30) >> 4) {
- case 0:
- rscreen->tiling_info.num_banks = 4;
- break;
- case 1:
- rscreen->tiling_info.num_banks = 8;
- break;
- default:
- return false;
+ info->total_device_memory = rscreen->info.vram_size / 1024;
+ info->total_staging_memory = rscreen->info.gart_size / 1024;
- }
- switch ((tiling_config & 0xc0) >> 6) {
- case 0:
- rscreen->tiling_info.group_bytes = 256;
- break;
- case 1:
- rscreen->tiling_info.group_bytes = 512;
- break;
- default:
- return false;
- }
- return true;
-}
+ /* The real TTM memory usage is somewhat random, because:
+ *
+ * 1) TTM delays freeing memory, because it can only free it after
+ * fences expire.
+ *
+ * 2) The memory usage can be really low if big VRAM evictions are
+ * taking place, but the real usage is well above the size of VRAM.
+ *
+ * Instead, return statistics of this process.
+ */
+ vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
+ gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
-static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen,
- uint32_t tiling_config)
-{
- switch (tiling_config & 0xf) {
- case 0:
- rscreen->tiling_info.num_channels = 1;
- break;
- case 1:
- rscreen->tiling_info.num_channels = 2;
- break;
- case 2:
- rscreen->tiling_info.num_channels = 4;
- break;
- case 3:
- rscreen->tiling_info.num_channels = 8;
- break;
- default:
- return false;
- }
+ info->avail_device_memory =
+ vram_usage <= info->total_device_memory ?
+ info->total_device_memory - vram_usage : 0;
+ info->avail_staging_memory =
+ gtt_usage <= info->total_staging_memory ?
+ info->total_staging_memory - gtt_usage : 0;
- switch ((tiling_config & 0xf0) >> 4) {
- case 0:
- rscreen->tiling_info.num_banks = 4;
- break;
- case 1:
- rscreen->tiling_info.num_banks = 8;
- break;
- case 2:
- rscreen->tiling_info.num_banks = 16;
- break;
- default:
- return false;
- }
-
- switch ((tiling_config & 0xf00) >> 8) {
- case 0:
- rscreen->tiling_info.group_bytes = 256;
- break;
- case 1:
- rscreen->tiling_info.group_bytes = 512;
- break;
- default:
- return false;
- }
- return true;
-}
-
-static bool r600_init_tiling(struct r600_common_screen *rscreen)
-{
- uint32_t tiling_config = rscreen->info.r600_tiling_config;
-
- /* set default group bytes, overridden by tiling info ioctl */
- if (rscreen->chip_class <= R700) {
- rscreen->tiling_info.group_bytes = 256;
- } else {
- rscreen->tiling_info.group_bytes = 512;
- }
-
- if (!tiling_config)
- return true;
-
- if (rscreen->chip_class <= R700) {
- return r600_interpret_tiling(rscreen, tiling_config);
- } else {
- return evergreen_interpret_tiling(rscreen, tiling_config);
- }
+ info->device_memory_evicted =
+ ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
+ /* Just return the number of evicted 64KB pages. */
+ info->nr_device_memory_evictions = info->device_memory_evicted / 64;
}
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
@@ -929,6 +875,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.fence_reference = r600_fence_reference;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
+ rscreen->b.query_memory_info = r600_query_memory_info;
if (rscreen->info.has_uvd) {
rscreen->b.get_video_param = rvid_get_video_param;
@@ -946,9 +893,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->chip_class = rscreen->info.chip_class;
rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
- if (!r600_init_tiling(rscreen)) {
- return false;
- }
util_format_s3tc_init();
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
@@ -968,27 +912,34 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
if (rscreen->debug_flags & DBG_INFO) {
printf("pci_id = 0x%x\n", rscreen->info.pci_id);
- printf("family = %i\n", rscreen->info.family);
+ printf("family = %i (%s)\n", rscreen->info.family,
+ r600_get_chip_name(rscreen));
printf("chip_class = %i\n", rscreen->info.chip_class);
- printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20));
- printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20));
- printf("max_sclk = %i\n", rscreen->info.max_sclk);
+ printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
+ printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
+ printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
+ printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
+ printf("has_sdma = %i\n", rscreen->info.has_sdma);
+ printf("has_uvd = %i\n", rscreen->info.has_uvd);
+ printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
+ printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
+ printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
+ printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
+ printf("has_userptr = %i\n", rscreen->info.has_userptr);
+
+ printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
+ printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
printf("max_se = %i\n", rscreen->info.max_se);
printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
- printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
- rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
- printf("has_uvd = %i\n", rscreen->info.has_uvd);
- printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
- printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends);
- printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq);
- printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config);
- printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes);
- printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes);
- printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address);
- printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma);
- printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map);
- printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid);
+
+ printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
+ printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
+ printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
+ printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
+ printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
+ printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid);
printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid);
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index d66e74f9254..e92df876c22 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -71,6 +71,7 @@
#define DBG_NO_IR (1 << 12)
#define DBG_NO_TGSI (1 << 13)
#define DBG_NO_ASM (1 << 14)
+#define DBG_PREOPT_IR (1 << 15)
/* Bits 21-31 are reserved for the r600g driver. */
/* features */
#define DBG_NO_ASYNC_DMA (1llu << 32)
@@ -87,6 +88,7 @@
#define DBG_NO_DCC (1llu << 43)
#define DBG_NO_DCC_CLEAR (1llu << 44)
#define DBG_NO_RB_PLUS (1llu << 45)
+#define DBG_SI_SCHED (1llu << 46)
#define R600_MAP_BUFFER_ALIGNMENT 64
@@ -129,6 +131,9 @@ struct radeon_shader_binary {
char *disasm_string;
};
+void radeon_shader_binary_init(struct radeon_shader_binary *b);
+void radeon_shader_binary_clean(struct radeon_shader_binary *b);
+
struct r600_resource {
struct u_resource b;
@@ -257,8 +262,6 @@ struct r600_surface {
unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */
unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */
unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
- unsigned sx_ps_downconvert; /* Stoney only */
- unsigned sx_blend_opt_epsilon; /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
@@ -278,19 +281,12 @@ struct r600_surface {
unsigned pa_su_poly_offset_db_fmt_cntl;
};
-struct r600_tiling_info {
- unsigned num_channels;
- unsigned num_banks;
- unsigned group_bytes;
-};
-
struct r600_common_screen {
struct pipe_screen b;
struct radeon_winsys *ws;
enum radeon_family family;
enum chip_class chip_class;
struct radeon_info info;
- struct r600_tiling_info tiling_info;
uint64_t debug_flags;
bool has_cp_dma;
bool has_streamout;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 0aa19cd54fe..f8b62411722 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -100,6 +100,12 @@ static boolean r600_query_sw_begin(struct r600_common_context *rctx,
case R600_QUERY_NUM_SHADERS_CREATED:
query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
default:
unreachable("r600_query_sw_begin: bad query type");
}
@@ -146,6 +152,12 @@ static void r600_query_sw_end(struct r600_common_context *rctx,
case R600_QUERY_NUM_SHADERS_CREATED:
query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
default:
unreachable("r600_query_sw_end: bad query type");
}
@@ -162,7 +174,7 @@ static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* Convert from cycles per millisecond to cycles per second (Hz). */
result->timestamp_disjoint.frequency =
- (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000;
+ (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
result->timestamp_disjoint.disjoint = FALSE;
return TRUE;
case PIPE_QUERY_GPU_FINISHED: {
@@ -171,6 +183,22 @@ static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
wait ? PIPE_TIMEOUT_INFINITE : 0);
return result->b;
}
+
+ case R600_QUERY_GPIN_ASIC_ID:
+ result->u32 = 0;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SIMD:
+ result->u32 = rctx->screen->info.num_good_compute_units;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_RB:
+ result->u32 = rctx->screen->info.num_render_backends;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SPI:
+ result->u32 = 1; /* all supported chips have one SPI per SE */
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SE:
+ result->u32 = rctx->screen->info.max_se;
+ return TRUE;
}
result->u64 = query->end_result - query->begin_result;
@@ -908,7 +936,7 @@ boolean r600_query_hw_get_result(struct r600_common_context *rctx,
/* Convert the time to expected units. */
if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
rquery->type == PIPE_QUERY_TIMESTAMP) {
- result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq;
+ result->u64 = (1000000 * result->u64) / rctx->screen->info.clock_crystal_freq;
}
return TRUE;
}
@@ -1021,13 +1049,13 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
struct radeon_winsys_cs *cs = ctx->gfx.cs;
struct r600_resource *buffer;
uint32_t *results;
- unsigned num_backends = ctx->screen->info.r600_num_backends;
+ unsigned num_backends = ctx->screen->info.num_render_backends;
unsigned i, mask = 0;
/* if backend_map query is supported by the kernel */
- if (ctx->screen->info.r600_backend_map_valid) {
- unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
- unsigned backend_map = ctx->screen->info.r600_backend_map;
+ if (ctx->screen->info.r600_gb_backend_map_valid) {
+ unsigned num_tile_pipes = ctx->screen->info.num_tile_pipes;
+ unsigned backend_map = ctx->screen->info.r600_gb_backend_map;
unsigned item_width, item_mask;
if (ctx->chip_class >= EVERGREEN) {
@@ -1096,15 +1124,21 @@ err:
return;
}
-#define X(name_, query_type_, type_, result_type_) \
+#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
{ \
.name = name_, \
.query_type = R600_QUERY_##query_type_, \
.type = PIPE_DRIVER_QUERY_TYPE_##type_, \
.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
- .group_id = ~(unsigned)0 \
+ .group_id = group_id_ \
}
+#define X(name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
+
+#define XG(group_, name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
+
static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
@@ -1116,6 +1150,20 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
+
+ /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
+ * which use it as a fallback path to detect the GPU type.
+ *
+ * Note: The names of these queries are significant for GPUPerfStudio
+ * (and possibly their order as well). */
+ XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
+ XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
+ XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
+ XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
+ XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
+
+ /* The following queries must be at the end of the list because their
+ * availability is adjusted dynamically based on the DRM version. */
X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
@@ -1123,6 +1171,8 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
};
#undef X
+#undef XG
+#undef XFULL
static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
{
@@ -1167,16 +1217,40 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
break;
}
+ if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
+ info->group_id += rscreen->perfcounters->num_groups;
+
return 1;
}
+/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
+ * performance counter groups, so be careful when changing this and related
+ * functions.
+ */
static int r600_get_driver_query_group_info(struct pipe_screen *screen,
unsigned index,
struct pipe_driver_query_group_info *info)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ unsigned num_pc_groups = 0;
- return r600_get_perfcounter_group_info(rscreen, index, info);
+ if (rscreen->perfcounters)
+ num_pc_groups = rscreen->perfcounters->num_groups;
+
+ if (!info)
+ return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
+
+ if (index < num_pc_groups)
+ return r600_get_perfcounter_group_info(rscreen, index, info);
+
+ index -= num_pc_groups;
+ if (index >= R600_NUM_SW_QUERY_GROUPS)
+ return 0;
+
+ info->name = "GPIN";
+ info->max_active_queries = 5;
+ info->num_queries = 5;
+ return 1;
}
void r600_query_init(struct r600_common_context *rctx)
@@ -1189,7 +1263,7 @@ void r600_query_init(struct r600_common_context *rctx)
rctx->b.get_query_result = r600_get_query_result;
rctx->render_cond_atom.emit = r600_emit_query_predication;
- if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0)
+ if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
rctx->b.render_condition = r600_render_condition;
LIST_INITHEAD(&rctx->active_nontimer_queries);
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index e5a98bfe5bd..8b2c4e3fe93 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -54,8 +54,18 @@ struct r600_resource;
#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
+#define R600_QUERY_GPIN_ASIC_ID (PIPE_QUERY_DRIVER_SPECIFIC + 14)
+#define R600_QUERY_GPIN_NUM_SIMD (PIPE_QUERY_DRIVER_SPECIFIC + 15)
+#define R600_QUERY_GPIN_NUM_RB (PIPE_QUERY_DRIVER_SPECIFIC + 16)
+#define R600_QUERY_GPIN_NUM_SPI (PIPE_QUERY_DRIVER_SPECIFIC + 17)
+#define R600_QUERY_GPIN_NUM_SE (PIPE_QUERY_DRIVER_SPECIFIC + 18)
#define R600_QUERY_FIRST_PERFCOUNTER (PIPE_QUERY_DRIVER_SPECIFIC + 100)
+enum {
+ R600_QUERY_GROUP_GPIN = 0,
+ R600_NUM_SW_QUERY_GROUPS
+};
+
struct r600_query_ops {
void (*destroy)(struct r600_common_context *, struct r600_query *);
boolean (*begin)(struct r600_common_context *, struct r600_query *);
@@ -156,24 +166,6 @@ enum {
R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
};
-/* Shader enable bits. Chosen to coincide with SQ_PERFCOUNTER_CTRL values */
-enum {
- R600_PC_SHADER_PS = (1 << 0),
- R600_PC_SHADER_VS = (1 << 1),
- R600_PC_SHADER_GS = (1 << 2),
- R600_PC_SHADER_ES = (1 << 3),
- R600_PC_SHADER_HS = (1 << 4),
- R600_PC_SHADER_LS = (1 << 5),
- R600_PC_SHADER_CS = (1 << 6),
-
- R600_PC_SHADER_ALL = R600_PC_SHADER_PS | R600_PC_SHADER_VS |
- R600_PC_SHADER_GS | R600_PC_SHADER_ES |
- R600_PC_SHADER_HS | R600_PC_SHADER_LS |
- R600_PC_SHADER_CS,
-
- R600_PC_SHADER_WINDOWING = (1 << 31),
-};
-
/* Describes a hardware block with performance counters. Multiple instances of
* each block, possibly per-SE, may exist on the chip. Depending on the block
* and on the user's configuration, we either
@@ -210,6 +202,10 @@ struct r600_perfcounters {
unsigned num_instance_cs_dwords;
unsigned num_shaders_cs_dwords;
+ unsigned num_shader_types;
+ const char * const *shader_type_suffixes;
+ const unsigned *shader_type_bits;
+
void (*get_size)(struct r600_perfcounter_block *,
unsigned count, unsigned *selectors,
unsigned *num_select_dw, unsigned *num_read_dw);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 7c4717d29fa..af206e43860 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -361,8 +361,8 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
unsigned element_bits = 4;
unsigned cmask_cache_bits = 1024;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
- unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
+ unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
@@ -394,8 +394,8 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out)
{
- unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
+ unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
unsigned cl_width, cl_height;
switch (num_pipes) {
@@ -515,7 +515,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
{
unsigned cl_width, cl_height, width, height;
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
if (rscreen->chip_class <= EVERGREEN &&
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
@@ -533,6 +533,10 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
return 0;
+ /* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */
+ if (rscreen->family == CHIP_STONEY)
+ num_pipes = 4;
+
switch (num_pipes) {
case 1:
cl_width = 32;
@@ -565,7 +569,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
slice_elements = (width * height) / (8 * 8);
slice_bytes = slice_elements * 4;
- pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+ pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
base_align = num_pipes * pipe_interleave_bytes;
rtex->htile.pitch = width;
@@ -1212,10 +1216,30 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
const struct pipe_surface *templ)
{
unsigned level = templ->u.tex.level;
+ unsigned width = u_minify(tex->width0, level);
+ unsigned height = u_minify(tex->height0, level);
- return r600_create_surface_custom(pipe, tex, templ,
- u_minify(tex->width0, level),
- u_minify(tex->height0, level));
+ if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
+ const struct util_format_description *tex_desc
+ = util_format_description(tex->format);
+ const struct util_format_description *templ_desc
+ = util_format_description(templ->format);
+
+ assert(tex_desc->block.bits == templ_desc->block.bits);
+
+ /* Adjust size of surface if and only if the block width or
+ * height is changed. */
+ if (tex_desc->block.width != templ_desc->block.width ||
+ tex_desc->block.height != templ_desc->block.height) {
+ unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
+ unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
+
+ width = nblks_x * templ_desc->block.width;
+ height = nblks_y * templ_desc->block.height;
+ }
+ }
+
+ return r600_create_surface_custom(pipe, tex, templ, width, height);
}
static void r600_surface_destroy(struct pipe_context *pipe,
@@ -1388,7 +1412,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
return;
for (i = 0; i < fb->nr_cbufs; i++) {
- struct r600_surface *surf;
struct r600_texture *tex;
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
@@ -1399,7 +1422,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
if (!(*buffers & clear_bit))
continue;
- surf = (struct r600_surface *)fb->cbufs[i];
tex = (struct r600_texture *)fb->cbufs[i]->texture;
/* 128-bit formats are unusupported */
@@ -1446,8 +1468,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
if (clear_words_needed)
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
} else {
- /* RB+ doesn't work with CMASK fast clear. */
- if (surf->sx_ps_downconvert)
+ /* Stoney/RB+ doesn't work with CMASK fast clear. */
+ if (rctx->family == CHIP_STONEY)
continue;
/* ensure CMASK is enabled */
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 76be37625f3..f5e3f6af1a0 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1452,6 +1452,74 @@ static void emit_minmax_int(const struct lp_build_tgsi_action *action,
emit_data->args[1], "");
}
+static void pk2h_fetch_args(struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+}
+
+static void emit_pk2h(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMTypeRef fp16, i16;
+ LLVMValueRef const16, comp[2];
+ unsigned i;
+
+ fp16 = LLVMHalfTypeInContext(context);
+ i16 = LLVMInt16TypeInContext(context);
+ const16 = lp_build_const_int32(uint_bld->gallivm, 16);
+
+ for (i = 0; i < 2; i++) {
+ comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
+ comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
+ comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
+ }
+
+ comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
+ comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
+
+ emit_data->output[emit_data->chan] = comp[0];
+}
+
+static void up2h_fetch_args(struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+}
+
+static void emit_up2h(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMTypeRef fp16, i16;
+ LLVMValueRef const16, input, val;
+ unsigned i;
+
+ fp16 = LLVMHalfTypeInContext(context);
+ i16 = LLVMInt16TypeInContext(context);
+ const16 = lp_build_const_int32(uint_bld->gallivm, 16);
+ input = emit_data->args[0];
+
+ for (i = 0; i < 2; i++) {
+ val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
+ val = LLVMBuildTrunc(builder, val, i16, "");
+ val = LLVMBuildBitCast(builder, val, fp16, "");
+ emit_data->output[i] =
+ LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
+ }
+}
+
void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
{
struct lp_type type;
@@ -1581,6 +1649,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
+ bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
@@ -1618,6 +1688,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
+ bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
}
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
@@ -1638,11 +1710,9 @@ void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
{
struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
- /* End the main function with Return*/
- LLVMBuildRetVoid(gallivm->builder);
/* Create the pass manager */
- ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
+ gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
gallivm->module);
/* This pass should eliminate all the load and store instructions */
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 2e5caa67d10..7329ceedf04 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -245,46 +245,49 @@ struct radeon_winsys_cs {
};
struct radeon_info {
+ /* Device info. */
uint32_t pci_id;
enum radeon_family family;
enum chip_class chip_class;
uint64_t gart_size;
uint64_t vram_size;
- uint32_t max_sclk;
- uint32_t num_good_compute_units;
- uint32_t max_se;
- uint32_t max_sh_per_se;
+ boolean has_virtual_memory;
+ bool gfx_ib_pad_with_type2;
+ boolean has_sdma;
+ boolean has_uvd;
+ uint32_t vce_fw_version;
+ uint32_t vce_harvest_config;
+ uint32_t clock_crystal_freq;
+ /* Kernel info. */
uint32_t drm_major; /* version */
uint32_t drm_minor;
uint32_t drm_patchlevel;
-
- boolean has_uvd;
- uint32_t vce_fw_version;
boolean has_userptr;
- bool gfx_ib_pad_with_type2;
+ /* Shader cores. */
+ uint32_t r600_max_quad_pipes; /* wave size / 16 */
+ uint32_t max_shader_clock;
+ uint32_t num_good_compute_units;
+ uint32_t max_se; /* shader engines */
+ uint32_t max_sh_per_se; /* shader arrays per shader engine */
+
+ /* Render backends (color + depth blocks). */
uint32_t r300_num_gb_pipes;
uint32_t r300_num_z_pipes;
+ uint32_t r600_gb_backend_map; /* R600 harvest config */
+ boolean r600_gb_backend_map_valid;
+ uint32_t r600_num_banks;
+ uint32_t num_render_backends;
+ uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
+ uint32_t pipe_interleave_bytes;
+ uint32_t enabled_rb_mask; /* GCN harvest config */
- uint32_t r600_num_backends;
- uint32_t r600_clock_crystal_freq;
- uint32_t r600_tiling_config;
- uint32_t r600_num_tile_pipes;
- uint32_t r600_max_pipes;
- boolean r600_virtual_address;
- boolean r600_has_dma;
-
- uint32_t r600_backend_map;
- boolean r600_backend_map_valid;
-
+ /* Tile modes. */
boolean si_tile_mode_array_valid;
uint32_t si_tile_mode_array[32];
- uint32_t si_backend_enabled_mask;
-
boolean cik_macrotile_mode_array_valid;
uint32_t cik_macrotile_mode_array[16];
- uint32_t vce_harvest_config;
};
enum radeon_feature_id {
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 105a1b2a878..76913914b38 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -308,7 +308,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
}
mtilew = (8 * rsrc->surface.bankw *
- sctx->screen->b.tiling_info.num_channels) *
+ sctx->screen->b.info.num_tile_pipes) *
rsrc->surface.mtilea;
assert(!(mtilew & (mtilew - 1)));
mtileh = (8 * rsrc->surface.bankh * num_banks) /
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 6ef6eeec178..825fbb181ba 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -461,9 +461,6 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
LLVMContextDispose(program->llvm_ctx);
}
#else
- FREE(program->shader.binary.config);
- FREE(program->shader.binary.rodata);
- FREE(program->shader.binary.global_symbol_offsets);
si_shader_destroy(&program->shader);
#endif
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index baa02293c41..d60c4515625 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -177,7 +177,7 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
- si_mark_atom_dirty(ctx, &ctx->cb_target_mask);
+ si_mark_atom_dirty(ctx, &ctx->cb_render_state);
si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
si_mark_atom_dirty(ctx, &ctx->db_render_state);
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 7ee1daee7bf..24855e4e6f2 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -56,6 +56,8 @@ enum si_pc_reg_layout {
/* Registers are laid out in decreasing rather than increasing order. */
SI_PC_REG_REVERSE = 4,
+
+ SI_PC_FAKE = 8,
};
struct si_pc_block_base {
@@ -79,6 +81,23 @@ struct si_pc_block {
unsigned instances;
};
+/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
+ * performance counter group IDs.
+ */
+static const char * const si_pc_shader_type_suffixes[] = {
+ "", "_ES", "_GS", "_VS", "_PS", "_LS", "_HS", "_CS"
+};
+
+static const unsigned si_pc_shader_type_bits[] = {
+ 0x7f,
+ S_036780_ES_EN(1),
+ S_036780_GS_EN(1),
+ S_036780_VS_EN(1),
+ S_036780_PS_EN(1),
+ S_036780_LS_EN(1),
+ S_036780_HS_EN(1),
+ S_036780_CS_EN(1),
+};
static struct si_pc_block_base cik_CB = {
.name = "CB",
@@ -308,56 +327,80 @@ static struct si_pc_block_base cik_WD = {
.counter0_lo = R_034200_WD_PERFCOUNTER0_LO,
};
+static struct si_pc_block_base cik_MC = {
+ .name = "MC",
+ .num_counters = 4,
+
+ .layout = SI_PC_FAKE,
+};
+
+static struct si_pc_block_base cik_SRBM = {
+ .name = "SRBM",
+ .num_counters = 2,
+
+ .layout = SI_PC_FAKE,
+};
+
/* Both the number of instances and selectors varies between chips of the same
* class. We only differentiate by class here and simply expose the maximum
* number over all chips in a class.
+ *
+ * Unfortunately, GPUPerfStudio uses the order of performance counter groups
+ * blindly once it believes it has identified the hardware, so the order of
+ * blocks here matters.
*/
static struct si_pc_block groups_CIK[] = {
{ &cik_CB, 226, 4 },
- { &cik_CPC, 22 },
{ &cik_CPF, 17 },
- { &cik_CPG, 46 },
{ &cik_DB, 257, 4 },
- { &cik_GDS, 121 },
{ &cik_GRBM, 34 },
{ &cik_GRBMSE, 15 },
- { &cik_IA, 22 },
- { &cik_PA_SC, 395 },
{ &cik_PA_SU, 153 },
+ { &cik_PA_SC, 395 },
{ &cik_SPI, 186 },
{ &cik_SQ, 252 },
{ &cik_SX, 32 },
{ &cik_TA, 111, 11 },
{ &cik_TCA, 39, 2 },
{ &cik_TCC, 160, 16 },
- { &cik_TCP, 154, 11 },
{ &cik_TD, 55, 11 },
+ { &cik_TCP, 154, 11 },
+ { &cik_GDS, 121 },
{ &cik_VGT, 140 },
+ { &cik_IA, 22 },
+ { &cik_MC, 22 },
+ { &cik_SRBM, 19 },
{ &cik_WD, 22 },
+ { &cik_CPG, 46 },
+ { &cik_CPC, 22 },
+
};
static struct si_pc_block groups_VI[] = {
{ &cik_CB, 396, 4 },
- { &cik_CPC, 24 },
{ &cik_CPF, 19 },
- { &cik_CPG, 48 },
{ &cik_DB, 257, 4 },
- { &cik_GDS, 121 },
{ &cik_GRBM, 34 },
{ &cik_GRBMSE, 15 },
- { &cik_IA, 24 },
- { &cik_PA_SC, 397 },
{ &cik_PA_SU, 153 },
+ { &cik_PA_SC, 397 },
{ &cik_SPI, 197 },
{ &cik_SQ, 273 },
{ &cik_SX, 34 },
{ &cik_TA, 119, 16 },
{ &cik_TCA, 35, 2 },
{ &cik_TCC, 192, 16 },
- { &cik_TCP, 180, 16 },
{ &cik_TD, 55, 16 },
+ { &cik_TCP, 180, 16 },
+ { &cik_GDS, 121 },
{ &cik_VGT, 147 },
+ { &cik_IA, 24 },
+ { &cik_MC, 22 },
+ { &cik_SRBM, 27 },
{ &cik_WD, 37 },
+ { &cik_CPG, 48 },
+ { &cik_CPC, 24 },
+
};
static void si_pc_get_size(struct r600_perfcounter_block *group,
@@ -368,7 +411,9 @@ static void si_pc_get_size(struct r600_perfcounter_block *group,
struct si_pc_block_base *regs = sigroup->b;
unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK;
- if (layout_multi == SI_PC_MULTI_BLOCK) {
+ if (regs->layout & SI_PC_FAKE) {
+ *num_select_dw = 0;
+ } else if (layout_multi == SI_PC_MULTI_BLOCK) {
if (count < regs->num_multi)
*num_select_dw = 2 * (count + 2) + regs->num_prelude;
else
@@ -431,6 +476,9 @@ static void si_pc_emit_select(struct r600_common_context *ctx,
assert(count <= regs->num_counters);
+ if (regs->layout & SI_PC_FAKE)
+ return;
+
if (layout_multi == SI_PC_MULTI_BLOCK) {
assert(!(regs->layout & SI_PC_REG_REVERSE));
@@ -590,22 +638,35 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
unsigned reg = regs->counter0_lo;
unsigned reg_delta = 8;
- if (regs->layout & SI_PC_REG_REVERSE)
- reg_delta = -reg_delta;
+ if (!(regs->layout & SI_PC_FAKE)) {
+ if (regs->layout & SI_PC_REG_REVERSE)
+ reg_delta = -reg_delta;
- for (idx = 0; idx < count; ++idx) {
- if (regs->counters)
- reg = regs->counters[idx];
+ for (idx = 0; idx < count; ++idx) {
+ if (regs->counters)
+ reg = regs->counters[idx];
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM));
- radeon_emit(cs, reg >> 2);
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- va += 4;
- reg += reg_delta;
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
+ COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ radeon_emit(cs, reg >> 2);
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ va += 4;
+ reg += reg_delta;
+ }
+ } else {
+ for (idx = 0; idx < count; ++idx) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
+ COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ radeon_emit(cs, 0); /* immediate */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ va += 4;
+ }
}
}
@@ -656,6 +717,10 @@ void si_init_perfcounters(struct si_screen *screen)
pc->num_stop_cs_dwords += 6;
}
+ pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
+ pc->shader_type_suffixes = si_pc_shader_type_suffixes;
+ pc->shader_type_bits = si_pc_shader_type_bits;
+
pc->get_size = si_pc_get_size;
pc->emit_instance = si_pc_emit_instance;
pc->emit_shaders = si_pc_emit_shaders;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 0c1ae90f9da..61ce976c32c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -215,7 +215,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
r600_get_llvm_processor_name(sscreen->b.family),
- "+DumpCode,+vgpr-spilling",
+#if HAVE_LLVM >= 0x0308
+ sscreen->b.debug_flags & DBG_SI_SCHED ?
+ "+DumpCode,+vgpr-spilling,+si-scheduler" :
+#endif
+ "+DumpCode,+vgpr-spilling",
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
@@ -304,6 +308,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -329,12 +335,18 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ return HAVE_LLVM >= 0x0306;
+
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return HAVE_LLVM >= 0x0307 ? 410 : 330;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
@@ -344,12 +356,12 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
- case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
@@ -399,7 +411,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_TIME_ELAPSED:
- return sscreen->b.info.r600_clock_crystal_freq != 0;
+ return sscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -541,57 +553,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
r600_destroy_common_screen(&sscreen->b);
}
-#define SI_TILE_MODE_COLOR_2D_8BPP 14
-
-/* Initialize pipe config. This is especially important for GPUs
- * with 16 pipes and more where it's initialized incorrectly by
- * the TILING_CONFIG ioctl. */
-static bool si_initialize_pipe_config(struct si_screen *sscreen)
-{
- unsigned mode2d;
-
- /* This is okay, because there can be no 2D tiling without
- * the tile mode array, so we won't need the pipe config.
- * Return "success".
- */
- if (!sscreen->b.info.si_tile_mode_array_valid)
- return true;
-
- /* The same index is used for the 2D mode on CIK too. */
- mode2d = sscreen->b.info.si_tile_mode_array[SI_TILE_MODE_COLOR_2D_8BPP];
-
- switch (G_009910_PIPE_CONFIG(mode2d)) {
- case V_02803C_ADDR_SURF_P2:
- sscreen->b.tiling_info.num_channels = 2;
- break;
- case V_02803C_X_ADDR_SURF_P4_8X16:
- case V_02803C_X_ADDR_SURF_P4_16X16:
- case V_02803C_X_ADDR_SURF_P4_16X32:
- case V_02803C_X_ADDR_SURF_P4_32X32:
- sscreen->b.tiling_info.num_channels = 4;
- break;
- case V_02803C_X_ADDR_SURF_P8_16X16_8X16:
- case V_02803C_X_ADDR_SURF_P8_16X32_8X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_8X16:
- case V_02803C_X_ADDR_SURF_P8_16X32_16X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_16X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_16X32:
- case V_02803C_X_ADDR_SURF_P8_32X64_32X32:
- sscreen->b.tiling_info.num_channels = 8;
- break;
- case V_02803C_X_ADDR_SURF_P16_32X32_8X16:
- case V_02803C_X_ADDR_SURF_P16_32X32_16X16:
- sscreen->b.tiling_info.num_channels = 16;
- break;
- default:
- assert(0);
- fprintf(stderr, "radeonsi: Unknown pipe config %i.\n",
- G_009910_PIPE_CONFIG(mode2d));
- return false;
- }
- return true;
-}
-
static bool si_init_gs_info(struct si_screen *sscreen)
{
switch (sscreen->b.family) {
@@ -636,7 +597,6 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;
if (!r600_common_screen_init(&sscreen->b, ws) ||
- !si_initialize_pipe_config(sscreen) ||
!si_init_gs_info(sscreen)) {
FREE(sscreen);
return NULL;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e2725fe3679..48947442757 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -193,7 +193,7 @@ struct si_context {
struct r600_atom db_render_state;
struct r600_atom msaa_config;
struct si_sample_mask sample_mask;
- struct r600_atom cb_target_mask;
+ struct r600_atom cb_render_state;
struct si_blend_color blend_color;
struct r600_atom clip_regs;
struct si_clip_state clip_state;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 94c1129c88d..d9ed6b234e0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4074,7 +4074,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
si_shader_dump_disassembly(&shader->binary, debug);
si_shader_dump_stats(sscreen, &shader->config,
- shader->selector->info.num_inputs,
+ shader->selector ? shader->selector->info.num_inputs : 0,
shader->binary.code_size, debug, processor);
}
@@ -4092,7 +4092,7 @@ int si_compile_llvm(struct si_screen *sscreen,
if (r600_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
- if (!(sscreen->b.debug_flags & DBG_NO_IR))
+ if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR)))
LLVMDumpModule(mod);
}
@@ -4177,6 +4177,13 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+
+ /* Dump LLVM IR before any optimization passes */
+ if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+ r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
+ LLVMDumpModule(bld_base->base.gallivm->module);
+
radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
if (dump)
@@ -4383,9 +4390,16 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
goto out;
}
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ mod = bld_base->base.gallivm->module;
+
+ /* Dump LLVM IR before any optimization passes */
+ if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+ r600_can_dump_shader(&sscreen->b, si_shader_ctx.type))
+ LLVMDumpModule(mod);
+
radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
- mod = bld_base->base.gallivm->module;
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
mod, debug, si_shader_ctx.type);
if (r) {
@@ -4423,14 +4437,6 @@ out:
return r;
}
-void si_shader_destroy_binary(struct radeon_shader_binary *binary)
-{
- FREE(binary->code);
- FREE(binary->rodata);
- FREE(binary->relocs);
- FREE(binary->disasm_string);
-}
-
void si_shader_destroy(struct si_shader *shader)
{
if (shader->gs_copy_shader) {
@@ -4442,5 +4448,6 @@ void si_shader_destroy(struct si_shader *shader)
r600_resource_reference(&shader->scratch_bo, NULL);
r600_resource_reference(&shader->bo, NULL);
- si_shader_destroy_binary(&shader->binary);
+
+ radeon_shader_binary_clean(&shader->binary);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index c1512078a18..98bdb890a45 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -345,7 +345,6 @@ int si_compile_llvm(struct si_screen *sscreen,
struct pipe_debug_callback *debug,
unsigned processor);
void si_shader_destroy(struct si_shader *shader);
-void si_shader_destroy_binary(struct radeon_shader_binary *binary);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 9e0ccfc5dde..bf780777b50 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -97,7 +97,7 @@ uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
}
/* The old way. */
- switch (sscreen->b.tiling_info.num_banks) {
+ switch (sscreen->b.info.r600_num_banks) {
case 2:
return V_02803C_ADDR_SURF_2_BANK;
case 4:
@@ -189,14 +189,14 @@ unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode)
/* This is probably broken for a lot of chips, but it's only used
* if the kernel cannot return the tile mode array for CIK. */
- switch (sscreen->b.info.r600_num_tile_pipes) {
+ switch (sscreen->b.info.num_tile_pipes) {
case 16:
return V_02803C_X_ADDR_SURF_P16_32X32_16X16;
case 8:
return V_02803C_X_ADDR_SURF_P8_32X32_16X16;
case 4:
default:
- if (sscreen->b.info.r600_num_backends == 4)
+ if (sscreen->b.info.num_render_backends == 4)
return V_02803C_X_ADDR_SURF_P4_16X16;
else
return V_02803C_X_ADDR_SURF_P4_8X16;
@@ -238,7 +238,8 @@ static unsigned si_pack_float_12p4(float x)
/*
* Inferred framebuffer and blender state.
*
- * One of the reasons this must be derived from the framebuffer state is that:
+ * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state
+ * is that:
* - The blend state mask is 0xf most of the time.
* - The COLOR1 format isn't INVALID because of possible dual-source blending,
* so COLOR1 is enabled pretty much all the time.
@@ -246,18 +247,18 @@ static unsigned si_pack_float_12p4(float x)
*
* Another reason is to avoid a hang with dual source blending.
*/
-static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom)
+static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_state_blend *blend = sctx->queued.named.blend;
- uint32_t mask = 0, i;
+ uint32_t cb_target_mask = 0, i;
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
if (sctx->framebuffer.state.cbufs[i])
- mask |= 0xf << (4*i);
+ cb_target_mask |= 0xf << (4*i);
if (blend)
- mask &= blend->cb_target_mask;
+ cb_target_mask &= blend->cb_target_mask;
/* Avoid a hang that happens when dual source blending is enabled
* but there is not enough color outputs. This is undefined behavior,
@@ -268,9 +269,146 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
if (blend && blend->dual_src_blend &&
sctx->ps_shader.cso &&
(sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
- mask = 0;
+ cb_target_mask = 0;
- radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
+ radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
+
+ /* STONEY-specific register settings. */
+ if (sctx->b.family == CHIP_STONEY) {
+ unsigned spi_shader_col_format =
+ sctx->ps_shader.cso ?
+ sctx->ps_shader.current->key.ps.spi_shader_col_format : 0;
+ unsigned sx_ps_downconvert = 0;
+ unsigned sx_blend_opt_epsilon = 0;
+ unsigned sx_blend_opt_control = 0;
+
+ for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+ struct r600_surface *surf =
+ (struct r600_surface*)sctx->framebuffer.state.cbufs[i];
+ unsigned format, swap, spi_format, colormask;
+ bool has_alpha, has_rgb;
+
+ if (!surf)
+ continue;
+
+ format = G_028C70_FORMAT(surf->cb_color_info);
+ swap = G_028C70_COMP_SWAP(surf->cb_color_info);
+ spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
+ colormask = (cb_target_mask >> (i * 4)) & 0xf;
+
+ /* Set if RGB and A are present. */
+ has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
+
+ if (format == V_028C70_COLOR_8 ||
+ format == V_028C70_COLOR_16 ||
+ format == V_028C70_COLOR_32)
+ has_rgb = !has_alpha;
+ else
+ has_rgb = true;
+
+ /* Check the colormask and export format. */
+ if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
+ has_rgb = false;
+ if (!(colormask & PIPE_MASK_A))
+ has_alpha = false;
+
+ if (spi_format == V_028714_SPI_SHADER_ZERO) {
+ has_rgb = false;
+ has_alpha = false;
+ }
+
+ /* Disable value checking for disabled channels. */
+ if (!has_rgb)
+ sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
+ if (!has_alpha)
+ sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
+
+ /* Enable down-conversion for 32bpp and smaller formats. */
+ switch (format) {
+ case V_028C70_COLOR_8:
+ case V_028C70_COLOR_8_8:
+ case V_028C70_COLOR_8_8_8_8:
+ /* For 1 and 2-channel formats, use the superset thereof. */
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_5_6_5:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_1_5_5_5:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_4_4_4_4:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_32:
+ if (swap == V_0280A0_SWAP_STD &&
+ spi_format == V_028714_SPI_SHADER_32_R)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
+ else if (swap == V_0280A0_SWAP_ALT_REV &&
+ spi_format == V_028714_SPI_SHADER_32_AR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
+ break;
+
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_16_16:
+ /* For 1-channel formats, use the superset thereof. */
+ if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ if (swap == V_0280A0_SWAP_STD ||
+ swap == V_0280A0_SWAP_STD_REV)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
+ else
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_10_11_11:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_2_10_10_10:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
+ }
+ break;
+ }
+ }
+
+ if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
+ sx_ps_downconvert = 0;
+ sx_blend_opt_epsilon = 0;
+ sx_blend_opt_control = 0;
+ }
+
+ radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
+ radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
+ radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
+ radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */
+ }
}
/*
@@ -390,6 +528,36 @@ static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
}
}
+/**
+ * Get rid of DST in the blend factors by commuting the operands:
+ * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+ */
+static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
+ unsigned *dst_factor, unsigned expected_dst,
+ unsigned replacement_src)
+{
+ if (*src_factor == expected_dst &&
+ *dst_factor == PIPE_BLENDFACTOR_ZERO) {
+ *src_factor = PIPE_BLENDFACTOR_ZERO;
+ *dst_factor = replacement_src;
+
+ /* Commuting the operands requires reversing subtractions. */
+ if (*func == PIPE_BLEND_SUBTRACT)
+ *func = PIPE_BLEND_REVERSE_SUBTRACT;
+ else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
+ *func = PIPE_BLEND_SUBTRACT;
+ }
+}
+
+static bool si_blend_factor_uses_dst(unsigned factor)
+{
+ return factor == PIPE_BLENDFACTOR_DST_COLOR ||
+ factor == PIPE_BLENDFACTOR_DST_ALPHA ||
+ factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+ factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
+}
+
static void *si_create_blend_state_mode(struct pipe_context *ctx,
const struct pipe_blend_state *state,
unsigned mode)
@@ -397,7 +565,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
struct si_context *sctx = (struct si_context*)ctx;
struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
struct si_pm4_state *pm4 = &blend->pm4;
-
+ uint32_t sx_mrt_blend_opt[8] = {0};
uint32_t color_control = 0;
if (!blend)
@@ -435,12 +603,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
unsigned srcA = state->rt[j].alpha_src_factor;
unsigned dstA = state->rt[j].alpha_dst_factor;
+ unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
unsigned blend_cntl = 0;
+ sx_mrt_blend_opt[i] =
+ S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+
if (!state->rt[j].colormask)
continue;
- /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
+ /* cb_render_state will disable unused ones */
blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
if (!state->rt[j].blend_enable) {
@@ -448,6 +621,50 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
continue;
}
+ /* Blending optimizations for Stoney.
+ * These transformations don't change the behavior.
+ *
+ * First, get rid of DST in the blend factors:
+ * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+ */
+ si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_SRC_COLOR);
+ si_blend_remove_dst(&eqA, &srcA, &dstA,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_SRC_COLOR);
+ si_blend_remove_dst(&eqA, &srcA, &dstA,
+ PIPE_BLENDFACTOR_DST_ALPHA,
+ PIPE_BLENDFACTOR_SRC_ALPHA);
+
+ /* Look up the ideal settings from tables. */
+ srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
+ dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
+ srcA_opt = si_translate_blend_opt_factor(srcA, true);
+ dstA_opt = si_translate_blend_opt_factor(dstA, true);
+
+ /* Handle interdependencies. */
+ if (si_blend_factor_uses_dst(srcRGB))
+ dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+ if (si_blend_factor_uses_dst(srcA))
+ dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+
+ if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
+ (dstRGB == PIPE_BLENDFACTOR_ZERO ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
+ dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+
+ /* Set the final value. */
+ sx_mrt_blend_opt[i] =
+ S_028760_COLOR_SRC_OPT(srcRGB_opt) |
+ S_028760_COLOR_DST_OPT(dstRGB_opt) |
+ S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
+ S_028760_ALPHA_SRC_OPT(srcA_opt) |
+ S_028760_ALPHA_DST_OPT(dstA_opt) |
+ S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
+
+ /* Set blend state. */
blend_cntl |= S_028780_ENABLE(1);
blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
@@ -480,41 +697,13 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
}
if (sctx->b.family == CHIP_STONEY) {
- uint32_t sx_blend_opt_control = 0;
-
- for (int i = 0; i < 8; i++) {
- const int j = state->independent_blend_enable ? i : 0;
-
- /* TODO: We can also set this if the surface doesn't contain RGB. */
- if (!state->rt[j].blend_enable ||
- !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
- sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
-
- /* TODO: We can also set this if the surface doesn't contain alpha. */
- if (!state->rt[j].blend_enable ||
- !(state->rt[j].colormask & PIPE_MASK_A))
- sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
-
- if (!state->rt[j].blend_enable) {
- si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
- S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
- S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
- continue;
- }
-
+ for (int i = 0; i < 8; i++)
si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
- S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
- S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
- S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
- S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
- S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
- S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
- }
+ sx_mrt_blend_opt[i]);
- si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
-
- /* RB+ doesn't work with dual source blending */
- if (blend->dual_src_blend)
+ /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
+ if (blend->dual_src_blend || state->logicop_enable ||
+ mode == V_028808_CB_RESOLVE)
color_control |= S_028808_DISABLE_DUAL_QUAD(1);
}
@@ -532,7 +721,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
@@ -2097,8 +2286,10 @@ static void si_initialize_color_surface(struct si_context *sctx,
color_pitch = S_028C64_TILE_MAX(pitch);
+ /* Intensity is implemented as Red, so treat it that way. */
color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
- S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
+ S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1 ||
+ util_format_is_intensity(surf->base.format));
if (rtex->resource.b.b.nr_samples > 1) {
unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
@@ -2169,61 +2360,6 @@ static void si_initialize_color_surface(struct si_context *sctx,
/* Determine pixel shader export format */
si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
- if (sctx->b.family == CHIP_STONEY &&
- !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
- switch (desc->channel[0].size) {
- case 32:
- if (desc->nr_channels == 1) {
- if (swap == V_0280A0_SWAP_STD)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
- else if (swap == V_0280A0_SWAP_ALT_REV)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
- }
- break;
- case 16:
- /* For 1-channel formats, use the superset thereof. */
- if (desc->nr_channels <= 2) {
- if (swap == V_0280A0_SWAP_STD ||
- swap == V_0280A0_SWAP_STD_REV)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
- else
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
- }
- break;
- case 11:
- if (desc->nr_channels == 3) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
- surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
- }
- break;
- case 10:
- if (desc->nr_channels == 4) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
- surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
- }
- break;
- case 8:
- /* For 1 and 2-channel formats, use the superset thereof. */
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
- surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
- break;
- case 5:
- if (desc->nr_channels == 3) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
- surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
- } else if (desc->nr_channels == 4) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
- surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
- }
- break;
- case 4:
- /* For 1 nad 2-channel formats, use the superset thereof. */
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
- surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
- break;
- }
- }
-
surf->color_initialized = true;
}
@@ -2459,7 +2595,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
}
si_update_poly_offset_state(sctx);
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->framebuffer.nr_samples != old_nr_samples) {
@@ -2512,8 +2648,6 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
unsigned i, nr_cbufs = state->nr_cbufs;
struct r600_texture *tex = NULL;
struct r600_surface *cb = NULL;
- uint32_t sx_ps_downconvert = 0;
- uint32_t sx_blend_opt_epsilon = 0;
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
@@ -2564,29 +2698,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
if (sctx->b.chip_class >= VI)
radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
-
- sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
- sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0] &&
sctx->framebuffer.dirty_cbufs & (1 << 0)) {
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
cb->cb_color_info | tex->cb_color_info);
- sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
- sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
i++;
}
for (; i < 8 ; i++)
if (sctx->framebuffer.dirty_cbufs & (1 << i))
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
- if (sctx->b.family == CHIP_STONEY) {
- radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
- radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
- radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
- }
-
/* ZS buffer. */
if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
@@ -3374,7 +3497,7 @@ void si_init_state_functions(struct si_context *sctx)
si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
- si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask);
+ si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
@@ -3449,8 +3572,8 @@ si_write_harvested_raster_configs(struct si_context *sctx,
{
unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
- unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
- unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+ unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
+ unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
unsigned rb_per_se = num_rb / num_se;
unsigned se_mask[4];
@@ -3579,8 +3702,8 @@ si_write_harvested_raster_configs(struct si_context *sctx,
static void si_init_config(struct si_context *sctx)
{
struct si_screen *sscreen = sctx->screen;
- unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
- unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+ unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
+ unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
unsigned raster_config, raster_config_1;
uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index be3488e6dba..507f45938ce 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -124,7 +124,7 @@ union si_state_atoms {
struct r600_atom *db_render_state;
struct r600_atom *msaa_config;
struct r600_atom *sample_mask;
- struct r600_atom *cb_target_mask;
+ struct r600_atom *cb_render_state;
struct r600_atom *blend_color;
struct r600_atom *clip_regs;
struct r600_atom *clip_state;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 36174eb5a94..bbef429edc5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -705,23 +705,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
}
/* Select the hw shader variant depending on the current state. */
-static int si_shader_select(struct pipe_context *ctx,
- struct si_shader_ctx_state *state)
+static int si_shader_select_with_key(struct pipe_context *ctx,
+ struct si_shader_ctx_state *state,
+ union si_shader_key *key)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
- union si_shader_key key;
struct si_shader *iter, *shader = NULL;
int r;
- si_shader_selector_key(ctx, sel, &key);
-
/* Check if we don't need to change anything.
* This path is also used for most shaders that don't need multiple
* variants, it will cost just a computation of the key and this
* test. */
- if (likely(current && memcmp(¤t->key, &key, sizeof(key)) == 0))
+ if (likely(current && memcmp(¤t->key, key, sizeof(*key)) == 0))
return 0;
pipe_mutex_lock(sel->mutex);
@@ -730,7 +728,7 @@ static int si_shader_select(struct pipe_context *ctx,
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
/* Don't check the "current" shader. We checked it above. */
if (current != iter &&
- memcmp(&iter->key, &key, sizeof(key)) == 0) {
+ memcmp(&iter->key, key, sizeof(*key)) == 0) {
state->current = iter;
pipe_mutex_unlock(sel->mutex);
return 0;
@@ -744,7 +742,7 @@ static int si_shader_select(struct pipe_context *ctx,
return -ENOMEM;
}
shader->selector = sel;
- shader->key = key;
+ shader->key = *key;
r = si_shader_create(sctx->screen, sctx->tm, shader, &sctx->b.debug);
if (unlikely(r)) {
@@ -768,6 +766,15 @@ static int si_shader_select(struct pipe_context *ctx,
return 0;
}
+static int si_shader_select(struct pipe_context *ctx,
+ struct si_shader_ctx_state *state)
+{
+ union si_shader_key key;
+
+ si_shader_selector_key(ctx, state->cso, &key);
+ return si_shader_select_with_key(ctx, state, &key);
+}
+
static void *si_create_shader_selector(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
@@ -888,8 +895,27 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
/* Pre-compilation. */
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
+ union si_shader_key key;
- if (si_shader_select(ctx, &state)) {
+ memset(&key, 0, sizeof(key));
+
+ /* Set reasonable defaults, so that the shader key doesn't
+ * cause any code to be eliminated.
+ */
+ switch (sel->type) {
+ case PIPE_SHADER_TESS_CTRL:
+ key.tcs.prim_mode = PIPE_PRIM_TRIANGLES;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ key.ps.alpha_func = PIPE_FUNC_ALWAYS;
+ for (i = 0; i < 8; i++)
+ if (sel->info.colors_written & (1 << i))
+ key.ps.spi_shader_col_format |=
+ V_028710_SPI_SHADER_FP16_ABGR << (i * 4);
+ break;
+ }
+
+ if (si_shader_select_with_key(ctx, &state, &key)) {
fprintf(stderr, "radeonsi: can't create a shader\n");
tgsi_free_tokens(sel->tokens);
FREE(sel);
@@ -1001,7 +1027,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
sctx->ps_shader.cso = sel;
sctx->ps_shader.current = sel ? sel->first_variant : NULL;
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
@@ -1726,6 +1752,9 @@ bool si_update_shaders(struct si_context *sctx)
si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
}
+ if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
if (sctx->ps_db_shader_control != db_shader_control) {
sctx->ps_db_shader_control = db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 3bc580899d4..097ffe6f920 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -179,6 +179,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return 1;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return 65536;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -261,6 +263,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 8d04222a0cd..d5405f8eacf 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -358,6 +358,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;
@@ -396,6 +398,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
}
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 6e703f76499..4d03fe1ee0b 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -1578,6 +1578,45 @@ static void trace_context_set_tess_state(struct pipe_context *_context,
}
+static void trace_context_set_shader_buffers(struct pipe_context *_context,
+ unsigned shader,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *buffers)
+{
+ struct trace_context *tr_context = trace_context(_context);
+ struct pipe_context *context = tr_context->pipe;
+ struct pipe_shader_buffer *_buffers = NULL;
+
+ trace_dump_call_begin("pipe_context", "set_shader_buffers");
+ trace_dump_arg(ptr, context);
+ trace_dump_arg(uint, shader);
+ trace_dump_arg(uint, start);
+ trace_dump_arg_begin("buffers");
+ trace_dump_struct_array(shader_buffer, buffers, nr);
+ trace_dump_arg_end();
+ trace_dump_call_end();
+
+ if (buffers) {
+ int i;
+
+ _buffers = MALLOC(nr * sizeof(struct pipe_shader_buffer));
+ if (!_buffers)
+ return;
+
+ for (i = 0; i < nr; i++) {
+ _buffers[i] = buffers[i];
+ _buffers[i].buffer = trace_resource_unwrap(
+ tr_context, _buffers[i].buffer);
+ }
+ }
+
+ context->set_shader_buffers(context, shader, start, nr, _buffers);
+
+ if (_buffers)
+ FREE(_buffers);
+}
+
+
static const struct debug_named_value rbug_blocker_flags[] = {
{"before", 1, NULL},
{"after", 2, NULL},
@@ -1675,6 +1714,7 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(texture_barrier);
TR_CTX_INIT(memory_barrier);
TR_CTX_INIT(set_tess_state);
+ TR_CTX_INIT(set_shader_buffers);
TR_CTX_INIT(transfer_map);
TR_CTX_INIT(transfer_unmap);
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 54f022a8ab6..cfbf53cf767 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -688,6 +688,24 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state)
}
+void trace_dump_shader_buffer(const struct pipe_shader_buffer *state)
+{
+ if (!trace_dumping_enabled_locked())
+ return;
+
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_shader_buffer");
+ trace_dump_member(resource_ptr, state, buffer);
+ trace_dump_member(uint, state, buffer_offset);
+ trace_dump_member(uint, state, buffer_size);
+ trace_dump_struct_end();
+}
+
+
void trace_dump_draw_info(const struct pipe_draw_info *state)
{
if (!trace_dumping_enabled_locked())
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 117b3c75e87..4f4ade155bc 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -78,6 +78,8 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state);
void trace_dump_constant_buffer(const struct pipe_constant_buffer *state);
+void trace_dump_shader_buffer(const struct pipe_shader_buffer *buffer);
+
void trace_dump_draw_info(const struct pipe_draw_info *state);
void trace_dump_blit_info(const struct pipe_blit_info *);
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index 5d071ec862f..41660f6ac4d 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -245,10 +245,19 @@ vc4_job_submit(struct vc4_context *vc4)
fprintf(stderr, "Draw call returned %s. "
"Expect corruption.\n", strerror(errno));
warned = true;
+ } else if (!ret) {
+ vc4->last_emit_seqno = submit.seqno;
}
}
- vc4->last_emit_seqno = submit.seqno;
+ if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
+ if (!vc4_wait_seqno(vc4->screen,
+ vc4->last_emit_seqno - 5,
+ PIPE_TIMEOUT_INFINITE,
+ "job throttling")) {
+ fprintf(stderr, "Job throttling failed\n");
+ }
+ }
if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 08c2dad8406..b19d31af6ac 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -127,6 +127,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Unsupported features. */
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
@@ -199,6 +200,9 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
index fb2e5670ef0..18263e91e6a 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -169,6 +169,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
return vscreen->caps.caps.v1.max_tbo_size > 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 0;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_CUBE_MAP_ARRAY:
return vscreen->caps.caps.v1.bset.cube_map_array;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
@@ -228,6 +230,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_VENDOR_ID:
return 0x1af4;
@@ -557,6 +561,7 @@ virgl_create_screen(struct virgl_winsys *vws)
vws->get_caps(vws, &screen->caps);
+ screen->refcnt = 1;
util_format_s3tc_init();
return &screen->base;
diff --git a/src/gallium/drivers/virgl/virgl_screen.h b/src/gallium/drivers/virgl/virgl_screen.h
index 52e72ca4958..8cac38d7e96 100644
--- a/src/gallium/drivers/virgl/virgl_screen.h
+++ b/src/gallium/drivers/virgl/virgl_screen.h
@@ -28,6 +28,12 @@
struct virgl_screen {
struct pipe_screen base;
+
+ int refcnt;
+
+ /* place for winsys to stash it's own stuff: */
+ void *winsys_priv;
+
struct virgl_winsys *vws;
struct virgl_drm_caps caps;
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index f69a75be50e..6c95b7b2178 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -150,6 +150,28 @@ struct pipe_context {
struct pipe_query *q,
boolean wait,
union pipe_query_result *result);
+
+ /**
+ * Get results of a query, storing into resource. Note that this may not
+ * be used with batch queries.
+ *
+ * \param wait if true, this query will block until the result is ready
+ * \param result_type the type of the value being stored:
+ * \param index for queries that return multiple pieces of data, which
+ * item of that data to store (e.g. for
+ * PIPE_QUERY_PIPELINE_STATISTICS).
+ * When the index is -1, instead of the value of the query
+ * the driver should instead write a 1/0 to the appropriate
+ * location with 1 meaning that the query result is available.
+ */
+ void (*get_query_result_resource)(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
+
/*@}*/
/**
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index b46187bc8a1..800f16cd250 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -352,6 +352,8 @@ enum pipe_flush_flags
* Flags for pipe_context::memory_barrier.
*/
#define PIPE_BARRIER_MAPPED_BUFFER (1 << 0)
+#define PIPE_BARRIER_SHADER_BUFFER (1 << 1)
+#define PIPE_BARRIER_QUERY_BUFFER (1 << 2)
/**
* Resource binding flags -- state tracker must specify in advance all
@@ -375,6 +377,7 @@ enum pipe_flush_flags
#define PIPE_BIND_SHADER_IMAGE (1 << 15) /* set_shader_images */
#define PIPE_BIND_COMPUTE_RESOURCE (1 << 16) /* set_compute_resources */
#define PIPE_BIND_COMMAND_ARGS_BUFFER (1 << 17) /* pipe_draw_info.indirect */
+#define PIPE_BIND_QUERY_BUFFER (1 << 18) /* get_query_result_resource */
/**
* The first two flags above were previously part of the amorphous
@@ -588,6 +591,7 @@ enum pipe_cap
PIPE_CAP_CUBE_MAP_ARRAY,
PIPE_CAP_TEXTURE_BUFFER_OBJECTS,
PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT,
+ PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY,
PIPE_CAP_TGSI_TEXCOORD,
PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER,
PIPE_CAP_QUERY_PIPELINE_STATISTICS,
@@ -645,6 +649,9 @@ enum pipe_cap
PIPE_CAP_INVALIDATE_BUFFER,
PIPE_CAP_GENERATE_MIPMAP,
PIPE_CAP_STRING_MARKER,
+ PIPE_CAP_SURFACE_REINTERPRET_BLOCKS,
+ PIPE_CAP_QUERY_BUFFER_OBJECT,
+ PIPE_CAP_QUERY_MEMORY_INFO,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
@@ -837,6 +844,14 @@ union pipe_query_result
union pipe_numeric_type_union batch[1];
};
+enum pipe_query_value_type
+{
+ PIPE_QUERY_TYPE_I32,
+ PIPE_QUERY_TYPE_U32,
+ PIPE_QUERY_TYPE_I64,
+ PIPE_QUERY_TYPE_U64,
+};
+
union pipe_color_union
{
float f[4];
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index f868d71db23..211bc2440f9 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -57,6 +57,7 @@ struct pipe_resource;
struct pipe_surface;
struct pipe_transfer;
struct pipe_box;
+struct pipe_memory_info;
/**
@@ -260,6 +261,11 @@ struct pipe_screen {
unsigned index,
struct pipe_driver_query_group_info *info);
+ /**
+ * Query information about memory usage.
+ */
+ void (*query_memory_info)(struct pipe_screen *screen,
+ struct pipe_memory_info *info);
};
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index f300207d4dd..6539017b77c 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -420,7 +420,7 @@ struct tgsi_property_data {
#define TGSI_OPCODE_FSLT 110
#define TGSI_OPCODE_FSNE 111
- /* gap */
+#define TGSI_OPCODE_MEMBAR 112
#define TGSI_OPCODE_CALLNZ 113
/* gap */
#define TGSI_OPCODE_BREAKC 115
@@ -744,6 +744,11 @@ struct tgsi_instruction_memory
unsigned Padding : 29;
};
+#define TGSI_MEMBAR_SHADER_BUFFER (1 << 0)
+#define TGSI_MEMBAR_ATOMIC_BUFFER (1 << 1)
+#define TGSI_MEMBAR_SHADER_IMAGE (1 << 2)
+#define TGSI_MEMBAR_SHARED (1 << 3)
+#define TGSI_MEMBAR_THREAD_GROUP (1 << 4)
#ifdef __cplusplus
}
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 2e4d2830199..ed62a33ad72 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -720,6 +720,19 @@ struct pipe_debug_callback
void *data;
};
+/**
+ * Information about memory usage. All sizes are in kilobytes.
+ */
+struct pipe_memory_info
+{
+ unsigned total_device_memory; /**< size of device memory, e.g. VRAM */
+ unsigned avail_device_memory; /**< free device memory at the moment */
+ unsigned total_staging_memory; /**< size of staging memory, e.g. GART */
+ unsigned avail_staging_memory; /**< free staging memory at the moment */
+ unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */
+ unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/state_trackers/nine/Makefile.sources b/src/gallium/state_trackers/nine/Makefile.sources
index 99b623a5b59..8d178d4b18f 100644
--- a/src/gallium/state_trackers/nine/Makefile.sources
+++ b/src/gallium/state_trackers/nine/Makefile.sources
@@ -5,6 +5,8 @@ C_SOURCES := \
authenticatedchannel9.h \
basetexture9.c \
basetexture9.h \
+ buffer9.c \
+ buffer9.h \
cryptosession9.c \
cryptosession9.h \
cubetexture9.c \
diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c
index 69e0fa25961..8428b1bd7eb 100644
--- a/src/gallium/state_trackers/nine/adapter9.c
+++ b/src/gallium/state_trackers/nine/adapter9.c
@@ -563,7 +563,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
D3DPIPECAP(INDEP_BLEND_ENABLE, D3DPMISCCAPS_INDEPENDENTWRITEMASKS) |
/*D3DPMISCCAPS_PERSTAGECONSTANT |*/ /* TODO */
/*D3DPMISCCAPS_POSTBLENDSRGBCONVERT |*/ /* TODO */
- D3DPMISCCAPS_FOGANDSPECULARALPHA |
+ D3DPMISCCAPS_FOGANDSPECULARALPHA | /* Note: documentation of the flag is wrong */
D3DPIPECAP(BLEND_EQUATION_SEPARATE, D3DPMISCCAPS_SEPARATEALPHABLEND) |
D3DPIPECAP(MIXED_COLORBUFFER_FORMATS, D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS) |
D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING |
@@ -618,7 +618,8 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
pCaps->DestBlendCaps = pCaps->SrcBlendCaps;
- pCaps->AlphaCmpCaps = D3DPCMPCAPS_LESS |
+ pCaps->AlphaCmpCaps = D3DPCMPCAPS_NEVER |
+ D3DPCMPCAPS_LESS |
D3DPCMPCAPS_EQUAL |
D3DPCMPCAPS_LESSEQUAL |
D3DPCMPCAPS_GREATER |
@@ -980,7 +981,8 @@ NineAdapter9_CreateDevice( struct NineAdapter9 *This,
hr = NineDevice9_new(screen, ¶ms, &caps, pPresentationParameters,
pD3D9, pPresentationGroup, This->ctx, FALSE, NULL,
- (struct NineDevice9 **)ppReturnedDeviceInterface);
+ (struct NineDevice9 **)ppReturnedDeviceInterface,
+ minor);
if (FAILED(hr)) {
DBG("Failed to create device.\n");
return hr;
@@ -1041,7 +1043,8 @@ NineAdapter9_CreateDeviceEx( struct NineAdapter9 *This,
hr = NineDevice9Ex_new(screen, ¶ms, &caps, pPresentationParameters,
pFullscreenDisplayMode,
pD3D9Ex, pPresentationGroup, This->ctx,
- (struct NineDevice9Ex **)ppReturnedDeviceInterface);
+ (struct NineDevice9Ex **)ppReturnedDeviceInterface,
+ minor);
if (FAILED(hr)) {
DBG("Failed to create device.\n");
return hr;
diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c
index d13138b7d5c..7a0959a8f3e 100644
--- a/src/gallium/state_trackers/nine/basetexture9.c
+++ b/src/gallium/state_trackers/nine/basetexture9.c
@@ -319,7 +319,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This )
if (tex->dirty_box.width) {
for (l = min_level_dirty; l <= last_level; ++l) {
- u_box_minify_2d(&box, &tex->dirty_box, l);
+ u_box_minify_3d(&box, &tex->dirty_box, l);
NineVolume9_UploadSelf(tex->volumes[l], &box);
}
memset(&tex->dirty_box, 0, sizeof(tex->dirty_box));
diff --git a/src/gallium/state_trackers/nine/buffer9.c b/src/gallium/state_trackers/nine/buffer9.c
new file mode 100644
index 00000000000..b4b91ec2a02
--- /dev/null
+++ b/src/gallium/state_trackers/nine/buffer9.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2011 Joakim Sindholt
+ * Copyright 2015 Patrick Rudolph
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "buffer9.h"
+#include "device9.h"
+#include "nine_helpers.h"
+#include "nine_pipe.h"
+
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+#include "util/u_box.h"
+
+#define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER)
+
+HRESULT
+NineBuffer9_ctor( struct NineBuffer9 *This,
+ struct NineUnknownParams *pParams,
+ D3DRESOURCETYPE Type,
+ DWORD Usage,
+ UINT Size,
+ D3DPOOL Pool )
+{
+ struct pipe_resource *info = &This->base.info;
+ HRESULT hr;
+
+ DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool);
+
+ user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
+
+ This->maps = MALLOC(sizeof(struct pipe_transfer *));
+ if (!This->maps)
+ return E_OUTOFMEMORY;
+ This->nmaps = 0;
+ This->maxmaps = 1;
+ This->size = Size;
+
+ This->pipe = pParams->device->pipe;
+
+ info->screen = pParams->device->screen;
+ info->target = PIPE_BUFFER;
+ info->format = PIPE_FORMAT_R8_UNORM;
+ info->width0 = Size;
+ info->flags = 0;
+
+ info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
+ if (!(Usage & D3DUSAGE_WRITEONLY))
+ info->bind |= PIPE_BIND_TRANSFER_READ;
+
+ info->usage = PIPE_USAGE_DEFAULT;
+ if (Usage & D3DUSAGE_DYNAMIC)
+ info->usage = PIPE_USAGE_STREAM;
+ else if (Pool == D3DPOOL_SYSTEMMEM)
+ info->usage = PIPE_USAGE_STAGING;
+
+ /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
+ /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
+ /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
+ /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
+ /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
+ if (Usage & D3DUSAGE_SOFTWAREPROCESSING)
+ DBG("Application asked for Software Vertex Processing, "
+ "but this is unimplemented\n");
+ /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
+
+ info->height0 = 1;
+ info->depth0 = 1;
+ info->array_size = 1;
+ info->last_level = 0;
+ info->nr_samples = 0;
+
+ hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
+ Type, Pool, Usage);
+ return hr;
+}
+
+void
+NineBuffer9_dtor( struct NineBuffer9 *This )
+{
+ if (This->maps) {
+ while (This->nmaps) {
+ NineBuffer9_Unlock(This);
+ }
+ FREE(This->maps);
+ }
+
+ NineResource9_dtor(&This->base);
+}
+
+struct pipe_resource *
+NineBuffer9_GetResource( struct NineBuffer9 *This )
+{
+ return NineResource9_GetResource(&This->base);
+}
+
+HRESULT WINAPI
+NineBuffer9_Lock( struct NineBuffer9 *This,
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags )
+{
+ struct pipe_box box;
+ void *data;
+ unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
+
+ DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
+ This, This->base.resource,
+ OffsetToLock, SizeToLock, Flags);
+
+ user_assert(ppbData, E_POINTER);
+ user_assert(!(Flags & ~(D3DLOCK_DISCARD |
+ D3DLOCK_DONOTWAIT |
+ D3DLOCK_NO_DIRTY_UPDATE |
+ D3DLOCK_NOSYSLOCK |
+ D3DLOCK_READONLY |
+ D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL);
+
+ if (This->nmaps == This->maxmaps) {
+ struct pipe_transfer **newmaps =
+ REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps,
+ sizeof(struct pipe_transfer *)*(This->maxmaps << 1));
+ if (newmaps == NULL)
+ return E_OUTOFMEMORY;
+
+ This->maxmaps <<= 1;
+ This->maps = newmaps;
+ }
+
+ if (SizeToLock == 0) {
+ SizeToLock = This->size - OffsetToLock;
+ user_warn(OffsetToLock != 0);
+ }
+
+ u_box_1d(OffsetToLock, SizeToLock, &box);
+
+ data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
+ usage, &box, &This->maps[This->nmaps]);
+
+ if (!data) {
+ DBG("pipe::transfer_map failed\n"
+ " usage = %x\n"
+ " box.x = %u\n"
+ " box.width = %u\n",
+ usage, box.x, box.width);
+ /* not sure what to return, msdn suggests this */
+ if (Flags & D3DLOCK_DONOTWAIT)
+ return D3DERR_WASSTILLDRAWING;
+ return D3DERR_INVALIDCALL;
+ }
+
+ DBG("returning pointer %p\n", data);
+ This->nmaps++;
+ *ppbData = data;
+
+ return D3D_OK;
+}
+
+HRESULT WINAPI
+NineBuffer9_Unlock( struct NineBuffer9 *This )
+{
+ DBG("This=%p\n", This);
+
+ user_assert(This->nmaps > 0, D3DERR_INVALIDCALL);
+ This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
+ return D3D_OK;
+}
diff --git a/src/gallium/state_trackers/nine/buffer9.h b/src/gallium/state_trackers/nine/buffer9.h
new file mode 100644
index 00000000000..1afd9a996ea
--- /dev/null
+++ b/src/gallium/state_trackers/nine/buffer9.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2011 Joakim Sindholt
+ * Copyright 2015 Patrick Rudolph
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _NINE_BUFFER9_H_
+#define _NINE_BUFFER9_H_
+
+#include "resource9.h"
+
+struct pipe_screen;
+struct pipe_context;
+struct pipe_transfer;
+
+struct NineBuffer9
+{
+ struct NineResource9 base;
+
+ /* G3D */
+ struct pipe_context *pipe;
+ struct pipe_transfer **maps;
+ int nmaps, maxmaps;
+ UINT size;
+};
+static inline struct NineBuffer9 *
+NineBuffer9( void *data )
+{
+ return (struct NineBuffer9 *)data;
+}
+
+HRESULT
+NineBuffer9_ctor( struct NineBuffer9 *This,
+ struct NineUnknownParams *pParams,
+ D3DRESOURCETYPE Type,
+ DWORD Usage,
+ UINT Size,
+ D3DPOOL Pool );
+
+void
+NineBuffer9_dtor( struct NineBuffer9 *This );
+
+struct pipe_resource *
+NineBuffer9_GetResource( struct NineBuffer9 *This );
+
+HRESULT WINAPI
+NineBuffer9_Lock( struct NineBuffer9 *This,
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags );
+
+HRESULT WINAPI
+NineBuffer9_Unlock( struct NineBuffer9 *This );
+
+#endif /* _NINE_BUFFER9_H_ */
diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c
index abba2637946..460cc853942 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.c
+++ b/src/gallium/state_trackers/nine/cubetexture9.c
@@ -181,7 +181,7 @@ NineCubeTexture9_dtor( struct NineCubeTexture9 *This )
}
if (This->managed_buffer)
- FREE(This->managed_buffer);
+ align_free(This->managed_buffer);
NineBaseTexture9_dtor(&This->base);
}
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 0be83658928..475ef96788e 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -38,6 +38,7 @@
#include "nine_pipe.h"
#include "nine_ff.h"
#include "nine_dump.h"
+#include "nine_limits.h"
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
@@ -81,7 +82,7 @@ static void nine_setup_fpu(void)
#endif
-static void
+void
NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
{
struct NineSurface9 *refSurf = NULL;
@@ -112,8 +113,10 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
This->state.scissor.maxy = refSurf->desc.Height;
}
- if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil)
+ if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil) {
This->state.rs[D3DRS_ZENABLE] = TRUE;
+ This->state.rs_advertised[D3DRS_ZENABLE] = TRUE;
+ }
if (This->state.rs[D3DRS_ZENABLE])
NineDevice9_SetDepthStencilSurface(
This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf);
@@ -131,7 +134,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
boolean ex,
- D3DDISPLAYMODEEX *pFullscreenDisplayMode )
+ D3DDISPLAYMODEEX *pFullscreenDisplayMode,
+ int minorVersionNum )
{
unsigned i;
HRESULT hr = NineUnknown_ctor(&This->base, pParams);
@@ -152,6 +156,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->params = *pCreationParameters;
This->ex = ex;
This->present = pPresentationGroup;
+ This->minor_version_num = minorVersionNum;
+
IDirect3D9_AddRef(This->d3d9);
ID3DPresentGroup_AddRef(This->present);
@@ -172,6 +178,19 @@ NineDevice9_ctor( struct NineDevice9 *This,
/* Create first, it messes up our state. */
This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */
+ /* Available memory counter. Updated only for allocations with this device
+ * instance. This is the Win 7 behavior.
+ * Win XP shares this counter across multiple devices. */
+ This->available_texture_mem = This->screen->get_param(This->screen, PIPE_CAP_VIDEO_MEMORY);
+ if (This->available_texture_mem < 4096)
+ This->available_texture_mem <<= 20;
+ else
+ This->available_texture_mem = UINT_MAX;
+ /* We cap texture memory usage to 80% of what is reported free initially
+ * This helps get closer Win behaviour. For example VertexBuffer allocation
+ * still succeeds when texture allocation fails. */
+ This->available_texture_limit = This->available_texture_mem * 20LL / 100LL;
+
/* create implicit swapchains */
This->nswapchains = ID3DPresentGroup_GetMultiheadCount(This->present);
This->swapchains = CALLOC(This->nswapchains,
@@ -460,7 +479,8 @@ NineDevice9_dtor( struct NineDevice9 *This )
if (This->swapchains) {
for (i = 0; i < This->nswapchains; ++i)
- NineUnknown_Unbind(NineUnknown(This->swapchains[i]));
+ if (This->swapchains[i])
+ NineUnknown_Unbind(NineUnknown(This->swapchains[i]));
FREE(This->swapchains);
}
@@ -523,17 +543,20 @@ NineDevice9_ResumeRecording( struct NineDevice9 *This )
HRESULT WINAPI
NineDevice9_TestCooperativeLevel( struct NineDevice9 *This )
{
- return D3D_OK; /* TODO */
+ if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
+ This->device_needs_reset = TRUE;
+ return D3DERR_DEVICELOST;
+ } else if (This->device_needs_reset) {
+ return D3DERR_DEVICENOTRESET;
+ }
+
+ return D3D_OK;
}
UINT WINAPI
NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This )
{
- const unsigned mem = This->screen->get_param(This->screen, PIPE_CAP_VIDEO_MEMORY);
- if (mem < 4096)
- return mem << 20;
- else
- return UINT_MAX;
+ return This->available_texture_mem;
}
HRESULT WINAPI
@@ -606,6 +629,7 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
"pCursorBitmap=%p\n", This, XHotSpot, YHotSpot, pCursorBitmap);
user_assert(pCursorBitmap, D3DERR_INVALIDCALL);
+ user_assert(surf->desc.Format == D3DFMT_A8R8G8B8, D3DERR_INVALIDCALL);
if (This->swapchains[0]->params.Windowed) {
This->cursor.w = MIN2(surf->desc.Width, 32);
@@ -709,6 +733,11 @@ NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This,
This, pPresentationParameters, pSwapChain);
user_assert(pPresentationParameters, D3DERR_INVALIDCALL);
+ user_assert(tmplt->params.Windowed && pPresentationParameters->Windowed, D3DERR_INVALIDCALL);
+
+ /* TODO: this deserves more tests */
+ if (!pPresentationParameters->hDeviceWindow)
+ pPresentationParameters->hDeviceWindow = This->params.hFocusWindow;
hr = ID3DPresentGroup_CreateAdditionalPresent(This->present, pPresentationParameters, &present);
@@ -757,11 +786,16 @@ NineDevice9_Reset( struct NineDevice9 *This,
DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters);
+ if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
+ This->device_needs_reset = TRUE;
+ return D3DERR_DEVICELOST;
+ }
+
for (i = 0; i < This->nswapchains; ++i) {
D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL);
if (hr != D3D_OK)
- return hr;
+ break;
}
nine_pipe_context_clear(This);
@@ -772,6 +806,7 @@ NineDevice9_Reset( struct NineDevice9 *This,
This, 0, (IDirect3DSurface9 *)This->swapchains[0]->buffers[0]);
/* XXX: better use GetBackBuffer here ? */
+ This->device_needs_reset = (hr != D3D_OK);
return hr;
}
@@ -806,6 +841,8 @@ NineDevice9_GetBackBuffer( struct NineDevice9 *This,
IDirect3DSurface9 **ppBackBuffer )
{
user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL);
+ /* return NULL on error */
+ *ppBackBuffer = NULL;
user_assert(iSwapChain < This->nswapchains, D3DERR_INVALIDCALL);
return NineSwapChain9_GetBackBuffer(This->swapchains[iSwapChain],
@@ -1455,7 +1492,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
struct NineSurface9 *src = NineSurface9(pSourceSurface);
struct pipe_resource *dst_res = NineSurface9_GetResource(dst);
struct pipe_resource *src_res = NineSurface9_GetResource(src);
- const boolean zs = util_format_is_depth_or_stencil(dst_res->format);
+ boolean zs;
struct pipe_blit_info blit;
boolean scaled, clamped, ms, flip_x = FALSE, flip_y = FALSE;
@@ -1470,6 +1507,9 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
DBG("pDestRect=(%u,%u)-(%u,%u)\n", pDestRect->left, pDestRect->top,
pDestRect->right, pDestRect->bottom);
+ user_assert(dst->base.pool == D3DPOOL_DEFAULT &&
+ src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
+ zs = util_format_is_depth_or_stencil(dst_res->format);
user_assert(!zs || !This->in_scene, D3DERR_INVALIDCALL);
user_assert(!zs || !pSourceRect ||
(pSourceRect->left == 0 &&
@@ -1493,8 +1533,6 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
src_res->nr_samples,
PIPE_BIND_SAMPLER_VIEW),
D3DERR_INVALIDCALL);
- user_assert(dst->base.pool == D3DPOOL_DEFAULT &&
- src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
/* We might want to permit these, but wine thinks we shouldn't. */
user_assert(!pDestRect ||
@@ -1668,6 +1706,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
user_assert((surf->base.usage & D3DUSAGE_RENDERTARGET) ||
NineSurface9_IsOffscreenPlain(surf), D3DERR_INVALIDCALL);
+ user_assert(surf->desc.Format != D3DFMT_NULL, D3D_OK);
+
if (pRect) {
x = pRect->left;
y = pRect->top;
@@ -1884,15 +1924,18 @@ NineDevice9_Clear( struct NineDevice9 *This,
Count = 0;
#endif
+ nine_update_state_framebuffer_clear(This);
+
if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
- if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
- if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
+ /* Ignore Z buffer if not bound */
+ if (This->state.fb.zsbuf != NULL) {
+ if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
+ if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
+ }
if (!bufs)
return D3D_OK;
d3dcolor_to_pipe_color_union(&rgba, Color);
- nine_update_state_framebuffer(This);
-
rect.x1 = This->state.viewport.X;
rect.y1 = This->state.viewport.Y;
rect.x2 = This->state.viewport.Width + rect.x1;
@@ -1935,7 +1978,6 @@ NineDevice9_Clear( struct NineDevice9 *This,
/* Case we clear depth buffer (and eventually rt too).
* depth buffer size is always >= rt size. Compare to clear region */
((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
- This->state.fb.zsbuf != NULL &&
rect.x2 >= zsbuf_surf->desc.Width &&
rect.y2 >= zsbuf_surf->desc.Height))) {
DBG("Clear fast path\n");
@@ -2342,8 +2384,15 @@ NineDevice9_SetRenderState( struct NineDevice9 *This,
DBG("This=%p State=%u(%s) Value=%08x\n", This,
State, nine_d3drs_to_string(State), Value);
+ user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL);
+
+ if (state->rs_advertised[State] == Value && likely(!This->is_recording))
+ return D3D_OK;
+
+ state->rs_advertised[State] = Value;
+
/* Amd hacks (equivalent to GL extensions) */
- if (State == D3DRS_POINTSIZE) {
+ if (unlikely(State == D3DRS_POINTSIZE)) {
if (Value == RESZ_CODE)
return NineDevice9_ResolveZ(This);
@@ -2356,20 +2405,17 @@ NineDevice9_SetRenderState( struct NineDevice9 *This,
}
/* NV hack */
- if (State == D3DRS_ADAPTIVETESS_Y &&
- (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE]))) {
+ if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
+ if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE])) {
state->rs[NINED3DRS_ALPHACOVERAGE] = (Value == D3DFMT_ATOC);
state->changed.group |= NINE_STATE_BLEND;
return D3D_OK;
+ }
}
- user_assert(State < Elements(state->rs), D3DERR_INVALIDCALL);
-
- if (likely(state->rs[State] != Value) || unlikely(This->is_recording)) {
- state->rs[State] = Value;
- state->changed.rs[State / 32] |= 1 << (State % 32);
- state->changed.group |= nine_render_state_group[State];
- }
+ state->rs[State] = nine_fix_render_state_value(State, Value);
+ state->changed.rs[State / 32] |= 1 << (State % 32);
+ state->changed.group |= nine_render_state_group[State];
return D3D_OK;
}
@@ -2379,9 +2425,9 @@ NineDevice9_GetRenderState( struct NineDevice9 *This,
D3DRENDERSTATETYPE State,
DWORD *pValue )
{
- user_assert(State < Elements(This->state.rs), D3DERR_INVALIDCALL);
+ user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL);
- *pValue = This->state.rs[State];
+ *pValue = This->state.rs_advertised[State];
return D3D_OK;
}
@@ -3122,7 +3168,7 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
buffer_offset = 0;
} else {
/* SO matches vertex declaration */
- resource = dst->base.resource;
+ resource = NineVertexBuffer9_GetResource(dst);
buffer_offset = DestIndex * vs->so->stride[0];
}
target = This->pipe->create_stream_output_target(This->pipe, resource,
@@ -3184,13 +3230,21 @@ NineDevice9_SetVertexDeclaration( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 *pDecl )
{
struct nine_state *state = This->update;
+ BOOL was_programmable_vs = This->state.programmable_vs;
DBG("This=%p pDecl=%p\n", This, pDecl);
if (likely(!This->is_recording) && state->vdecl == NineVertexDeclaration9(pDecl))
return D3D_OK;
+
nine_bind(&state->vdecl, pDecl);
+ This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+ if (likely(!This->is_recording) && was_programmable_vs != This->state.programmable_vs) {
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
+ state->changed.group |= NINE_STATE_VS;
+ }
+
state->changed.group |= NINE_STATE_VDECL;
return D3D_OK;
@@ -3262,18 +3316,21 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This,
IDirect3DVertexShader9 *pShader )
{
struct nine_state *state = This->update;
+ BOOL was_programmable_vs = This->state.programmable_vs;
DBG("This=%p pShader=%p\n", This, pShader);
if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader)
return D3D_OK;
- /* ff -> non-ff: commit back non-ff constants */
- if (!state->vs && pShader)
- state->commit |= NINE_STATE_COMMIT_CONST_VS;
-
nine_bind(&state->vs, pShader);
+ This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+
+ /* ff -> non-ff: commit back non-ff constants */
+ if (!was_programmable_vs && This->state.programmable_vs)
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
+
state->changed.group |= NINE_STATE_VS;
return D3D_OK;
@@ -3499,7 +3556,8 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This,
state->vtxbuf[i].stride = Stride;
state->vtxbuf[i].buffer_offset = OffsetInBytes;
}
- state->vtxbuf[i].buffer = pStreamData ? pVBuf9->base.resource : NULL;
+ pipe_resource_reference(&state->vtxbuf[i].buffer,
+ pStreamData ? NineVertexBuffer9_GetResource(pVBuf9) : NULL);
return D3D_OK;
}
@@ -3542,6 +3600,9 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
(Setting & D3DSTREAMSOURCE_INDEXEDDATA)), D3DERR_INVALIDCALL);
user_assert(Setting, D3DERR_INVALIDCALL);
+ if (likely(!This->is_recording) && state->stream_freq[StreamNumber] == Setting)
+ return D3D_OK;
+
state->stream_freq[StreamNumber] = Setting;
if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
@@ -3549,7 +3610,9 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
else
state->stream_instancedata_mask &= ~(1 << StreamNumber);
- state->changed.stream_freq |= 1 << StreamNumber;
+ state->changed.stream_freq |= 1 << StreamNumber; /* Used for stateblocks */
+ if (StreamNumber != 0)
+ state->changed.group |= NINE_STATE_STREAMFREQ;
return D3D_OK;
}
@@ -4013,7 +4076,8 @@ NineDevice9_new( struct pipe_screen *pScreen,
struct d3dadapter9_context *pCTX,
boolean ex,
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
- struct NineDevice9 **ppOut )
+ struct NineDevice9 **ppOut,
+ int minorVersionNum )
{
BOOL lock;
lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED);
@@ -4021,5 +4085,5 @@ NineDevice9_new( struct pipe_screen *pScreen,
NINE_NEW(Device9, ppOut, lock, /* args */
pScreen, pCreationParameters, pCaps,
pPresentationParameters, pD3D9, pPresentationGroup, pCTX,
- ex, pFullscreenDisplayMode);
+ ex, pFullscreenDisplayMode, minorVersionNum );
}
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index cbc1e61f5db..34edf0cfa48 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -137,6 +137,10 @@ struct NineDevice9
/* dummy vbo (containing 0 0 0 0) to bind if vertex shader input
* is not bound to anything by the vertex declaration */
struct pipe_resource *dummy_vbo;
+ BOOL device_needs_reset;
+ int minor_version_num;
+ long long available_texture_mem;
+ long long available_texture_limit;
};
static inline struct NineDevice9 *
NineDevice9( void *data )
@@ -154,7 +158,8 @@ NineDevice9_new( struct pipe_screen *pScreen,
struct d3dadapter9_context *pCTX,
boolean ex,
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
- struct NineDevice9 **ppOut );
+ struct NineDevice9 **ppOut,
+ int minorVersionNum );
HRESULT
NineDevice9_ctor( struct NineDevice9 *This,
@@ -167,12 +172,15 @@ NineDevice9_ctor( struct NineDevice9 *This,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
boolean ex,
- D3DDISPLAYMODEEX *pFullscreenDisplayMode );
+ D3DDISPLAYMODEEX *pFullscreenDisplayMode,
+ int minorVersionNum );
void
NineDevice9_dtor( struct NineDevice9 *This );
/*** Nine private ***/
+void
+NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset );
struct pipe_screen *
NineDevice9_GetScreen( struct NineDevice9 *This );
diff --git a/src/gallium/state_trackers/nine/device9ex.c b/src/gallium/state_trackers/nine/device9ex.c
index fe8aa9b2704..11244b1bedf 100644
--- a/src/gallium/state_trackers/nine/device9ex.c
+++ b/src/gallium/state_trackers/nine/device9ex.c
@@ -20,7 +20,9 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "device9.h"
#include "device9ex.h"
+#include "nine_pipe.h"
#include "swapchain9ex.h"
#include "nine_helpers.h"
@@ -37,7 +39,8 @@ NineDevice9Ex_ctor( struct NineDevice9Ex *This,
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
- struct d3dadapter9_context *pCTX )
+ struct d3dadapter9_context *pCTX,
+ int minorVersionNum )
{
DBG("This=%p pParams=%p pScreen=%p pCreationParameters=%p pCaps=%p "
"pPresentationParameters=%p pFullscreenDisplayMode=%p "
@@ -50,7 +53,7 @@ NineDevice9Ex_ctor( struct NineDevice9Ex *This,
pScreen, pCreationParameters, pCaps,
pPresentationParameters,
(IDirect3D9 *)pD3D9Ex, pPresentationGroup, pCTX,
- TRUE, pFullscreenDisplayMode);
+ TRUE, pFullscreenDisplayMode, minorVersionNum);
}
static void
@@ -158,6 +161,14 @@ NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This,
DBG("This=%p hDestinationWindow=%p\n",
This, hDestinationWindow);
+ user_assert(!This->base.swapchains[0]->params.Windowed, D3D_OK);
+
+ if (This->base.params.hFocusWindow == hDestinationWindow) {
+ if (NineSwapChain9_GetOccluded(This->base.swapchains[0]))
+ return S_PRESENT_OCCLUDED;
+ } else if(!NineSwapChain9_GetOccluded(This->base.swapchains[0])) {
+ return S_PRESENT_OCCLUDED;
+ }
/* TODO: handle the other return values */
return D3D_OK;
}
@@ -221,12 +232,37 @@ NineDevice9Ex_ResetEx( struct NineDevice9Ex *This,
if (pFullscreenDisplayMode) mode = &(pFullscreenDisplayMode[i]);
hr = NineSwapChain9_Resize(This->base.swapchains[i], params, mode);
if (FAILED(hr))
- return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST;
+ break;
}
NineDevice9_SetRenderTarget(
(struct NineDevice9 *)This, 0, (IDirect3DSurface9 *)This->base.swapchains[0]->buffers[0]);
+ return hr;
+}
+
+HRESULT WINAPI
+NineDevice9Ex_Reset( struct NineDevice9Ex *This,
+ D3DPRESENT_PARAMETERS *pPresentationParameters )
+{
+ HRESULT hr = D3D_OK;
+ unsigned i;
+
+ DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters);
+
+ for (i = 0; i < This->base.nswapchains; ++i) {
+ D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
+ hr = NineSwapChain9_Resize(This->base.swapchains[i], params, NULL);
+ if (FAILED(hr))
+ break;
+ }
+
+ nine_pipe_context_clear((struct NineDevice9 *)This);
+ nine_state_clear(&This->base.state, TRUE);
+
+ NineDevice9_SetDefaultState((struct NineDevice9 *)This, TRUE);
+ NineDevice9_SetRenderTarget(
+ (struct NineDevice9 *)This, 0, (IDirect3DSurface9 *)This->base.swapchains[0]->buffers[0]);
return hr;
}
@@ -248,11 +284,18 @@ NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
return NineSwapChain9Ex_GetDisplayModeEx(swapchain, pMode, pRotation);
}
+HRESULT WINAPI
+NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This )
+{
+ return D3D_OK;
+}
+
+
IDirect3DDevice9ExVtbl NineDevice9Ex_vtable = {
(void *)NineUnknown_QueryInterface,
(void *)NineUnknown_AddRef,
(void *)NineUnknown_Release,
- (void *)NineDevice9_TestCooperativeLevel,
+ (void *)NineDevice9Ex_TestCooperativeLevel,
(void *)NineDevice9_GetAvailableTextureMem,
(void *)NineDevice9_EvictManagedResources,
(void *)NineDevice9_GetDirect3D,
@@ -265,7 +308,7 @@ IDirect3DDevice9ExVtbl NineDevice9Ex_vtable = {
(void *)NineDevice9_CreateAdditionalSwapChain,
(void *)NineDevice9_GetSwapChain,
(void *)NineDevice9_GetNumberOfSwapChains,
- (void *)NineDevice9_Reset,
+ (void *)NineDevice9Ex_Reset,
(void *)NineDevice9_Present,
(void *)NineDevice9_GetBackBuffer,
(void *)NineDevice9_GetRasterStatus,
@@ -401,13 +444,14 @@ NineDevice9Ex_new( struct pipe_screen *pScreen,
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
- struct NineDevice9Ex **ppOut )
+ struct NineDevice9Ex **ppOut,
+ int minorVersionNum )
{
BOOL lock;
lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED);
NINE_NEW(Device9Ex, ppOut, lock,
pScreen, pCreationParameters, pCaps, pPresentationParameters,
- pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, pCTX);
+ pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, pCTX, minorVersionNum );
}
diff --git a/src/gallium/state_trackers/nine/device9ex.h b/src/gallium/state_trackers/nine/device9ex.h
index 8375622d8a1..1c7e57e0974 100644
--- a/src/gallium/state_trackers/nine/device9ex.h
+++ b/src/gallium/state_trackers/nine/device9ex.h
@@ -44,7 +44,8 @@ NineDevice9Ex_new( struct pipe_screen *pScreen,
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
- struct NineDevice9Ex **ppOut );
+ struct NineDevice9Ex **ppOut,
+ int minorVersionNum );
HRESULT WINAPI
NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This,
@@ -72,6 +73,13 @@ NineDevice9Ex_PresentEx( struct NineDevice9Ex *This,
const RGNDATA *pDirtyRegion,
DWORD dwFlags );
+HRESULT WINAPI
+NineDevice9Ex_Present( struct NineDevice9Ex *This,
+ const RECT *pSourceRect,
+ const RECT *pDestRect,
+ HWND hDestWindowOverride,
+ const RGNDATA *pDirtyRegion );
+
HRESULT WINAPI
NineDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This,
INT *pPriority );
@@ -140,10 +148,17 @@ NineDevice9Ex_ResetEx( struct NineDevice9Ex *This,
D3DPRESENT_PARAMETERS *pPresentationParameters,
D3DDISPLAYMODEEX *pFullscreenDisplayMode );
+HRESULT WINAPI
+NineDevice9Ex_Reset( struct NineDevice9Ex *This,
+ D3DPRESENT_PARAMETERS *pPresentationParameters );
+
HRESULT WINAPI
NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
UINT iSwapChain,
D3DDISPLAYMODEEX *pMode,
D3DDISPLAYROTATION *pRotation );
+HRESULT WINAPI
+NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This );
+
#endif /* _NINE_DEVICE9EX_H_ */
diff --git a/src/gallium/state_trackers/nine/guid.c b/src/gallium/state_trackers/nine/guid.c
index 5034feb4d71..5e63d2f6629 100644
--- a/src/gallium/state_trackers/nine/guid.c
+++ b/src/gallium/state_trackers/nine/guid.c
@@ -20,6 +20,7 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include
#include "guid.h"
const GUID IID_IUnknown = { 0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 } };
@@ -64,3 +65,20 @@ GUID_equal( const GUID *a,
}
return TRUE;
}
+
+char* GUID_sprintf(char *guid_str, REFGUID id) {
+ sprintf( guid_str,
+ "{%08X,%04X,%04X,%02X%02X%02X%02X%02X%02X%02X%02X}",
+ id->Data1,
+ id->Data2,
+ id->Data3,
+ id->Data4[0],
+ id->Data4[1],
+ id->Data4[2],
+ id->Data4[3],
+ id->Data4[4],
+ id->Data4[5],
+ id->Data4[6],
+ id->Data4[7]);
+ return guid_str;
+}
diff --git a/src/gallium/state_trackers/nine/guid.h b/src/gallium/state_trackers/nine/guid.h
index 1f9ff009ad8..af8f081bfb5 100644
--- a/src/gallium/state_trackers/nine/guid.h
+++ b/src/gallium/state_trackers/nine/guid.h
@@ -33,4 +33,8 @@ boolean
GUID_equal( const GUID *a,
const GUID *b );
+char*
+GUID_sprintf( char *guid_str,
+ REFGUID id );
+
#endif /* _NINE_GUID_H_ */
diff --git a/src/gallium/state_trackers/nine/indexbuffer9.c b/src/gallium/state_trackers/nine/indexbuffer9.c
index 860313b7f7e..401fe75e95f 100644
--- a/src/gallium/state_trackers/nine/indexbuffer9.c
+++ b/src/gallium/state_trackers/nine/indexbuffer9.c
@@ -40,52 +40,17 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This,
struct NineUnknownParams *pParams,
D3DINDEXBUFFER_DESC *pDesc )
{
- struct pipe_resource *info = &This->base.info;
HRESULT hr;
DBG("This=%p pParams=%p pDesc=%p Usage=%s\n",
This, pParams, pDesc, nine_D3DUSAGE_to_str(pDesc->Usage));
- This->pipe = pParams->device->pipe;
-
- info->screen = pParams->device->screen;
- info->target = PIPE_BUFFER;
- info->format = PIPE_FORMAT_R8_UNORM;
- info->width0 = pDesc->Size;
- info->flags = 0;
-
- info->bind = PIPE_BIND_INDEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
- if (!(pDesc->Usage & D3DUSAGE_WRITEONLY))
- info->bind |= PIPE_BIND_TRANSFER_READ;
-
- info->usage = PIPE_USAGE_DEFAULT;
- if (pDesc->Usage & D3DUSAGE_DYNAMIC)
- info->usage = PIPE_USAGE_STREAM;
- if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
- info->usage = PIPE_USAGE_STAGING;
-
- /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
- /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
- /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
- /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
- /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
- if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
- DBG("Application asked for Software Vertex Processing, "
- "but this is unimplemented\n");
-
- info->height0 = 1;
- info->depth0 = 1;
- info->array_size = 1;
- info->last_level = 0;
- info->nr_samples = 0;
-
- hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, D3DRTYPE_INDEXBUFFER,
- pDesc->Pool, pDesc->Usage);
+ hr = NineBuffer9_ctor(&This->base, pParams, D3DRTYPE_INDEXBUFFER,
+ pDesc->Usage, pDesc->Size, pDesc->Pool);
if (FAILED(hr))
return hr;
- This->buffer.buffer = This->base.resource;
+ This->buffer.buffer = NineIndexBuffer9_GetResource(This);
This->buffer.offset = 0;
- This->map_count = 0;
switch (pDesc->Format) {
case D3DFMT_INDEX16: This->buffer.index_size = 2; break;
@@ -105,9 +70,7 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This,
void
NineIndexBuffer9_dtor( struct NineIndexBuffer9 *This )
{
- if (This->transfer) { NineIndexBuffer9_Unlock(This); }
-
- NineResource9_dtor(&This->base);
+ NineBuffer9_dtor(&This->base);
}
const struct pipe_index_buffer *
@@ -116,6 +79,12 @@ NineIndexBuffer9_GetBuffer( struct NineIndexBuffer9 *This )
return &This->buffer;
}
+struct pipe_resource *
+NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This )
+{
+ return NineBuffer9_GetResource(&This->base);
+}
+
HRESULT WINAPI
NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
UINT OffsetToLock,
@@ -123,59 +92,13 @@ NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
void **ppbData,
DWORD Flags )
{
- struct pipe_box box;
- void *data;
- UINT count;
- const unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
-
- DBG("This=%p OffsetToLock=%u SizeToLock=%u ppbData=%p Flags=%i "
- "transfer=%p map_count=%u\n", This, OffsetToLock,
- SizeToLock, ppbData, Flags, This->transfer, This->map_count);
-
- count = ++This->map_count;
-
- if (SizeToLock == 0) {
- SizeToLock = This->desc.Size - OffsetToLock;
- user_warn(OffsetToLock != 0);
- }
-
- u_box_1d(OffsetToLock, SizeToLock, &box);
-
- if (unlikely(count != 1)) {
- DBG("Lock has been called on already locked buffer."
- "Unmapping before mapping again.");
- This->pipe->transfer_unmap(This->pipe, This->transfer);
- }
- data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
- usage, &box, &This->transfer);
- if (!This->transfer) {
- DBG("pipe::transfer_map failed\n"
- " usage = %u\n"
- " box.x = %u\n"
- " box.width = %u\n",
- usage, box.x, box.width);
- }
- *ppbData = data;
- DBG("Returning memory at %p at address %p\n", *ppbData, ppbData);
-
- return D3D_OK;
+ return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags);
}
HRESULT WINAPI
NineIndexBuffer9_Unlock( struct NineIndexBuffer9 *This )
{
- DBG("This=%p\n", This);
- if (!This->map_count) {
- DBG("Unmap called without a previous map call.\n");
- return D3D_OK;
- }
- if (--This->map_count) {
- DBG("Ignoring unmap.\n");
- return D3D_OK;
- }
- This->pipe->transfer_unmap(This->pipe, This->transfer);
- This->transfer = NULL;
- return D3D_OK;
+ return NineBuffer9_Unlock(&This->base);
}
HRESULT WINAPI
diff --git a/src/gallium/state_trackers/nine/indexbuffer9.h b/src/gallium/state_trackers/nine/indexbuffer9.h
index f10578f47ba..f3274b71224 100644
--- a/src/gallium/state_trackers/nine/indexbuffer9.h
+++ b/src/gallium/state_trackers/nine/indexbuffer9.h
@@ -24,7 +24,7 @@
#define _NINE_INDEXBUFFER9_H_
#include "resource9.h"
-
+#include "buffer9.h"
#include "pipe/p_state.h"
struct pipe_screen;
@@ -35,13 +35,10 @@ struct NineDevice9;
struct NineIndexBuffer9
{
- struct NineResource9 base;
+ struct NineBuffer9 base;
/* g3d stuff */
- struct pipe_context *pipe;
struct pipe_index_buffer buffer;
- struct pipe_transfer *transfer;
- UINT map_count;
D3DINDEXBUFFER_DESC desc;
};
@@ -69,6 +66,8 @@ NineIndexBuffer9_dtor( struct NineIndexBuffer9 *This );
const struct pipe_index_buffer *
NineIndexBuffer9_GetBuffer( struct NineIndexBuffer9 *This );
+struct pipe_resource *
+NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This );
/*** Direct3D public ***/
HRESULT WINAPI
diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c
index 0feaeab7330..a5466a7bdd4 100644
--- a/src/gallium/state_trackers/nine/nine_ff.c
+++ b/src/gallium/state_trackers/nine/nine_ff.c
@@ -58,7 +58,8 @@ struct nine_ff_vs_key
uint32_t color0in_one : 1;
uint32_t color1in_one : 1;
uint32_t fog : 1;
- uint32_t pad1 : 7;
+ uint32_t specular_enable : 1;
+ uint32_t pad1 : 6;
uint32_t tc_dim_input: 16; /* 8 * 2 bits */
uint32_t pad2 : 16;
uint32_t tc_dim_output: 24; /* 8 * 3 bits */
@@ -466,6 +467,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 224.0f));
ureg_ARL(ureg, AR, ureg_src(tmp));
}
+
+ ureg_MOV(ureg, r[2], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
+ ureg_MOV(ureg, r[3], ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
+
for (i = 0; i < key->vertexblend; ++i) {
for (c = 0; c < 4; ++c) {
cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (224 + i * 4) * !key->vertexblend_indexed + c);
@@ -473,22 +478,27 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
}
/* multiply by WORLD(index) */
- ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), cWM[0]);
- ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), cWM[1], ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), cWM[2], ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _WWWW(vs->aVtx), cWM[3], ureg_src(r[0]));
+ ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
+ ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
- /* accumulate weighted position value */
- if (i)
- ureg_MAD(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
- else
- ureg_MUL(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, 0));
+ if (i < (key->vertexblend - 1)) {
+ /* accumulate weighted position value */
+ ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
+ /* subtract weighted position value for last value */
+ ureg_SUB(ureg, r[3], ureg_src(r[3]), ureg_scalar(vs->aWgt, i));
+ }
}
+
+ /* the last weighted position is always 1 - sum_of_previous_weights */
+ ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(ureg_src(r[3]), key->vertexblend - 1), ureg_src(r[2]));
+
/* multiply by VIEW_PROJ */
- ureg_MUL(ureg, r[0], _X(r[2]), _CONST(8));
- ureg_MAD(ureg, r[0], _Y(r[2]), _CONST(9), ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _Z(r[2]), _CONST(10), ureg_src(r[0]));
- ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(r[0]));
+ ureg_MUL(ureg, tmp, _X(r[2]), _CONST(8));
+ ureg_MAD(ureg, tmp, _Y(r[2]), _CONST(9), ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _Z(r[2]), _CONST(10), ureg_src(tmp));
+ ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(tmp));
if (need_rVtx)
vs->aVtx = ureg_src(r[2]);
@@ -515,10 +525,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MOV(ureg, oPos, ureg_src(tmp));
} else {
/* position = vertex * WORLD_VIEW_PROJ */
- ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), _CONST(0));
- ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), _CONST(1), ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), _CONST(2), ureg_src(r[0]));
- ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(r[0]));
+ ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
+ ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
+ ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
}
if (need_rVtx) {
@@ -746,12 +756,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
{
/* hitDir = light.position - eyeVtx
* d = length(hitDir)
- * hitDir /= d
*/
ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx));
ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
ureg_RSQ(ureg, tmp_y, _X(tmp));
- ureg_MUL(ureg, rHit, ureg_src(rHit), _Y(tmp)); /* normalize */
ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
/* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
@@ -765,6 +773,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
ureg_ENDIF(ureg);
+ /* normalize hitDir */
+ ureg_normalize3(ureg, rHit, ureg_src(rHit), tmp);
+
/* if (SPOT light) */
ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
ureg_IF(ureg, _X(tmp), &label[l++]);
@@ -799,9 +810,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
/* midVec = normalize(hitDir + eyeDir) */
if (key->localviewer) {
ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp);
- ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
+ ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
} else {
- ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
+ ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
}
ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp);
ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid));
@@ -849,7 +860,14 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
}
- ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
+
+ if (key->specular_enable) {
+ /* add oCol[1] to oCol[0] */
+ ureg_MAD(ureg, tmp, ureg_src(rD), vs->mtlD, ureg_src(tmp));
+ ureg_MAD(ureg, oCol[0], ureg_src(rS), vs->mtlS, ureg_src(tmp));
+ } else {
+ ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
+ }
ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
} else
/* COLOR */
@@ -1012,10 +1030,10 @@ ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
break;
case D3DTA_DIFFUSE:
- reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
+ reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
break;
case D3DTA_SPECULAR:
- reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
break;
case D3DTA_TEMP:
reg = ps->rTmpSrc;
@@ -1222,7 +1240,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
ps.ureg = ureg;
ps.stage.index_pre_mod = -1;
- ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
/* Declare all TEMPs we might need, serious drivers have a register allocator. */
for (i = 0; i < Elements(ps.r); ++i)
@@ -1241,7 +1259,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
key->ts[s].colorarg1 == D3DTA_SPECULAR ||
key->ts[s].colorarg2 == D3DTA_SPECULAR)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
key->ts[s].colorarg1 == D3DTA_TEXTURE ||
@@ -1258,7 +1276,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
key->ts[s].alphaarg2 == D3DTA_SPECULAR)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
@@ -1269,7 +1287,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
}
}
if (key->specular)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
@@ -1500,6 +1518,9 @@ nine_ff_get_vs(struct NineDevice9 *device)
if (key.fog_mode)
key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
+ key.localviewer = !!state->rs[D3DRS_LOCALVIEWER];
+ key.specular_enable = !!state->rs[D3DRS_SPECULARENABLE];
+
if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE];
@@ -1847,7 +1868,7 @@ nine_ff_update(struct NineDevice9 *device)
DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
/* NOTE: the only reference belongs to the hash table */
- if (!device->state.vs) {
+ if (!state->programmable_vs) {
device->ff.vs = nine_ff_get_vs(device);
device->state.changed.group |= NINE_STATE_VS;
}
@@ -1856,7 +1877,7 @@ nine_ff_update(struct NineDevice9 *device)
device->state.changed.group |= NINE_STATE_PS;
}
- if (!device->state.vs) {
+ if (!state->programmable_vs) {
nine_ff_load_vs_transforms(device);
nine_ff_load_tex_matrices(device);
nine_ff_load_lights(device);
diff --git a/src/gallium/state_trackers/nine/nine_limits.h b/src/gallium/state_trackers/nine/nine_limits.h
new file mode 100644
index 00000000000..ef1ed2566ba
--- /dev/null
+++ b/src/gallium/state_trackers/nine/nine_limits.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2015 Axel Davy
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _NINE_LIMITS_H_
+#define _NINE_LIMITS_H_
+
+#include "assert.h"
+#include "d3d9types.h"
+
+// state can be any value
+#define NINE_STATE_NO_LIMIT 0
+// value is clamped if below min or max
+#define NINE_STATE_CLAMP 1
+// boolean: 0 -> false; any other value -> true
+#define NINE_STATE_BOOL 2
+// a mask is applied on the value
+#define NINE_STATE_MASK 3
+// if outside a range, state value is changed to a default value
+#define NINE_STATE_RANGE_DEF_VAL 4
+
+struct nine_state_behaviour {
+ unsigned state_value_behaviour;
+ union {
+ struct {
+ unsigned min;
+ unsigned max;
+ } clamp;
+ unsigned mask;
+ struct {
+ unsigned min;
+ unsigned max;
+ unsigned default_val;
+ } range_def_val;
+ } u;
+};
+
+#define __NO_LIMIT_RS(o) \
+ [D3DRS_##o] = {NINE_STATE_NO_LIMIT}
+
+#define __CLAMP_RS(o, m, M) \
+ [D3DRS_##o] = {NINE_STATE_CLAMP, {.clamp = {m, M}}}
+
+#define __BOOLEAN_RS(o) \
+ [D3DRS_##o] = {NINE_STATE_BOOL}
+
+#define __MASK_RS(o, m) \
+ [D3DRS_##o] = {NINE_STATE_MASK, {.mask = m}}
+
+#define __RANGE_DEF_VAL_RS(o, m, M, d) \
+ [D3DRS_##o] = {NINE_STATE_RANGE_DEF_VAL, {.range_def_val = {m, M, d}}}
+
+#define __TO_DETERMINE_RS(o, m, M) \
+ [D3DRS_##o] = {NINE_STATE_NO_LIMIT}
+
+static const struct nine_state_behaviour
+render_state_limits_table[D3DRS_BLENDOPALPHA + 1] = {
+ __TO_DETERMINE_RS(ZENABLE, 0, 3),
+ __TO_DETERMINE_RS(FILLMODE, 1, 3),
+ __CLAMP_RS(SHADEMODE, 1, 3),
+ __BOOLEAN_RS(ZWRITEENABLE),
+ __BOOLEAN_RS(ALPHATESTENABLE),
+ __BOOLEAN_RS(LASTPIXEL),
+ __RANGE_DEF_VAL_RS(SRCBLEND, 1, 17, D3DBLEND_ZERO),
+ __RANGE_DEF_VAL_RS(DESTBLEND, 1, 17, D3DBLEND_ZERO),
+ __CLAMP_RS(CULLMODE, 1, 3),
+ __CLAMP_RS(ZFUNC, 1, 8),
+ __MASK_RS(ALPHAREF, 0x000000FF),
+ __CLAMP_RS(ALPHAFUNC, 1, 8),
+ __BOOLEAN_RS(DITHERENABLE),
+ __BOOLEAN_RS(ALPHABLENDENABLE),
+ __BOOLEAN_RS(FOGENABLE),
+ __BOOLEAN_RS(SPECULARENABLE),
+ __NO_LIMIT_RS(FOGCOLOR),
+ __MASK_RS(FOGTABLEMODE, 0x00000007),
+ __NO_LIMIT_RS(FOGSTART), /* a bit more complex than that, lets ignore */
+ __NO_LIMIT_RS(FOGEND),
+ __NO_LIMIT_RS(FOGDENSITY), /* actually should be between 0.0 and 1.0 */
+ __BOOLEAN_RS(RANGEFOGENABLE),
+ __BOOLEAN_RS(STENCILENABLE),
+ __CLAMP_RS(STENCILFAIL, 1, 8),
+ __CLAMP_RS(STENCILZFAIL, 1, 8),
+ __CLAMP_RS(STENCILPASS, 1, 8),
+ __CLAMP_RS(STENCILFUNC, 1, 8),
+ __NO_LIMIT_RS(STENCILREF),
+ __NO_LIMIT_RS(STENCILMASK),
+ __NO_LIMIT_RS(STENCILWRITEMASK),
+ __NO_LIMIT_RS(TEXTUREFACTOR),
+ __TO_DETERMINE_RS(WRAP0, 0, 15),
+ __TO_DETERMINE_RS(WRAP1, 0, 15),
+ __TO_DETERMINE_RS(WRAP2, 0, 15),
+ __TO_DETERMINE_RS(WRAP3, 0, 15),
+ __TO_DETERMINE_RS(WRAP4, 0, 15),
+ __TO_DETERMINE_RS(WRAP5, 0, 15),
+ __TO_DETERMINE_RS(WRAP6, 0, 15),
+ __TO_DETERMINE_RS(WRAP7, 0, 15),
+ __BOOLEAN_RS(CLIPPING),
+ __BOOLEAN_RS(LIGHTING),
+ __NO_LIMIT_RS(AMBIENT),
+ __MASK_RS(FOGVERTEXMODE, 0x00000007),
+ __BOOLEAN_RS(COLORVERTEX),
+ __BOOLEAN_RS(LOCALVIEWER),
+ __BOOLEAN_RS(NORMALIZENORMALS),
+ __TO_DETERMINE_RS(DIFFUSEMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(SPECULARMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(AMBIENTMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(EMISSIVEMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(VERTEXBLEND, 0, 256), /* values between 4 and 254 -both included- are forbidden too */
+ __NO_LIMIT_RS(CLIPPLANEENABLE), /* expected check seems complex */
+ __TO_DETERMINE_RS(POINTSIZE, 0, 0xFFFFFFFF),
+ __TO_DETERMINE_RS(POINTSIZE_MIN, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __BOOLEAN_RS(POINTSPRITEENABLE),
+ __BOOLEAN_RS(POINTSCALEENABLE),
+ __TO_DETERMINE_RS(POINTSCALE_A, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __TO_DETERMINE_RS(POINTSCALE_B, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __TO_DETERMINE_RS(POINTSCALE_C, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __BOOLEAN_RS(MULTISAMPLEANTIALIAS),
+ __NO_LIMIT_RS(MULTISAMPLEMASK),
+ __TO_DETERMINE_RS(PATCHEDGESTYLE, 0, 1),
+ __TO_DETERMINE_RS(DEBUGMONITORTOKEN, 0, 1),
+ __TO_DETERMINE_RS(POINTSIZE_MAX, 0, 0x7FFFFFFF), /* check more complex than that */
+ __BOOLEAN_RS(INDEXEDVERTEXBLENDENABLE),
+ __TO_DETERMINE_RS(COLORWRITEENABLE, 0, 15),
+ __NO_LIMIT_RS(TWEENFACTOR),
+ __CLAMP_RS(BLENDOP, 1, 5),
+ __TO_DETERMINE_RS(POSITIONDEGREE, 1, 5), /* can actually be only 1 or 5 */
+ __TO_DETERMINE_RS(NORMALDEGREE, 1, 2),
+ __BOOLEAN_RS(SCISSORTESTENABLE),
+ __NO_LIMIT_RS(SLOPESCALEDEPTHBIAS),
+ __BOOLEAN_RS(ANTIALIASEDLINEENABLE),
+ __NO_LIMIT_RS(MINTESSELLATIONLEVEL),
+ __NO_LIMIT_RS(MAXTESSELLATIONLEVEL),
+ __NO_LIMIT_RS(ADAPTIVETESS_X),
+ __NO_LIMIT_RS(ADAPTIVETESS_Y),
+ __NO_LIMIT_RS(ADAPTIVETESS_Z),
+ __NO_LIMIT_RS(ADAPTIVETESS_W),
+ __BOOLEAN_RS(ENABLEADAPTIVETESSELLATION),
+ __BOOLEAN_RS(TWOSIDEDSTENCILMODE),
+ __CLAMP_RS(CCW_STENCILFAIL, 1, 8),
+ __CLAMP_RS(CCW_STENCILZFAIL, 1, 8),
+ __CLAMP_RS(CCW_STENCILPASS, 1, 8),
+ __CLAMP_RS(CCW_STENCILFUNC, 1, 8),
+ __TO_DETERMINE_RS(COLORWRITEENABLE1, 0, 15),
+ __TO_DETERMINE_RS(COLORWRITEENABLE2, 0, 15),
+ __TO_DETERMINE_RS(COLORWRITEENABLE3, 0, 15),
+ __NO_LIMIT_RS(BLENDFACTOR),
+ __BOOLEAN_RS(SRGBWRITEENABLE),
+ __NO_LIMIT_RS(DEPTHBIAS),
+ __TO_DETERMINE_RS(WRAP8, 0, 15),
+ __TO_DETERMINE_RS(WRAP9, 0, 15),
+ __TO_DETERMINE_RS(WRAP10, 0, 15),
+ __TO_DETERMINE_RS(WRAP11, 0, 15),
+ __TO_DETERMINE_RS(WRAP12, 0, 15),
+ __TO_DETERMINE_RS(WRAP13, 0, 15),
+ __TO_DETERMINE_RS(WRAP14, 0, 15),
+ __TO_DETERMINE_RS(WRAP15, 0, 15),
+ __BOOLEAN_RS(SEPARATEALPHABLENDENABLE),
+ __RANGE_DEF_VAL_RS(SRCBLENDALPHA, 1, 17, D3DBLEND_ZERO),
+ __RANGE_DEF_VAL_RS(DESTBLENDALPHA, 1, 17, D3DBLEND_ZERO),
+ __CLAMP_RS(BLENDOPALPHA, 1, 5)
+};
+
+static DWORD inline
+nine_fix_render_state_value(D3DRENDERSTATETYPE State,
+ DWORD Value)
+{
+ struct nine_state_behaviour behaviour = render_state_limits_table[State];
+
+ switch (behaviour.state_value_behaviour) {
+ case NINE_STATE_NO_LIMIT:
+ break;
+ case NINE_STATE_CLAMP:
+ if (Value < behaviour.u.clamp.min)
+ Value = behaviour.u.clamp.min;
+ else if (Value > behaviour.u.clamp.max)
+ Value = behaviour.u.clamp.max;
+ break;
+ case NINE_STATE_BOOL:
+ Value = Value ? 1 : 0;
+ break;
+ case NINE_STATE_MASK:
+ Value = Value & behaviour.u.mask;
+ break;
+ case NINE_STATE_RANGE_DEF_VAL:
+ if (Value < behaviour.u.range_def_val.min || Value > behaviour.u.range_def_val.max)
+ Value = behaviour.u.range_def_val.default_val;
+ break;
+ }
+
+ return Value;
+}
+
+#endif /* _NINE_HELPERS_H_ */
diff --git a/src/gallium/state_trackers/nine/nine_pdata.h b/src/gallium/state_trackers/nine/nine_pdata.h
index 7bdd702cfbb..0e9a2aa7160 100644
--- a/src/gallium/state_trackers/nine/nine_pdata.h
+++ b/src/gallium/state_trackers/nine/nine_pdata.h
@@ -5,6 +5,7 @@
struct pheader
{
boolean unknown;
+ GUID guid;
DWORD size;
char data[1];
};
diff --git a/src/gallium/state_trackers/nine/nine_pipe.c b/src/gallium/state_trackers/nine/nine_pipe.c
index 2be30f7e097..27a10d64473 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.c
+++ b/src/gallium/state_trackers/nine/nine_pipe.c
@@ -181,6 +181,7 @@ nine_convert_blend_state(struct pipe_blend_state *blend_state, const DWORD *rs)
}
nine_convert_blend_state_fixup(&blend, rs); /* for BOTH[INV]SRCALPHA */
}
+
blend.rt[0].colormask = rs[D3DRS_COLORWRITEENABLE];
if (rs[D3DRS_COLORWRITEENABLE1] != rs[D3DRS_COLORWRITEENABLE] ||
@@ -222,8 +223,8 @@ nine_convert_sampler_state(struct cso_context *ctx, int idx, const DWORD *ss)
samp.wrap_s = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSU]);
samp.wrap_t = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSV]);
samp.wrap_r = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSW]);
- samp.min_img_filter = ss[D3DSAMP_MINFILTER] == D3DTEXF_POINT ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
- samp.mag_img_filter = ss[D3DSAMP_MAGFILTER] == D3DTEXF_POINT ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
+ samp.min_img_filter = (ss[D3DSAMP_MINFILTER] == D3DTEXF_POINT && !ss[NINED3DSAMP_SHADOW]) ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
+ samp.mag_img_filter = (ss[D3DSAMP_MAGFILTER] == D3DTEXF_POINT && !ss[NINED3DSAMP_SHADOW]) ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
if (ss[D3DSAMP_MINFILTER] == D3DTEXF_ANISOTROPIC ||
ss[D3DSAMP_MAGFILTER] == D3DTEXF_ANISOTROPIC)
samp.max_anisotropy = ss[D3DSAMP_MAXANISOTROPY];
@@ -265,7 +266,7 @@ nine_pipe_context_clear(struct NineDevice9 *This)
const enum pipe_format nine_d3d9_to_pipe_format_map[120] =
{
[D3DFMT_UNKNOWN] = PIPE_FORMAT_NONE,
- [D3DFMT_R8G8B8] = PIPE_FORMAT_NONE,
+ [D3DFMT_R8G8B8] = PIPE_FORMAT_R8G8B8_UNORM,
[D3DFMT_A8R8G8B8] = PIPE_FORMAT_B8G8R8A8_UNORM,
[D3DFMT_X8R8G8B8] = PIPE_FORMAT_B8G8R8X8_UNORM,
[D3DFMT_R5G6B5] = PIPE_FORMAT_B5G6R5_UNORM,
@@ -323,8 +324,8 @@ const enum pipe_format nine_d3d9_to_pipe_format_map[120] =
const D3DFORMAT nine_pipe_to_d3d9_format_map[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_NONE] = D3DFMT_UNKNOWN,
-
-/* [PIPE_FORMAT_B8G8R8_UNORM] = D3DFMT_R8G8B8, */
+ /* TODO: rename PIPE_FORMAT_R8G8B8_UNORM to PIPE_FORMAT_B8G8R8_UNORM */
+ [PIPE_FORMAT_R8G8B8_UNORM] = D3DFMT_R8G8B8,
[PIPE_FORMAT_B8G8R8A8_UNORM] = D3DFMT_A8R8G8B8,
[PIPE_FORMAT_B8G8R8X8_UNORM] = D3DFMT_X8R8G8B8,
[PIPE_FORMAT_B5G6R5_UNORM] = D3DFMT_R5G6B5,
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index ed431738abc..a7a7da27903 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -852,7 +852,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
/* the address register (vs only) must be
* assigned before use */
assert(!ureg_dst_is_undef(tx->regs.a0));
- ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ /* Round to lowest for vs1.1 (contrary to the doc), else
+ * round to nearest */
+ if (tx->version.major < 2 && tx->version.minor < 2)
+ ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ else
+ ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
src = ureg_src(tx->regs.address);
} else {
if (tx->version.major < 2 && tx->version.minor < 4) {
@@ -870,9 +875,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
} else {
if (tx->version.major < 3) {
assert(!param->rel);
- src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
- param->idx,
- TGSI_INTERPOLATE_PERSPECTIVE);
+ src = ureg_DECL_fs_input_cyl_centroid(
+ ureg, TGSI_SEMANTIC_COLOR, param->idx,
+ TGSI_INTERPOLATE_COLOR, 0,
+ tx->info->force_color_in_centroid ?
+ TGSI_INTERPOLATE_LOC_CENTROID : 0,
+ 0, 1);
} else {
assert(!param->rel); /* TODO */
assert(param->idx < Elements(tx->regs.v));
@@ -1163,12 +1171,9 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
assert(!param->rel);
tx->info->rt_mask |= 1 << param->idx;
if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
- /* ps < 3: oCol[0] will have fog blending afterward
- * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
+ /* ps < 3: oCol[0] will have fog blending afterward */
if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
- } else if (IS_VS && tx->version.major < 3 && param->idx == 1) {
- tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg);
} else {
tx->regs.oCol[param->idx] =
ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
@@ -1543,25 +1548,6 @@ DECL_SPECIAL(CALLNZ)
return D3D_OK;
}
-DECL_SPECIAL(MOV_vs1x)
-{
- if (tx->insn.dst[0].file == D3DSPR_ADDR) {
- /* Implementation note: We don't write directly
- * to the addr register, but to an intermediate
- * float register.
- * Contrary to the doc, when writing to ADDR here,
- * the rounding is not to nearest, but to lowest
- * (wine test).
- * Since we use ARR next, substract 0.5. */
- ureg_SUB(tx->ureg,
- tx_dst_param(tx, &tx->insn.dst[0]),
- tx_src_param(tx, &tx->insn.src[0]),
- ureg_imm1f(tx->ureg, 0.5f));
- return D3D_OK;
- }
- return NineTranslateInstruction_Generic(tx);
-}
-
DECL_SPECIAL(LOOP)
{
struct ureg_program *ureg = tx->ureg;
@@ -1978,6 +1964,7 @@ nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
return TGSI_INTERPOLATE_LINEAR;
case TGSI_SEMANTIC_BCOLOR:
case TGSI_SEMANTIC_COLOR:
+ return TGSI_INTERPOLATE_COLOR;
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_TEXCOORD:
@@ -2058,13 +2045,17 @@ DECL_SPECIAL(DCL)
}
} else {
if (is_input && tx->version.major >= 3) {
+ unsigned interp_location = 0;
/* SM3 only, SM2 input semantic determined by file */
assert(sem.reg.idx < Elements(tx->regs.v));
+ if (sem.reg.mod & NINED3DSPDM_CENTROID ||
+ (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
+ interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
ureg, tgsi.Name, tgsi.Index,
nine_tgsi_to_interp_mode(&tgsi),
0, /* cylwrap */
- sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
+ interp_location, 0, 1);
} else
if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
/* FragColor or FragDepth */
@@ -2736,8 +2727,7 @@ DECL_SPECIAL(COMMENT)
struct sm1_op_info inst_table[] =
{
_OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
- _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
- _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
+ _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
_OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
_OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
_OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
@@ -3426,13 +3416,6 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
}
- /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
- if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) {
- struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1);
- ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1]));
- ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f));
- }
-
if (info->position_t)
ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index 41577ac572b..1fe0c4bd182 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -61,6 +61,7 @@ struct nine_shader_info
uint8_t fog_enable;
uint8_t fog_mode;
+ uint8_t force_color_in_centroid;
uint16_t projected; /* ps 1.1 to 1.3 */
unsigned const_i_base; /* in vec4 (16 byte) units */
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index aee31622088..6f94e378984 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -367,14 +367,14 @@ prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
uint32_t changed_group = 0;
int has_key_changed = 0;
- if (likely(vs))
+ if (likely(state->programmable_vs))
has_key_changed = NineVertexShader9_UpdateKey(vs, state);
if (!shader_changed && !has_key_changed)
return 0;
/* likely because we dislike FF */
- if (likely(vs)) {
+ if (likely(state->programmable_vs)) {
state->cso.vs = NineVertexShader9_GetVariant(vs);
} else {
vs = device->ff.vs;
@@ -427,8 +427,8 @@ prepare_ps(struct NineDevice9 *device, uint8_t shader_changed)
/* State preparation + State commit */
-static uint32_t
-update_framebuffer(struct NineDevice9 *device)
+static void
+update_framebuffer(struct NineDevice9 *device, bool is_clear)
{
struct pipe_context *pipe = device->pipe;
struct nine_state *state = &device->state;
@@ -438,7 +438,8 @@ update_framebuffer(struct NineDevice9 *device)
unsigned w = rt0->desc.Width;
unsigned h = rt0->desc.Height;
D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType;
- unsigned mask = state->ps ? state->ps->rt_mask : 1;
+ unsigned ps_mask = state->ps ? state->ps->rt_mask : 1;
+ unsigned mask = is_clear ? 0xf : ps_mask;
const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
DBG("\n");
@@ -498,13 +499,13 @@ update_framebuffer(struct NineDevice9 *device)
pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
- return state->changed.group;
+ if (is_clear && state->rt_mask == ps_mask)
+ state->changed.group &= ~NINE_STATE_FB;
}
static void
update_viewport(struct NineDevice9 *device)
{
- struct pipe_context *pipe = device->pipe;
const D3DVIEWPORT9 *vport = &device->state.viewport;
struct pipe_viewport_state pvport;
@@ -543,7 +544,7 @@ update_viewport(struct NineDevice9 *device)
pvport.translate[1] -= 1.0f / 128.0f;
}
- pipe->set_viewport_states(pipe, 0, 1, &pvport);
+ cso_set_viewport(device->cso, &pvport);
}
/* Loop through VS inputs and pick the vertex elements with the declared
@@ -567,7 +568,7 @@ update_vertex_elements(struct NineDevice9 *device)
state->stream_usage_mask = 0;
memset(vdecl_index_map, -1, 16);
memset(used_streams, 0, device->caps.MaxStreams);
- vs = device->state.vs ? device->state.vs : device->ff.vs;
+ vs = state->programmable_vs ? device->state.vs : device->ff.vs;
if (vdecl) {
for (n = 0; n < vs->num_inputs; ++n) {
@@ -761,7 +762,7 @@ update_textures_and_samplers(struct NineDevice9 *device)
cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT);
commit_samplers = FALSE;
- sampler_mask = state->vs ? state->vs->sampler_mask : 0;
+ sampler_mask = state->programmable_vs ? state->vs->sampler_mask : 0;
state->bound_samplers_mask_vs = 0;
for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) {
const unsigned s = NINE_SAMPLER_VS(i);
@@ -854,7 +855,7 @@ commit_vs_constants(struct NineDevice9 *device)
{
struct pipe_context *pipe = device->pipe;
- if (unlikely(!device->state.vs))
+ if (unlikely(!device->state.programmable_vs))
pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff);
else
pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
@@ -913,7 +914,8 @@ commit_ps(struct NineDevice9 *device)
NINE_STATE_DSA | \
NINE_STATE_VIEWPORT | \
NINE_STATE_VDECL | \
- NINE_STATE_IDXBUF)
+ NINE_STATE_IDXBUF | \
+ NINE_STATE_STREAMFREQ)
#define NINE_STATE_RARE \
(NINE_STATE_SCISSOR | \
@@ -934,16 +936,14 @@ validate_textures(struct NineDevice9 *device)
}
void
-nine_update_state_framebuffer(struct NineDevice9 *device)
+nine_update_state_framebuffer_clear(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
validate_textures(device);
if (state->changed.group & NINE_STATE_FB)
- update_framebuffer(device);
-
- state->changed.group &= ~NINE_STATE_FB;
+ update_framebuffer(device, TRUE);
}
boolean
@@ -964,7 +964,7 @@ nine_update_state(struct NineDevice9 *device)
validate_textures(device); /* may clobber state */
/* ff_update may change VS/PS dirty bits */
- if (unlikely(!state->vs || !state->ps))
+ if (unlikely(!state->programmable_vs || !state->ps))
nine_ff_update(device);
group = state->changed.group;
@@ -977,15 +977,14 @@ nine_update_state(struct NineDevice9 *device)
if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
if (group & NINE_STATE_FB)
- group |= update_framebuffer(device); /* may set NINE_STATE_RASTERIZER */
+ update_framebuffer(device, FALSE);
if (group & NINE_STATE_BLEND)
prepare_blend(device);
if (group & NINE_STATE_DSA)
prepare_dsa(device);
if (group & NINE_STATE_VIEWPORT)
update_viewport(device);
- if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) ||
- state->changed.stream_freq & ~1)
+ if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ))
update_vertex_elements(device);
if (group & NINE_STATE_IDXBUF)
commit_index_buffer(device);
@@ -997,12 +996,12 @@ nine_update_state(struct NineDevice9 *device)
if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
update_textures_and_samplers(device);
if (device->prefer_user_constbuf) {
- if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs)
+ if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs)
prepare_vs_constants_userbuf(device);
if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
prepare_ps_constants_userbuf(device);
} else {
- if ((group & NINE_STATE_VS_CONST) && state->vs)
+ if ((group & NINE_STATE_VS_CONST) && state->programmable_vs)
upload_constants(device, PIPE_SHADER_VERTEX);
if ((group & NINE_STATE_PS_CONST) && state->ps)
upload_constants(device, PIPE_SHADER_FRAGMENT);
@@ -1262,6 +1261,8 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
*/
state->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize);
+ memcpy(state->rs_advertised, state->rs, sizeof(state->rs));
+
/* Set changed flags to initialize driver.
*/
state->changed.group = NINE_STATE_ALL;
@@ -1314,8 +1315,10 @@ nine_state_clear(struct nine_state *state, const boolean device)
nine_bind(&state->vs, NULL);
nine_bind(&state->ps, NULL);
nine_bind(&state->vdecl, NULL);
- for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
+ for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) {
nine_bind(&state->stream[i], NULL);
+ pipe_resource_reference(&state->vtxbuf[i].buffer, NULL);
+ }
nine_bind(&state->idxbuf, NULL);
for (i = 0; i < NINE_MAX_SAMPLERS; ++i) {
if (device &&
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index b34da70ef48..a4ec4e3b63a 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -61,23 +61,24 @@
#define NINE_STATE_SAMPLER (1 << 11)
#define NINE_STATE_VDECL (1 << 12)
#define NINE_STATE_IDXBUF (1 << 13)
-#define NINE_STATE_PRIM (1 << 14)
-#define NINE_STATE_MATERIAL (1 << 15)
-#define NINE_STATE_BLEND_COLOR (1 << 16)
-#define NINE_STATE_STENCIL_REF (1 << 17)
-#define NINE_STATE_SAMPLE_MASK (1 << 18)
-#define NINE_STATE_FF (0x1f << 19)
-#define NINE_STATE_FF_VS (0x17 << 19)
-#define NINE_STATE_FF_PS (0x18 << 19)
-#define NINE_STATE_FF_LIGHTING (1 << 19)
-#define NINE_STATE_FF_MATERIAL (1 << 20)
-#define NINE_STATE_FF_VSTRANSF (1 << 21)
-#define NINE_STATE_FF_PSSTAGES (1 << 22)
-#define NINE_STATE_FF_OTHER (1 << 23)
-#define NINE_STATE_FOG_SHADER (1 << 24)
-#define NINE_STATE_PS1X_SHADER (1 << 25)
-#define NINE_STATE_ALL 0x3ffffff
-#define NINE_STATE_UNHANDLED (1 << 26)
+#define NINE_STATE_STREAMFREQ (1 << 14)
+#define NINE_STATE_PRIM (1 << 15)
+#define NINE_STATE_MATERIAL (1 << 16)
+#define NINE_STATE_BLEND_COLOR (1 << 17)
+#define NINE_STATE_STENCIL_REF (1 << 18)
+#define NINE_STATE_SAMPLE_MASK (1 << 19)
+#define NINE_STATE_FF (0x1f << 20)
+#define NINE_STATE_FF_VS (0x17 << 20)
+#define NINE_STATE_FF_PS (0x18 << 20)
+#define NINE_STATE_FF_LIGHTING (1 << 20)
+#define NINE_STATE_FF_MATERIAL (1 << 21)
+#define NINE_STATE_FF_VSTRANSF (1 << 22)
+#define NINE_STATE_FF_PSSTAGES (1 << 23)
+#define NINE_STATE_FF_OTHER (1 << 24)
+#define NINE_STATE_FOG_SHADER (1 << 25)
+#define NINE_STATE_PS1X_SHADER (1 << 26)
+#define NINE_STATE_ALL 0x7ffffff
+#define NINE_STATE_UNHANDLED (1 << 27)
#define NINE_STATE_COMMIT_DSA (1 << 0)
#define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
@@ -152,6 +153,7 @@ struct nine_state
int vs_const_i[NINE_MAX_CONST_I][4];
BOOL vs_const_b[NINE_MAX_CONST_B];
float *vs_lconstf_temp;
+ BOOL programmable_vs;
struct NinePixelShader9 *ps;
float *ps_const_f;
@@ -179,6 +181,7 @@ struct nine_state
uint8_t rt_mask;
DWORD rs[NINED3DRS_COUNT];
+ DWORD rs_advertised[NINED3DRS_COUNT]; /* the ones apps get with GetRenderState */
struct NineBaseTexture9 *texture[NINE_MAX_SAMPLERS]; /* PS, DMAP, VS */
@@ -236,7 +239,7 @@ extern const uint32_t nine_render_states_vertex[(NINED3DRS_COUNT + 31) / 32];
struct NineDevice9;
-void nine_update_state_framebuffer(struct NineDevice9 *);
+void nine_update_state_framebuffer_clear(struct NineDevice9 *);
boolean nine_update_state(struct NineDevice9 *);
void nine_state_restore_non_cso(struct NineDevice9 *device);
diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c
index 42bc349c2cc..00be67f8955 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.c
+++ b/src/gallium/state_trackers/nine/pixelshader9.c
@@ -160,6 +160,7 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This )
info.sampler_ps1xtypes = key;
info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE];
+ info.force_color_in_centroid = key >> 34 & 1;
info.projected = (key >> 48) & 0xffff;
hr = nine_translate_shader(This->base.device, &info);
diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h
index e09009f6621..6b431813a81 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.h
+++ b/src/gallium/state_trackers/nine/pixelshader9.h
@@ -28,6 +28,7 @@
#include "nine_state.h"
#include "basetexture9.h"
#include "nine_ff.h"
+#include "surface9.h"
struct nine_lconstf;
@@ -92,6 +93,10 @@ NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps,
key |= ((uint64_t)state->rs[D3DRS_FOGTABLEMODE]) << 33;
}
+ /* centroid interpolation automatically used for color ps inputs */
+ if (state->rt[0]->desc.MultiSampleType > 1)
+ key |= ((uint64_t)1) << 34;
+
if (unlikely(ps->byte_code.version < 0x14)) {
projected = nine_ff_get_projected_key(state);
key |= ((uint64_t) projected) << 48;
diff --git a/src/gallium/state_trackers/nine/resource9.c b/src/gallium/state_trackers/nine/resource9.c
index 6d915338b24..b929c50a83c 100644
--- a/src/gallium/state_trackers/nine/resource9.c
+++ b/src/gallium/state_trackers/nine/resource9.c
@@ -29,12 +29,12 @@
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
+#include "util/u_resource.h"
#include "nine_pdata.h"
#define DBG_CHANNEL DBG_RESOURCE
-
HRESULT
NineResource9_ctor( struct NineResource9 *This,
struct NineUnknownParams *pParams,
@@ -62,6 +62,33 @@ NineResource9_ctor( struct NineResource9 *This,
if (Allocate) {
assert(!initResource);
+
+ /* On Windows it is possible allocation fails when
+ * IDirect3DDevice9::GetAvailableTextureMem() still reports
+ * enough free space.
+ *
+ * Some games allocate surfaces
+ * in a loop until they receive D3DERR_OUTOFVIDEOMEMORY to measure
+ * the available texture memory size.
+ *
+ * We are not using the drivers VRAM statistics because:
+ * * This would add overhead to each resource allocation.
+ * * Freeing memory is lazy and takes some time, but applications
+ * expects the memory counter to change immediately after allocating
+ * or freeing memory.
+ *
+ * Vertexbuffers and indexbuffers are not accounted !
+ */
+ if (This->info.target != PIPE_BUFFER) {
+ This->size = util_resource_size(&This->info);
+
+ This->base.device->available_texture_mem -= This->size;
+ if (This->base.device->available_texture_mem <=
+ This->base.device->available_texture_limit) {
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
DBG("(%p) Creating pipe_resource.\n", This);
This->resource = screen->resource_create(screen, &This->info);
if (!This->resource)
@@ -92,6 +119,10 @@ NineResource9_dtor( struct NineResource9 *This )
* still hold a reference. */
pipe_resource_reference(&This->resource, NULL);
+ /* NOTE: size is 0, unless something has actually been allocated */
+ if (This->base.device)
+ This->base.device->available_texture_mem += This->size;
+
NineUnknown_dtor(&This->base);
}
@@ -117,9 +148,10 @@ NineResource9_SetPrivateData( struct NineResource9 *This,
enum pipe_error err;
struct pheader *header;
const void *user_data = pData;
+ char guid_str[64];
- DBG("This=%p refguid=%p pData=%p SizeOfData=%u Flags=%x\n",
- This, refguid, pData, SizeOfData, Flags);
+ DBG("This=%p GUID=%s pData=%p SizeOfData=%u Flags=%x\n",
+ This, GUID_sprintf(guid_str, refguid), pData, SizeOfData, Flags);
if (Flags & D3DSPD_IUNKNOWN)
user_assert(SizeOfData == sizeof(IUnknown *), D3DERR_INVALIDCALL);
@@ -141,8 +173,9 @@ NineResource9_SetPrivateData( struct NineResource9 *This,
header->size = SizeOfData;
memcpy(header->data, user_data, header->size);
+ memcpy(&header->guid, refguid, sizeof(header->guid));
- err = util_hash_table_set(This->pdata, refguid, header);
+ err = util_hash_table_set(This->pdata, &header->guid, header);
if (err == PIPE_OK) {
if (header->unknown) { IUnknown_AddRef(*(IUnknown **)header->data); }
return D3D_OK;
@@ -162,9 +195,10 @@ NineResource9_GetPrivateData( struct NineResource9 *This,
{
struct pheader *header;
DWORD sizeofdata;
+ char guid_str[64];
- DBG("This=%p refguid=%p pData=%p pSizeOfData=%p\n",
- This, refguid, pData, pSizeOfData);
+ DBG("This=%p GUID=%s pData=%p pSizeOfData=%p\n",
+ This, GUID_sprintf(guid_str, refguid), pData, pSizeOfData);
header = util_hash_table_get(This->pdata, refguid);
if (!header) { return D3DERR_NOTFOUND; }
@@ -191,8 +225,9 @@ NineResource9_FreePrivateData( struct NineResource9 *This,
REFGUID refguid )
{
struct pheader *header;
+ char guid_str[64];
- DBG("This=%p refguid=%p\n", This, refguid);
+ DBG("This=%p GUID=%s\n", This, GUID_sprintf(guid_str, refguid));
header = util_hash_table_get(This->pdata, refguid);
if (!header)
diff --git a/src/gallium/state_trackers/nine/resource9.h b/src/gallium/state_trackers/nine/resource9.h
index 906f90806ce..8122257b7a7 100644
--- a/src/gallium/state_trackers/nine/resource9.h
+++ b/src/gallium/state_trackers/nine/resource9.h
@@ -45,6 +45,8 @@ struct NineResource9
/* for [GS]etPrivateData/FreePrivateData */
struct util_hash_table *pdata;
+
+ long long size;
};
static inline struct NineResource9 *
NineResource9( void *data )
diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c
index 6d6e1be0b7f..0d1a04b657a 100644
--- a/src/gallium/state_trackers/nine/stateblock9.c
+++ b/src/gallium/state_trackers/nine/stateblock9.c
@@ -24,6 +24,7 @@
#include "device9.h"
#include "basetexture9.h"
#include "nine_helpers.h"
+#include "vertexdeclaration9.h"
#define DBG_CHANNEL DBG_STATEBLOCK
@@ -179,6 +180,7 @@ nine_state_copy_common(struct nine_state *dst,
const int r = ffs(m) - 1;
m &= ~(1 << r);
dst->rs[i * 32 + r] = src->rs[i * 32 + r];
+ dst->rs_advertised[i * 32 + r] = src->rs_advertised[i * 32 + r];
}
}
@@ -223,7 +225,7 @@ nine_state_copy_common(struct nine_state *dst,
nine_bind(&dst->stream[i], src->stream[i]);
if (src->stream[i]) {
dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset;
- dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer;
+ pipe_resource_reference(&dst->vtxbuf[i].buffer, src->vtxbuf[i].buffer);
dst->vtxbuf[i].stride = src->vtxbuf[i].stride;
}
}
@@ -269,6 +271,10 @@ nine_state_copy_common(struct nine_state *dst,
dst->ff.light = REALLOC(dst->ff.light,
dst->ff.num_lights * sizeof(D3DLIGHT9),
mask->ff.num_lights * sizeof(D3DLIGHT9));
+ for (i = dst->ff.num_lights; i < mask->ff.num_lights; ++i) {
+ memset(&dst->ff.light[i], 0, sizeof(D3DLIGHT9));
+ dst->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
+ }
dst->ff.num_lights = mask->ff.num_lights;
}
for (i = 0; i < mask->ff.num_lights; ++i)
@@ -353,6 +359,7 @@ nine_state_copy_common_all(struct nine_state *dst,
/* Render states. */
memcpy(dst->rs, src->rs, sizeof(dst->rs));
+ memcpy(dst->rs_advertised, src->rs_advertised, sizeof(dst->rs_advertised));
if (apply)
memcpy(dst->changed.rs, src->changed.rs, sizeof(dst->changed.rs));
@@ -377,7 +384,7 @@ nine_state_copy_common_all(struct nine_state *dst,
nine_bind(&dst->stream[i], src->stream[i]);
if (src->stream[i]) {
dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset;
- dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer;
+ pipe_resource_reference(&dst->vtxbuf[i].buffer, src->vtxbuf[i].buffer);
dst->vtxbuf[i].stride = src->vtxbuf[i].stride;
}
dst->stream_freq[i] = src->stream_freq[i];
@@ -486,7 +493,10 @@ NineStateBlock9_Apply( struct NineStateBlock9 *This )
nine_state_copy_common(dst, src, src, TRUE, pool);
if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
- nine_bind(&dst->vdecl, src->vdecl);
+ NineDevice9_SetVertexDeclaration(This->base.device, (IDirect3DVertexDeclaration9 *)src->vdecl);
+
+ /* Recomputing it is needed if we changed vs but not vdecl */
+ dst->programmable_vs = dst->vs && !(dst->vdecl && dst->vdecl->position_t);
/* Textures */
if (src->changed.texture) {
diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c
index 14c1ce927ad..f88b75c3dd7 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -56,6 +56,9 @@ NineSurface9_ctor( struct NineSurface9 *This,
D3DSURFACE_DESC *pDesc )
{
HRESULT hr;
+ union pipe_color_union rgba = {0};
+ struct pipe_surface *surf;
+ struct pipe_context *pipe = pParams->device->pipe;
DBG("This=%p pDevice=%p pResource=%p Level=%u Layer=%u pDesc=%p\n",
This, pParams->device, pResource, Level, Layer, pDesc);
@@ -140,6 +143,12 @@ NineSurface9_ctor( struct NineSurface9 *This,
if (pResource && NineSurface9_IsOffscreenPlain(This))
pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
+ /* TODO: investigate what else exactly needs to be cleared */
+ if (This->base.resource && (pDesc->Usage & D3DUSAGE_RENDERTARGET)) {
+ surf = NineSurface9_GetSurface(This, 0);
+ pipe->clear_render_target(pipe, surf, &rgba, 0, 0, pDesc->Width, pDesc->Height);
+ }
+
NineSurface9_Dump(This);
return D3D_OK;
@@ -156,7 +165,7 @@ NineSurface9_dtor( struct NineSurface9 *This )
/* Release system memory when we have to manage it (no parent) */
if (!This->base.base.container && This->data)
- FREE(This->data);
+ align_free(This->data);
NineResource9_dtor(&This->base);
}
@@ -348,7 +357,7 @@ NineSurface9_LockRect( struct NineSurface9 *This,
D3DERR_INVALIDCALL);
if (pRect && This->desc.Pool == D3DPOOL_DEFAULT &&
- compressed_format (This->desc.Format)) {
+ util_format_is_compressed(This->base.info.format)) {
const unsigned w = util_format_get_blockwidth(This->base.info.format);
const unsigned h = util_format_get_blockheight(This->base.info.format);
user_assert((pRect->left == 0 && pRect->right == This->desc.Width &&
@@ -384,8 +393,8 @@ NineSurface9_LockRect( struct NineSurface9 *This,
* and bpp 8, and the app has a workaround to work with the fact
* that it is actually compressed. */
if (is_ATI1_ATI2(This->base.info.format)) {
- pLockedRect->Pitch = This->desc.Height;
- pLockedRect->pBits = This->data + box.y * This->desc.Height + box.x;
+ pLockedRect->Pitch = This->desc.Width;
+ pLockedRect->pBits = This->data + box.y * This->desc.Width + box.x;
} else {
pLockedRect->Pitch = This->stride;
pLockedRect->pBits = NineSurface9_GetSystemMemPointer(This,
diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c
index 3b1a7a4493c..82d4173fbb2 100644
--- a/src/gallium/state_trackers/nine/swapchain9.c
+++ b/src/gallium/state_trackers/nine/swapchain9.c
@@ -118,6 +118,14 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
DBG("This=%p pParams=%p\n", This, pParams);
user_assert(pParams != NULL, E_POINTER);
+ user_assert(pParams->SwapEffect, D3DERR_INVALIDCALL);
+ user_assert((pParams->SwapEffect != D3DSWAPEFFECT_COPY) ||
+ (pParams->BackBufferCount <= 1), D3DERR_INVALIDCALL);
+ user_assert(pDevice->ex || pParams->BackBufferCount <= 3, D3DERR_INVALIDCALL);
+ user_assert(pDevice->ex ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_FLIP) ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_COPY) ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_DISCARD), D3DERR_INVALIDCALL);
DBG("pParams(%p):\n"
"BackBufferWidth: %u\n"
@@ -145,11 +153,6 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
pParams->FullScreen_RefreshRateInHz,
pParams->PresentationInterval);
- if (pParams->SwapEffect == D3DSWAPEFFECT_COPY &&
- pParams->BackBufferCount > 1) {
- pParams->BackBufferCount = 1;
- }
-
if (pParams->BackBufferCount > 3) {
pParams->BackBufferCount = 3;
}
@@ -713,6 +716,10 @@ present( struct NineSwapChain9 *This,
This->pipe->blit(This->pipe, &blit);
}
+ /* The resource we present has to resolve fast clears
+ * if needed (and other things) */
+ This->pipe->flush_resource(This->pipe, resource);
+
if (This->params.SwapEffect != D3DSWAPEFFECT_DISCARD)
handle_draw_cursor_and_hud(This, resource);
@@ -738,12 +745,6 @@ bypass_rendering:
return D3DERR_WASSTILLDRAWING;
}
- if (This->present_buffers)
- resource = This->present_buffers[0];
- else
- resource = This->buffers[0]->base.resource;
- This->pipe->flush_resource(This->pipe, resource);
-
if (!This->enable_threadpool) {
This->tasks[0]=NULL;
fence = swap_fences_pop_front(This);
@@ -786,6 +787,19 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
if (hr == D3DERR_WASSTILLDRAWING)
return hr;
+ if (This->base.device->ex) {
+ if (NineSwapChain9_GetOccluded(This)) {
+ return S_PRESENT_OCCLUDED;
+ }
+ } else {
+ if (NineSwapChain9_GetOccluded(This)) {
+ This->base.device->device_needs_reset = TRUE;
+ }
+ if (This->base.device->device_needs_reset) {
+ return D3DERR_DEVICELOST;
+ }
+ }
+
switch (This->params.SwapEffect) {
case D3DSWAPEFFECT_FLIP:
UNTESTED(4);
@@ -840,7 +854,6 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]);
This->base.device->state.changed.group |= NINE_STATE_FB;
- nine_update_state_framebuffer(This->base.device);
return hr;
}
@@ -907,8 +920,9 @@ NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This,
DBG("GetBackBuffer: This=%p iBackBuffer=%d Type=%d ppBackBuffer=%p\n",
This, iBackBuffer, Type, ppBackBuffer);
(void)user_error(Type == D3DBACKBUFFER_TYPE_MONO);
+ /* don't touch ppBackBuffer on error */
+ user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL);
user_assert(iBackBuffer < This->params.BackBufferCount, D3DERR_INVALIDCALL);
- user_assert(ppBackBuffer != NULL, E_POINTER);
NineUnknown_AddRef(NineUnknown(This->buffers[iBackBuffer]));
*ppBackBuffer = (IDirect3DSurface9 *)This->buffers[iBackBuffer];
@@ -990,3 +1004,13 @@ NineSwapChain9_new( struct NineDevice9 *pDevice,
implicit, pPresent, pPresentationParameters,
pCTX, hFocusWindow, NULL);
}
+
+BOOL
+NineSwapChain9_GetOccluded( struct NineSwapChain9 *This )
+{
+ if (This->base.device->minor_version_num > 0) {
+ return ID3DPresent_GetWindowOccluded(This->present);
+ }
+
+ return FALSE;
+}
diff --git a/src/gallium/state_trackers/nine/swapchain9.h b/src/gallium/state_trackers/nine/swapchain9.h
index 5e48dde5004..4bd74f7b6ec 100644
--- a/src/gallium/state_trackers/nine/swapchain9.h
+++ b/src/gallium/state_trackers/nine/swapchain9.h
@@ -139,4 +139,7 @@ HRESULT WINAPI
NineSwapChain9_GetPresentParameters( struct NineSwapChain9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters );
+BOOL
+NineSwapChain9_GetOccluded( struct NineSwapChain9 *This );
+
#endif /* _NINE_SWAPCHAIN9_H_ */
diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c
index bc325c1335e..ada08cea90a 100644
--- a/src/gallium/state_trackers/nine/texture9.c
+++ b/src/gallium/state_trackers/nine/texture9.c
@@ -235,7 +235,7 @@ NineTexture9_dtor( struct NineTexture9 *This )
}
if (This->managed_buffer)
- FREE(This->managed_buffer);
+ align_free(This->managed_buffer);
NineBaseTexture9_dtor(&This->base);
}
diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.c b/src/gallium/state_trackers/nine/vertexbuffer9.c
index 8e2eaaf8ff9..10311b428fe 100644
--- a/src/gallium/state_trackers/nine/vertexbuffer9.c
+++ b/src/gallium/state_trackers/nine/vertexbuffer9.c
@@ -39,56 +39,13 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This,
struct NineUnknownParams *pParams,
D3DVERTEXBUFFER_DESC *pDesc )
{
- struct pipe_resource *info = &This->base.info;
HRESULT hr;
DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This,
pDesc->Size, pDesc->Usage, pDesc->Pool);
- user_assert(pDesc->Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
-
- This->maps = MALLOC(sizeof(struct pipe_transfer *));
- if (!This->maps)
- return E_OUTOFMEMORY;
- This->nmaps = 0;
- This->maxmaps = 1;
-
- This->pipe = pParams->device->pipe;
-
- info->screen = pParams->device->screen;
- info->target = PIPE_BUFFER;
- info->format = PIPE_FORMAT_R8_UNORM;
- info->width0 = pDesc->Size;
- info->flags = 0;
-
- info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
- if (!(pDesc->Usage & D3DUSAGE_WRITEONLY))
- info->bind |= PIPE_BIND_TRANSFER_READ;
-
- info->usage = PIPE_USAGE_DEFAULT;
- if (pDesc->Usage & D3DUSAGE_DYNAMIC)
- info->usage = PIPE_USAGE_STREAM;
- if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
- info->usage = PIPE_USAGE_STAGING;
-
- /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
- /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
- /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
- /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
- /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
- if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
- DBG("Application asked for Software Vertex Processing, "
- "but this is unimplemented\n");
- /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
-
- info->height0 = 1;
- info->depth0 = 1;
- info->array_size = 1;
- info->last_level = 0;
- info->nr_samples = 0;
-
- hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
- D3DRTYPE_VERTEXBUFFER, pDesc->Pool, pDesc->Usage);
+ hr = NineBuffer9_ctor(&This->base, pParams, D3DRTYPE_VERTEXBUFFER,
+ pDesc->Usage, pDesc->Size, pDesc->Pool);
if (FAILED(hr))
return hr;
@@ -102,85 +59,29 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This,
void
NineVertexBuffer9_dtor( struct NineVertexBuffer9 *This )
{
- if (This->maps) {
- while (This->nmaps) {
- NineVertexBuffer9_Unlock(This);
- }
- FREE(This->maps);
- }
+ NineBuffer9_dtor(&This->base);
+}
- NineResource9_dtor(&This->base);
+struct pipe_resource *
+NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This )
+{
+ return NineBuffer9_GetResource(&This->base);
}
HRESULT WINAPI
NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
- UINT OffsetToLock,
- UINT SizeToLock,
- void **ppbData,
- DWORD Flags )
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags )
{
- struct pipe_box box;
- void *data;
- const unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
-
- DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
- This, This->base.resource,
- OffsetToLock, SizeToLock, Flags);
-
- user_assert(ppbData, E_POINTER);
- user_assert(!(Flags & ~(D3DLOCK_DISCARD |
- D3DLOCK_DONOTWAIT |
- D3DLOCK_NO_DIRTY_UPDATE |
- D3DLOCK_NOSYSLOCK |
- D3DLOCK_READONLY |
- D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL);
-
- if (This->nmaps == This->maxmaps) {
- struct pipe_transfer **newmaps =
- REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps,
- sizeof(struct pipe_transfer *)*(This->maxmaps << 1));
- if (newmaps == NULL)
- return E_OUTOFMEMORY;
-
- This->maxmaps <<= 1;
- This->maps = newmaps;
- }
-
- if (SizeToLock == 0) {
- SizeToLock = This->desc.Size - OffsetToLock;
- user_warn(OffsetToLock != 0);
- }
-
- u_box_1d(OffsetToLock, SizeToLock, &box);
-
- data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
- usage, &box, &This->maps[This->nmaps]);
- if (!data) {
- DBG("pipe::transfer_map failed\n"
- " usage = %x\n"
- " box.x = %u\n"
- " box.width = %u\n",
- usage, box.x, box.width);
- /* not sure what to return, msdn suggests this */
- if (Flags & D3DLOCK_DONOTWAIT)
- return D3DERR_WASSTILLDRAWING;
- return D3DERR_INVALIDCALL;
- }
-
- This->nmaps++;
- *ppbData = data;
-
- return D3D_OK;
+ return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags);
}
HRESULT WINAPI
NineVertexBuffer9_Unlock( struct NineVertexBuffer9 *This )
{
- DBG("This=%p\n", This);
-
- user_assert(This->nmaps > 0, D3DERR_INVALIDCALL);
- This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
- return D3D_OK;
+ return NineBuffer9_Unlock(&This->base);
}
HRESULT WINAPI
diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.h b/src/gallium/state_trackers/nine/vertexbuffer9.h
index 6174de4df08..859402b925b 100644
--- a/src/gallium/state_trackers/nine/vertexbuffer9.h
+++ b/src/gallium/state_trackers/nine/vertexbuffer9.h
@@ -22,8 +22,8 @@
#ifndef _NINE_VERTEXBUFFER9_H_
#define _NINE_VERTEXBUFFER9_H_
-
#include "resource9.h"
+#include "buffer9.h"
struct pipe_screen;
struct pipe_context;
@@ -31,13 +31,10 @@ struct pipe_transfer;
struct NineVertexBuffer9
{
- struct NineResource9 base;
+ struct NineBuffer9 base;
/* G3D */
struct pipe_context *pipe;
- struct pipe_transfer **maps;
- int nmaps, maxmaps;
-
D3DVERTEXBUFFER_DESC desc;
};
static inline struct NineVertexBuffer9 *
@@ -58,6 +55,12 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This,
void
NineVertexBuffer9_dtor( struct NineVertexBuffer9 *This );
+/*** Nine private ***/
+
+struct pipe_resource *
+NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This );
+
+/*** Direct3D public ***/
HRESULT WINAPI
NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.c b/src/gallium/state_trackers/nine/vertexdeclaration9.c
index 2047b91abc4..36c594b5be3 100644
--- a/src/gallium/state_trackers/nine/vertexdeclaration9.c
+++ b/src/gallium/state_trackers/nine/vertexdeclaration9.c
@@ -174,24 +174,24 @@ NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This,
const D3DVERTEXELEMENT9 *pElements )
{
const D3DCAPS9 *caps;
- unsigned i;
-
+ unsigned i, nelems;
DBG("This=%p pParams=%p pElements=%p\n", This, pParams, pElements);
+ /* wine */
+ for (nelems = 0;
+ pElements[nelems].Stream != 0xFF;
+ ++nelems) {
+ user_assert(pElements[nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL);
+ user_assert(!(pElements[nelems].Offset & 3), E_FAIL);
+ }
+
+ caps = NineDevice9_GetCaps(pParams->device);
+ user_assert(nelems <= caps->MaxStreams, D3DERR_INVALIDCALL);
+
HRESULT hr = NineUnknown_ctor(&This->base, pParams);
if (FAILED(hr)) { return hr; }
- /* wine */
- for (This->nelems = 0;
- pElements[This->nelems].Stream != 0xFF;
- ++This->nelems) {
- user_assert(pElements[This->nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL);
- user_assert(!(pElements[This->nelems].Offset & 3), E_FAIL);
- }
-
- caps = NineDevice9_GetCaps(This->base.device);
- user_assert(This->nelems <= caps->MaxStreams, D3DERR_INVALIDCALL);
-
+ This->nelems = nelems;
This->decls = CALLOC(This->nelems+1, sizeof(D3DVERTEXELEMENT9));
This->elems = CALLOC(This->nelems, sizeof(struct pipe_vertex_element));
This->usage_map = CALLOC(This->nelems, sizeof(uint16_t));
@@ -203,6 +203,9 @@ NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This,
This->decls[i].UsageIndex);
This->usage_map[i] = usage;
+ if (This->decls[i].Usage == D3DDECLUSAGE_POSITIONT)
+ This->position_t = TRUE;
+
This->elems[i].src_offset = This->decls[i].Offset;
This->elems[i].instance_divisor = 0;
This->elems[i].vertex_buffer_index = This->decls[i].Stream;
diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.h b/src/gallium/state_trackers/nine/vertexdeclaration9.h
index 655bcfbf165..e39f259440f 100644
--- a/src/gallium/state_trackers/nine/vertexdeclaration9.h
+++ b/src/gallium/state_trackers/nine/vertexdeclaration9.h
@@ -46,6 +46,8 @@ struct NineVertexDeclaration9
D3DVERTEXELEMENT9 *decls;
DWORD fvf;
+
+ BOOL position_t;
};
static inline struct NineVertexDeclaration9 *
NineVertexDeclaration9( void *data )
diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c
index 0b9005685a9..f6988923caa 100644
--- a/src/gallium/state_trackers/nine/volume9.c
+++ b/src/gallium/state_trackers/nine/volume9.c
@@ -136,7 +136,7 @@ NineVolume9_dtor( struct NineVolume9 *This )
NineVolume9_UnlockBox(This);
if (This->data)
- FREE(This->data);
+ align_free(This->data);
pipe_resource_reference(&This->resource, NULL);
@@ -264,6 +264,13 @@ NineVolume9_LockBox( struct NineVolume9 *This,
usage |= PIPE_TRANSFER_DONTBLOCK;
if (pBox) {
+ user_assert(pBox->Right > pBox->Left, D3DERR_INVALIDCALL);
+ user_assert(pBox->Bottom > pBox->Top, D3DERR_INVALIDCALL);
+ user_assert(pBox->Back > pBox->Front, D3DERR_INVALIDCALL);
+ user_assert(pBox->Right <= This->desc.Width, D3DERR_INVALIDCALL);
+ user_assert(pBox->Bottom <= This->desc.Height, D3DERR_INVALIDCALL);
+ user_assert(pBox->Back <= This->desc.Depth, D3DERR_INVALIDCALL);
+
d3dbox_to_pipe_box(&box, pBox);
if (u_box_clip_2d(&box, &box, This->desc.Width, This->desc.Height) < 0) {
DBG("Locked volume intersection empty.\n");
diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c
index f66ed896e62..b4536828909 100644
--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
+++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
@@ -35,6 +35,7 @@
#include "util/u_memory.h"
#include "util/u_video.h"
#include "vl/vl_rbsp.h"
+#include "vl/vl_zscan.h"
#include "entrypoint.h"
#include "vid_dec.h"
@@ -205,6 +206,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
const uint8_t *defaultList, const uint8_t *fallbackList)
{
unsigned lastScale = 8, nextScale = 8;
+ const int *list;
unsigned i;
/* (pic|seq)_scaling_list_present_flag[i] */
@@ -214,6 +216,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
return;
}
+ list = (sizeOfScalingList == 16) ? vl_zscan_normal_16 : vl_zscan_normal;
for (i = 0; i < sizeOfScalingList; ++i ) {
if (nextScale != 0) {
@@ -224,8 +227,8 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
return;
}
}
- scalingList[i] = nextScale == 0 ? lastScale : nextScale;
- lastScale = scalingList[i];
+ scalingList[list[i]] = nextScale == 0 ? lastScale : nextScale;
+ lastScale = scalingList[list[i]];
}
}
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 5cd1ba7815c..233db8ae372 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -53,22 +53,29 @@ DRI_CONF_BEGIN
DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_NINE
+ DRI_CONF_NINE_OVERRIDEVENDOR(-1)
DRI_CONF_NINE_THROTTLE(-2)
DRI_CONF_NINE_THREADSUBMIT("false")
DRI_CONF_SECTION_END
DRI_CONF_END;
-/* define fallback value here: NVIDIA GeForce GTX 970 */
-#define FALLBACK_NAME "NV124"
-#define FALLBACK_DEVID 0x13C2
-#define FALLBACK_VENID 0x10de
+struct fallback_card_config {
+ const char *name;
+ unsigned vendor_id;
+ unsigned device_id;
+} fallback_cards[] = {
+ {"NV124", 0x10de, 0x13C2}, /* NVIDIA GeForce GTX 970 */
+ {"HAWAII", 0x1002, 0x67b1}, /* AMD Radeon R9 290 */
+ {"Haswell Mobile", 0x8086, 0x13C2}, /* Intel Haswell Mobile */
+ {"SVGA3D", 0x15ad, 0x0405}, /* VMware SVGA 3D */
+};
/* prototypes */
void
d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
- unsigned fallback_ven,
- unsigned fallback_dev,
- const char* fallback_name );
+ unsigned fallback_ven,
+ unsigned fallback_dev,
+ const char* fallback_name );
void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid);
@@ -118,9 +125,9 @@ get_bus_info( int fd,
*subsysid = 0;
*revision = 0;
} else {
- DBG("Unable to detect card. Faking %s\n", FALLBACK_NAME);
- *vendorid = FALLBACK_VENID;
- *deviceid = FALLBACK_DEVID;
+ DBG("Unable to detect card. Faking %s\n", fallback_cards[0].name);
+ *vendorid = fallback_cards[0].vendor_id;
+ *deviceid = fallback_cards[0].device_id;
*subsysid = 0;
*revision = 0;
}
@@ -128,8 +135,10 @@ get_bus_info( int fd,
static inline void
read_descriptor( struct d3dadapter9_context *ctx,
- int fd )
+ int fd, int override_vendorid )
{
+ unsigned i;
+ BOOL found;
D3DADAPTER_IDENTIFIER9 *drvid = &ctx->identifier;
memset(drvid, 0, sizeof(*drvid));
@@ -140,9 +149,30 @@ read_descriptor( struct d3dadapter9_context *ctx,
strncpy(drvid->Description, ctx->hal->get_name(ctx->hal),
sizeof(drvid->Description));
+ if (override_vendorid > 0) {
+ found = FALSE;
+ /* fill in device_id and card name for fake vendor */
+ for (i = 0; i < sizeof(fallback_cards)/sizeof(fallback_cards[0]); i++) {
+ if (fallback_cards[i].vendor_id == override_vendorid) {
+ DBG("Faking card '%s' vendor 0x%04x, device 0x%04x\n",
+ fallback_cards[i].name,
+ fallback_cards[i].vendor_id,
+ fallback_cards[i].device_id);
+ drvid->VendorId = fallback_cards[i].vendor_id;
+ drvid->DeviceId = fallback_cards[i].device_id;
+ strncpy(drvid->Description, fallback_cards[i].name,
+ sizeof(drvid->Description));
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ DBG("Unknown fake vendor 0x%04x! Using detected vendor !\n", override_vendorid);
+ }
+ }
/* choose fall-back vendor if necessary to allow
* the following functions to return sane results */
- d3d_match_vendor_id(drvid, FALLBACK_VENID, FALLBACK_DEVID, FALLBACK_NAME);
+ d3d_match_vendor_id(drvid, fallback_cards[0].vendor_id, fallback_cards[0].device_id, fallback_cards[0].name);
/* fill in driver name and version info */
d3d_fill_driver_version(drvid);
/* override Description field with Windows like names */
@@ -177,6 +207,7 @@ drm_create_adapter( int fd,
driOptionCache defaultInitOptions;
driOptionCache userInitOptions;
int throttling_value_user = -2;
+ int override_vendorid = -1;
if (!ctx) { return E_OUTOFMEMORY; }
@@ -247,6 +278,10 @@ drm_create_adapter( int fd,
"You should not expect any benefit.");
}
+ if (driCheckOption(&userInitOptions, "override_vendorid", DRI_INT)) {
+ override_vendorid = driQueryOptioni(&userInitOptions, "override_vendorid");
+ }
+
driDestroyOptionCache(&userInitOptions);
driDestroyOptionInfo(&defaultInitOptions);
@@ -260,7 +295,7 @@ drm_create_adapter( int fd,
}
/* read out PCI info */
- read_descriptor(&ctx->base, fd);
+ read_descriptor(&ctx->base, fd, override_vendorid);
/* create and return new ID3DAdapter9 */
hr = NineAdapter9_new(&ctx->base, (struct NineAdapter9 **)ppAdapter);
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index d4030852943..2a7738e6979 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -94,7 +94,7 @@ gallium_DRIVERS += libmesa_winsys_vc4 libmesa_pipe_vc4
endif
ifneq ($(filter virgl,$(MESA_GPU_DRIVERS)),)
LOCAL_CFLAGS += -DGALLIUM_VIRGL
-gallium_DRIVERS += libmesa_winsys_virgl libmesa_pipe_virgl
+gallium_DRIVERS += libmesa_winsys_virgl libmesa_winsys_virgl_vtest libmesa_pipe_virgl
endif
ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),)
gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 30a1aa8d6ba..59a801b1426 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -288,16 +288,17 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
request.alloc_size = size;
request.phys_alignment = alignment;
- if (initial_domain & RADEON_DOMAIN_VRAM) {
+ if (initial_domain & RADEON_DOMAIN_VRAM)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
- if (flags & RADEON_FLAG_CPU_ACCESS)
- request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- }
- if (initial_domain & RADEON_DOMAIN_GTT) {
+ if (initial_domain & RADEON_DOMAIN_GTT)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
- if (flags & RADEON_FLAG_GTT_WC)
- request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- }
+
+ if (flags & RADEON_FLAG_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ if (flags & RADEON_FLAG_GTT_WC)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 7393a1d1eb4..dab27dfba96 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -68,7 +68,6 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
case CIK__PIPE_CONFIG__ADDR_SURF_P2:
- default:
return 2;
case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
@@ -86,23 +85,13 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
return 16;
+ default:
+ fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n");
+ assert(!"this should never occur");
+ return 2;
}
}
-/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG
- * into GB_TILING_CONFIG register which is only present on R600-R700. */
-static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info)
-{
- unsigned num_pipes = info->gb_addr_cfg & 0x7;
- unsigned num_banks = info->mc_arb_ramcfg & 0x3;
- unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7;
- unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3;
-
- return num_pipes | (num_banks << 4) |
- (pipe_interleave_bytes << 8) |
- (row_size << 12);
-}
-
/* Helper function to do the ioctls needed for setup and init. */
static boolean do_winsys_init(struct amdgpu_winsys *ws)
{
@@ -251,20 +240,19 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
ws->info.gart_size = gtt.heap_size;
ws->info.vram_size = vram.heap_size;
/* convert the shader clock from KHz to MHz */
- ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
+ ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000;
ws->info.max_se = ws->amdinfo.num_shader_engines;
ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
ws->info.has_uvd = uvd.available_rings != 0;
ws->info.vce_fw_version =
vce.available_rings ? vce_version : 0;
ws->info.has_userptr = TRUE;
- ws->info.r600_num_backends = ws->amdinfo.rb_pipes;
- ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
- ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo);
- ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
- ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */
- ws->info.r600_virtual_address = TRUE;
- ws->info.r600_has_dma = dma.available_rings != 0;
+ ws->info.num_render_backends = ws->amdinfo.rb_pipes;
+ ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
+ ws->info.num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
+ ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
+ ws->info.has_virtual_memory = TRUE;
+ ws->info.has_sdma = dma.available_rings != 0;
/* Get the number of good compute units. */
ws->info.num_good_compute_units = 0;
@@ -276,7 +264,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
sizeof(ws->amdinfo.gb_tile_mode));
ws->info.si_tile_mode_array_valid = TRUE;
- ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask;
+ ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask;
memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode,
sizeof(ws->amdinfo.gb_macro_tile_mode));
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 3ec6a065c7d..7e9ed0ca0fe 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -281,7 +281,7 @@ void radeon_bo_destroy(struct pb_buffer *_buf)
if (bo->ptr)
os_munmap(bo->ptr, bo->base.size);
- if (rws->info.r600_virtual_address) {
+ if (rws->info.has_virtual_memory) {
if (rws->va_unmap_working) {
struct drm_radeon_gem_va va;
@@ -552,7 +552,7 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
pipe_mutex_init(bo->map_mutex);
pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base);
- if (rws->info.r600_virtual_address) {
+ if (rws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(rws, size, alignment);
@@ -834,7 +834,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
pipe_mutex_unlock(ws->bo_handles_mutex);
- if (ws->info.r600_virtual_address) {
+ if (ws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
@@ -966,7 +966,7 @@ done:
if (stride)
*stride = whandle->stride;
- if (ws->info.r600_virtual_address && !bo->va) {
+ if (ws->info.has_virtual_memory && !bo->va) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 085071c381c..155a13008a4 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -283,7 +283,7 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
* This doesn't have to be done if virtual memory is enabled,
* because there is no offset patching with virtual memory.
*/
- if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
+ if (cs->base.ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
return i;
}
}
@@ -540,7 +540,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
cs->cst->flags[0] = 0;
cs->cst->flags[1] = RADEON_CS_RING_DMA;
cs->cst->cs.num_chunks = 3;
- if (cs->ws->info.r600_virtual_address) {
+ if (cs->ws->info.has_virtual_memory) {
cs->cst->flags[0] |= RADEON_CS_USE_VM;
}
break;
@@ -567,7 +567,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
cs->cst->cs.num_chunks = 3;
}
- if (cs->ws->info.r600_virtual_address) {
+ if (cs->ws->info.has_virtual_memory) {
cs->cst->flags[0] |= RADEON_CS_USE_VM;
cs->cst->cs.num_chunks = 3;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 8a1ed3ae08c..35dc7e69dcf 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -298,10 +298,10 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
}
/* Check for dma */
- ws->info.r600_has_dma = FALSE;
+ ws->info.has_sdma = FALSE;
/* DMA is disabled on R700. There is IB corruption and hangs. */
if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) {
- ws->info.r600_has_dma = TRUE;
+ ws->info.has_sdma = TRUE;
}
/* Check for UVD and VCE */
@@ -351,11 +351,11 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
/* Get max clock frequency info and convert it to MHz */
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
- &ws->info.max_sclk);
- ws->info.max_sclk /= 1000;
+ &ws->info.max_shader_clock);
+ ws->info.max_shader_clock /= 1000;
radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
- &ws->info.si_backend_enabled_mask);
+ &ws->info.enabled_rb_mask);
ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -372,51 +372,72 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
return FALSE;
}
else if (ws->gen >= DRV_R600) {
+ uint32_t tiling_config = 0;
+
if (ws->info.drm_minor >= 9 &&
!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
"num backends",
- &ws->info.r600_num_backends))
+ &ws->info.num_render_backends))
return FALSE;
/* get the GPU counter frequency, failure is not fatal */
radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL,
- &ws->info.r600_clock_crystal_freq);
+ &ws->info.clock_crystal_freq);
radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
- &ws->info.r600_tiling_config);
+ &tiling_config);
+
+ ws->info.r600_num_banks =
+ ws->info.chip_class >= EVERGREEN ?
+ 4 << ((tiling_config & 0xf0) >> 4) :
+ 4 << ((tiling_config & 0x30) >> 4);
+
+ ws->info.pipe_interleave_bytes =
+ ws->info.chip_class >= EVERGREEN ?
+ 256 << ((tiling_config & 0xf00) >> 8) :
+ 256 << ((tiling_config & 0xc0) >> 6);
+
+ if (!ws->info.pipe_interleave_bytes)
+ ws->info.pipe_interleave_bytes =
+ ws->info.chip_class >= EVERGREEN ? 512 : 256;
if (ws->info.drm_minor >= 11) {
radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
- &ws->info.r600_num_tile_pipes);
+ &ws->info.num_tile_pipes);
if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
- &ws->info.r600_backend_map))
- ws->info.r600_backend_map_valid = TRUE;
+ &ws->info.r600_gb_backend_map))
+ ws->info.r600_gb_backend_map_valid = TRUE;
+ } else {
+ ws->info.num_tile_pipes =
+ ws->info.chip_class >= EVERGREEN ?
+ 1 << (tiling_config & 0xf) :
+ 1 << ((tiling_config & 0xe) >> 1);
}
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
if (ws->info.drm_minor >= 13) {
uint32_t ib_vm_max_size;
- ws->info.r600_virtual_address = TRUE;
+ ws->info.has_virtual_memory = TRUE;
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
&ws->va_start))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
&ib_vm_max_size))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
&ws->va_unmap_working);
}
if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", FALSE))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
}
/* Get max pipes, this is only needed for compute shaders. All evergreen+
* chips have at least 2 pipes, so we use 2 as a default. */
- ws->info.r600_max_pipes = 2;
+ ws->info.r600_max_quad_pipes = 2;
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL,
- &ws->info.r600_max_pipes);
+ &ws->info.r600_max_quad_pipes);
/* All GPUs have at least one compute unit */
ws->info.num_good_compute_units = 1;
@@ -742,7 +763,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
ws->fd = dup(fd);
if (!do_winsys_init(ws))
- goto fail;
+ goto fail1;
pb_cache_init(&ws->bo_cache, 500000, 2.0f, 0,
MIN2(ws->info.vram_size, ws->info.gart_size),
@@ -812,8 +833,9 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
return &ws->base;
fail:
- pipe_mutex_unlock(fd_tab_mutex);
pb_cache_deinit(&ws->bo_cache);
+fail1:
+ pipe_mutex_unlock(fd_tab_mutex);
if (ws->surf_man)
radeon_surface_manager_free(ws->surf_man);
if (ws->fd >= 0)
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_public.h b/src/gallium/winsys/virgl/drm/virgl_drm_public.h
index be01021ca9a..f70f0e50448 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_public.h
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_public.h
@@ -23,8 +23,8 @@
#ifndef VIRGL_DRM_PUBLIC_H
#define VIRGL_DRM_PUBLIC_H
-struct virgl_winsys;
+struct pipe_screen;
-struct virgl_winsys *virgl_drm_winsys_create(int drmFD);
+struct pipe_screen *virgl_drm_screen_create(int fd);
#endif
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
index b5d4435e5e6..ba009882ec2 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -25,6 +25,7 @@
#include
#include
#include
+#include
#include "os/os_mman.h"
#include "os/os_time.h"
@@ -33,6 +34,8 @@
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
#include "state_tracker/drm_driver.h"
+#include "virgl/virgl_screen.h"
+#include "virgl/virgl_public.h"
#include
#include "virtgpu_drm.h"
@@ -50,10 +53,17 @@ static void virgl_hw_res_destroy(struct virgl_drm_winsys *qdws,
{
struct drm_gem_close args;
- if (res->name) {
+ if (res->flinked) {
+ pipe_mutex_lock(qdws->bo_handles_mutex);
+ util_hash_table_remove(qdws->bo_names,
+ (void *)(uintptr_t)res->flink);
+ pipe_mutex_unlock(qdws->bo_handles_mutex);
+ }
+
+ if (res->bo_handle) {
pipe_mutex_lock(qdws->bo_handles_mutex);
util_hash_table_remove(qdws->bo_handles,
- (void *)(uintptr_t)res->name);
+ (void *)(uintptr_t)res->bo_handle);
pipe_mutex_unlock(qdws->bo_handles_mutex);
}
@@ -109,6 +119,7 @@ virgl_drm_winsys_destroy(struct virgl_winsys *qws)
virgl_cache_flush(qdws);
util_hash_table_destroy(qdws->bo_handles);
+ util_hash_table_destroy(qdws->bo_names);
pipe_mutex_destroy(qdws->bo_handles_mutex);
pipe_mutex_destroy(qdws->mutex);
@@ -367,11 +378,12 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
struct drm_gem_open open_arg = {};
struct drm_virtgpu_resource_info info_arg = {};
struct virgl_hw_res *res;
+ uint32_t handle = whandle->handle;
pipe_mutex_lock(qdws->bo_handles_mutex);
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
- res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)whandle->handle);
+ res = util_hash_table_get(qdws->bo_names, (void*)(uintptr_t)handle);
if (res) {
struct virgl_hw_res *r = NULL;
virgl_drm_resource_reference(qdws, &r, res);
@@ -379,21 +391,31 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
}
}
+ if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
+ int r;
+ r = drmPrimeFDToHandle(qdws->fd, whandle->handle, &handle);
+ if (r) {
+ res = NULL;
+ goto done;
+ }
+ }
+
+ res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)handle);
+ fprintf(stderr, "resource %p for handle %d, pfd=%d\n", res, handle, whandle->handle);
+ if (res) {
+ struct virgl_hw_res *r = NULL;
+ virgl_drm_resource_reference(qdws, &r, res);
+ goto done;
+ }
+
res = CALLOC_STRUCT(virgl_hw_res);
if (!res)
goto done;
if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
- int r;
- uint32_t handle;
- r = drmPrimeFDToHandle(qdws->fd, whandle->handle, &handle);
- if (r) {
- FREE(res);
- res = NULL;
- goto done;
- }
res->bo_handle = handle;
} else {
+ fprintf(stderr, "gem open handle %d\n", handle);
memset(&open_arg, 0, sizeof(open_arg));
open_arg.name = whandle->handle;
if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
@@ -403,7 +425,7 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
}
res->bo_handle = open_arg.handle;
}
- res->name = whandle->handle;
+ res->name = handle;
memset(&info_arg, 0, sizeof(info_arg));
info_arg.bo_handle = res->bo_handle;
@@ -422,7 +444,7 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
pipe_reference_init(&res->reference, 1);
res->num_cs_references = 0;
- util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)whandle->handle, res);
+ util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)handle, res);
done:
pipe_mutex_unlock(qdws->bo_handles_mutex);
@@ -452,7 +474,7 @@ static boolean virgl_drm_winsys_resource_get_handle(struct virgl_winsys *qws,
res->flink = flink.name;
pipe_mutex_lock(qdws->bo_handles_mutex);
- util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)res->flink, res);
+ util_hash_table_set(qdws->bo_names, (void *)(uintptr_t)res->flink, res);
pipe_mutex_unlock(qdws->bo_handles_mutex);
}
whandle->handle = res->flink;
@@ -732,7 +754,7 @@ static void virgl_fence_reference(struct virgl_winsys *vws,
}
-struct virgl_winsys *
+static struct virgl_winsys *
virgl_drm_winsys_create(int drmFD)
{
struct virgl_drm_winsys *qdws;
@@ -748,6 +770,7 @@ virgl_drm_winsys_create(int drmFD)
pipe_mutex_init(qdws->mutex);
pipe_mutex_init(qdws->bo_handles_mutex);
qdws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+ qdws->bo_names = util_hash_table_create(handle_hash, handle_compare);
qdws->base.destroy = virgl_drm_winsys_destroy;
qdws->base.transfer_put = virgl_bo_transfer_put;
@@ -772,3 +795,87 @@ virgl_drm_winsys_create(int drmFD)
return &qdws->base;
}
+
+static struct util_hash_table *fd_tab = NULL;
+pipe_static_mutex(virgl_screen_mutex);
+
+static void
+virgl_drm_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct virgl_screen *screen = virgl_screen(pscreen);
+ boolean destroy;
+
+ pipe_mutex_lock(virgl_screen_mutex);
+ destroy = --screen->refcnt == 0;
+ if (destroy) {
+ int fd = virgl_drm_winsys(screen->vws)->fd;
+ util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
+ }
+ pipe_mutex_unlock(virgl_screen_mutex);
+
+ if (destroy) {
+ pscreen->destroy = screen->winsys_priv;
+ pscreen->destroy(pscreen);
+ }
+}
+
+static unsigned hash_fd(void *key)
+{
+ int fd = pointer_to_intptr(key);
+ struct stat stat;
+ fstat(fd, &stat);
+
+ return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
+}
+
+static int compare_fd(void *key1, void *key2)
+{
+ int fd1 = pointer_to_intptr(key1);
+ int fd2 = pointer_to_intptr(key2);
+ struct stat stat1, stat2;
+ fstat(fd1, &stat1);
+ fstat(fd2, &stat2);
+
+ return stat1.st_dev != stat2.st_dev ||
+ stat1.st_ino != stat2.st_ino ||
+ stat1.st_rdev != stat2.st_rdev;
+}
+
+struct pipe_screen *
+virgl_drm_screen_create(int fd)
+{
+ struct pipe_screen *pscreen = NULL;
+
+ pipe_mutex_lock(virgl_screen_mutex);
+ if (!fd_tab) {
+ fd_tab = util_hash_table_create(hash_fd, compare_fd);
+ if (!fd_tab)
+ goto unlock;
+ }
+
+ pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
+ if (pscreen) {
+ virgl_screen(pscreen)->refcnt++;
+ } else {
+ struct virgl_winsys *vws;
+ int dup_fd = dup(fd);
+
+ vws = virgl_drm_winsys_create(dup_fd);
+
+ pscreen = virgl_create_screen(vws);
+ if (pscreen) {
+ util_hash_table_set(fd_tab, intptr_to_pointer(dup_fd), pscreen);
+
+ /* Bit of a hack, to avoid circular linkage dependency,
+ * ie. pipe driver having to call in to winsys, we
+ * override the pipe drivers screen->destroy():
+ */
+ virgl_screen(pscreen)->winsys_priv = pscreen->destroy;
+ pscreen->destroy = virgl_drm_screen_destroy;
+ }
+ }
+
+unlock:
+ pipe_mutex_unlock(virgl_screen_mutex);
+ return pscreen;
+}
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
index da85ff87d2a..ffd7658ca81 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
@@ -62,6 +62,7 @@ struct virgl_drm_winsys
pipe_mutex mutex;
struct util_hash_table *bo_handles;
+ struct util_hash_table *bo_names;
pipe_mutex bo_handles_mutex;
};
diff --git a/src/gallium/winsys/virgl/vtest/Android.mk b/src/gallium/winsys/virgl/vtest/Android.mk
new file mode 100644
index 00000000000..3e084e44ceb
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/Android.mk
@@ -0,0 +1,33 @@
+# Copyright (C) 2014 Emil Velikov
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_MODULE := libmesa_winsys_virgl_vtest
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 651915aed71..77103492a4f 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -1102,9 +1102,14 @@ dri2BindExtensions(struct dri2_screen *psc, struct glx_display * priv,
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- if ((mask & (1 << __DRI_API_GLES2)) != 0)
- __glXEnableDirectExtension(&psc->base,
- "GLX_EXT_create_context_es2_profile");
+ if ((mask & ((1 << __DRI_API_GLES) |
+ (1 << __DRI_API_GLES2) |
+ (1 << __DRI_API_GLES3))) != 0) {
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es2_profile");
+ }
}
for (i = 0; extensions[i]; i++) {
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 8bdbb9caf56..6054ffc3dc1 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -665,9 +665,14 @@ dri3_bind_extensions(struct dri3_screen *psc, struct glx_display * priv,
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- if ((mask & (1 << __DRI_API_GLES2)) != 0)
+ if ((mask & ((1 << __DRI_API_GLES) |
+ (1 << __DRI_API_GLES2) |
+ (1 << __DRI_API_GLES3))) != 0) {
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
__glXEnableDirectExtension(&psc->base,
"GLX_EXT_create_context_es2_profile");
+ }
for (i = 0; extensions[i]; i++) {
/* when on a different gpu than the server, the server pixmaps
diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index 8a56385c4bd..6728d38fa0a 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -547,9 +547,18 @@ dri2_convert_glx_attribs(unsigned num_attribs, const uint32_t *attribs,
case GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB:
*api = __DRI_API_OPENGL;
break;
- case GLX_CONTEXT_ES2_PROFILE_BIT_EXT:
- *api = __DRI_API_GLES2;
- break;
+ case GLX_CONTEXT_ES_PROFILE_BIT_EXT:
+ if (*major_ver >= 3)
+ *api = __DRI_API_GLES3;
+ else if (*major_ver == 2 && *minor_ver == 0)
+ *api = __DRI_API_GLES2;
+ else if (*major_ver == 1 && *minor_ver < 2)
+ *api = __DRI_API_GLES;
+ else {
+ *error = __DRI_CTX_ERROR_BAD_API;
+ return false;
+ }
+ break;
default:
*error = __DRI_CTX_ERROR_BAD_API;
return false;
@@ -580,19 +589,6 @@ dri2_convert_glx_attribs(unsigned num_attribs, const uint32_t *attribs,
return false;
}
- /* The GLX_EXT_create_context_es2_profile spec says:
- *
- * "... If the version requested is 2.0, and the
- * GLX_CONTEXT_ES2_PROFILE_BIT_EXT bit is set in the
- * GLX_CONTEXT_PROFILE_MASK_ARB attribute (see below), then the context
- * returned will implement OpenGL ES 2.0. This is the only way in which
- * an implementation may request an OpenGL ES 2.0 context."
- */
- if (*api == __DRI_API_GLES2 && (*major_ver != 2 || *minor_ver != 0)) {
- *error = __DRI_CTX_ERROR_BAD_API;
- return false;
- }
-
*error = __DRI_CTX_ERROR_SUCCESS;
return true;
}
diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c
index 76cc3214b7b..241ac7f6d2c 100644
--- a/src/glx/drisw_glx.c
+++ b/src/glx/drisw_glx.c
@@ -623,8 +623,10 @@ driswBindExtensions(struct drisw_screen *psc, const __DRIextension **extensions)
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- /* DRISW version >= 2 implies support for OpenGL ES 2.0.
+ /* DRISW version >= 2 implies support for OpenGL ES.
*/
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
__glXEnableDirectExtension(&psc->base,
"GLX_EXT_create_context_es2_profile");
}
diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c
index 3b29aef1234..22b078ce484 100644
--- a/src/glx/glxextensions.c
+++ b/src/glx/glxextensions.c
@@ -146,6 +146,7 @@ static const struct extension_info known_glx_extensions[] = {
{ GLX(EXT_fbconfig_packed_float), VER(0,0), Y, Y, N, N },
{ GLX(EXT_framebuffer_sRGB), VER(0,0), Y, Y, N, N },
{ GLX(EXT_create_context_es2_profile), VER(0,0), Y, N, N, N },
+ { GLX(EXT_create_context_es_profile), VER(0,0), Y, N, N, N },
{ GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N },
{ GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
{ GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y },
diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h
index 3a9bc823052..906b3fc16c0 100644
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -45,6 +45,7 @@ enum
EXT_import_context_bit,
EXT_framebuffer_sRGB_bit,
EXT_fbconfig_packed_float_bit,
+ EXT_create_context_es_profile_bit,
EXT_create_context_es2_profile_bit,
MESA_copy_sub_buffer_bit,
MESA_depth_float_bit,
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index d7ab3bff4df..db98ac05fd9 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8247,7 +8247,14 @@
-
+
+
+
+
+
+
+
+
@@ -12661,6 +12668,12 @@
+
+
+
+
+
+
@@ -12714,6 +12727,14 @@
+
+
+
+
+
+
+
+
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 7af8becd607..ffe560faa3d 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -377,6 +377,7 @@ VBO_FILES = \
vbo/vbo_exec_eval.c \
vbo/vbo_exec.h \
vbo/vbo.h \
+ vbo/vbo_minmax_index.c \
vbo/vbo_noop.c \
vbo/vbo_noop.h \
vbo/vbo_primitive_restart.c \
@@ -393,6 +394,7 @@ VBO_FILES = \
STATETRACKER_FILES = \
state_tracker/st_atom_array.c \
+ state_tracker/st_atom_atomicbuf.c \
state_tracker/st_atom_blend.c \
state_tracker/st_atom.c \
state_tracker/st_atom_clip.c \
@@ -409,6 +411,7 @@ STATETRACKER_FILES = \
state_tracker/st_atom_shader.c \
state_tracker/st_atom_shader.h \
state_tracker/st_atom_stipple.c \
+ state_tracker/st_atom_storagebuf.c \
state_tracker/st_atom_tess.c \
state_tracker/st_atom_texture.c \
state_tracker/st_atom_viewport.c \
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 55e926b239e..e5cbc465871 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -363,3 +363,8 @@ DRI_CONF_OPT_END
DRI_CONF_OPT_BEGIN_B(thread_submit, def) \
DRI_CONF_DESC(en,gettext("Use an additional thread to submit buffers.")) \
DRI_CONF_OPT_END
+
+#define DRI_CONF_NINE_OVERRIDEVENDOR(def) \
+DRI_CONF_OPT_BEGIN(override_vendorid, int, def) \
+ DRI_CONF_DESC(en,"Define the vendor_id to report. This allows faking another hardware vendor.") \
+DRI_CONF_OPT_END
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c
index 0401e397031..00e44af2f8d 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -23,7 +23,7 @@
#include "brw_compiler.h"
#include "brw_context.h"
-#include "nir.h"
+#include "compiler/nir/nir.h"
#include "main/errors.h"
#include "util/debug.h"
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 1032e5a8175..44d2fe4d9e4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -166,6 +166,19 @@ intel_viewport(struct gl_context *ctx)
}
}
+static void
+intel_update_framebuffer(struct gl_context *ctx,
+ struct gl_framebuffer *fb)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ /* Quantize the derived default number of samples
+ */
+ fb->DefaultGeometry._NumSamples =
+ intel_quantize_num_samples(brw->intelScreen,
+ fb->DefaultGeometry.NumSamples);
+}
+
static void
intel_update_state(struct gl_context * ctx, GLuint new_state)
{
@@ -245,6 +258,12 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
}
_mesa_lock_context_textures(ctx);
+
+ if (new_state & _NEW_BUFFERS) {
+ intel_update_framebuffer(ctx, ctx->DrawBuffer);
+ if (ctx->DrawBuffer != ctx->ReadBuffer)
+ intel_update_framebuffer(ctx, ctx->ReadBuffer);
+ }
}
#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index 994c699bb5a..d7a1456bce0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -268,7 +268,7 @@ fs_visitor::opt_combine_constants()
qsort(table.imm, table.len, sizeof(struct imm), compare);
/* Insert MOVs to load the constant values into GRFs. */
- fs_reg reg(VGRF, alloc.allocate(dispatch_width / 8));
+ fs_reg reg(VGRF, alloc.allocate(1));
reg.stride = 0;
for (int i = 0; i < table.len; i++) {
struct imm *imm = &table.imm[i];
@@ -284,8 +284,8 @@ fs_visitor::opt_combine_constants()
imm->subreg_offset = reg.subreg_offset;
reg.subreg_offset += sizeof(float);
- if ((unsigned)reg.subreg_offset == dispatch_width * sizeof(float)) {
- reg.nr = alloc.allocate(dispatch_width / 8);
+ if ((unsigned)reg.subreg_offset == 8 * sizeof(float)) {
+ reg.nr = alloc.allocate(1);
reg.subreg_offset = 0;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 6c3a8d70677..cd7f3fe851a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1144,16 +1144,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
inst->predicate = BRW_PREDICATE_NORMAL;
break;
- case nir_op_extract_ubyte:
- case nir_op_extract_ibyte: {
+ case nir_op_extract_u8:
+ case nir_op_extract_i8: {
nir_const_value *byte = nir_src_as_const_value(instr->src[1].src);
bld.emit(SHADER_OPCODE_EXTRACT_BYTE,
result, op[0], brw_imm_ud(byte->u[0]));
break;
}
- case nir_op_extract_uword:
- case nir_op_extract_iword: {
+ case nir_op_extract_u16:
+ case nir_op_extract_i16: {
nir_const_value *word = nir_src_as_const_value(instr->src[1].src);
bld.emit(SHADER_OPCODE_EXTRACT_WORD,
result, op[0], brw_imm_ud(word->u[0]));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 6b9bfcf0b85..c1690ad45c3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -939,7 +939,7 @@ fs_visitor::emit_barrier()
/* Clear the message payload */
pbld.MOV(payload, brw_imm_ud(0u));
- /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
+ /* Copy the barrier id from r0.2 to the message payload reg.2 */
fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
pbld.AND(component(payload, 2), r0_2, brw_imm_ud(barrier_id_mask));
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index c6f0b0d8a2a..6bd992882b8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -254,8 +254,8 @@ try_constant_propagate(const struct brw_device_info *devinfo,
static bool
try_copy_propagate(const struct brw_device_info *devinfo,
- vec4_instruction *inst,
- int arg, struct copy_entry *entry)
+ vec4_instruction *inst, int arg,
+ struct copy_entry *entry, int attributes_per_reg)
{
/* Build up the value we are propagating as if it were the source of a
* single MOV
@@ -320,7 +320,8 @@ try_copy_propagate(const struct brw_device_info *devinfo,
unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
value.swizzle);
if (inst->is_3src() &&
- value.file == UNIFORM &&
+ (value.file == UNIFORM ||
+ (value.file == ATTR && attributes_per_reg != 1)) &&
!brw_is_single_value_swizzle(composed_swizzle))
return false;
@@ -395,6 +396,11 @@ try_copy_propagate(const struct brw_device_info *devinfo,
bool
vec4_visitor::opt_copy_propagation(bool do_constant_prop)
{
+ /* If we are in dual instanced or single mode, then attributes are going
+ * to be interleaved, so one register contains two attribute slots.
+ */
+ const int attributes_per_reg =
+ prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
bool progress = false;
struct copy_entry entries[alloc.total_size];
@@ -465,7 +471,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
progress = true;
- if (try_copy_propagate(devinfo, inst, i, &entry))
+ if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
progress = true;
}
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 904950dfa07..0df25d2557c 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -210,7 +210,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
{
const unsigned depth = max_layer - min_layer;
struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = 0;
+ uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
unsigned tiling_mode, pitch;
@@ -425,7 +425,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_mipmap_tree *mt = irb->mt;
struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = 0;
+ uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
unsigned width = mt->logical_width0;
unsigned height = mt->logical_height0;
unsigned pitch = mt->pitch;
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 8ede1f06e4e..de1aba44c1b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -32,6 +32,7 @@
#include
#include /* for PRId64 macro */
+#include "util/debug.h"
#include "glheader.h"
#include "enums.h"
#include "hash.h"
@@ -120,6 +121,10 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
return &ctx->CopyReadBuffer;
case GL_COPY_WRITE_BUFFER:
return &ctx->CopyWriteBuffer;
+ case GL_QUERY_BUFFER:
+ if (_mesa_has_ARB_query_buffer_object(ctx))
+ return &ctx->QueryBuffer;
+ break;
case GL_DRAW_INDIRECT_BUFFER:
if ((ctx->API == API_OPENGL_CORE &&
ctx->Extensions.ARB_draw_indirect) ||
@@ -458,6 +463,7 @@ _mesa_delete_buffer_object(struct gl_context *ctx,
{
(void) ctx;
+ vbo_delete_minmax_cache(bufObj);
_mesa_align_free(bufObj->Data);
/* assign strange values here to help w/ debugging */
@@ -519,6 +525,24 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
}
+/**
+ * Get the value of MESA_NO_MINMAX_CACHE.
+ */
+static bool
+get_no_minmax_cache()
+{
+ static bool read = false;
+ static bool disable = false;
+
+ if (!read) {
+ disable = env_var_as_boolean("MESA_NO_MINMAX_CACHE", false);
+ read = true;
+ }
+
+ return disable;
+}
+
+
/**
* Initialize a buffer object to default values.
*/
@@ -532,6 +556,9 @@ _mesa_initialize_buffer_object(struct gl_context *ctx,
obj->RefCount = 1;
obj->Name = name;
obj->Usage = GL_STATIC_DRAW_ARB;
+
+ if (get_no_minmax_cache())
+ obj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
}
@@ -877,6 +904,9 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
_mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer,
ctx->Shared->NullBufferObj);
+ _mesa_reference_buffer_object(ctx, &ctx->QueryBuffer,
+ ctx->Shared->NullBufferObj);
+
for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
@@ -925,6 +955,8 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
_mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer, NULL);
+ _mesa_reference_buffer_object(ctx, &ctx->QueryBuffer, NULL);
+
for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
@@ -1014,6 +1046,15 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer)
return;
}
+ /* record usage history */
+ switch (target) {
+ case GL_PIXEL_PACK_BUFFER:
+ newBufObj->UsageHistory |= USAGE_PIXEL_PACK_BUFFER;
+ break;
+ default:
+ break;
+ }
+
/* bind new buffer */
_mesa_reference_buffer_object(ctx, bindTarget, newBufObj);
}
@@ -1348,6 +1389,11 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
_mesa_BindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
}
+ /* unbind query buffer binding point */
+ if (ctx->QueryBuffer == bufObj) {
+ _mesa_BindBuffer(GL_QUERY_BUFFER, 0);
+ }
+
/* The ID is immediately freed for re-use */
_mesa_HashRemove(ctx->Shared->BufferObjects, ids[i]);
/* Make sure we do not run into the classic ABA problem on bind.
@@ -1519,6 +1565,7 @@ _mesa_buffer_storage(struct gl_context *ctx, struct gl_buffer_object *bufObj,
bufObj->Written = GL_TRUE;
bufObj->Immutable = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
assert(ctx->Driver.BufferData);
if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,
@@ -1632,6 +1679,7 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT);
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
#ifdef VBO_DEBUG
printf("glBufferDataARB(%u, sz %ld, from %p, usage 0x%x)\n",
@@ -1744,6 +1792,7 @@ _mesa_buffer_sub_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
}
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
assert(ctx->Driver.BufferSubData);
ctx->Driver.BufferSubData(ctx, offset, size, data, bufObj);
@@ -1859,12 +1908,16 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx,
return;
}
+ /* Bail early. Negative size has already been checked. */
+ if (size == 0)
+ return;
+
+ bufObj->MinMaxCacheDirty = true;
+
if (data == NULL) {
/* clear to zeros, per the spec */
- if (size > 0) {
- ctx->Driver.ClearBufferSubData(ctx, offset, size,
- NULL, clearValueSize, bufObj);
- }
+ ctx->Driver.ClearBufferSubData(ctx, offset, size,
+ NULL, clearValueSize, bufObj);
return;
}
@@ -1873,10 +1926,8 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx,
return;
}
- if (size > 0) {
- ctx->Driver.ClearBufferSubData(ctx, offset, size,
- clearValue, clearValueSize, bufObj);
- }
+ ctx->Driver.ClearBufferSubData(ctx, offset, size,
+ clearValue, clearValueSize, bufObj);
}
void GLAPIENTRY
@@ -2276,6 +2327,8 @@ _mesa_copy_buffer_sub_data(struct gl_context *ctx,
}
}
+ dst->MinMaxCacheDirty = true;
+
ctx->Driver.CopyBufferSubData(ctx, src, dst, readOffset, writeOffset, size);
}
@@ -2480,8 +2533,10 @@ _mesa_map_buffer_range(struct gl_context *ctx,
assert(bufObj->Mappings[MAP_USER].AccessFlags == access);
}
- if (access & GL_MAP_WRITE_BIT)
+ if (access & GL_MAP_WRITE_BIT) {
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
+ }
#ifdef VBO_DEBUG
if (strstr(func, "Range") == NULL) { /* If not MapRange */
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index d4378e51159..19ef3042548 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -48,6 +48,7 @@ struct gl_shader;
struct gl_shader_program;
struct gl_texture_image;
struct gl_texture_object;
+struct gl_memory_info;
/* GL_ARB_vertex_buffer_object */
/* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
@@ -726,6 +727,15 @@ struct dd_function_table {
void (*EndQuery)(struct gl_context *ctx, struct gl_query_object *q);
void (*CheckQuery)(struct gl_context *ctx, struct gl_query_object *q);
void (*WaitQuery)(struct gl_context *ctx, struct gl_query_object *q);
+ /*
+ * \pname the value requested to be written (GL_QUERY_RESULT, etc)
+ * \ptype the type of the value requested to be written:
+ * GL_UNSIGNED_INT, GL_UNSIGNED_INT64_ARB,
+ * GL_INT, GL_INT64_ARB
+ */
+ void (*StoreQueryResult)(struct gl_context *ctx, struct gl_query_object *q,
+ struct gl_buffer_object *buf, intptr_t offset,
+ GLenum pname, GLenum ptype);
/*@}*/
/**
@@ -939,6 +949,13 @@ struct dd_function_table {
void (*DispatchCompute)(struct gl_context *ctx, const GLuint *num_groups);
void (*DispatchComputeIndirect)(struct gl_context *ctx, GLintptr indirect);
/*@}*/
+
+ /**
+ * Query information about memory. Device memory is e.g. VRAM. Staging
+ * memory is e.g. GART. All sizes are in kilobytes.
+ */
+ void (*QueryMemoryInfo)(struct gl_context *ctx,
+ struct gl_memory_info *info);
};
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 11f4482f8d2..ded6f2c06dc 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -88,6 +88,7 @@ EXT(ARB_point_parameters , EXT_point_parameters
EXT(ARB_point_sprite , ARB_point_sprite , GLL, GLC, x , x , 2003)
EXT(ARB_program_interface_query , dummy_true , GLL, GLC, x , x , 2012)
EXT(ARB_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009)
+EXT(ARB_query_buffer_object , ARB_query_buffer_object , GLL, GLC, x , x , 2013)
EXT(ARB_robustness , dummy_true , GLL, GLC, x , x , 2010)
EXT(ARB_sample_shading , ARB_sample_shading , GLL, GLC, x , x , 2009)
EXT(ARB_sampler_objects , dummy_true , GLL, GLC, x , x , 2009)
@@ -165,6 +166,7 @@ EXT(ARB_window_pos , dummy_true
EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002)
EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001)
+EXT(ATI_meminfo , ATI_meminfo , GLL, GLC, x , x , 2009)
EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006)
EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004)
EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002)
@@ -291,6 +293,7 @@ EXT(NV_texture_barrier , NV_texture_barrier
EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
+EXT(NVX_gpu_memory_info , NVX_gpu_memory_info , GLL, GLC, x , x , 2013)
EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)
@@ -311,6 +314,7 @@ EXT(OES_element_index_uint , dummy_true
EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005)
EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002)
EXT(OES_framebuffer_object , dummy_true , x , x , ES1, x , 2005)
+EXT(OES_geometry_point_size , OES_geometry_shader , x , x , x , 31, 2015)
EXT(OES_geometry_shader , OES_geometry_shader , x , x , x , 31, 2015)
EXT(OES_get_program_binary , dummy_true , x , x , x , ES2, 2008)
EXT(OES_mapbuffer , dummy_true , x , x , ES1, ES2, 2005)
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 3be216da234..2d4acb35bd6 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1414,6 +1414,9 @@ framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb,
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(pname=0x%x)", func, pname);
}
+
+ invalidate_framebuffer(fb);
+ ctx->NewState |= _NEW_BUFFERS;
}
void GLAPIENTRY
diff --git a/src/mesa/main/format_parser.py b/src/mesa/main/format_parser.py
index 799b14f0b1c..a29f20754a8 100755
--- a/src/mesa/main/format_parser.py
+++ b/src/mesa/main/format_parser.py
@@ -532,7 +532,7 @@ def _parse_channels(fields, layout, colorspace, swizzle):
return channels
def parse(filename):
- """Parse a format descrition in CSV format.
+ """Parse a format description in CSV format.
This function parses the given CSV file and returns an iterable of
channels."""
diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h
index bfc8a0836e7..fa434d447ae 100644
--- a/src/mesa/main/framebuffer.h
+++ b/src/mesa/main/framebuffer.h
@@ -26,7 +26,7 @@
#ifndef FRAMEBUFFER_H
#define FRAMEBUFFER_H
-#include "glheader.h"
+#include "mtypes.h"
struct gl_config;
struct gl_context;
@@ -97,7 +97,8 @@ static inline GLuint
_mesa_geometric_samples(const struct gl_framebuffer *buffer)
{
return buffer->_HasAttachments ?
- buffer->Visual.samples : buffer->DefaultGeometry.NumSamples;
+ buffer->Visual.samples :
+ buffer->DefaultGeometry._NumSamples;
}
static inline GLuint
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 95cb18c8ee8..8453a922549 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -147,11 +147,14 @@ enum value_extra {
EXTRA_VALID_CLIP_DISTANCE,
EXTRA_FLUSH_CURRENT,
EXTRA_GLSL_130,
- EXTRA_EXT_UBO_GS4,
- EXTRA_EXT_ATOMICS_GS4,
- EXTRA_EXT_SHADER_IMAGE_GS4,
+ EXTRA_EXT_UBO_GS,
+ EXTRA_EXT_ATOMICS_GS,
+ EXTRA_EXT_SHADER_IMAGE_GS,
EXTRA_EXT_ATOMICS_TESS,
EXTRA_EXT_SHADER_IMAGE_TESS,
+ EXTRA_EXT_SSBO_GS,
+ EXTRA_EXT_FB_NO_ATTACH_GS,
+ EXTRA_EXT_ES_GS,
};
#define NO_EXTRA NULL
@@ -308,7 +311,7 @@ static const int extra_ARB_transform_feedback2_api_es3[] = {
};
static const int extra_ARB_uniform_buffer_object_and_geometry_shader[] = {
- EXTRA_EXT_UBO_GS4,
+ EXTRA_EXT_UBO_GS,
EXTRA_END
};
@@ -343,12 +346,12 @@ static const int extra_EXT_texture_array_es3[] = {
};
static const int extra_ARB_shader_atomic_counters_and_geometry_shader[] = {
- EXTRA_EXT_ATOMICS_GS4,
+ EXTRA_EXT_ATOMICS_GS,
EXTRA_END
};
static const int extra_ARB_shader_image_load_store_and_geometry_shader[] = {
- EXTRA_EXT_SHADER_IMAGE_GS4,
+ EXTRA_EXT_SHADER_IMAGE_GS,
EXTRA_END
};
@@ -375,6 +378,28 @@ static const int extra_ARB_shader_storage_buffer_object_es31[] = {
EXTRA_END
};
+static const int extra_ARB_shader_storage_buffer_object_and_geometry_shader[] = {
+ EXTRA_EXT_SSBO_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_framebuffer_no_attachments_and_geometry_shader[] = {
+ EXTRA_EXT_FB_NO_ATTACH_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_viewport_array_or_oes_geometry_shader[] = {
+ EXT(ARB_viewport_array),
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_gpu_shader5_or_oes_geometry_shader[] = {
+ EXT(ARB_gpu_shader5),
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
EXTRA_EXT(ARB_texture_cube_map);
EXTRA_EXT(EXT_texture_array);
EXTRA_EXT(NV_fog_distance);
@@ -414,6 +439,7 @@ EXTRA_EXT(ARB_shader_image_load_store);
EXTRA_EXT(ARB_viewport_array);
EXTRA_EXT(ARB_compute_shader);
EXTRA_EXT(ARB_gpu_shader5);
+EXTRA_EXT(ARB_query_buffer_object);
EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5);
EXTRA_EXT(INTEL_performance_query);
EXTRA_EXT(ARB_explicit_uniform_location);
@@ -424,6 +450,8 @@ EXTRA_EXT(ARB_tessellation_shader);
EXTRA_EXT(ARB_shader_subroutine);
EXTRA_EXT(ARB_shader_storage_buffer_object);
EXTRA_EXT(ARB_indirect_parameters);
+EXTRA_EXT(ATI_meminfo);
+EXTRA_EXT(NVX_gpu_memory_info);
static const int
extra_ARB_color_buffer_float_or_glcore[] = {
@@ -455,6 +483,12 @@ static const int extra_gl32_es3[] = {
EXTRA_END,
};
+static const int extra_version_32_OES_geometry_shader[] = {
+ EXTRA_VERSION_32,
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
static const int extra_gl40_ARB_sample_shading[] = {
EXTRA_VERSION_40,
EXT(ARB_sample_shading),
@@ -1006,6 +1040,10 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
case GL_SHADER_STORAGE_BUFFER_BINDING:
v->value_int = ctx->ShaderStorageBuffer->Name;
break;
+ /* GL_ARB_query_buffer_object */
+ case GL_QUERY_BUFFER_BINDING:
+ v->value_int = ctx->QueryBuffer->Name;
+ break;
/* GL_ARB_timer_query */
case GL_TIMESTAMP:
if (ctx->Driver.GetTimestamp) {
@@ -1049,6 +1087,60 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
case GL_DISPATCH_INDIRECT_BUFFER_BINDING:
v->value_int = ctx->DispatchIndirectBuffer->Name;
break;
+ /* GL_ARB_multisample */
+ case GL_SAMPLES:
+ v->value_int = _mesa_geometric_samples(ctx->DrawBuffer);
+ break;
+ case GL_SAMPLE_BUFFERS:
+ v->value_int = _mesa_geometric_samples(ctx->DrawBuffer) > 0;
+ break;
+ /* GL_ATI_meminfo & GL_NVX_gpu_memory_info */
+ case GL_VBO_FREE_MEMORY_ATI:
+ case GL_TEXTURE_FREE_MEMORY_ATI:
+ case GL_RENDERBUFFER_FREE_MEMORY_ATI:
+ case GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX:
+ case GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX:
+ case GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX:
+ case GL_GPU_MEMORY_INFO_EVICTION_COUNT_NVX:
+ case GL_GPU_MEMORY_INFO_EVICTED_MEMORY_NVX:
+ {
+ struct gl_memory_info info;
+
+ ctx->Driver.QueryMemoryInfo(ctx, &info);
+
+ if (d->pname == GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX)
+ v->value_int = info.total_device_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX)
+ v->value_int = info.total_device_memory +
+ info.total_staging_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX)
+ v->value_int = info.avail_device_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_EVICTION_COUNT_NVX)
+ v->value_int = info.nr_device_memory_evictions;
+ else if (d->pname == GL_GPU_MEMORY_INFO_EVICTED_MEMORY_NVX)
+ v->value_int = info.device_memory_evicted;
+ else {
+ /* ATI free memory enums.
+ *
+ * Since the GPU memory is (usually) page-table based, every two
+ * consecutive elements are equal. From the GL_ATI_meminfo
+ * specification:
+ *
+ * "param[0] - total memory free in the pool
+ * param[1] - largest available free block in the pool
+ * param[2] - total auxiliary memory free
+ * param[3] - largest auxiliary free block"
+ *
+ * All three (VBO, TEXTURE, RENDERBUFFER) queries return
+ * the same numbers here.
+ */
+ v->value_int_4[0] = info.avail_device_memory;
+ v->value_int_4[1] = info.avail_device_memory;
+ v->value_int_4[2] = info.avail_staging_memory;
+ v->value_int_4[3] = info.avail_staging_memory;
+ }
+ }
+ break;
}
}
@@ -1154,20 +1246,23 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d
if (ctx->Const.GLSLVersion >= 130)
api_found = GL_TRUE;
break;
- case EXTRA_EXT_UBO_GS4:
+ case EXTRA_EXT_UBO_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_uniform_buffer_object &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_uniform_buffer_object &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
- case EXTRA_EXT_ATOMICS_GS4:
+ case EXTRA_EXT_ATOMICS_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_shader_atomic_counters &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_shader_atomic_counters &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
- case EXTRA_EXT_SHADER_IMAGE_GS4:
+ case EXTRA_EXT_SHADER_IMAGE_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_shader_image_load_store &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_shader_image_load_store &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
case EXTRA_EXT_ATOMICS_TESS:
api_check = GL_TRUE;
@@ -1179,6 +1274,24 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d
api_found = ctx->Extensions.ARB_shader_image_load_store &&
_mesa_has_tessellation(ctx);
break;
+ case EXTRA_EXT_SSBO_GS:
+ api_check = GL_TRUE;
+ if (ctx->Extensions.ARB_shader_storage_buffer_object &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
+ break;
+ case EXTRA_EXT_FB_NO_ATTACH_GS:
+ api_check = GL_TRUE;
+ if (ctx->Extensions.ARB_framebuffer_no_attachments &&
+ (_mesa_is_desktop_gl(ctx) ||
+ _mesa_has_OES_geometry_shader(ctx)))
+ api_found = GL_TRUE;
+ break;
+ case EXTRA_EXT_ES_GS:
+ api_check = GL_TRUE;
+ if (_mesa_has_OES_geometry_shader(ctx))
+ api_found = GL_TRUE;
+ break;
case EXTRA_END:
break;
default: /* *e is a offset into the extension struct */
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index af7a8f4a906..164095c103c 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -80,8 +80,8 @@ descriptor=[
[ "SAMPLE_COVERAGE_ARB", "CONTEXT_BOOL(Multisample.SampleCoverage), NO_EXTRA" ],
[ "SAMPLE_COVERAGE_VALUE_ARB", "CONTEXT_FLOAT(Multisample.SampleCoverageValue), NO_EXTRA" ],
[ "SAMPLE_COVERAGE_INVERT_ARB", "CONTEXT_BOOL(Multisample.SampleCoverageInvert), NO_EXTRA" ],
- [ "SAMPLE_BUFFERS_ARB", "BUFFER_INT(Visual.sampleBuffers), extra_new_buffers" ],
- [ "SAMPLES_ARB", "BUFFER_INT(Visual.samples), extra_new_buffers" ],
+ [ "SAMPLE_BUFFERS_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers" ],
+ [ "SAMPLES_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers" ],
# GL_ARB_sample_shading
[ "SAMPLE_SHADING_ARB", "CONTEXT_BOOL(Multisample.SampleShading), extra_gl40_ARB_sample_shading" ],
@@ -470,6 +470,9 @@ descriptor=[
["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
+# GL_ARB_framebuffer_no_attachments / geometry shader
+ [ "MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments_and_geometry_shader" ],
+
# GL_ARB_explicit_uniform_location / GLES 3.1
[ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
@@ -499,6 +502,34 @@ descriptor=[
{ "apis": ["GL_CORE", "GLES31"], "params": [
# GL_ARB_draw_indirect / GLES 3.1
[ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect" ],
+
+# GL 3.2 / GL OES_geometry_shader
+ [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_OUTPUT_VERTICES", "CONTEXT_INT(Const.MaxGeometryOutputVertices), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxGeometryTotalOutputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents), extra_version_32_OES_geometry_shader" ],
+
+# GL_ARB_shader_image_load_store / geometry shader
+ [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader" ],
+
+# GL_ARB_shader_atomic_counters / geometry shader
+ [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader " ],
+ [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
+
+# GL_ARB_shader_storage_buffer_object / geometry shader
+ [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
+
+# GL_ARB_uniform_buffer_object / geometry shader
+ [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
+ [ "MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
+
+# GL_ARB_viewport_array / GL_OES_geometry_shader
+ [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array_or_oes_geometry_shader" ],
+
+# GL_ARB_gpu_shader5 / GL_OES_geometry_shader
+ [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5_or_oes_geometry_shader" ],
]},
# Remaining enums are only in OpenGL
@@ -790,21 +821,10 @@ descriptor=[
# GL 3.2
[ "CONTEXT_PROFILE_MASK", "CONTEXT_INT(Const.ProfileMask), extra_version_32" ],
- [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits), extra_version_32" ],
- [ "MAX_GEOMETRY_OUTPUT_VERTICES", "CONTEXT_INT(Const.MaxGeometryOutputVertices), extra_version_32" ],
- [ "MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxGeometryTotalOutputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents), extra_version_32" ],
# GL_ARB_robustness
[ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
-
-# GL_ARB_uniform_buffer_object
- [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
- [ "MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
-
# GL_ARB_timer_query
[ "TIMESTAMP", "LOC_CUSTOM, TYPE_INT64, 0, extra_ARB_timer_query" ],
@@ -817,25 +837,31 @@ descriptor=[
# GL_ARB_texture_gather
[ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"],
-# GL_ARB_shader_atomic_counters
- [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
- [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
-
# GL_ARB_shader_image_load_store
[ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ],
[ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
- [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"],
-
-# GL_ARB_framebuffer_no_attachments
- ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"],
# GL_EXT_polygon_offset_clamp
[ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
# GL_ARB_shader_storage_buffer_object
- [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
[ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
[ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+
+# GL_ARB_query_buffer_object
+ [ "QUERY_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_query_buffer_object" ],
+
+# GL_ATI_meminfo
+ [ "VBO_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+ [ "TEXTURE_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+ [ "RENDERBUFFER_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+
+# GL_NVX_gpu_memory_info
+ [ "GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_EVICTION_COUNT_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_EVICTED_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
]},
# Enums restricted to OpenGL Core profile
@@ -847,7 +873,6 @@ descriptor=[
[ "MAX_VIEWPORTS", "CONTEXT_INT(Const.MaxViewports), extra_ARB_viewport_array" ],
[ "VIEWPORT_SUBPIXEL_BITS", "CONTEXT_INT(Const.ViewportSubpixelBits), extra_ARB_viewport_array" ],
[ "VIEWPORT_BOUNDS_RANGE", "CONTEXT_FLOAT2(Const.ViewportBounds), extra_ARB_viewport_array" ],
- [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ],
[ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ],
# GL_ARB_gpu_shader5
diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
index 315b5d64004..ab1b9e907ae 100644
--- a/src/mesa/main/hash.c
+++ b/src/mesa/main/hash.c
@@ -496,14 +496,12 @@ _mesa_HashFindFreeKeyBlock(struct _mesa_HashTable *table, GLuint numKeys)
GLuint
_mesa_HashNumEntries(const struct _mesa_HashTable *table)
{
- struct hash_entry *entry;
GLuint count = 0;
if (table->deleted_key_data)
count++;
- hash_table_foreach(table->ht, entry)
- count++;
+ count += _mesa_hash_table_num_entries(table->ht);
return count;
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 56dce2d1b81..a66b56c62bf 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1253,6 +1253,9 @@ typedef enum {
USAGE_TEXTURE_BUFFER = 0x2,
USAGE_ATOMIC_COUNTER_BUFFER = 0x4,
USAGE_SHADER_STORAGE_BUFFER = 0x8,
+ USAGE_TRANSFORM_FEEDBACK_BUFFER = 0x10,
+ USAGE_PIXEL_PACK_BUFFER = 0x20,
+ USAGE_DISABLE_MINMAX_CACHE = 0x40,
} gl_buffer_usage;
@@ -1280,6 +1283,12 @@ struct gl_buffer_object
GLuint NumMapBufferWriteCalls;
struct gl_buffer_mapping Mappings[MAP_COUNT];
+
+ /** Memoization of min/max index computations for static index buffers */
+ struct hash_table *MinMaxCache;
+ unsigned MinMaxCacheHitIndices;
+ unsigned MinMaxCacheMissIndices;
+ bool MinMaxCacheDirty;
};
@@ -1861,6 +1870,8 @@ typedef enum
PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */
PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */
PROGRAM_UNDEFINED, /**< Invalid/TBD value */
+ PROGRAM_IMMEDIATE, /**< Immediate value, used by TGSI */
+ PROGRAM_BUFFER, /**< for shader buffers, compile-time only */
PROGRAM_FILE_MAX
} gl_register_file;
@@ -3217,6 +3228,10 @@ struct gl_framebuffer
struct {
GLuint Width, Height, Layers, NumSamples;
GLboolean FixedSampleLocations;
+ /* Derived from NumSamples by the driver so that it can choose a valid
+ * value for the hardware.
+ */
+ GLuint _NumSamples;
} DefaultGeometry;
/** \name Drawing bounds (Intersection of buffer size and scissor box)
@@ -3785,6 +3800,7 @@ struct gl_extensions
GLboolean ARB_occlusion_query2;
GLboolean ARB_pipeline_statistics_query;
GLboolean ARB_point_sprite;
+ GLboolean ARB_query_buffer_object;
GLboolean ARB_sample_shading;
GLboolean ARB_seamless_cube_map;
GLboolean ARB_shader_atomic_counters;
@@ -3880,6 +3896,7 @@ struct gl_extensions
GLboolean AMD_vertex_shader_layer;
GLboolean AMD_vertex_shader_viewport_index;
GLboolean APPLE_object_purgeable;
+ GLboolean ATI_meminfo;
GLboolean ATI_texture_compression_3dc;
GLboolean ATI_texture_mirror_once;
GLboolean ATI_texture_env_combine3;
@@ -3900,6 +3917,7 @@ struct gl_extensions
GLboolean NV_texture_env_combine4;
GLboolean NV_texture_rectangle;
GLboolean NV_vdpau_interop;
+ GLboolean NVX_gpu_memory_info;
GLboolean TDFX_texture_compression_FXT1;
GLboolean OES_EGL_image;
GLboolean OES_draw_texture;
@@ -4434,6 +4452,8 @@ struct gl_context
struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
+ struct gl_buffer_object *QueryBuffer; /**< GL_ARB_query_buffer_object */
+
/**
* Current GL_ARB_uniform_buffer_object binding referenced by
* GL_UNIFORM_BUFFER target for glBufferData, glMapBuffer, etc.
@@ -4576,6 +4596,18 @@ struct gl_context
GLboolean ShareGroupReset;
};
+/**
+ * Information about memory usage. All sizes are in kilobytes.
+ */
+struct gl_memory_info
+{
+ unsigned total_device_memory; /**< size of device memory, e.g. VRAM */
+ unsigned avail_device_memory; /**< free device memory at the moment */
+ unsigned total_staging_memory; /**< size of staging memory, e.g. GART */
+ unsigned avail_staging_memory; /**< free staging memory at the moment */
+ unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */
+ unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */
+};
#ifdef DEBUG
extern int MESA_VERBOSE;
diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 41f370ce485..b622d6a2979 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -288,16 +288,18 @@ void GLAPIENTRY
_mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+ struct gl_sync_object *syncObj;
const char *callerstr;
char **labelPtr;
+ syncObj = _mesa_get_and_ref_sync(ctx, (void*)ptr, true);
+
if (_mesa_is_desktop_gl(ctx))
callerstr = "glObjectPtrLabel";
else
callerstr = "glObjectPtrLabelKHR";
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
callerstr);
return;
@@ -306,6 +308,7 @@ _mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label)
labelPtr = &syncObj->Label;
set_label(ctx, labelPtr, label, length, callerstr);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
void GLAPIENTRY
@@ -313,7 +316,7 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length,
GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+ struct gl_sync_object *syncObj;
const char *callerstr;
char **labelPtr;
@@ -328,7 +331,8 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length,
return;
}
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, (void*)ptr, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
callerstr);
return;
@@ -337,4 +341,5 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length,
labelPtr = &syncObj->Label;
copy_label(*labelPtr, label, length, bufSize);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index 98366857f62..b86692a5f7e 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -23,6 +23,7 @@
*/
+#include "bufferobj.h"
#include "glheader.h"
#include "context.h"
#include "enums.h"
@@ -732,14 +733,16 @@ _mesa_GetQueryiv(GLenum target, GLenum pname, GLint *params)
_mesa_GetQueryIndexediv(target, 0, pname, params);
}
-void GLAPIENTRY
-_mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
+static void
+get_query_object(struct gl_context *ctx, const char *func,
+ GLuint id, GLenum pname, GLenum ptype,
+ struct gl_buffer_object *buf, intptr_t offset)
{
struct gl_query_object *q = NULL;
- GET_CURRENT_CONTEXT(ctx);
+ uint64_t value;
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectiv(%u, %s)\n", id,
+ _mesa_debug(ctx, "%s(%u, %s)\n", func, id,
_mesa_enum_to_string(pname));
if (id)
@@ -747,96 +750,114 @@ _mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
if (!q || q->Active || !q->EverBound) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectivARB(id=%d is invalid or active)", id);
+ "%s(id=%d is invalid or active)", func, id);
return;
}
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- /* if result is too large for returned type, clamp to max value */
- if (q->Target == GL_ANY_SAMPLES_PASSED
- || q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) {
- if (q->Result)
- *params = GL_TRUE;
- else
- *params = GL_FALSE;
- } else {
- if (q->Result > 0x7fffffff) {
- *params = 0x7fffffff;
- }
- else {
- *params = (GLint)q->Result;
- }
- }
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectivARB(pname)");
+ if (buf && buf != ctx->Shared->NullBufferObj) {
+ bool is_64bit = ptype == GL_INT64_ARB ||
+ ptype == GL_UNSIGNED_INT64_ARB;
+ if (!ctx->Extensions.ARB_query_buffer_object) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(not supported)", func);
return;
+ }
+ if (buf->Size < offset + 4 * (is_64bit ? 2 : 1)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(out of bounds)", func);
+ return;
+ }
+
+ switch (pname) {
+ case GL_QUERY_RESULT:
+ case GL_QUERY_RESULT_NO_WAIT:
+ case GL_QUERY_RESULT_AVAILABLE:
+ case GL_QUERY_TARGET:
+ ctx->Driver.StoreQueryResult(ctx, q, buf, offset, pname, ptype);
+ return;
+ }
+
+ /* fall through to get error below */
}
+
+ switch (pname) {
+ case GL_QUERY_RESULT:
+ if (!q->Ready)
+ ctx->Driver.WaitQuery(ctx, q);
+ value = q->Result;
+ break;
+ case GL_QUERY_RESULT_NO_WAIT:
+ if (!ctx->Extensions.ARB_query_buffer_object)
+ goto invalid_enum;
+ ctx->Driver.CheckQuery(ctx, q);
+ if (!q->Ready)
+ return;
+ value = q->Result;
+ break;
+ case GL_QUERY_RESULT_AVAILABLE:
+ if (!q->Ready)
+ ctx->Driver.CheckQuery(ctx, q);
+ value = q->Ready;
+ break;
+ case GL_QUERY_TARGET:
+ value = q->Target;
+ break;
+ default:
+invalid_enum:
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)",
+ func, _mesa_enum_to_string(pname));
+ return;
+ }
+
+ /* TODO: Have the driver be required to handle this fixup. */
+ if (q->Target == GL_ANY_SAMPLES_PASSED ||
+ q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE)
+ value = !!value;
+
+ switch (ptype) {
+ case GL_INT: {
+ GLint *param = (GLint *)offset;
+ if (value > 0x7fffffff)
+ *param = 0x7fffffff;
+ else
+ *param = value;
+ break;
+ }
+ case GL_UNSIGNED_INT: {
+ GLuint *param = (GLuint *)offset;
+ if (value > 0xffffffff)
+ *param = 0xffffffff;
+ else
+ *param = value;
+ break;
+ }
+ case GL_INT64_ARB:
+ case GL_UNSIGNED_INT64_ARB: {
+ GLuint64EXT *param = (GLuint64EXT *)offset;
+ *param = value;
+ break;
+ }
+ default:
+ unreachable("unexpected ptype");
+ }
+}
+
+void GLAPIENTRY
+_mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ get_query_object(ctx, "glGetQueryObjectiv",
+ id, pname, GL_INT, ctx->QueryBuffer, (intptr_t)params);
}
void GLAPIENTRY
_mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectuiv(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
-
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
-
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectuivARB(id=%d is invalid or active)", id);
- return;
- }
-
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- /* if result is too large for returned type, clamp to max value */
- if (q->Target == GL_ANY_SAMPLES_PASSED
- || q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) {
- if (q->Result)
- *params = GL_TRUE;
- else
- *params = GL_FALSE;
- } else {
- if (q->Result > 0xffffffff) {
- *params = 0xffffffff;
- }
- else {
- *params = (GLuint)q->Result;
- }
- }
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectuivARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjectuiv",
+ id, pname, GL_UNSIGNED_INT,
+ ctx->QueryBuffer, (intptr_t)params);
}
@@ -846,40 +867,11 @@ _mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params)
void GLAPIENTRY
_mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjecti64v(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
-
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
-
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectui64vARB(id=%d is invalid or active)", id);
- return;
- }
-
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- *params = q->Result;
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjecti64vARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjecti64v",
+ id, pname, GL_INT64_ARB,
+ ctx->QueryBuffer, (intptr_t)params);
}
@@ -889,40 +881,11 @@ _mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params)
void GLAPIENTRY
_mesa_GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64EXT *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectui64v(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
-
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
-
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectuui64vARB(id=%d is invalid or active)", id);
- return;
- }
-
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- *params = q->Result;
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectui64vARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjectui64v",
+ id, pname, GL_UNSIGNED_INT64_ARB,
+ ctx->QueryBuffer, (intptr_t)params);
}
/**
@@ -932,8 +895,15 @@ void GLAPIENTRY
_mesa_GetQueryBufferObjectiv(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectiv");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectiv");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectiv",
+ id, pname, GL_INT, buf, offset);
}
@@ -941,8 +911,15 @@ void GLAPIENTRY
_mesa_GetQueryBufferObjectuiv(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectuiv");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectuiv");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectuiv",
+ id, pname, GL_UNSIGNED_INT, buf, offset);
}
@@ -950,8 +927,15 @@ void GLAPIENTRY
_mesa_GetQueryBufferObjecti64v(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjecti64v");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjecti64v");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjecti64v",
+ id, pname, GL_INT64_ARB, buf, offset);
}
@@ -959,8 +943,15 @@ void GLAPIENTRY
_mesa_GetQueryBufferObjectui64v(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectui64v");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectui64v");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectui64v",
+ id, pname, GL_UNSIGNED_INT64_ARB, buf, offset);
}
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index c37b31d1753..b9f7bb65fb6 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -338,7 +338,7 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared)
struct set_entry *entry;
set_foreach(shared->SyncObjects, entry) {
- _mesa_unref_sync_object(ctx, (struct gl_sync_object *) entry->key);
+ _mesa_unref_sync_object(ctx, (struct gl_sync_object *) entry->key, 1);
}
}
_mesa_set_destroy(shared->SyncObjects, NULL);
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index 4043c4f2057..57f13411fdf 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -352,7 +352,7 @@ update_multisample(struct gl_context *ctx)
ctx->Multisample._Enabled = GL_FALSE;
if (ctx->Multisample.Enabled &&
ctx->DrawBuffer &&
- ctx->DrawBuffer->Visual.sampleBuffers)
+ _mesa_geometric_samples(ctx->DrawBuffer) > 0)
ctx->Multisample._Enabled = GL_TRUE;
}
diff --git a/src/mesa/main/syncobj.c b/src/mesa/main/syncobj.c
index c1b2d3bed54..be758dd1241 100644
--- a/src/mesa/main/syncobj.c
+++ b/src/mesa/main/syncobj.c
@@ -167,34 +167,42 @@ _mesa_free_sync_data(struct gl_context *ctx)
* - not in sync objects hash table
* - type is GL_SYNC_FENCE
* - not marked as deleted
+ *
+ * Returns the internal gl_sync_object pointer if the sync object is valid
+ * or NULL if it isn't.
+ *
+ * If "incRefCount" is true, the reference count is incremented, which is
+ * normally what you want; otherwise, a glDeleteSync from another thread
+ * could delete the sync object while you are still working on it.
*/
-bool
-_mesa_validate_sync(struct gl_context *ctx,
- const struct gl_sync_object *syncObj)
+struct gl_sync_object *
+_mesa_get_and_ref_sync(struct gl_context *ctx, GLsync sync, bool incRefCount)
{
- return (syncObj != NULL)
+ struct gl_sync_object *syncObj = (struct gl_sync_object *) sync;
+ mtx_lock(&ctx->Shared->Mutex);
+ if (syncObj != NULL
&& _mesa_set_search(ctx->Shared->SyncObjects, syncObj) != NULL
&& (syncObj->Type == GL_SYNC_FENCE)
- && !syncObj->DeletePending;
-}
-
-
-void
-_mesa_ref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj)
-{
- mtx_lock(&ctx->Shared->Mutex);
- syncObj->RefCount++;
+ && !syncObj->DeletePending) {
+ if (incRefCount) {
+ syncObj->RefCount++;
+ }
+ } else {
+ syncObj = NULL;
+ }
mtx_unlock(&ctx->Shared->Mutex);
+ return syncObj;
}
void
-_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj)
+_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj,
+ int amount)
{
struct set_entry *entry;
mtx_lock(&ctx->Shared->Mutex);
- syncObj->RefCount--;
+ syncObj->RefCount -= amount;
if (syncObj->RefCount == 0) {
entry = _mesa_set_search(ctx->Shared->SyncObjects, syncObj);
assert (entry != NULL);
@@ -212,10 +220,9 @@ GLboolean GLAPIENTRY
_mesa_IsSync(GLsync sync)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
- return _mesa_validate_sync(ctx, syncObj) ? GL_TRUE : GL_FALSE;
+ return _mesa_get_and_ref_sync(ctx, sync, false) ? GL_TRUE : GL_FALSE;
}
@@ -223,7 +230,7 @@ void GLAPIENTRY
_mesa_DeleteSync(GLsync sync)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
/* From the GL_ARB_sync spec:
*
@@ -235,16 +242,19 @@ _mesa_DeleteSync(GLsync sync)
return;
}
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "glDeleteSync (not a valid sync object)");
return;
}
/* If there are no client-waits or server-waits pending on this sync, delete
- * the underlying object.
+ * the underlying object. Note that we double-unref the object, as
+ * _mesa_get_and_ref_sync above took an extra refcount to make sure the pointer
+ * is valid for us to manipulate.
*/
syncObj->DeletePending = GL_TRUE;
- _mesa_unref_sync_object(ctx, syncObj);
+ _mesa_unref_sync_object(ctx, syncObj, 2);
}
@@ -299,21 +309,20 @@ GLenum GLAPIENTRY
_mesa_ClientWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
GLenum ret;
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_WAIT_FAILED);
- if (!_mesa_validate_sync(ctx, syncObj)) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync (not a valid sync object)");
- return GL_WAIT_FAILED;
- }
-
if ((flags & ~GL_SYNC_FLUSH_COMMANDS_BIT) != 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync(flags=0x%x)", flags);
return GL_WAIT_FAILED;
}
- _mesa_ref_sync_object(ctx, syncObj);
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync (not a valid sync object)");
+ return GL_WAIT_FAILED;
+ }
/* From the GL_ARB_sync spec:
*
@@ -335,7 +344,7 @@ _mesa_ClientWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
}
}
- _mesa_unref_sync_object(ctx, syncObj);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
return ret;
}
@@ -344,12 +353,7 @@ void GLAPIENTRY
_mesa_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
-
- if (!_mesa_validate_sync(ctx, syncObj)) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync (not a valid sync object)");
- return;
- }
+ struct gl_sync_object *syncObj;
if (flags != 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync(flags=0x%x)", flags);
@@ -362,7 +366,14 @@ _mesa_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
return;
}
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync (not a valid sync object)");
+ return;
+ }
+
ctx->Driver.ServerWaitSync(ctx, syncObj, flags, timeout);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
@@ -371,11 +382,12 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length,
GLint *values)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
GLsizei size = 0;
GLint v[1];
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "glGetSynciv (not a valid sync object)");
return;
}
@@ -409,6 +421,7 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length,
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetSynciv(pname=0x%x)\n", pname);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
return;
}
@@ -421,4 +434,6 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length,
if (length != NULL) {
*length = size;
}
+
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
diff --git a/src/mesa/main/syncobj.h b/src/mesa/main/syncobj.h
index 5d510e873a9..ea4a71222c0 100644
--- a/src/mesa/main/syncobj.h
+++ b/src/mesa/main/syncobj.h
@@ -47,15 +47,12 @@ _mesa_init_sync(struct gl_context *);
extern void
_mesa_free_sync_data(struct gl_context *);
-extern void
-_mesa_ref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj);
+struct gl_sync_object *
+_mesa_get_and_ref_sync(struct gl_context *ctx, GLsync sync, bool incRefCount);
extern void
-_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj);
-
-extern bool
-_mesa_validate_sync(struct gl_context *ctx,
- const struct gl_sync_object *syncObj);
+_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj,
+ int amount);
extern GLboolean GLAPIENTRY
_mesa_IsSync(GLsync sync);
diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h
index bb9729cdbde..eb274ad6540 100644
--- a/src/mesa/main/transformfeedback.h
+++ b/src/mesa/main/transformfeedback.h
@@ -145,6 +145,9 @@ _mesa_set_transform_feedback_binding(struct gl_context *ctx,
tfObj->BufferNames[index] = bufObj->Name;
tfObj->Offset[index] = offset;
tfObj->RequestedSize[index] = size;
+
+ if (bufObj != ctx->Shared->NullBufferObj)
+ bufObj->UsageHistory |= USAGE_TRANSFORM_FEEDBACK_BUFFER;
}
/*** GL_ARB_direct_state_access ***/
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 3c51d18ed62..0f17ed136da 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2293,6 +2293,10 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
(void) row_major;
+ /* atomics don't get real storage */
+ if (type->contains_atomic())
+ return;
+
if (type->is_vector() || type->is_scalar()) {
size = type->vector_elements;
if (type->is_double())
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index e98946b9387..34183d4d95f 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -454,73 +454,3 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
*posOut = -1;
return GL_FALSE;
}
-
-
-struct gl_program_parameter_list *
-_mesa_clone_parameter_list(const struct gl_program_parameter_list *list)
-{
- struct gl_program_parameter_list *clone;
- GLuint i;
-
- clone = _mesa_new_parameter_list();
- if (!clone)
- return NULL;
-
- /** Not too efficient, but correct */
- for (i = 0; i < list->NumParameters; i++) {
- struct gl_program_parameter *p = list->Parameters + i;
- struct gl_program_parameter *pCopy;
- GLuint size = MIN2(p->Size, 4);
- GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType,
- list->ParameterValues[i], NULL);
- assert(j >= 0);
- pCopy = clone->Parameters + j;
- /* copy state indexes */
- if (p->Type == PROGRAM_STATE_VAR) {
- GLint k;
- for (k = 0; k < STATE_LENGTH; k++) {
- pCopy->StateIndexes[k] = p->StateIndexes[k];
- }
- }
- else {
- clone->Parameters[j].Size = p->Size;
- }
-
- }
-
- clone->StateFlags = list->StateFlags;
-
- return clone;
-}
-
-
-/**
- * Return a new parameter list which is listA + listB.
- */
-struct gl_program_parameter_list *
-_mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA,
- const struct gl_program_parameter_list *listB)
-{
- struct gl_program_parameter_list *list;
-
- if (listA) {
- list = _mesa_clone_parameter_list(listA);
- if (list && listB) {
- GLuint i;
- for (i = 0; i < listB->NumParameters; i++) {
- struct gl_program_parameter *param = listB->Parameters + i;
- _mesa_add_parameter(list, param->Type, param->Name, param->Size,
- param->DataType,
- listB->ParameterValues[i],
- param->StateIndexes);
- }
- }
- }
- else if (listB) {
- list = _mesa_clone_parameter_list(listB);
- }
- else {
- list = NULL;
- }
- return list;
-}
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index 44700b710d7..c04d7a2e634 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -99,13 +99,6 @@ _mesa_new_parameter_list_sized(unsigned size);
extern void
_mesa_free_parameter_list(struct gl_program_parameter_list *paramList);
-extern struct gl_program_parameter_list *
-_mesa_clone_parameter_list(const struct gl_program_parameter_list *list);
-
-extern struct gl_program_parameter_list *
-_mesa_combine_parameter_lists(const struct gl_program_parameter_list *a,
- const struct gl_program_parameter_list *b);
-
static inline GLuint
_mesa_num_parameters(const struct gl_program_parameter_list *list)
{
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 12490d0c380..eed241271df 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -40,6 +40,7 @@
#include "prog_statevars.h"
#include "prog_parameter.h"
#include "main/samplerobj.h"
+#include "framebuffer.h"
#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
@@ -352,7 +353,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[],
}
return;
case STATE_NUM_SAMPLES:
- ((int *)value)[0] = ctx->DrawBuffer->Visual.samples;
+ ((int *)value)[0] = _mesa_geometric_samples(ctx->DrawBuffer);
return;
case STATE_DEPTH_RANGE:
value[0] = ctx->ViewportArray[0].Near; /* near */
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 0e78e6ab25d..27867c48d52 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -31,6 +31,7 @@
#include "main/glheader.h"
#include "main/context.h"
+#include "main/framebuffer.h"
#include "main/hash.h"
#include "main/macros.h"
#include "program.h"
@@ -534,14 +535,14 @@ _mesa_get_min_invocations_per_fragment(struct gl_context *ctx,
* forces per-sample shading"
*/
if (prog->IsSample && !ignore_sample_qualifier)
- return MAX2(ctx->DrawBuffer->Visual.samples, 1);
+ return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
if (prog->Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
SYSTEM_BIT_SAMPLE_POS))
- return MAX2(ctx->DrawBuffer->Visual.samples, 1);
+ return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
else if (ctx->Multisample.SampleShading)
return MAX2(ceil(ctx->Multisample.MinSampleShadingValue *
- ctx->DrawBuffer->Visual.samples), 1);
+ _mesa_geometric_samples(ctx->DrawBuffer)), 1);
else
return 1;
}
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 03097225bb2..4b89ade1b15 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -75,6 +75,16 @@ static const struct st_tracked_state *atoms[] =
&st_bind_tes_ubos,
&st_bind_fs_ubos,
&st_bind_gs_ubos,
+ &st_bind_vs_atomics,
+ &st_bind_tcs_atomics,
+ &st_bind_tes_atomics,
+ &st_bind_fs_atomics,
+ &st_bind_gs_atomics,
+ &st_bind_vs_ssbos,
+ &st_bind_tcs_ssbos,
+ &st_bind_tes_ssbos,
+ &st_bind_fs_ssbos,
+ &st_bind_gs_ssbos,
&st_update_pixel_transfer,
&st_update_tess,
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index a24842baa4f..3a9153c80cb 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -78,6 +78,16 @@ extern const struct st_tracked_state st_bind_vs_ubos;
extern const struct st_tracked_state st_bind_gs_ubos;
extern const struct st_tracked_state st_bind_tcs_ubos;
extern const struct st_tracked_state st_bind_tes_ubos;
+extern const struct st_tracked_state st_bind_fs_atomics;
+extern const struct st_tracked_state st_bind_vs_atomics;
+extern const struct st_tracked_state st_bind_gs_atomics;
+extern const struct st_tracked_state st_bind_tcs_atomics;
+extern const struct st_tracked_state st_bind_tes_atomics;
+extern const struct st_tracked_state st_bind_fs_ssbos;
+extern const struct st_tracked_state st_bind_vs_ssbos;
+extern const struct st_tracked_state st_bind_gs_ssbos;
+extern const struct st_tracked_state st_bind_tcs_ssbos;
+extern const struct st_tracked_state st_bind_tes_ssbos;
extern const struct st_tracked_state st_update_pixel_transfer;
extern const struct st_tracked_state st_update_tess;
diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c
new file mode 100644
index 00000000000..1c30d1fb701
--- /dev/null
+++ b/src/mesa/state_tracker/st_atom_atomicbuf.c
@@ -0,0 +1,158 @@
+/**************************************************************************
+ *
+ * Copyright 2014 Ilia Mirkin. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/imports.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+
+#include "st_debug.h"
+#include "st_cb_bufferobjects.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_program.h"
+
+static void
+st_bind_atomics(struct st_context *st,
+ struct gl_shader_program *prog,
+ unsigned shader_type)
+{
+ unsigned i;
+
+ if (!prog || !st->pipe->set_shader_buffers)
+ return;
+
+ for (i = 0; i < prog->NumAtomicBuffers; i++) {
+ struct gl_active_atomic_buffer *atomic = &prog->AtomicBuffers[i];
+ struct gl_atomic_buffer_binding *binding =
+ &st->ctx->AtomicBufferBindings[atomic->Binding];
+ struct st_buffer_object *st_obj =
+ st_buffer_object(binding->BufferObject);
+ struct pipe_shader_buffer sb = { 0 };
+
+ sb.buffer = st_obj->buffer;
+ sb.buffer_offset = binding->Offset;
+ sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
+
+ st->pipe->set_shader_buffers(st->pipe, shader_type,
+ atomic->Binding, 1, &sb);
+ }
+}
+
+static void
+bind_vs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_bind_vs_atomics = {
+ "st_bind_vs_atomics",
+ {
+ 0,
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_vs_atomics
+};
+
+static void
+bind_fs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_FRAGMENT);
+}
+
+const struct st_tracked_state st_bind_fs_atomics = {
+ "st_bind_fs_atomics",
+ {
+ 0,
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_fs_atomics
+};
+
+static void
+bind_gs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_GEOMETRY);
+}
+
+const struct st_tracked_state st_bind_gs_atomics = {
+ "st_bind_gs_atomics",
+ {
+ 0,
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_gs_atomics
+};
+
+static void
+bind_tcs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_atomics = {
+ "st_bind_tcs_atomics",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_tcs_atomics
+};
+
+static void
+bind_tes_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_atomics = {
+ "st_bind_tes_atomics",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_tes_atomics
+};
diff --git a/src/mesa/state_tracker/st_atom_storagebuf.c b/src/mesa/state_tracker/st_atom_storagebuf.c
new file mode 100644
index 00000000000..f165cc3e0a1
--- /dev/null
+++ b/src/mesa/state_tracker/st_atom_storagebuf.c
@@ -0,0 +1,196 @@
+/**************************************************************************
+ *
+ * Copyright 2014 Ilia Mirkin. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/imports.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+
+#include "st_debug.h"
+#include "st_cb_bufferobjects.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_program.h"
+
+static void
+st_bind_ssbos(struct st_context *st, struct gl_shader *shader,
+ unsigned shader_type)
+{
+ unsigned i;
+ struct pipe_shader_buffer buffers[MAX_SHADER_STORAGE_BUFFERS];
+ struct gl_program_constants *c;
+
+ if (!shader || !st->pipe->set_shader_buffers)
+ return;
+
+ c = &st->ctx->Const.Program[shader->Stage];
+
+ for (i = 0; i < shader->NumShaderStorageBlocks; i++) {
+ struct gl_shader_storage_buffer_binding *binding;
+ struct st_buffer_object *st_obj;
+ struct pipe_shader_buffer *sb = &buffers[i];
+
+ binding = &st->ctx->ShaderStorageBufferBindings[
+ shader->ShaderStorageBlocks[i]->Binding];
+ st_obj = st_buffer_object(binding->BufferObject);
+
+ sb->buffer = st_obj->buffer;
+
+ if (sb->buffer) {
+ sb->buffer_offset = binding->Offset;
+ sb->buffer_size = sb->buffer->width0 - binding->Offset;
+
+ /* AutomaticSize is FALSE if the buffer was set with BindBufferRange.
+ * Take the minimum just to be sure.
+ */
+ if (!binding->AutomaticSize)
+ sb->buffer_size = MIN2(sb->buffer_size, (unsigned) binding->Size);
+ }
+ else {
+ sb->buffer_offset = 0;
+ sb->buffer_size = 0;
+ }
+ }
+ st->pipe->set_shader_buffers(st->pipe, shader_type, c->MaxAtomicBuffers,
+ shader->NumShaderStorageBlocks, buffers);
+ /* clear out any stale shader buffers */
+ if (shader->NumShaderStorageBlocks < c->MaxShaderStorageBlocks)
+ st->pipe->set_shader_buffers(
+ st->pipe, shader_type,
+ c->MaxAtomicBuffers + shader->NumShaderStorageBlocks,
+ c->MaxShaderStorageBlocks - shader->NumShaderStorageBlocks,
+ NULL);
+}
+
+static void bind_vs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+ PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_bind_vs_ssbos = {
+ "st_bind_vs_ssbos",
+ {
+ 0,
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_vs_ssbos
+};
+
+static void bind_fs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+ PIPE_SHADER_FRAGMENT);
+}
+
+const struct st_tracked_state st_bind_fs_ssbos = {
+ "st_bind_fs_ssbos",
+ {
+ 0,
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_fs_ssbos
+};
+
+static void bind_gs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+ PIPE_SHADER_GEOMETRY);
+}
+
+const struct st_tracked_state st_bind_gs_ssbos = {
+ "st_bind_gs_ssbos",
+ {
+ 0,
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_gs_ssbos
+};
+
+static void bind_tcs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL],
+ PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_ssbos = {
+ "st_bind_tcs_ssbos",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_tcs_ssbos
+};
+
+static void bind_tes_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL],
+ PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_ssbos = {
+ "st_bind_tes_ssbos",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_tes_ssbos
+};
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 68be8ba64ac..202b4eeeefa 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -237,6 +237,13 @@ st_bufferobj_data(struct gl_context *ctx,
case GL_PARAMETER_BUFFER_ARB:
bind = PIPE_BIND_COMMAND_ARGS_BUFFER;
break;
+ case GL_ATOMIC_COUNTER_BUFFER:
+ case GL_SHADER_STORAGE_BUFFER:
+ bind = PIPE_BIND_SHADER_BUFFER;
+ break;
+ case GL_QUERY_BUFFER:
+ bind = PIPE_BIND_QUERY_BUFFER;
+ break;
default:
bind = 0;
}
diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c
index aafae16b2df..fc239bc778c 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -39,9 +39,11 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
#include "st_context.h"
#include "st_cb_queryobj.h"
#include "st_cb_bitmap.h"
+#include "st_cb_bufferobjects.h"
static struct gl_query_object *
@@ -271,7 +273,7 @@ st_WaitQuery(struct gl_context *ctx, struct gl_query_object *q)
{
/* nothing */
}
-
+
q->Ready = GL_TRUE;
}
@@ -303,6 +305,98 @@ st_GetTimestamp(struct gl_context *ctx)
}
}
+static void
+st_StoreQueryResult(struct gl_context *ctx, struct gl_query_object *q,
+ struct gl_buffer_object *buf, intptr_t offset,
+ GLenum pname, GLenum ptype)
+{
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct st_query_object *stq = st_query_object(q);
+ struct st_buffer_object *stObj = st_buffer_object(buf);
+ boolean wait = pname == GL_QUERY_RESULT;
+ enum pipe_query_value_type result_type;
+ int index;
+
+ /* GL_QUERY_TARGET is a bit of an extension since it has nothing to
+ * do with the GPU end of the query. Write it in "by hand".
+ */
+ if (pname == GL_QUERY_TARGET) {
+ /* Assume that the data must be LE. The endianness situation wrt CPU and
+ * GPU is incredibly confusing, but the vast majority of GPUs are
+ * LE. When a BE one comes along, this needs some form of resolution.
+ */
+ unsigned data[2] = { CPU_TO_LE32(q->Target), 0 };
+ pipe_buffer_write(pipe, stObj->buffer, offset,
+ (ptype == GL_INT64_ARB ||
+ ptype == GL_UNSIGNED_INT64_ARB) ? 8 : 4,
+ data);
+ return;
+ }
+
+ switch (ptype) {
+ case GL_INT:
+ result_type = PIPE_QUERY_TYPE_I32;
+ break;
+ case GL_UNSIGNED_INT:
+ result_type = PIPE_QUERY_TYPE_U32;
+ break;
+ case GL_INT64_ARB:
+ result_type = PIPE_QUERY_TYPE_I64;
+ break;
+ case GL_UNSIGNED_INT64_ARB:
+ result_type = PIPE_QUERY_TYPE_U64;
+ break;
+ default:
+ unreachable("Unexpected result type");
+ }
+
+ if (pname == GL_QUERY_RESULT_AVAILABLE) {
+ index = -1;
+ } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
+ switch (q->Target) {
+ case GL_VERTICES_SUBMITTED_ARB:
+ index = 0;
+ break;
+ case GL_PRIMITIVES_SUBMITTED_ARB:
+ index = 1;
+ break;
+ case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+ index = 2;
+ break;
+ case GL_GEOMETRY_SHADER_INVOCATIONS:
+ index = 3;
+ break;
+ case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+ index = 4;
+ break;
+ case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+ index = 5;
+ break;
+ case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+ index = 6;
+ break;
+ case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+ index = 7;
+ break;
+ case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+ index = 8;
+ break;
+ case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+ index = 9;
+ break;
+ case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+ index = 10;
+ break;
+ default:
+ unreachable("Unexpected target");
+ }
+ } else {
+ index = 0;
+ }
+
+ pipe->get_query_result_resource(pipe, stq->pq, wait, result_type, index,
+ stObj->buffer, offset);
+}
void st_init_query_functions(struct dd_function_table *functions)
{
@@ -313,4 +407,5 @@ void st_init_query_functions(struct dd_function_table *functions)
functions->WaitQuery = st_WaitQuery;
functions->CheckQuery = st_CheckQuery;
functions->GetTimestamp = st_GetTimestamp;
+ functions->StoreQueryResult = st_StoreQueryResult;
}
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 0ceb37027e1..f2b607c3a1d 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -60,6 +60,7 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_tile.h"
#include "util/u_format.h"
@@ -67,6 +68,9 @@
#include "util/u_sampler.h"
#include "util/u_math.h"
#include "util/u_box.h"
+#include "util/u_simple_shaders.h"
+#include "cso_cache/cso_context.h"
+#include "tgsi/tgsi_ureg.h"
#define DBG if (0) printf
@@ -686,6 +690,999 @@ st_get_blit_mask(GLenum srcFormat, GLenum dstFormat)
}
}
+void
+st_init_pbo_upload(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+
+ st->pbo_upload.enabled =
+ screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) &&
+ screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 &&
+ screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
+ if (!st->pbo_upload.enabled)
+ return;
+
+ st->pbo_upload.rgba_only =
+ screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
+
+ if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) {
+ if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
+ st->pbo_upload.upload_layers = true;
+ } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) {
+ st->pbo_upload.upload_layers = true;
+ st->pbo_upload.use_gs = true;
+ }
+ }
+
+ /* Blend state */
+ memset(&st->pbo_upload.blend, 0, sizeof(struct pipe_blend_state));
+ st->pbo_upload.blend.rt[0].colormask = PIPE_MASK_RGBA;
+
+ /* Rasterizer state */
+ memset(&st->pbo_upload.raster, 0, sizeof(struct pipe_rasterizer_state));
+ st->pbo_upload.raster.half_pixel_center = 1;
+}
+
+void
+st_destroy_pbo_upload(struct st_context *st)
+{
+ if (st->pbo_upload.fs) {
+ cso_delete_fragment_shader(st->cso_context, st->pbo_upload.fs);
+ st->pbo_upload.fs = NULL;
+ }
+
+ if (st->pbo_upload.gs) {
+ cso_delete_geometry_shader(st->cso_context, st->pbo_upload.gs);
+ st->pbo_upload.gs = NULL;
+ }
+
+ if (st->pbo_upload.vs) {
+ cso_delete_vertex_shader(st->cso_context, st->pbo_upload.vs);
+ st->pbo_upload.vs = NULL;
+ }
+}
+
+/**
+ * Converts format to a format with the same components, types
+ * and sizes, but with the components in RGBA order.
+ */
+static enum pipe_format
+unswizzle_format(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ return PIPE_FORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return PIPE_FORMAT_R10G10B10A2_UNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_SNORM:
+ return PIPE_FORMAT_R10G10B10A2_SNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ return PIPE_FORMAT_R10G10B10A2_UINT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_A* to PIPE_FORMAT_R*.
+ */
+static enum pipe_format
+alpha_to_red(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_A8_UNORM:
+ return PIPE_FORMAT_R8_UNORM;
+ case PIPE_FORMAT_A8_SNORM:
+ return PIPE_FORMAT_R8_SNORM;
+ case PIPE_FORMAT_A8_UINT:
+ return PIPE_FORMAT_R8_UINT;
+ case PIPE_FORMAT_A8_SINT:
+ return PIPE_FORMAT_R8_SINT;
+
+ case PIPE_FORMAT_A16_UNORM:
+ return PIPE_FORMAT_R16_UNORM;
+ case PIPE_FORMAT_A16_SNORM:
+ return PIPE_FORMAT_R16_SNORM;
+ case PIPE_FORMAT_A16_UINT:
+ return PIPE_FORMAT_R16_UINT;
+ case PIPE_FORMAT_A16_SINT:
+ return PIPE_FORMAT_R16_SINT;
+ case PIPE_FORMAT_A16_FLOAT:
+ return PIPE_FORMAT_R16_FLOAT;
+
+ case PIPE_FORMAT_A32_UINT:
+ return PIPE_FORMAT_R32_UINT;
+ case PIPE_FORMAT_A32_SINT:
+ return PIPE_FORMAT_R32_SINT;
+ case PIPE_FORMAT_A32_FLOAT:
+ return PIPE_FORMAT_R32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_R*A* to PIPE_FORMAT_R*G*.
+ */
+static enum pipe_format
+red_alpha_to_red_green(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_R8A8_UNORM:
+ return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8A8_SNORM:
+ return PIPE_FORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8A8_UINT:
+ return PIPE_FORMAT_R8G8_UINT;
+ case PIPE_FORMAT_R8A8_SINT:
+ return PIPE_FORMAT_R8G8_SINT;
+
+ case PIPE_FORMAT_R16A16_UNORM:
+ return PIPE_FORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16A16_SNORM:
+ return PIPE_FORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16A16_UINT:
+ return PIPE_FORMAT_R16G16_UINT;
+ case PIPE_FORMAT_R16A16_SINT:
+ return PIPE_FORMAT_R16G16_SINT;
+ case PIPE_FORMAT_R16A16_FLOAT:
+ return PIPE_FORMAT_R16G16_FLOAT;
+
+ case PIPE_FORMAT_R32A32_UINT:
+ return PIPE_FORMAT_R32G32_UINT;
+ case PIPE_FORMAT_R32A32_SINT:
+ return PIPE_FORMAT_R32G32_SINT;
+ case PIPE_FORMAT_R32A32_FLOAT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_L*A* to PIPE_FORMAT_R*G*.
+ */
+static enum pipe_format
+luminance_alpha_to_red_green(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_L8A8_UNORM:
+ return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_L8A8_SNORM:
+ return PIPE_FORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_L8A8_UINT:
+ return PIPE_FORMAT_R8G8_UINT;
+ case PIPE_FORMAT_L8A8_SINT:
+ return PIPE_FORMAT_R8G8_SINT;
+
+ case PIPE_FORMAT_L16A16_UNORM:
+ return PIPE_FORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_L16A16_SNORM:
+ return PIPE_FORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_L16A16_UINT:
+ return PIPE_FORMAT_R16G16_UINT;
+ case PIPE_FORMAT_L16A16_SINT:
+ return PIPE_FORMAT_R16G16_SINT;
+ case PIPE_FORMAT_L16A16_FLOAT:
+ return PIPE_FORMAT_R16G16_FLOAT;
+
+ case PIPE_FORMAT_L32A32_UINT:
+ return PIPE_FORMAT_R32G32_UINT;
+ case PIPE_FORMAT_L32A32_SINT:
+ return PIPE_FORMAT_R32G32_SINT;
+ case PIPE_FORMAT_L32A32_FLOAT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_A* format, and false otherwise.
+ */
+static bool
+format_is_alpha(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
+ return true;
+
+ return false;
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_R* format, and false otherwise.
+ */
+static bool
+format_is_red(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return true;
+
+ return false;
+}
+
+
+/**
+ * Returns true if format is a PIPE_FORMAT_L* format, and false otherwise.
+ */
+static bool
+format_is_luminance(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return true;
+
+ return false;
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_R*A* format, and false otherwise.
+ */
+static bool
+format_is_red_alpha(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 2 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_Y)
+ return true;
+
+ return false;
+}
+
+static bool
+format_is_swizzled_rgba(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if ((desc->swizzle[0] == TGSI_SWIZZLE_X || desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[1] == TGSI_SWIZZLE_Y || desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[2] == TGSI_SWIZZLE_Z || desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[3] == TGSI_SWIZZLE_W || desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1))
+ return false;
+
+ return true;
+}
+
+struct format_table
+{
+ unsigned char swizzle[4];
+ enum pipe_format format;
+};
+
+static const struct format_table table_8888_unorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R8G8B8A8_UNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B8G8R8A8_UNORM },
+ { { 3, 0, 1, 2 }, PIPE_FORMAT_A8R8G8B8_UNORM },
+ { { 3, 2, 1, 0 }, PIPE_FORMAT_A8B8G8R8_UNORM }
+};
+
+static const struct format_table table_1010102_unorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UNORM }
+};
+
+static const struct format_table table_1010102_snorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_SNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_SNORM }
+};
+
+static const struct format_table table_1010102_uint[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UINT },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UINT }
+};
+
+static enum pipe_format
+swizzle_format(enum pipe_format format, const int * const swizzle)
+{
+ unsigned i;
+
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ for (i = 0; i < ARRAY_SIZE(table_8888_unorm); i++) {
+ if (swizzle[0] == table_8888_unorm[i].swizzle[0] &&
+ swizzle[1] == table_8888_unorm[i].swizzle[1] &&
+ swizzle[2] == table_8888_unorm[i].swizzle[2] &&
+ swizzle[3] == table_8888_unorm[i].swizzle[3])
+ return table_8888_unorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_unorm); i++) {
+ if (swizzle[0] == table_1010102_unorm[i].swizzle[0] &&
+ swizzle[1] == table_1010102_unorm[i].swizzle[1] &&
+ swizzle[2] == table_1010102_unorm[i].swizzle[2] &&
+ swizzle[3] == table_1010102_unorm[i].swizzle[3])
+ return table_1010102_unorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_SNORM:
+ case PIPE_FORMAT_B10G10R10A2_SNORM:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_snorm); i++) {
+ if (swizzle[0] == table_1010102_snorm[i].swizzle[0] &&
+ swizzle[1] == table_1010102_snorm[i].swizzle[1] &&
+ swizzle[2] == table_1010102_snorm[i].swizzle[2] &&
+ swizzle[3] == table_1010102_snorm[i].swizzle[3])
+ return table_1010102_snorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_UINT:
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_uint); i++) {
+ if (swizzle[0] == table_1010102_uint[i].swizzle[0] &&
+ swizzle[1] == table_1010102_uint[i].swizzle[1] &&
+ swizzle[2] == table_1010102_uint[i].swizzle[2] &&
+ swizzle[3] == table_1010102_uint[i].swizzle[3])
+ return table_1010102_uint[i].format;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return PIPE_FORMAT_NONE;
+}
+
+static bool
+reinterpret_formats(enum pipe_format *src_format, enum pipe_format *dst_format)
+{
+ enum pipe_format src = *src_format;
+ enum pipe_format dst = *dst_format;
+
+ /* Note: dst_format has already been transformed from luminance/intensity
+ * to red when this function is called. The source format will never
+ * be an intensity format, because GL_INTENSITY is not a legal value
+ * for the format parameter in glTex(Sub)Image(). */
+
+ if (format_is_alpha(src)) {
+ if (!format_is_alpha(dst))
+ return false;
+
+ src = alpha_to_red(src);
+ dst = alpha_to_red(dst);
+ } else if (format_is_luminance(src)) {
+ if (!format_is_red(dst) && !format_is_red_alpha(dst))
+ return false;
+
+ src = util_format_luminance_to_red(src);
+ } else if (util_format_is_luminance_alpha(src)) {
+ src = luminance_alpha_to_red_green(src);
+
+ if (format_is_red_alpha(dst)) {
+ dst = red_alpha_to_red_green(dst);
+ } else if (!format_is_red(dst))
+ return false;
+ } else if (format_is_swizzled_rgba(src)) {
+ const struct util_format_description *src_desc = util_format_description(src);
+ const struct util_format_description *dst_desc = util_format_description(dst);
+ int swizzle[4];
+ unsigned i;
+
+ /* Make sure the format is an RGBA and not an RGBX format */
+ if (src_desc->nr_channels != 4 || src_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return false;
+
+ if (dst_desc->nr_channels != 4 || dst_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return false;
+
+ for (i = 0; i < 4; i++)
+ swizzle[i] = dst_desc->swizzle[src_desc->swizzle[i]];
+
+ dst = swizzle_format(dst, swizzle);
+ if (dst == PIPE_FORMAT_NONE)
+ return false;
+
+ src = unswizzle_format(src);
+ }
+
+ *src_format = src;
+ *dst_format = dst;
+ return true;
+}
+
+static void *
+create_pbo_upload_vs(struct st_context *st)
+{
+ struct ureg_program *ureg;
+ struct ureg_src in_pos;
+ struct ureg_src in_instanceid;
+ struct ureg_dst out_pos;
+ struct ureg_dst out_layer;
+
+ ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+
+ in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION);
+
+ out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+
+ if (st->pbo_upload.upload_layers) {
+ in_instanceid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0);
+
+ if (!st->pbo_upload.use_gs)
+ out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+ }
+
+ /* out_pos = in_pos */
+ ureg_MOV(ureg, out_pos, in_pos);
+
+ if (st->pbo_upload.upload_layers) {
+ if (st->pbo_upload.use_gs) {
+ /* out_pos.z = i2f(gl_InstanceID) */
+ ureg_I2F(ureg, ureg_writemask(out_pos, TGSI_WRITEMASK_Z),
+ ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
+ } else {
+ /* out_layer = gl_InstanceID */
+ ureg_MOV(ureg, out_layer, in_instanceid);
+ }
+ }
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, st->pipe);
+}
+
+static void *
+create_pbo_upload_gs(struct st_context *st)
+{
+ static const int zero = 0;
+ struct ureg_program *ureg;
+ struct ureg_dst out_pos;
+ struct ureg_dst out_layer;
+ struct ureg_src in_pos;
+ struct ureg_src imm;
+ unsigned i;
+
+ ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+ if (!ureg)
+ return NULL;
+
+ ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES);
+ ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP);
+ ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3);
+
+ out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+ out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+
+ in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1);
+
+ imm = ureg_DECL_immediate_int(ureg, &zero, 1);
+
+ for (i = 0; i < 3; ++i) {
+ struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i);
+
+ /* out_pos = in_pos[i] */
+ ureg_MOV(ureg, out_pos, in_pos_vertex);
+
+ /* out_layer.x = f2i(in_pos[i].z) */
+ ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
+ ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z));
+
+ ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X));
+ }
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, st->pipe);
+}
+
+static void *
+create_pbo_upload_fs(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct ureg_program *ureg;
+ struct ureg_dst out;
+ struct ureg_src sampler;
+ struct ureg_src pos;
+ struct ureg_src layer;
+ struct ureg_src const0;
+ struct ureg_dst temp0;
+
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!ureg)
+ return NULL;
+
+ out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ sampler = ureg_DECL_sampler(ureg, 0);
+ if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
+ pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
+ } else {
+ pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ }
+ if (st->pbo_upload.upload_layers) {
+ layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
+ TGSI_INTERPOLATE_CONSTANT);
+ }
+ const0 = ureg_DECL_constant(ureg, 0);
+ temp0 = ureg_DECL_temporary(ureg);
+
+ /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
+
+ /* temp0.xy = f2i(temp0.xy) */
+ ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
+ ureg_swizzle(pos,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
+
+ /* temp0.xy = temp0.xy + const0.xy */
+ ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
+ ureg_swizzle(ureg_src(temp0),
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
+ ureg_swizzle(const0,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
+
+ /* temp0.x = const0.z * temp0.y + temp0.x */
+ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
+ ureg_scalar(const0, TGSI_SWIZZLE_Z),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
+
+ if (st->pbo_upload.upload_layers) {
+ /* temp0.x = const0.w * layer + temp0.x */
+ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
+ ureg_scalar(const0, TGSI_SWIZZLE_W),
+ ureg_scalar(layer, TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
+ }
+
+ /* out = txf(sampler, temp0.x) */
+ ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER,
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X),
+ sampler);
+
+ ureg_release_temporary(ureg, temp0);
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
+static bool
+try_pbo_upload_common(struct gl_context *ctx,
+ struct pipe_surface *surface,
+ int xoffset, int yoffset,
+ unsigned upload_width, unsigned upload_height,
+ struct pipe_resource *buffer,
+ enum pipe_format src_format,
+ intptr_t buf_offset,
+ unsigned bytes_per_pixel,
+ unsigned stride,
+ unsigned image_height)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1;
+ unsigned skip_pixels = 0;
+ bool success = false;
+
+ /* Check alignment. */
+ {
+ unsigned ofs = (buf_offset * bytes_per_pixel) % ctx->Const.TextureBufferOffsetAlignment;
+ if (ofs != 0) {
+ if (ofs % bytes_per_pixel != 0)
+ return false;
+
+ skip_pixels = ofs / bytes_per_pixel;
+ buf_offset -= skip_pixels;
+ }
+ }
+
+ /* Create the shaders */
+ if (!st->pbo_upload.vs) {
+ st->pbo_upload.vs = create_pbo_upload_vs(st);
+ if (!st->pbo_upload.vs)
+ return false;
+ }
+
+ if (depth != 1 && st->pbo_upload.use_gs && !st->pbo_upload.gs) {
+ st->pbo_upload.gs = create_pbo_upload_gs(st);
+ if (!st->pbo_upload.gs)
+ return false;
+ }
+
+ if (!st->pbo_upload.fs) {
+ st->pbo_upload.fs = create_pbo_upload_fs(st);
+ if (!st->pbo_upload.fs)
+ return false;
+ }
+
+ /* Set up the sampler_view */
+ {
+ unsigned first_element = buf_offset;
+ unsigned last_element = buf_offset + skip_pixels + upload_width - 1
+ + (upload_height - 1 + (depth - 1) * image_height) * stride;
+ struct pipe_sampler_view templ;
+ struct pipe_sampler_view *sampler_view;
+
+ /* This should be ensured by Mesa before calling our callbacks */
+ assert((last_element + 1) * bytes_per_pixel <= buffer->width0);
+
+ if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1)
+ return false;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.format = src_format;
+ templ.u.buf.first_element = first_element;
+ templ.u.buf.last_element = last_element;
+ templ.swizzle_r = PIPE_SWIZZLE_RED;
+ templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+ templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+ templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+ sampler_view = pipe->create_sampler_view(pipe, buffer, &templ);
+ if (sampler_view == NULL)
+ return false;
+
+ cso_save_fragment_sampler_views(st->cso_context);
+ cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1,
+ &sampler_view);
+
+ pipe_sampler_view_reference(&sampler_view, NULL);
+ }
+
+ /* Upload vertices */
+ {
+ struct pipe_vertex_buffer vbo;
+ struct pipe_vertex_element velem;
+
+ float x0 = (float) xoffset / surface->width * 2.0f - 1.0f;
+ float y0 = (float) yoffset / surface->height * 2.0f - 1.0f;
+ float x1 = (float) (xoffset + upload_width) / surface->width * 2.0f - 1.0f;
+ float y1 = (float) (yoffset + upload_height) / surface->height * 2.0f - 1.0f;
+
+ float *verts = NULL;
+
+ vbo.user_buffer = NULL;
+ vbo.buffer = NULL;
+ vbo.stride = 2 * sizeof(float);
+
+ u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4,
+ &vbo.buffer_offset, &vbo.buffer, (void **) &verts);
+ if (!verts)
+ goto fail_vertex_upload;
+
+ verts[0] = x0;
+ verts[1] = y0;
+ verts[2] = x0;
+ verts[3] = y1;
+ verts[4] = x1;
+ verts[5] = y0;
+ verts[6] = x1;
+ verts[7] = y1;
+
+ u_upload_unmap(st->uploader);
+
+ velem.src_offset = 0;
+ velem.instance_divisor = 0;
+ velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context);
+ velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ cso_save_vertex_elements(st->cso_context);
+ cso_set_vertex_elements(st->cso_context, 1, &velem);
+
+ cso_save_aux_vertex_buffer_slot(st->cso_context);
+ cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index,
+ 1, &vbo);
+
+ pipe_resource_reference(&vbo.buffer, NULL);
+ }
+
+ /* Upload constants */
+ {
+ struct pipe_constant_buffer cb;
+
+ struct {
+ int32_t xoffset;
+ int32_t yoffset;
+ int32_t stride;
+ int32_t image_size;
+ } constants;
+
+ constants.xoffset = -xoffset + skip_pixels;
+ constants.yoffset = -yoffset;
+ constants.stride = stride;
+ constants.image_size = stride * image_height;
+
+ if (st->constbuf_uploader) {
+ cb.buffer = NULL;
+ cb.user_buffer = NULL;
+ u_upload_data(st->constbuf_uploader, 0, sizeof(constants),
+ st->ctx->Const.UniformBufferOffsetAlignment,
+ &constants, &cb.buffer_offset, &cb.buffer);
+ if (!cb.buffer)
+ goto fail_constant_upload;
+
+ u_upload_unmap(st->constbuf_uploader);
+ } else {
+ cb.buffer = NULL;
+ cb.user_buffer = &constants;
+ cb.buffer_offset = 0;
+ }
+ cb.buffer_size = sizeof(constants);
+
+ cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
+ cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb);
+
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+
+ /* Framebuffer_state */
+ {
+ struct pipe_framebuffer_state fb;
+ memset(&fb, 0, sizeof(fb));
+ fb.width = surface->width;
+ fb.height = surface->height;
+ fb.nr_cbufs = 1;
+ pipe_surface_reference(&fb.cbufs[0], surface);
+
+ cso_save_framebuffer(st->cso_context);
+ cso_set_framebuffer(st->cso_context, &fb);
+
+ pipe_surface_reference(&fb.cbufs[0], NULL);
+ }
+
+ /* Viewport state */
+ {
+ struct pipe_viewport_state vp;
+ vp.scale[0] = 0.5f * surface->width;
+ vp.scale[1] = 0.5f * surface->height;
+ vp.scale[2] = 1.0f;
+ vp.translate[0] = 0.5f * surface->width;
+ vp.translate[1] = 0.5f * surface->height;
+ vp.translate[2] = 0.0f;
+
+ cso_save_viewport(st->cso_context);
+ cso_set_viewport(st->cso_context, &vp);
+ }
+
+ /* Blend state */
+ cso_save_blend(st->cso_context);
+ cso_set_blend(st->cso_context, &st->pbo_upload.blend);
+
+ /* Rasterizer state */
+ cso_save_rasterizer(st->cso_context);
+ cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster);
+
+ /* Set up the shaders */
+ cso_save_vertex_shader(st->cso_context);
+ cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs);
+
+ cso_save_geometry_shader(st->cso_context);
+ cso_set_geometry_shader_handle(st->cso_context,
+ depth != 1 ? st->pbo_upload.gs : NULL);
+
+ cso_save_tessctrl_shader(st->cso_context);
+ cso_set_tessctrl_shader_handle(st->cso_context, NULL);
+
+ cso_save_tesseval_shader(st->cso_context);
+ cso_set_tesseval_shader_handle(st->cso_context, NULL);
+
+ cso_save_fragment_shader(st->cso_context);
+ cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs);
+
+ /* Disable stream output */
+ cso_save_stream_outputs(st->cso_context);
+ cso_set_stream_outputs(st->cso_context, 0, NULL, 0);
+
+ if (depth == 1) {
+ cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+ } else {
+ cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP,
+ 0, 4, 0, depth);
+ }
+
+ success = true;
+
+ cso_restore_framebuffer(st->cso_context);
+ cso_restore_viewport(st->cso_context);
+ cso_restore_blend(st->cso_context);
+ cso_restore_rasterizer(st->cso_context);
+ cso_restore_vertex_shader(st->cso_context);
+ cso_restore_geometry_shader(st->cso_context);
+ cso_restore_tessctrl_shader(st->cso_context);
+ cso_restore_tesseval_shader(st->cso_context);
+ cso_restore_fragment_shader(st->cso_context);
+ cso_restore_stream_outputs(st->cso_context);
+ cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
+fail_constant_upload:
+ cso_restore_vertex_elements(st->cso_context);
+ cso_restore_aux_vertex_buffer_slot(st->cso_context);
+fail_vertex_upload:
+ cso_restore_fragment_sampler_views(st->cso_context);
+
+ return success;
+}
+
+static bool
+try_pbo_upload(struct gl_context *ctx, GLuint dims,
+ struct gl_texture_image *texImage,
+ GLenum format, GLenum type,
+ enum pipe_format dst_format,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLint width, GLint height, GLint depth,
+ const void *pixels,
+ const struct gl_pixelstore_attrib *unpack)
+{
+ struct st_context *st = st_context(ctx);
+ struct st_texture_image *stImage = st_texture_image(texImage);
+ struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
+ struct pipe_resource *texture = stImage->pt;
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_surface *surface = NULL;
+ enum pipe_format src_format;
+ const struct util_format_description *desc;
+ GLenum gl_target = texImage->TexObject->Target;
+ intptr_t buf_offset;
+ unsigned bytes_per_pixel;
+ unsigned stride, image_height;
+ bool success;
+
+ if (!st->pbo_upload.enabled)
+ return false;
+
+ /* From now on, we need the gallium representation of dimensions. */
+ if (gl_target == GL_TEXTURE_1D_ARRAY) {
+ depth = height;
+ height = 1;
+ zoffset = yoffset;
+ yoffset = 0;
+ image_height = 1;
+ } else {
+ image_height = unpack->ImageHeight > 0 ? unpack->ImageHeight : height;
+ }
+
+ if (depth != 1 && !st->pbo_upload.upload_layers)
+ return false;
+
+ /* Choose the source format. Initially, we do so without checking driver
+ * support at all because of the remapping we later perform and because
+ * at least the Radeon driver actually supports some formats for texture
+ * buffers which it doesn't support for regular textures. */
+ src_format = st_choose_matching_format(st, 0, format, type, unpack->SwapBytes);
+ if (!src_format) {
+ return false;
+ }
+
+ src_format = util_format_linear(src_format);
+ desc = util_format_description(src_format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return false;
+
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB)
+ return false;
+
+ if (st->pbo_upload.rgba_only) {
+ enum pipe_format orig_dst_format = dst_format;
+
+ if (!reinterpret_formats(&src_format, &dst_format)) {
+ return false;
+ }
+
+ if (dst_format != orig_dst_format &&
+ !screen->is_format_supported(screen, dst_format, PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_RENDER_TARGET)) {
+ return false;
+ }
+ }
+
+ if (!src_format ||
+ !screen->is_format_supported(screen, src_format, PIPE_BUFFER, 0,
+ PIPE_BIND_SAMPLER_VIEW)) {
+ return false;
+ }
+
+ /* Check if the offset satisfies the alignment requirements */
+ buf_offset = (intptr_t) pixels;
+ bytes_per_pixel = desc->block.bits / 8;
+
+ if (buf_offset % bytes_per_pixel) {
+ return false;
+ }
+
+ /* Convert to texels */
+ buf_offset = buf_offset / bytes_per_pixel;
+
+ /* Compute the stride, taking unpack->Alignment into account */
+ {
+ unsigned pixels_per_row = unpack->RowLength > 0 ?
+ unpack->RowLength : width;
+ unsigned bytes_per_row = pixels_per_row * bytes_per_pixel;
+ unsigned remainder = bytes_per_row % unpack->Alignment;
+ unsigned offset_rows;
+
+ if (remainder > 0)
+ bytes_per_row += (unpack->Alignment - remainder);
+
+ if (bytes_per_row % bytes_per_pixel) {
+ return false;
+ }
+
+ stride = bytes_per_row / bytes_per_pixel;
+
+ offset_rows = unpack->SkipRows;
+ if (dims == 3)
+ offset_rows += image_height * unpack->SkipImages;
+
+ buf_offset += unpack->SkipPixels + stride * offset_rows;
+ }
+
+ /* Set up the surface */
+ {
+ unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level;
+ unsigned max_layer = util_max_layer(texture, level);
+
+ zoffset += texImage->Face + texImage->TexObject->MinLayer;
+
+ struct pipe_surface templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = dst_format;
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = MIN2(zoffset, max_layer);
+ templ.u.tex.last_layer = MIN2(zoffset + depth - 1, max_layer);
+
+ surface = pipe->create_surface(pipe, texture, &templ);
+ if (!surface)
+ return false;
+ }
+
+ success = try_pbo_upload_common(ctx, surface,
+ xoffset, yoffset, width, height,
+ st_buffer_object(unpack->BufferObj)->buffer,
+ src_format,
+ buf_offset,
+ bytes_per_pixel, stride, image_height);
+
+ pipe_surface_reference(&surface, NULL);
+
+ return success;
+}
static void
st_TexSubImage(struct gl_context *ctx, GLuint dims,
@@ -735,21 +1732,15 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
goto fallback;
}
- /* See if the texture format already matches the format and type,
- * in which case the memcpy-based fast path will likely be used and
- * we don't have to blit. */
- if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
- type, unpack->SwapBytes, NULL)) {
- goto fallback;
- }
+ /* See if the destination format is supported. */
if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
bind = PIPE_BIND_DEPTH_STENCIL;
else
bind = PIPE_BIND_RENDER_TARGET;
- /* See if the destination format is supported.
- * For luminance and intensity, only the red channel is stored there. */
+ /* For luminance and intensity, only the red channel is stored
+ * in the destination. */
dst_format = util_format_linear(dst->format);
dst_format = util_format_luminance_to_red(dst_format);
dst_format = util_format_intensity_to_red(dst_format);
@@ -760,6 +1751,21 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
goto fallback;
}
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
+ if (try_pbo_upload(ctx, dims, texImage, format, type, dst_format,
+ xoffset, yoffset, zoffset,
+ width, height, depth, pixels, unpack))
+ return;
+ }
+
+ /* See if the texture format already matches the format and type,
+ * in which case the memcpy-based fast path will likely be used and
+ * we don't have to blit. */
+ if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
+ type, unpack->SwapBytes, NULL)) {
+ goto fallback;
+ }
+
/* Choose the source format. */
src_format = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW,
format, type, unpack->SwapBytes);
@@ -849,18 +1855,18 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
/* 1D array textures.
* We need to convert gallium coords to GL coords.
*/
- GLvoid *src = _mesa_image_address3d(unpack, pixels,
+ GLvoid *src = _mesa_image_address2d(unpack, pixels,
width, depth, format,
- type, 0, slice, 0);
+ type, slice, 0);
memcpy(map, src, bytesPerRow);
}
else {
ubyte *slice_map = map;
for (row = 0; row < (unsigned) height; row++) {
- GLvoid *src = _mesa_image_address3d(unpack, pixels,
- width, height, format,
- type, slice, row, 0);
+ GLvoid *src = _mesa_image_address(dims, unpack, pixels,
+ width, height, format,
+ type, slice, row, 0);
memcpy(slice_map, src, bytesPerRow);
slice_map += transfer->stride;
}
@@ -927,13 +1933,166 @@ st_TexImage(struct gl_context * ctx, GLuint dims,
}
+static void
+st_CompressedTexSubImage(struct gl_context *ctx, GLuint dims,
+ struct gl_texture_image *texImage,
+ GLint x, GLint y, GLint z,
+ GLsizei w, GLsizei h, GLsizei d,
+ GLenum format, GLsizei imageSize, const GLvoid *data)
+{
+ struct st_context *st = st_context(ctx);
+ struct st_texture_image *stImage = st_texture_image(texImage);
+ struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
+ struct pipe_resource *texture = stImage->pt;
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_resource *dst = stImage->pt;
+ struct pipe_surface *surface = NULL;
+ struct compressed_pixelstore store;
+ enum pipe_format copy_format;
+ unsigned bytes_per_block;
+ unsigned bw, bh;
+ intptr_t buf_offset;
+ bool success = false;
+
+ /* Check basic pre-conditions for PBO upload */
+ if (!st->prefer_blit_based_texture_transfer) {
+ goto fallback;
+ }
+
+ if (!_mesa_is_bufferobj(ctx->Unpack.BufferObj))
+ goto fallback;
+
+ if ((_mesa_is_format_etc2(texImage->TexFormat) && !st->has_etc2) ||
+ (texImage->TexFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1)) {
+ /* ETC isn't supported and is represented by uncompressed formats. */
+ goto fallback;
+ }
+
+ if (!dst) {
+ goto fallback;
+ }
+
+ if (!st->pbo_upload.enabled ||
+ !screen->get_param(screen, PIPE_CAP_SURFACE_REINTERPRET_BLOCKS)) {
+ goto fallback;
+ }
+
+ /* Choose the pipe format for the upload. */
+ bytes_per_block = util_format_get_blocksize(dst->format);
+ bw = util_format_get_blockwidth(dst->format);
+ bh = util_format_get_blockheight(dst->format);
+
+ switch (bytes_per_block) {
+ case 8:
+ copy_format = PIPE_FORMAT_R16G16B16A16_UINT;
+ break;
+ case 16:
+ copy_format = PIPE_FORMAT_R32G32B32A32_UINT;
+ break;
+ default:
+ goto fallback;
+ }
+
+ if (!screen->is_format_supported(screen, copy_format, PIPE_BUFFER, 0,
+ PIPE_BIND_SAMPLER_VIEW)) {
+ goto fallback;
+ }
+
+ if (!screen->is_format_supported(screen, copy_format, dst->target,
+ dst->nr_samples, PIPE_BIND_RENDER_TARGET)) {
+ goto fallback;
+ }
+
+ /* Interpret the pixelstore settings. */
+ _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat, w, h, d,
+ &ctx->Unpack, &store);
+ assert(store.CopyBytesPerRow % bytes_per_block == 0);
+ assert(store.SkipBytes % bytes_per_block == 0);
+
+ /* Compute the offset into the buffer */
+ buf_offset = (intptr_t)data + store.SkipBytes;
+
+ if (buf_offset % bytes_per_block) {
+ goto fallback;
+ }
+
+ buf_offset = buf_offset / bytes_per_block;
+
+ /* Set up the surface. */
+ {
+ unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level;
+ unsigned max_layer = util_max_layer(texture, level);
+
+ z += texImage->Face + texImage->TexObject->MinLayer;
+
+ struct pipe_surface templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = copy_format;
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = MIN2(z, max_layer);
+ templ.u.tex.last_layer = MIN2(z + d - 1, max_layer);
+
+ surface = pipe->create_surface(pipe, texture, &templ);
+ if (!surface)
+ goto fallback;
+ }
+
+ success = try_pbo_upload_common(ctx, surface,
+ x / bw, y / bh,
+ store.CopyBytesPerRow / bytes_per_block,
+ store.CopyRowsPerSlice,
+ st_buffer_object(ctx->Unpack.BufferObj)->buffer,
+ copy_format,
+ buf_offset,
+ bytes_per_block,
+ store.TotalBytesPerRow / bytes_per_block,
+ store.TotalRowsPerSlice);
+
+ pipe_surface_reference(&surface, NULL);
+
+ if (success)
+ return;
+
+fallback:
+ _mesa_store_compressed_texsubimage(ctx, dims, texImage,
+ x, y, z, w, h, d,
+ format, imageSize, data);
+}
+
static void
st_CompressedTexImage(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *texImage,
GLsizei imageSize, const GLvoid *data)
{
prep_teximage(ctx, texImage, GL_NONE, GL_NONE);
- _mesa_store_compressed_teximage(ctx, dims, texImage, imageSize, data);
+
+ /* only 2D and 3D compressed images are supported at this time */
+ if (dims == 1) {
+ _mesa_problem(ctx, "Unexpected glCompressedTexImage1D call");
+ return;
+ }
+
+ /* This is pretty simple, because unlike the general texstore path we don't
+ * have to worry about the usual image unpacking or image transfer
+ * operations.
+ */
+ assert(texImage);
+ assert(texImage->Width > 0);
+ assert(texImage->Height > 0);
+ assert(texImage->Depth > 0);
+
+ /* allocate storage for texture data */
+ if (!st_AllocTextureImageBuffer(ctx, texImage)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage%uD", dims);
+ return;
+ }
+
+ st_CompressedTexSubImage(ctx, dims, texImage,
+ 0, 0, 0,
+ texImage->Width, texImage->Height, texImage->Depth,
+ texImage->TexFormat,
+ imageSize, data);
}
@@ -1958,7 +3117,7 @@ st_init_texture_functions(struct dd_function_table *functions)
functions->QuerySamplesForFormat = st_QuerySamplesForFormat;
functions->TexImage = st_TexImage;
functions->TexSubImage = st_TexSubImage;
- functions->CompressedTexSubImage = _mesa_store_compressed_texsubimage;
+ functions->CompressedTexSubImage = st_CompressedTexSubImage;
functions->CopyTexSubImage = st_CopyTexSubImage;
functions->GenerateMipmap = st_generate_mipmap;
diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h
index 1b685298c5f..55c86c401e2 100644
--- a/src/mesa/state_tracker/st_cb_texture.h
+++ b/src/mesa/state_tracker/st_cb_texture.h
@@ -53,5 +53,10 @@ st_finalize_texture(struct gl_context *ctx,
extern void
st_init_texture_functions(struct dd_function_table *functions);
+extern void
+st_init_pbo_upload(struct st_context *st);
+
+extern void
+st_destroy_pbo_upload(struct st_context *st);
#endif /* ST_CB_TEXTURE_H */
diff --git a/src/mesa/state_tracker/st_cb_texturebarrier.c b/src/mesa/state_tracker/st_cb_texturebarrier.c
index dd4dde74c86..2de150ba13a 100644
--- a/src/mesa/state_tracker/st_cb_texturebarrier.c
+++ b/src/mesa/state_tracker/st_cb_texturebarrier.c
@@ -65,6 +65,13 @@ st_MemoryBarrier(struct gl_context *ctx, GLbitfield barriers)
if (barriers & GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT)
flags |= PIPE_BARRIER_MAPPED_BUFFER;
+ if (barriers & GL_ATOMIC_COUNTER_BARRIER_BIT)
+ flags |= PIPE_BARRIER_SHADER_BUFFER;
+ if (barriers & GL_SHADER_STORAGE_BARRIER_BIT)
+ flags |= PIPE_BARRIER_SHADER_BUFFER;
+
+ if (barriers & GL_QUERY_BUFFER_BARRIER_BIT)
+ flags |= PIPE_BARRIER_QUERY_BUFFER;
if (flags && pipe->memory_barrier)
pipe->memory_barrier(pipe, flags);
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index ce1e97aacb5..446ebfb563f 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -96,6 +96,30 @@ static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
}
+/**
+ * Called via ctx->Driver.QueryMemoryInfo()
+ */
+static void
+st_query_memory_info(struct gl_context *ctx, struct gl_memory_info *out)
+{
+ struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+ struct pipe_memory_info info;
+
+ assert(screen->query_memory_info);
+ if (!screen->query_memory_info)
+ return;
+
+ screen->query_memory_info(screen, &info);
+
+ out->total_device_memory = info.total_device_memory;
+ out->avail_device_memory = info.avail_device_memory;
+ out->total_staging_memory = info.total_staging_memory;
+ out->avail_staging_memory = info.avail_staging_memory;
+ out->device_memory_evicted = info.device_memory_evicted;
+ out->nr_device_memory_evictions = info.nr_device_memory_evictions;
+}
+
+
/**
* Called via ctx->Driver.UpdateState()
*/
@@ -136,6 +160,7 @@ st_destroy_context_priv(struct st_context *st)
st_destroy_drawpix(st);
st_destroy_drawtex(st);
st_destroy_perfmon(st);
+ st_destroy_pbo_upload(st);
for (shader = 0; shader < ARRAY_SIZE(st->state.sampler_views); shader++) {
for (i = 0; i < ARRAY_SIZE(st->state.sampler_views[0]); i++) {
@@ -209,6 +234,7 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
st_init_bitmap(st);
st_init_clear(st);
st_init_draw( st );
+ st_init_pbo_upload(st);
/* Choose texture target for glDrawPixels, glBitmap, renderbuffers */
if (pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
@@ -350,6 +376,8 @@ static void st_init_driver_flags(struct gl_driver_flags *f)
f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER;
f->NewDefaultTessLevels = ST_NEW_TESS_STATE;
f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
+ f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER;
+ f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER;
}
struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
@@ -487,4 +515,5 @@ void st_init_driver_functions(struct pipe_screen *screen,
functions->Enable = st_Enable;
functions->UpdateState = st_invalidate_state;
+ functions->QueryMemoryInfo = st_query_memory_info;
}
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 9db5f11beb5..57076ad0d18 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -62,6 +62,8 @@ struct u_upload_mgr;
#define ST_NEW_TESSCTRL_PROGRAM (1 << 9)
#define ST_NEW_TESSEVAL_PROGRAM (1 << 10)
#define ST_NEW_SAMPLER_VIEWS (1 << 11)
+#define ST_NEW_ATOMIC_BUFFER (1 << 12)
+#define ST_NEW_STORAGE_BUFFER (1 << 13)
struct st_state_flags {
@@ -202,6 +204,19 @@ struct st_context
void *gs_layered;
} clear;
+ /* For gl(Compressed)Tex(Sub)Image */
+ struct {
+ struct pipe_rasterizer_state raster;
+ struct pipe_blend_state blend;
+ void *vs;
+ void *gs;
+ void *fs;
+ bool enabled;
+ bool rgba_only;
+ bool upload_layers;
+ bool use_gs;
+ } pbo_upload;
+
/** used for anything using util_draw_vertex_buffer */
struct pipe_vertex_element velems_util_draw[3];
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 53ea6767395..f25bd742f79 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -218,6 +218,11 @@ void st_init_limits(struct pipe_screen *screen,
c->MaxUniformBlockSize / 4 *
pc->MaxUniformBlocks);
+ pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
+ pc->MaxAtomicBuffers = screen->get_shader_param(
+ screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2;
+ pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers;
+
/* Gallium doesn't really care about local vs. env parameters so use the
* same limits.
*/
@@ -333,6 +338,31 @@ void st_init_limits(struct pipe_screen *screen,
screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
c->GLSLFrontFacingIsSysVal =
screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
+
+ c->MaxAtomicBufferBindings =
+ c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+ c->MaxCombinedAtomicBuffers =
+ c->Program[MESA_SHADER_VERTEX].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+ assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
+
+ if (c->MaxCombinedAtomicBuffers > 0)
+ extensions->ARB_shader_atomic_counters = GL_TRUE;
+
+ c->MaxCombinedShaderOutputResources = c->MaxDrawBuffers;
+ c->ShaderStorageBufferOffsetAlignment =
+ screen->get_param(screen, PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT);
+ if (c->ShaderStorageBufferOffsetAlignment) {
+ c->MaxCombinedShaderStorageBlocks = c->MaxShaderStorageBufferBindings =
+ c->MaxCombinedAtomicBuffers;
+ c->MaxCombinedShaderOutputResources +=
+ c->MaxCombinedShaderStorageBlocks;
+ c->MaxShaderStorageBlockSize = 1 << 27;
+ extensions->ARB_shader_storage_buffer_object = GL_TRUE;
+ }
}
@@ -465,6 +495,7 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY },
{ o(ARB_pipeline_statistics_query), PIPE_CAP_QUERY_PIPELINE_STATISTICS },
{ o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE },
+ { o(ARB_query_buffer_object), PIPE_CAP_QUERY_BUFFER_OBJECT },
{ o(ARB_sample_shading), PIPE_CAP_SAMPLE_SHADING },
{ o(ARB_seamless_cube_map), PIPE_CAP_SEAMLESS_CUBE_MAP },
{ o(ARB_shader_draw_parameters), PIPE_CAP_DRAW_PARAMETERS },
@@ -496,12 +527,14 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(EXT_transform_feedback), PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS },
{ o(AMD_pinned_memory), PIPE_CAP_RESOURCE_FROM_USER_MEMORY },
+ { o(ATI_meminfo), PIPE_CAP_QUERY_MEMORY_INFO },
{ o(AMD_seamless_cubemap_per_texture), PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE },
{ o(ATI_separate_stencil), PIPE_CAP_TWO_SIDED_STENCIL },
{ o(ATI_texture_mirror_once), PIPE_CAP_TEXTURE_MIRROR_CLAMP },
{ o(NV_conditional_render), PIPE_CAP_CONDITIONAL_RENDER },
{ o(NV_primitive_restart), PIPE_CAP_PRIMITIVE_RESTART },
{ o(NV_texture_barrier), PIPE_CAP_TEXTURE_BARRIER },
+ { o(NVX_gpu_memory_info), PIPE_CAP_QUERY_MEMORY_INFO },
/* GL_NV_point_sprite is not supported by gallium because we don't
* support the GL_POINT_SPRITE_R_MODE_NV option. */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index cf91d39ff92..b8182de0be8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -52,7 +52,6 @@
#include "st_mesa_to_tgsi.h"
-#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
(1 << PROGRAM_CONSTANT) | \
(1 << PROGRAM_UNIFORM))
@@ -267,6 +266,9 @@ public:
unsigned tex_offset_num_offset;
int dead_mask; /**< Used in dead code elimination */
+ st_src_reg buffer; /**< buffer register */
+ unsigned buffer_access; /**< buffer access type */
+
class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
const struct tgsi_opcode_info *info;
};
@@ -391,6 +393,7 @@ public:
int samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
+ int buffers_used;
bool indirect_addr_consts;
int wpos_transform_const;
@@ -444,6 +447,10 @@ public:
virtual void visit(ir_barrier *);
/*@}*/
+ void visit_atomic_counter_intrinsic(ir_call *);
+ void visit_ssbo_intrinsic(ir_call *);
+ void visit_membar_intrinsic(ir_call *);
+
st_src_reg result;
/** List of variable_storage */
@@ -557,6 +564,28 @@ swizzle_for_size(int size)
return size_swizzles[size - 1];
}
+static bool
+is_resource_instruction(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_RESQ:
+ case TGSI_OPCODE_LOAD:
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ return true;
+ default:
+ return false;
+ }
+}
+
static unsigned
num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
{
@@ -566,7 +595,8 @@ num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
static unsigned
num_inst_src_regs(const glsl_to_tgsi_instruction *op)
{
- return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src;
+ return op->info->is_tex || is_resource_instruction(op->op) ?
+ op->info->num_src - 1 : op->info->num_src;
}
glsl_to_tgsi_instruction *
@@ -661,8 +691,6 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
}
}
- this->instructions.push_tail(inst);
-
/*
* This section contains the double processing.
* GLSL just represents doubles as single channel values,
@@ -698,7 +726,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
int initial_src_swz[4], initial_src_idx[4];
int initial_dst_idx[2], initial_dst_writemask[2];
/* select the writemask for dst0 or dst1 */
- unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask;
+ unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
/* copy out the writemask, index and swizzles for all src/dsts. */
for (j = 0; j < 2; j++) {
@@ -715,10 +743,22 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
* scan all the components in the dst writemask
* generate an instruction for each of them if required.
*/
+ st_src_reg addr;
while (writemask) {
int i = u_bit_scan(&writemask);
+ /* before emitting the instruction, see if we have to adjust store
+ * address */
+ if (i > 1 && inst->op == TGSI_OPCODE_STORE &&
+ addr.file == PROGRAM_UNDEFINED) {
+ /* We have to advance the buffer address by 16 */
+ addr = get_temp(glsl_type::uint_type);
+ emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr),
+ inst->src[0], st_src_reg_for_int(16));
+ }
+
+
/* first time use previous instruction */
if (dinst == NULL) {
dinst = inst;
@@ -728,16 +768,21 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
*dinst = *inst;
dinst->next = NULL;
dinst->prev = NULL;
- this->instructions.push_tail(dinst);
}
+ this->instructions.push_tail(dinst);
/* modify the destination if we are splitting */
for (j = 0; j < 2; j++) {
if (dst_is_double[j]) {
dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
dinst->dst[j].index = initial_dst_idx[j];
- if (i > 1)
+ if (i > 1) {
+ if (dinst->op == TGSI_OPCODE_STORE) {
+ dinst->src[0] = addr;
+ } else {
dinst->dst[j].index++;
+ }
+ }
} else {
/* if we aren't writing to a double, just get the bit of the initial writemask
for this channel */
@@ -773,6 +818,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
}
}
inst = dinst;
+ } else {
+ this->instructions.push_tail(inst);
}
@@ -807,7 +854,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
assert(src1.type != GLSL_TYPE_ARRAY);
assert(src1.type != GLSL_TYPE_STRUCT);
- if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
+ if (is_resource_instruction(op))
+ type = src1.type;
+ else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
type = GLSL_TYPE_DOUBLE;
else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
type = GLSL_TYPE_FLOAT;
@@ -891,6 +940,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
case3fid(FLR, FLR, DFLR);
case3fid(ROUND, ROUND, DROUND);
+ case2iu(ATOMIMAX, ATOMUMAX);
+ case2iu(ATOMIMIN, ATOMUMIN);
+
default: break;
}
@@ -2170,6 +2222,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
break;
+ case ir_unop_get_buffer_size: {
+ ir_constant *const_offset = ir->operands[0]->as_constant();
+ st_src_reg buffer(
+ PROGRAM_BUFFER,
+ ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
+ (const_offset ? const_offset->value.u[0] : 0),
+ GLSL_TYPE_UINT);
+ if (!const_offset) {
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
+ emit_arl(ir, sampler_reladdr, op[0]);
+ }
+ emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer;
+ break;
+ }
+
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_unorm_2x16:
case ir_unop_pack_snorm_4x8:
@@ -2190,10 +2258,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
*/
assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
break;
-
- case ir_unop_get_buffer_size:
- assert(!"Not implemented yet");
- break;
}
this->result = result_src;
@@ -3070,14 +3134,242 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
return entry;
}
+void
+glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ ir_dereference *deref = static_cast(
+ ir->actual_parameters.get_head());
+ ir_variable *location = deref->variable_referenced();
+
+ st_src_reg buffer(
+ PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
+
+ /* Calculate the surface offset */
+ st_src_reg offset;
+ ir_dereference_array *deref_array = deref->as_dereference_array();
+
+ if (deref_array) {
+ offset = get_temp(glsl_type::uint_type);
+
+ deref_array->array_index->accept(this);
+
+ emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
+ this->result, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
+ emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
+ offset, st_src_reg_for_int(location->data.offset));
+ } else {
+ offset = st_src_reg_for_int(location->data.offset);
+ }
+
+ ir->return_deref->accept(this);
+ st_dst_reg dst(this->result);
+ dst.writemask = WRITEMASK_X;
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_atomic_read", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
+ st_src_reg_for_int(1));
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
+ st_src_reg_for_int(-1));
+ inst->buffer = buffer;
+ emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
+ }
+}
+
+void
+glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ ir_constant *const_block = block->as_constant();
+
+ st_src_reg buffer(
+ PROGRAM_BUFFER,
+ ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
+ (const_block ? const_block->value.u[0] : 0),
+ GLSL_TYPE_UINT);
+
+ if (!const_block) {
+ block->accept(this);
+ emit_arl(ir, sampler_reladdr, this->result);
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
+ }
+
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_load_ssbo", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
+ if (dst.type == GLSL_TYPE_BOOL)
+ emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
+ } else if (!strcmp("__intrinsic_store_ssbo", callee)) {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+ dst.writemask = write_mask->value.u[0];
+
+ dst.type = this->result.type;
+ inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
+ } else {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ st_src_reg data = this->result, data2 = undef_src;
+ unsigned opcode;
+ if (!strcmp("__intrinsic_atomic_add_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_atomic_min_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_atomic_max_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_atomic_and_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_atomic_or_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
+ opcode = TGSI_OPCODE_ATOMCAS;
+ param = param->get_next();
+ val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+ data2 = this->result;
+ } else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, off, data, data2);
+ }
+
+ param = param->get_next();
+ ir_constant *access = NULL;
+ if (!param->is_tail_sentinel()) {
+ access = ((ir_instruction *)param)->as_constant();
+ assert(access);
+ }
+
+ /* The emit_asm() might have actually split the op into pieces, e.g. for
+ * double stores. We have to go back and fix up all the generated ops.
+ */
+ unsigned op = inst->op;
+ do {
+ inst->buffer = buffer;
+ if (access)
+ inst->buffer_access = access->value.u[0];
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ if (inst->op == TGSI_OPCODE_UADD)
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ } while (inst && inst->buffer.file == PROGRAM_UNDEFINED && inst->op == op);
+}
+
+void
+glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+
+ if (!strcmp("__intrinsic_memory_barrier", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED));
+ else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
+ else if (!strcmp("__intrinsic_memory_barrier_buffer", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
+ else if (!strcmp("__intrinsic_memory_barrier_image", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
+ else if (!strcmp("__intrinsic_memory_barrier_shared", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHARED));
+ else if (!strcmp("__intrinsic_group_memory_barrier", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED |
+ TGSI_MEMBAR_THREAD_GROUP));
+ else
+ assert(!"Unexpected memory barrier intrinsic");
+}
+
void
glsl_to_tgsi_visitor::visit(ir_call *ir)
{
glsl_to_tgsi_instruction *call_inst;
ir_function_signature *sig = ir->callee;
- function_entry *entry = get_function_signature(sig);
+ const char *callee = sig->function_name();
+ function_entry *entry;
int i;
+ /* Filter out intrinsics */
+ if (!strcmp("__intrinsic_atomic_read", callee) ||
+ !strcmp("__intrinsic_atomic_increment", callee) ||
+ !strcmp("__intrinsic_atomic_predecrement", callee)) {
+ visit_atomic_counter_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_load_ssbo", callee) ||
+ !strcmp("__intrinsic_store_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_add_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_min_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_max_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_and_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_or_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_xor_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_exchange_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
+ visit_ssbo_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_memory_barrier", callee) ||
+ !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) ||
+ !strcmp("__intrinsic_memory_barrier_buffer", callee) ||
+ !strcmp("__intrinsic_memory_barrier_image", callee) ||
+ !strcmp("__intrinsic_memory_barrier_shared", callee) ||
+ !strcmp("__intrinsic_group_memory_barrier", callee)) {
+ visit_membar_intrinsic(ir);
+ return;
+ }
+
+ entry = get_function_signature(sig);
/* Process in parameters. */
foreach_two_lists(formal_node, &sig->parameters,
actual_node, &ir->actual_parameters) {
@@ -3583,6 +3875,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
current_function = NULL;
num_address_regs = 0;
samplers_used = 0;
+ buffers_used = 0;
indirect_addr_consts = false;
wpos_transform_const = -1;
glsl_version = 0;
@@ -3617,6 +3910,7 @@ static void
count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
{
v->samplers_used = 0;
+ v->buffers_used = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (inst->info->is_tex) {
@@ -3634,6 +3928,12 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
}
}
}
+ if (inst->buffer.file != PROGRAM_UNDEFINED && (
+ is_resource_instruction(inst->op) ||
+ inst->op == TGSI_OPCODE_STORE)) {
+ if (inst->buffer.file == PROGRAM_BUFFER)
+ v->buffers_used |= 1 << inst->buffer.index;
+ }
}
prog->SamplersUsed = v->samplers_used;
@@ -3822,9 +4122,11 @@ glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *
last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
}
for (j = 0; j < num_inst_dst_regs(inst); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ if (inst->dst[j].file == PROGRAM_TEMPORARY) {
if (first_writes[inst->dst[j].index] == -1)
first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
+ last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
+ }
}
for (j = 0; j < inst->tex_offset_num_offset; j++) {
if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
@@ -4229,7 +4531,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
if (!inst->dead_mask || !inst->dst[0].writemask)
continue;
- else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
+ /* No amount of dead masks should remove memory stores */
+ if (inst->info->is_store)
+ continue;
+
+ if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
inst->remove();
delete inst;
removed++;
@@ -4338,6 +4644,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
/* Update the first_writes and last_reads arrays with the new
* values for the merged register index, and mark the newly unused
* register index as such. */
+ assert(last_reads[j] >= last_reads[i]);
last_reads[i] = last_reads[j];
first_writes[j] = -1;
last_reads[j] = -1;
@@ -4407,6 +4714,7 @@ struct st_translate {
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
struct ureg_dst address[3];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+ struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned *array_sizes;
@@ -4814,13 +5122,13 @@ compile_tgsi_instruction(struct st_translate *t,
const glsl_to_tgsi_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
- GLuint i;
+ int i;
struct ureg_dst dst[2];
struct ureg_src src[4];
struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
- unsigned num_dst;
- unsigned num_src;
+ int num_dst;
+ int num_src;
unsigned tex_target;
num_dst = num_inst_dst_regs(inst);
@@ -4868,7 +5176,7 @@ compile_tgsi_instruction(struct st_translate *t,
src[num_src] =
ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
num_src++;
- for (i = 0; i < inst->tex_offset_num_offset; i++) {
+ for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
}
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
@@ -4881,6 +5189,38 @@ compile_tgsi_instruction(struct st_translate *t,
src, num_src);
return;
+ case TGSI_OPCODE_RESQ:
+ case TGSI_OPCODE_LOAD:
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ for (i = num_src - 1; i >= 0; i--)
+ src[i + 1] = src[i];
+ num_src++;
+ src[0] = t->buffers[inst->buffer.index];
+ if (inst->buffer.reladdr)
+ src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
+ assert(src[0].File != TGSI_FILE_NULL);
+ ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
+ inst->buffer_access);
+ break;
+
+ case TGSI_OPCODE_STORE:
+ dst[0] = ureg_writemask(ureg_dst(t->buffers[inst->buffer.index]), inst->dst[0].writemask);
+ if (inst->buffer.reladdr)
+ dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
+ assert(dst[0].File != TGSI_FILE_NULL);
+ ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
+ inst->buffer_access);
+ break;
+
case TGSI_OPCODE_SCS:
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
@@ -5170,6 +5510,8 @@ st_translate_program(
{
struct st_translate *t;
unsigned i;
+ struct gl_program_constants *frag_const =
+ &ctx->Const.Program[MESA_SHADER_FRAGMENT];
enum pipe_error ret = PIPE_OK;
assert(numInputs <= ARRAY_SIZE(t->inputs));
@@ -5485,7 +5827,7 @@ st_translate_program(
assert(i == program->num_immediates);
/* texture samplers */
- for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
+ for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
if (program->samplers_used & (1 << i)) {
unsigned type;
@@ -5510,6 +5852,21 @@ st_translate_program(
}
}
+ for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
+ if (program->buffers_used & (1 << i)) {
+ t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
+ }
+ }
+
+ for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
+ i++) {
+ if (program->buffers_used & (1 << i)) {
+ t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
+ }
+ }
+
+
+
/* Emit each instruction in turn:
*/
foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index 0b8b6a9de56..6494aa518a2 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -180,6 +180,9 @@ vbo_sizeof_ib_type(GLenum type)
}
}
+void
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj);
+
void
vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prim,
const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 02139ef881f..f0245fd08cc 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -37,8 +37,6 @@
#include "main/enums.h"
#include "main/macros.h"
#include "main/transformfeedback.h"
-#include "main/sse_minmax.h"
-#include "x86/common_x86_asm.h"
#include "vbo_context.h"
@@ -80,152 +78,6 @@ vbo_check_buffers_are_unmapped(struct gl_context *ctx)
}
-
-/**
- * Compute min and max elements by scanning the index buffer for
- * glDraw[Range]Elements() calls.
- * If primitive restart is enabled, we need to ignore restart
- * indexes when computing min/max.
- */
-static void
-vbo_get_minmax_index(struct gl_context *ctx,
- const struct _mesa_prim *prim,
- const struct _mesa_index_buffer *ib,
- GLuint *min_index, GLuint *max_index,
- const GLuint count)
-{
- const GLboolean restart = ctx->Array._PrimitiveRestart;
- const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
- const int index_size = vbo_sizeof_ib_type(ib->type);
- const char *indices;
- GLuint i;
-
- indices = (char *) ib->ptr + prim->start * index_size;
- if (_mesa_is_bufferobj(ib->obj)) {
- GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
- indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
- GL_MAP_READ_BIT, ib->obj,
- MAP_INTERNAL);
- }
-
- switch (ib->type) {
- case GL_UNSIGNED_INT: {
- const GLuint *ui_indices = (const GLuint *)indices;
- GLuint max_ui = 0;
- GLuint min_ui = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ui_indices[i] != restartIndex) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- }
- else {
-#if defined(USE_SSE41)
- if (cpu_has_sse4_1) {
- _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
- }
- else
-#endif
- for (i = 0; i < count; i++) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- *min_index = min_ui;
- *max_index = max_ui;
- break;
- }
- case GL_UNSIGNED_SHORT: {
- const GLushort *us_indices = (const GLushort *)indices;
- GLuint max_us = 0;
- GLuint min_us = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (us_indices[i] != restartIndex) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- *min_index = min_us;
- *max_index = max_us;
- break;
- }
- case GL_UNSIGNED_BYTE: {
- const GLubyte *ub_indices = (const GLubyte *)indices;
- GLuint max_ub = 0;
- GLuint min_ub = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] != restartIndex) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- *min_index = min_ub;
- *max_index = max_ub;
- break;
- }
- default:
- unreachable("not reached");
- }
-
- if (_mesa_is_bufferobj(ib->obj)) {
- ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
- }
-}
-
-/**
- * Compute min and max elements for nr_prims
- */
-void
-vbo_get_minmax_indices(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- const struct _mesa_index_buffer *ib,
- GLuint *min_index,
- GLuint *max_index,
- GLuint nr_prims)
-{
- GLuint tmp_min, tmp_max;
- GLuint i;
- GLuint count;
-
- *min_index = ~0;
- *max_index = 0;
-
- for (i = 0; i < nr_prims; i++) {
- const struct _mesa_prim *start_prim;
-
- start_prim = &prims[i];
- count = start_prim->count;
- /* Do combination if possible to reduce map/unmap count */
- while ((i + 1 < nr_prims) &&
- (prims[i].start + prims[i].count == prims[i+1].start)) {
- count += prims[i+1].count;
- i++;
- }
- vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
- *min_index = MIN2(*min_index, tmp_min);
- *max_index = MAX2(*max_index, tmp_max);
- }
-}
-
-
/**
* Check that element 'j' of the array has reasonable data.
* Map VBO if needed.
diff --git a/src/mesa/vbo/vbo_minmax_index.c b/src/mesa/vbo/vbo_minmax_index.c
new file mode 100644
index 00000000000..0f75a87f3f3
--- /dev/null
+++ b/src/mesa/vbo/vbo_minmax_index.c
@@ -0,0 +1,378 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright 2003 VMware, Inc.
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/varray.h"
+#include "main/macros.h"
+#include "main/sse_minmax.h"
+#include "x86/common_x86_asm.h"
+#include "util/hash_table.h"
+
+
+struct minmax_cache_key {
+ GLintptr offset;
+ GLuint count;
+ GLenum type;
+};
+
+
+struct minmax_cache_entry {
+ struct minmax_cache_key key;
+ GLuint min;
+ GLuint max;
+};
+
+
+static uint32_t
+vbo_minmax_cache_hash(const struct minmax_cache_key *key)
+{
+ return _mesa_hash_data(key, sizeof(*key));
+}
+
+
+static bool
+vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
+ const struct minmax_cache_key *b)
+{
+ return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type);
+}
+
+
+static void
+vbo_minmax_cache_delete_entry(struct hash_entry *entry)
+{
+ free(entry->data);
+}
+
+
+static GLboolean
+vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
+{
+ if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
+ USAGE_ATOMIC_COUNTER_BUFFER |
+ USAGE_SHADER_STORAGE_BUFFER |
+ USAGE_TRANSFORM_FEEDBACK_BUFFER |
+ USAGE_PIXEL_PACK_BUFFER |
+ USAGE_DISABLE_MINMAX_CACHE))
+ return GL_FALSE;
+
+ if ((bufferObj->Mappings[MAP_USER].AccessFlags &
+ (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
+ (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+void
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
+{
+ _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
+ bufferObj->MinMaxCache = NULL;
+}
+
+
+static GLboolean
+vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
+ GLenum type, GLintptr offset, GLuint count,
+ GLuint *min_index, GLuint *max_index)
+{
+ GLboolean found = GL_FALSE;
+ struct minmax_cache_key key;
+ uint32_t hash;
+ struct hash_entry *result;
+
+ if (!bufferObj->MinMaxCache)
+ return GL_FALSE;
+ if (!vbo_use_minmax_cache(bufferObj))
+ return GL_FALSE;
+
+ mtx_lock(&bufferObj->Mutex);
+
+ if (bufferObj->MinMaxCacheDirty) {
+ /* Disable the cache permanently for this BO if the number of hits
+ * is asymptotically less than the number of misses. This happens when
+ * applications use the BO for streaming.
+ *
+ * However, some initial optimism allows applications that interleave
+ * draw calls with glBufferSubData during warmup.
+ */
+ unsigned optimism = bufferObj->Size;
+ if (bufferObj->MinMaxCacheMissIndices > optimism &&
+ bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
+ bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
+ vbo_delete_minmax_cache(bufferObj);
+ goto out_disable;
+ }
+
+ _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
+ bufferObj->MinMaxCacheDirty = false;
+ goto out_invalidate;
+ }
+
+ key.type = type;
+ key.offset = offset;
+ key.count = count;
+ hash = vbo_minmax_cache_hash(&key);
+ result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
+ if (result) {
+ struct minmax_cache_entry *entry = result->data;
+ *min_index = entry->min;
+ *max_index = entry->max;
+ found = GL_TRUE;
+ }
+
+out_invalidate:
+ if (found) {
+ /* The hit counter saturates so that we don't accidently disable the
+ * cache in a long-running program.
+ */
+ unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
+
+ if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
+ bufferObj->MinMaxCacheHitIndices = new_hit_count;
+ else
+ bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
+ } else {
+ bufferObj->MinMaxCacheMissIndices += count;
+ }
+
+out_disable:
+ mtx_unlock(&bufferObj->Mutex);
+ return found;
+}
+
+
+static void
+vbo_minmax_cache_store(struct gl_context *ctx,
+ struct gl_buffer_object *bufferObj,
+ GLenum type, GLintptr offset, GLuint count,
+ GLuint min, GLuint max)
+{
+ struct minmax_cache_entry *entry;
+ struct hash_entry *table_entry;
+ uint32_t hash;
+
+ if (!vbo_use_minmax_cache(bufferObj))
+ return;
+
+ mtx_lock(&bufferObj->Mutex);
+
+ if (!bufferObj->MinMaxCache) {
+ bufferObj->MinMaxCache =
+ _mesa_hash_table_create(NULL,
+ (uint32_t (*)(const void *))vbo_minmax_cache_hash,
+ (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
+ if (!bufferObj->MinMaxCache)
+ goto out;
+ }
+
+ entry = MALLOC_STRUCT(minmax_cache_entry);
+ if (!entry)
+ goto out;
+
+ entry->key.offset = offset;
+ entry->key.count = count;
+ entry->key.type = type;
+ entry->min = min;
+ entry->max = max;
+ hash = vbo_minmax_cache_hash(&entry->key);
+
+ table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
+ hash, &entry->key);
+ if (table_entry) {
+ /* It seems like this could happen when two contexts are rendering using
+ * the same buffer object from multiple threads.
+ */
+ _mesa_debug(ctx, "duplicate entry in minmax cache\n");
+ free(entry);
+ goto out;
+ }
+
+ table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
+ hash, &entry->key, entry);
+ if (!table_entry)
+ free(entry);
+
+out:
+ mtx_unlock(&bufferObj->Mutex);
+}
+
+
+/**
+ * Compute min and max elements by scanning the index buffer for
+ * glDraw[Range]Elements() calls.
+ * If primitive restart is enabled, we need to ignore restart
+ * indexes when computing min/max.
+ */
+static void
+vbo_get_minmax_index(struct gl_context *ctx,
+ const struct _mesa_prim *prim,
+ const struct _mesa_index_buffer *ib,
+ GLuint *min_index, GLuint *max_index,
+ const GLuint count)
+{
+ const GLboolean restart = ctx->Array._PrimitiveRestart;
+ const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
+ const int index_size = vbo_sizeof_ib_type(ib->type);
+ const char *indices;
+ GLuint i;
+
+ indices = (char *) ib->ptr + prim->start * index_size;
+ if (_mesa_is_bufferobj(ib->obj)) {
+ GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
+
+ if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count,
+ min_index, max_index))
+ return;
+
+ indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
+ GL_MAP_READ_BIT, ib->obj,
+ MAP_INTERNAL);
+ }
+
+ switch (ib->type) {
+ case GL_UNSIGNED_INT: {
+ const GLuint *ui_indices = (const GLuint *)indices;
+ GLuint max_ui = 0;
+ GLuint min_ui = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] != restartIndex) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
+ }
+ }
+ else {
+#if defined(USE_SSE41)
+ if (cpu_has_sse4_1) {
+ _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
+ }
+ else
+#endif
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
+ }
+ *min_index = min_ui;
+ *max_index = max_ui;
+ break;
+ }
+ case GL_UNSIGNED_SHORT: {
+ const GLushort *us_indices = (const GLushort *)indices;
+ GLuint max_us = 0;
+ GLuint min_us = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (us_indices[i] != restartIndex) {
+ if (us_indices[i] > max_us) max_us = us_indices[i];
+ if (us_indices[i] < min_us) min_us = us_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (us_indices[i] > max_us) max_us = us_indices[i];
+ if (us_indices[i] < min_us) min_us = us_indices[i];
+ }
+ }
+ *min_index = min_us;
+ *max_index = max_us;
+ break;
+ }
+ case GL_UNSIGNED_BYTE: {
+ const GLubyte *ub_indices = (const GLubyte *)indices;
+ GLuint max_ub = 0;
+ GLuint min_ub = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (ub_indices[i] != restartIndex) {
+ if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
+ if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
+ if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
+ }
+ }
+ *min_index = min_ub;
+ *max_index = max_ub;
+ break;
+ }
+ default:
+ unreachable("not reached");
+ }
+
+ if (_mesa_is_bufferobj(ib->obj)) {
+ vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,
+ *min_index, *max_index);
+ ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
+ }
+}
+
+/**
+ * Compute min and max elements for nr_prims
+ */
+void
+vbo_get_minmax_indices(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint *min_index,
+ GLuint *max_index,
+ GLuint nr_prims)
+{
+ GLuint tmp_min, tmp_max;
+ GLuint i;
+ GLuint count;
+
+ *min_index = ~0;
+ *max_index = 0;
+
+ for (i = 0; i < nr_prims; i++) {
+ const struct _mesa_prim *start_prim;
+
+ start_prim = &prims[i];
+ count = start_prim->count;
+ /* Do combination if possible to reduce map/unmap count */
+ while ((i + 1 < nr_prims) &&
+ (prims[i].start + prims[i].count == prims[i+1].start)) {
+ count += prims[i+1].count;
+ i++;
+ }
+ vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
+ *min_index = MIN2(*min_index, tmp_min);
+ *max_index = MAX2(*max_index, tmp_max);
+ }
+}
diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S
index c185f62099e..b0aca19c8b0 100644
--- a/src/mesa/x86-64/xform4.S
+++ b/src/mesa/x86-64/xform4.S
@@ -69,7 +69,7 @@ _mesa_x86_64_transform_points4_general:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
@@ -80,7 +80,7 @@ _mesa_x86_64_transform_points4_general:
p4_general_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
- prefetchw 16(%rdi)
+ prefetcht1 16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
@@ -93,7 +93,7 @@ p4_general_loop:
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
@@ -150,7 +150,7 @@ _mesa_x86_64_transform_points4_3d:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
@@ -166,7 +166,7 @@ _mesa_x86_64_transform_points4_3d:
p4_3d_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
- prefetchw 16(%rdi)
+ prefetcht1 16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
@@ -179,7 +179,7 @@ p4_3d_loop:
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
@@ -210,8 +210,8 @@ _mesa_x86_64_transform_points4_identity:
movq V4F_START(%rdx), %rsi /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 64(%rsi)
- prefetchw 64(%rdi)
+ prefetcht1 64(%rsi)
+ prefetcht1 64(%rdi)
add %ecx, %ecx
@@ -242,7 +242,7 @@ _mesa_3dnow_transform_points4_3d_no_rot:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
movd (%rsi), %mm0 /* | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
@@ -255,7 +255,7 @@ _mesa_3dnow_transform_points4_3d_no_rot:
p4_3d_no_rot_loop:
- prefetchw 32(%rdi)
+ prefetcht1 32(%rdi)
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@ -279,7 +279,7 @@ p4_3d_no_rot_loop:
addq $16, %rdi
decl %ecx
- prefetch 32(%rdx)
+ prefetcht1 32(%rdx)
jnz p4_3d_no_rot_loop
p4_3d_no_rot_done:
@@ -311,7 +311,7 @@ _mesa_3dnow_transform_points4_perspective:
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 32(%rsi), %mm2 /* m21 | m20 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
movd 40(%rsi), %mm1 /* | m22 */
@@ -321,7 +321,7 @@ _mesa_3dnow_transform_points4_perspective:
p4_perspective_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@ -347,7 +347,7 @@ p4_perspective_loop:
addq $16, %rdi
decl %ecx
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
jnz p4_perspective_loop
p4_perspective_done:
@@ -374,14 +374,14 @@ _mesa_3dnow_transform_points4_2d_no_rot:
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movd (%rsi), %mm0 /* | m00 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 48(%rsi), %mm1 /* m31 | m30 */
p4_2d_no_rot_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@ -394,7 +394,7 @@ p4_2d_no_rot_loop:
addq %rax, %rdx
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
movq %mm6, (%rdi) /* write r0, r1 */
@@ -433,7 +433,7 @@ _mesa_3dnow_transform_points4_2d:
movd (%rsi), %mm0 /* | m00 */
movd 4(%rsi), %mm1 /* | m01 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
punpckldq 16(%rsi), %mm0 /* m10 | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
@@ -443,7 +443,7 @@ _mesa_3dnow_transform_points4_2d:
p4_2d_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm3 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@ -460,7 +460,7 @@ p4_2d_loop:
pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */
pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
pfadd %mm6, %mm3 /* r1 | r0 */
diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index 3247593c1f6..4cfe3d93251 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -163,6 +163,32 @@ _mesa_hash_table_destroy(struct hash_table *ht,
ralloc_free(ht);
}
+/**
+ * Deletes all entries of the given hash table without deleting the table
+ * itself or changing its structure.
+ *
+ * If delete_function is passed, it gets called on each entry present.
+ */
+void
+_mesa_hash_table_clear(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry))
+{
+ struct hash_entry *entry;
+
+ for (entry = ht->table; entry != ht->table + ht->size; entry++) {
+ if (entry->key == NULL)
+ continue;
+
+ if (delete_function != NULL && entry->key != ht->deleted_key)
+ delete_function(entry);
+
+ entry->key = NULL;
+ }
+
+ ht->entries = 0;
+ ht->deleted_entries = 0;
+}
+
/** Sets the value of the key pointer used for deleted entries in the table.
*
* The assumption is that usually keys are actual pointers, so we use a
@@ -300,7 +326,8 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
* required to avoid memory leaks, perform a search
* before inserting.
*/
- if (entry->hash == hash &&
+ if (!entry_is_deleted(ht, entry) &&
+ entry->hash == hash &&
ht->key_equals_function(key, entry->key)) {
entry->key = key;
entry->data = data;
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index eb9dbc333ec..85b013cac24 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -64,9 +64,16 @@ _mesa_hash_table_create(void *mem_ctx,
const void *b));
void _mesa_hash_table_destroy(struct hash_table *ht,
void (*delete_function)(struct hash_entry *entry));
+void _mesa_hash_table_clear(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry));
void _mesa_hash_table_set_deleted_key(struct hash_table *ht,
const void *deleted_key);
+static inline uint32_t _mesa_hash_table_num_entries(struct hash_table *ht)
+{
+ return ht->entries;
+}
+
struct hash_entry *
_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data);
struct hash_entry *
diff --git a/src/util/set.c b/src/util/set.c
index f01f8699ac2..99abefd0632 100644
--- a/src/util/set.c
+++ b/src/util/set.c
@@ -282,7 +282,8 @@ set_add(struct set *ht, uint32_t hash, const void *key)
* If freeing of old keys is required to avoid memory leaks,
* perform a search before inserting.
*/
- if (entry->hash == hash &&
+ if (!entry_is_deleted(entry) &&
+ entry->hash == hash &&
ht->key_equals_function(key, entry->key)) {
entry->key = key;
return entry;
diff --git a/src/util/tests/hash_table/Makefile.am b/src/util/tests/hash_table/Makefile.am
index 04a77e30df1..8f12240cede 100644
--- a/src/util/tests/hash_table/Makefile.am
+++ b/src/util/tests/hash_table/Makefile.am
@@ -29,6 +29,7 @@ LDADD = \
$(DLOPEN_LIBS)
TESTS = \
+ clear \
collision \
delete_and_lookup \
delete_management \
diff --git a/src/util/tests/hash_table/clear.c b/src/util/tests/hash_table/clear.c
new file mode 100644
index 00000000000..526700bfb0f
--- /dev/null
+++ b/src/util/tests/hash_table/clear.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "hash_table.h"
+
+static void *make_key(uint32_t i)
+{
+ return (void *)(uintptr_t)(1 + i);
+}
+
+static uint32_t key_id(const void *key)
+{
+ return (uintptr_t)key - 1;
+}
+
+static uint32_t key_hash(const void *key)
+{
+ return (uintptr_t)key;
+}
+
+static bool key_equal(const void *a, const void *b)
+{
+ return a == b;
+}
+
+static void delete_function(struct hash_entry *entry)
+{
+ bool *deleted = (bool *)entry->data;
+ assert(!*deleted);
+ *deleted = true;
+}
+
+int main()
+{
+ struct hash_table *ht;
+ struct hash_entry *entry;
+ const uint32_t size = 1000;
+ bool flags[size];
+ uint32_t i;
+
+ ht = _mesa_hash_table_create(NULL, key_hash, key_equal);
+
+ for (i = 0; i < size; ++i) {
+ flags[i] = false;
+ _mesa_hash_table_insert(ht, make_key(i), &flags[i]);
+ }
+
+ _mesa_hash_table_clear(ht, delete_function);
+ assert(_mesa_hash_table_next_entry(ht, NULL) == NULL);
+
+ /* Check that delete_function was called and that repopulating the table
+ * works. */
+ for (i = 0; i < size; ++i) {
+ assert(flags[i]);
+ flags[i] = false;
+ _mesa_hash_table_insert(ht, make_key(i), &flags[i]);
+ }
+
+ /* Check that exactly the right set of entries is in the table. */
+ for (i = 0; i < size; ++i) {
+ assert(_mesa_hash_table_search(ht, make_key(i)));
+ }
+
+ hash_table_foreach(ht, entry) {
+ assert(key_id(entry->key) < size);
+ }
+
+ _mesa_hash_table_destroy(ht, NULL);
+
+ return 0;
+}