mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-18 06:50:24 +01:00
Merge remote-tracking branch 'mesa-public/master' into vulkan
This commit is contained in:
commit
9401516113
248 changed files with 7167 additions and 2008 deletions
|
|
@ -5,6 +5,7 @@
|
|||
(c-file-style . "stroustrup")
|
||||
(fill-column . 78)
|
||||
(eval . (progn
|
||||
(c-set-offset 'case-label '0)
|
||||
(c-set-offset 'innamespace '0)
|
||||
(c-set-offset 'inline-open '0)))
|
||||
)
|
||||
|
|
|
|||
11
appveyor.yml
11
appveyor.yml
|
|
@ -6,7 +6,7 @@
|
|||
# - Select Git and fill in the Git clone URL
|
||||
# - Setup a Git hook as explained in
|
||||
# https://github.com/appveyor/webhooks#installing-git-hook
|
||||
# - Check 'Settings > General > Skip branches without appveyor'
|
||||
# - Check 'Settings > General > Skip branches without appveyor.yml'
|
||||
# - Check 'Settings > General > Rolling builds'
|
||||
# - Setup the global or project notifications to your liking
|
||||
#
|
||||
|
|
@ -24,7 +24,14 @@ branches:
|
|||
except:
|
||||
- /^travis.*$/
|
||||
|
||||
clone_depth: 5
|
||||
# Don't download the full Mesa history to speed up cloning. However the clone
|
||||
# depth must not be too small, otherwise builds might fail when lots of patches
|
||||
# are committed in succession, because the desired commit is not found on the
|
||||
# truncated history.
|
||||
#
|
||||
# See also:
|
||||
# - https://www.appveyor.com/blog/2014/06/04/shallow-clone-for-git-repositories
|
||||
clone_depth: 100
|
||||
|
||||
cache:
|
||||
- win_flex_bison-2.4.5.zip
|
||||
|
|
|
|||
|
|
@ -2161,7 +2161,12 @@ gallium_require_drm_loader() {
|
|||
fi
|
||||
}
|
||||
|
||||
dnl This is for Glamor. Skip this if OpenGL is disabled.
|
||||
require_egl_drm() {
|
||||
if test "x$enable_opengl" = xno; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
case "$with_egl_platforms" in
|
||||
*drm*)
|
||||
;;
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ GL 4.2, GLSL 4.20:
|
|||
|
||||
GL_ARB_texture_compression_bptc DONE (i965, nvc0, r600, radeonsi)
|
||||
GL_ARB_compressed_texture_pixel_storage DONE (all drivers)
|
||||
GL_ARB_shader_atomic_counters DONE (i965)
|
||||
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
|
||||
GL_ARB_texture_storage DONE (all drivers)
|
||||
GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
|
|
@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30:
|
|||
GL_ARB_program_interface_query DONE (all drivers)
|
||||
GL_ARB_robust_buffer_access_behavior not started
|
||||
GL_ARB_shader_image_size DONE (i965)
|
||||
GL_ARB_shader_storage_buffer_object DONE (i965)
|
||||
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
|
||||
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
|
||||
|
|
@ -186,7 +186,7 @@ GL 4.4, GLSL 4.40:
|
|||
- specified transform/feedback layout in progress
|
||||
- input/output block locations DONE
|
||||
GL_ARB_multi_bind DONE (all drivers)
|
||||
GL_ARB_query_buffer_object not started
|
||||
GL_ARB_query_buffer_object DONE (nvc0)
|
||||
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as
|
|||
"130". Mesa will not really implement all the features of the given language version
|
||||
if it's higher than what's normally reported. (for developers only)
|
||||
<li>MESA_GLSL - <a href="shading.html#envvars">shading language compiler options</a>
|
||||
<li>MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally disabled.
|
||||
</ul>
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,10 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_compute_shader on i965</li>
|
||||
<li>GL_ARB_copy_image on r600</li>
|
||||
<li>GL_ARB_indirect_parameters on nvc0</li>
|
||||
<li>GL_ARB_query_buffer_object on nvc0</li>
|
||||
<li>GL_ARB_shader_atomic_counters on nvc0</li>
|
||||
<li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
|
||||
<li>GL_ARB_shader_storage_buffer_object on nvc0</li>
|
||||
<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
|
||||
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
|
||||
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
|
||||
|
|
@ -58,6 +61,8 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx</li>
|
||||
<li>GL_KHR_texture_compression_astc_ldr on freedreno/a4xx</li>
|
||||
<li>GL_AMD_performance_monitor on radeonsi (CIK+ only)</li>
|
||||
<li>GL_ATI_meminfo on r600, radeonsi</li>
|
||||
<li>GL_NVX_gpu_memory_info on r600, radeonsi</li>
|
||||
<li>New OSMesaCreateContextAttribs() function (for creating core profile
|
||||
contexts)</li>
|
||||
</ul>
|
||||
|
|
|
|||
|
|
@ -227,6 +227,7 @@ typedef struct _RGNDATA {
|
|||
#define D3DERR_DRIVERINVALIDCALL MAKE_D3DHRESULT(2157)
|
||||
#define D3DERR_DEVICEREMOVED MAKE_D3DHRESULT(2160)
|
||||
#define D3DERR_DEVICEHUNG MAKE_D3DHRESULT(2164)
|
||||
#define S_PRESENT_OCCLUDED MAKE_D3DSTATUS(2168)
|
||||
|
||||
/********************************************************
|
||||
* Bitmasks *
|
||||
|
|
|
|||
|
|
@ -69,6 +69,8 @@ typedef struct ID3DPresentVtbl
|
|||
HRESULT (WINAPI *SetCursor)(ID3DPresent *This, void *pBitmap, POINT *pHotspot, BOOL bShow);
|
||||
HRESULT (WINAPI *SetGammaRamp)(ID3DPresent *This, const D3DGAMMARAMP *pRamp, HWND hWndOverride);
|
||||
HRESULT (WINAPI *GetWindowInfo)(ID3DPresent *This, HWND hWnd, int *width, int *height, int *depth);
|
||||
/* Available since version 1.1 */
|
||||
BOOL (WINAPI *GetWindowOccluded)(ID3DPresent *This);
|
||||
} ID3DPresentVtbl;
|
||||
|
||||
struct ID3DPresent
|
||||
|
|
@ -96,6 +98,7 @@ struct ID3DPresent
|
|||
#define ID3DPresent_SetCursor(p,a,b,c) (p)->lpVtbl->SetCursor(p,a,b,c)
|
||||
#define ID3DPresent_SetGammaRamp(p,a,b) (p)->lpVtbl->SetGammaRamp(p,a,b)
|
||||
#define ID3DPresent_GetWindowInfo(p,a,b,c,d) (p)->lpVtbl->GetWindowSize(p,a,b,c,d)
|
||||
#define ID3DPresent_GetWindowOccluded(p) (p)->lpVtbl->GetWindowOccluded(p)
|
||||
|
||||
typedef struct ID3DPresentGroupVtbl
|
||||
{
|
||||
|
|
|
|||
1
src/compiler/.gitignore
vendored
Normal file
1
src/compiler/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
glsl_compiler
|
||||
|
|
@ -220,9 +220,11 @@ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
|
|||
LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
|
||||
|
||||
glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
|
||||
$(MKDIR_GEN)
|
||||
$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
|
||||
|
||||
glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
|
||||
$(MKDIR_GEN)
|
||||
$(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
|
||||
|
||||
glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
|
||||
|
|
|
|||
1
src/compiler/glsl/.gitignore
vendored
1
src/compiler/glsl/.gitignore
vendored
|
|
@ -1,4 +1,3 @@
|
|||
glsl_compiler
|
||||
glsl_lexer.cpp
|
||||
glsl_parser.cpp
|
||||
glsl_parser.h
|
||||
|
|
|
|||
|
|
@ -291,6 +291,10 @@ apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from,
|
|||
if (!state->is_version(120, 0))
|
||||
return false;
|
||||
|
||||
/* ESSL does not allow implicit conversions */
|
||||
if (state->es_shader)
|
||||
return false;
|
||||
|
||||
/* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec:
|
||||
*
|
||||
* "There are no implicit array or structure conversions. For
|
||||
|
|
|
|||
|
|
@ -661,7 +661,7 @@ private:
|
|||
BA1(roundEven)
|
||||
BA1(ceil)
|
||||
BA1(fract)
|
||||
B2(mod)
|
||||
BA2(mod)
|
||||
BA1(modf)
|
||||
BA2(min)
|
||||
BA2(max)
|
||||
|
|
@ -1242,23 +1242,23 @@ builtin_builder::create_builtins()
|
|||
FD(fract)
|
||||
|
||||
add_function("mod",
|
||||
_mod(glsl_type::float_type, glsl_type::float_type),
|
||||
_mod(glsl_type::vec2_type, glsl_type::float_type),
|
||||
_mod(glsl_type::vec3_type, glsl_type::float_type),
|
||||
_mod(glsl_type::vec4_type, glsl_type::float_type),
|
||||
_mod(always_available, glsl_type::float_type, glsl_type::float_type),
|
||||
_mod(always_available, glsl_type::vec2_type, glsl_type::float_type),
|
||||
_mod(always_available, glsl_type::vec3_type, glsl_type::float_type),
|
||||
_mod(always_available, glsl_type::vec4_type, glsl_type::float_type),
|
||||
|
||||
_mod(glsl_type::vec2_type, glsl_type::vec2_type),
|
||||
_mod(glsl_type::vec3_type, glsl_type::vec3_type),
|
||||
_mod(glsl_type::vec4_type, glsl_type::vec4_type),
|
||||
_mod(always_available, glsl_type::vec2_type, glsl_type::vec2_type),
|
||||
_mod(always_available, glsl_type::vec3_type, glsl_type::vec3_type),
|
||||
_mod(always_available, glsl_type::vec4_type, glsl_type::vec4_type),
|
||||
|
||||
_mod(glsl_type::double_type, glsl_type::double_type),
|
||||
_mod(glsl_type::dvec2_type, glsl_type::double_type),
|
||||
_mod(glsl_type::dvec3_type, glsl_type::double_type),
|
||||
_mod(glsl_type::dvec4_type, glsl_type::double_type),
|
||||
_mod(fp64, glsl_type::double_type, glsl_type::double_type),
|
||||
_mod(fp64, glsl_type::dvec2_type, glsl_type::double_type),
|
||||
_mod(fp64, glsl_type::dvec3_type, glsl_type::double_type),
|
||||
_mod(fp64, glsl_type::dvec4_type, glsl_type::double_type),
|
||||
|
||||
_mod(glsl_type::dvec2_type, glsl_type::dvec2_type),
|
||||
_mod(glsl_type::dvec3_type, glsl_type::dvec3_type),
|
||||
_mod(glsl_type::dvec4_type, glsl_type::dvec4_type),
|
||||
_mod(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type),
|
||||
_mod(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),
|
||||
_mod(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),
|
||||
NULL);
|
||||
|
||||
FD(modf)
|
||||
|
|
@ -3452,9 +3452,10 @@ UNOPA(ceil, ir_unop_ceil)
|
|||
UNOPA(fract, ir_unop_fract)
|
||||
|
||||
ir_function_signature *
|
||||
builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
|
||||
builtin_builder::_mod(builtin_available_predicate avail,
|
||||
const glsl_type *x_type, const glsl_type *y_type)
|
||||
{
|
||||
return binop(always_available, ir_binop_mod, x_type, x_type, y_type);
|
||||
return binop(avail, ir_binop_mod, x_type, x_type, y_type);
|
||||
}
|
||||
|
||||
ir_function_signature *
|
||||
|
|
|
|||
|
|
@ -328,6 +328,11 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
|
|||
this->fields[this->num_fields].sample = 0;
|
||||
this->fields[this->num_fields].patch = 0;
|
||||
this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
|
||||
this->fields[this->num_fields].image_read_only = 0;
|
||||
this->fields[this->num_fields].image_write_only = 0;
|
||||
this->fields[this->num_fields].image_coherent = 0;
|
||||
this->fields[this->num_fields].image_volatile = 0;
|
||||
this->fields[this->num_fields].image_restrict = 0;
|
||||
this->num_fields++;
|
||||
}
|
||||
|
||||
|
|
@ -1201,7 +1206,12 @@ builtin_variable_generator::generate_varyings()
|
|||
/* gl_Position and gl_PointSize are not visible from fragment shaders. */
|
||||
if (state->stage != MESA_SHADER_FRAGMENT) {
|
||||
add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position");
|
||||
add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
|
||||
if (!state->es_shader ||
|
||||
state->stage == MESA_SHADER_VERTEX ||
|
||||
(state->stage == MESA_SHADER_GEOMETRY &&
|
||||
state->OES_geometry_point_size_enable)) {
|
||||
add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
|
||||
}
|
||||
}
|
||||
|
||||
if (state->is_version(130, 0)) {
|
||||
|
|
|
|||
|
|
@ -2386,6 +2386,13 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
|
|||
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
|
||||
if (extensions->ARB_blend_func_extended)
|
||||
add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
|
||||
|
||||
if (version >= 310) {
|
||||
if (extensions->OES_geometry_shader) {
|
||||
add_builtin_define(parser, "GL_OES_geometry_point_size", 1);
|
||||
add_builtin_define(parser, "GL_OES_geometry_shader", 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
|
||||
|
|
|
|||
|
|
@ -600,6 +600,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
|
|||
/* OES extensions go here, sorted alphabetically.
|
||||
*/
|
||||
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
|
||||
EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
|
||||
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
|
||||
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
|
||||
EXT(OES_texture_3D, false, true, dummy_true),
|
||||
|
|
@ -1867,59 +1868,76 @@ do_common_optimization(exec_list *ir, bool linked,
|
|||
const struct gl_shader_compiler_options *options,
|
||||
bool native_integers)
|
||||
{
|
||||
const bool debug = false;
|
||||
GLboolean progress = GL_FALSE;
|
||||
|
||||
progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
|
||||
#define OPT(PASS, ...) do { \
|
||||
if (debug) { \
|
||||
fprintf(stderr, "START GLSL optimization %s\n", #PASS); \
|
||||
const bool opt_progress = PASS(__VA_ARGS__); \
|
||||
progress = opt_progress || progress; \
|
||||
if (opt_progress) \
|
||||
_mesa_print_ir(stderr, ir, NULL); \
|
||||
fprintf(stderr, "GLSL optimization %s: %s progress\n", \
|
||||
#PASS, opt_progress ? "made" : "no"); \
|
||||
} else { \
|
||||
progress = PASS(__VA_ARGS__) || progress; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
OPT(lower_instructions, ir, SUB_TO_ADD_NEG);
|
||||
|
||||
if (linked) {
|
||||
progress = do_function_inlining(ir) || progress;
|
||||
progress = do_dead_functions(ir) || progress;
|
||||
progress = do_structure_splitting(ir) || progress;
|
||||
OPT(do_function_inlining, ir);
|
||||
OPT(do_dead_functions, ir);
|
||||
OPT(do_structure_splitting, ir);
|
||||
}
|
||||
progress = do_if_simplification(ir) || progress;
|
||||
progress = opt_flatten_nested_if_blocks(ir) || progress;
|
||||
progress = opt_conditional_discard(ir) || progress;
|
||||
progress = do_copy_propagation(ir) || progress;
|
||||
progress = do_copy_propagation_elements(ir) || progress;
|
||||
OPT(do_if_simplification, ir);
|
||||
OPT(opt_flatten_nested_if_blocks, ir);
|
||||
OPT(opt_conditional_discard, ir);
|
||||
OPT(do_copy_propagation, ir);
|
||||
OPT(do_copy_propagation_elements, ir);
|
||||
|
||||
if (options->OptimizeForAOS && !linked)
|
||||
progress = opt_flip_matrices(ir) || progress;
|
||||
OPT(opt_flip_matrices, ir);
|
||||
|
||||
if (linked && options->OptimizeForAOS) {
|
||||
progress = do_vectorize(ir) || progress;
|
||||
OPT(do_vectorize, ir);
|
||||
}
|
||||
|
||||
if (linked)
|
||||
progress = do_dead_code(ir, uniform_locations_assigned) || progress;
|
||||
OPT(do_dead_code, ir, uniform_locations_assigned);
|
||||
else
|
||||
progress = do_dead_code_unlinked(ir) || progress;
|
||||
progress = do_dead_code_local(ir) || progress;
|
||||
progress = do_tree_grafting(ir) || progress;
|
||||
progress = do_constant_propagation(ir) || progress;
|
||||
OPT(do_dead_code_unlinked, ir);
|
||||
OPT(do_dead_code_local, ir);
|
||||
OPT(do_tree_grafting, ir);
|
||||
OPT(do_constant_propagation, ir);
|
||||
if (linked)
|
||||
progress = do_constant_variable(ir) || progress;
|
||||
OPT(do_constant_variable, ir);
|
||||
else
|
||||
progress = do_constant_variable_unlinked(ir) || progress;
|
||||
progress = do_constant_folding(ir) || progress;
|
||||
progress = do_minmax_prune(ir) || progress;
|
||||
progress = do_rebalance_tree(ir) || progress;
|
||||
progress = do_algebraic(ir, native_integers, options) || progress;
|
||||
progress = do_lower_jumps(ir) || progress;
|
||||
progress = do_vec_index_to_swizzle(ir) || progress;
|
||||
progress = lower_vector_insert(ir, false) || progress;
|
||||
progress = do_swizzle_swizzle(ir) || progress;
|
||||
progress = do_noop_swizzle(ir) || progress;
|
||||
OPT(do_constant_variable_unlinked, ir);
|
||||
OPT(do_constant_folding, ir);
|
||||
OPT(do_minmax_prune, ir);
|
||||
OPT(do_rebalance_tree, ir);
|
||||
OPT(do_algebraic, ir, native_integers, options);
|
||||
OPT(do_lower_jumps, ir);
|
||||
OPT(do_vec_index_to_swizzle, ir);
|
||||
OPT(lower_vector_insert, ir, false);
|
||||
OPT(do_swizzle_swizzle, ir);
|
||||
OPT(do_noop_swizzle, ir);
|
||||
|
||||
progress = optimize_split_arrays(ir, linked) || progress;
|
||||
progress = optimize_redundant_jumps(ir) || progress;
|
||||
OPT(optimize_split_arrays, ir, linked);
|
||||
OPT(optimize_redundant_jumps, ir);
|
||||
|
||||
loop_state *ls = analyze_loop_variables(ir);
|
||||
if (ls->loop_found) {
|
||||
progress = set_loop_controls(ir, ls) || progress;
|
||||
progress = unroll_loops(ir, ls, options) || progress;
|
||||
OPT(set_loop_controls, ir, ls);
|
||||
OPT(unroll_loops, ir, ls, options);
|
||||
}
|
||||
delete ls;
|
||||
|
||||
#undef OPT
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -591,6 +591,8 @@ struct _mesa_glsl_parse_state {
|
|||
*/
|
||||
bool OES_EGL_image_external_enable;
|
||||
bool OES_EGL_image_external_warn;
|
||||
bool OES_geometry_point_size_enable;
|
||||
bool OES_geometry_point_size_warn;
|
||||
bool OES_geometry_shader_enable;
|
||||
bool OES_geometry_shader_warn;
|
||||
bool OES_standard_derivatives_enable;
|
||||
|
|
|
|||
|
|
@ -471,10 +471,11 @@ private:
|
|||
*/
|
||||
class parcel_out_uniform_storage : public program_resource_visitor {
|
||||
public:
|
||||
parcel_out_uniform_storage(struct string_to_uint_map *map,
|
||||
parcel_out_uniform_storage(struct gl_shader_program *prog,
|
||||
struct string_to_uint_map *map,
|
||||
struct gl_uniform_storage *uniforms,
|
||||
union gl_constant_value *values)
|
||||
: map(map), uniforms(uniforms), values(values)
|
||||
: prog(prog), map(map), uniforms(uniforms), values(values)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
@ -492,8 +493,7 @@ public:
|
|||
memset(this->targets, 0, sizeof(this->targets));
|
||||
}
|
||||
|
||||
void set_and_process(struct gl_shader_program *prog,
|
||||
ir_variable *var)
|
||||
void set_and_process(ir_variable *var)
|
||||
{
|
||||
current_var = var;
|
||||
field_counter = 0;
|
||||
|
|
@ -643,6 +643,16 @@ private:
|
|||
uniform->opaque[shader_type].index = this->next_image;
|
||||
uniform->opaque[shader_type].active = true;
|
||||
|
||||
/* Set image access qualifiers */
|
||||
const GLenum access =
|
||||
(current_var->data.image_read_only ? GL_READ_ONLY :
|
||||
current_var->data.image_write_only ? GL_WRITE_ONLY :
|
||||
GL_READ_WRITE);
|
||||
|
||||
for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j)
|
||||
prog->_LinkedShaders[shader_type]->
|
||||
ImageAccess[this->next_image + j] = access;
|
||||
|
||||
/* Increment the image index by 1 for non-arrays and by the
|
||||
* number of array elements for arrays.
|
||||
*/
|
||||
|
|
@ -844,6 +854,11 @@ private:
|
|||
this->values += values_for_type(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Current program being processed.
|
||||
*/
|
||||
struct gl_shader_program *prog;
|
||||
|
||||
struct string_to_uint_map *map;
|
||||
|
||||
struct gl_uniform_storage *uniforms;
|
||||
|
|
@ -1007,40 +1022,6 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
link_set_image_access_qualifiers(struct gl_shader_program *prog,
|
||||
gl_shader *sh, unsigned shader_stage,
|
||||
ir_variable *var, const glsl_type *type,
|
||||
char **name, size_t name_length)
|
||||
{
|
||||
/* Handle arrays of arrays */
|
||||
if (type->is_array() && type->fields.array->is_array()) {
|
||||
for (unsigned i = 0; i < type->length; i++) {
|
||||
size_t new_length = name_length;
|
||||
|
||||
/* Append the subscript to the current variable name */
|
||||
ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
|
||||
|
||||
link_set_image_access_qualifiers(prog, sh, shader_stage, var,
|
||||
type->fields.array, name,
|
||||
new_length);
|
||||
}
|
||||
} else {
|
||||
unsigned id = 0;
|
||||
bool found = prog->UniformHash->get(id, *name);
|
||||
assert(found);
|
||||
(void) found;
|
||||
const gl_uniform_storage *storage = &prog->UniformStorage[id];
|
||||
const unsigned index = storage->opaque[shader_stage].index;
|
||||
const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
|
||||
var->data.image_write_only ? GL_WRITE_ONLY :
|
||||
GL_READ_WRITE);
|
||||
|
||||
for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j)
|
||||
sh->ImageAccess[index + j] = access;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine the hidden uniform hash map with the uniform hash map so that the
|
||||
* hidden uniforms will be given indicies at the end of the uniform storage
|
||||
|
|
@ -1148,7 +1129,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
union gl_constant_value *data_end = &data[num_data_slots];
|
||||
#endif
|
||||
|
||||
parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);
|
||||
parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data);
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] == NULL)
|
||||
|
|
@ -1163,7 +1144,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
var->data.mode != ir_var_shader_storage))
|
||||
continue;
|
||||
|
||||
parcel.set_and_process(prog, var);
|
||||
parcel.set_and_process(var);
|
||||
}
|
||||
|
||||
prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
|
||||
|
|
@ -1301,29 +1282,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
prog->NumHiddenUniforms = hidden_uniforms;
|
||||
prog->UniformStorage = uniforms;
|
||||
|
||||
/**
|
||||
* Scan the program for image uniforms and store image unit access
|
||||
* information into the gl_shader data structure.
|
||||
*/
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
gl_shader *sh = prog->_LinkedShaders[i];
|
||||
|
||||
if (sh == NULL)
|
||||
continue;
|
||||
|
||||
foreach_in_list(ir_instruction, node, sh->ir) {
|
||||
ir_variable *var = node->as_variable();
|
||||
|
||||
if (var && var->data.mode == ir_var_uniform &&
|
||||
var->type->contains_image()) {
|
||||
char *name_copy = ralloc_strdup(NULL, var->name);
|
||||
link_set_image_access_qualifiers(prog, sh, i, var, var->type,
|
||||
&name_copy, strlen(var->name));
|
||||
ralloc_free(name_copy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
link_set_uniform_initializers(prog, boolean_true);
|
||||
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -967,11 +967,16 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
|
|||
return;
|
||||
}
|
||||
|
||||
if ((consumer_var == NULL && producer_var->type->contains_integer()) ||
|
||||
bool needs_flat_qualifier = consumer_var == NULL &&
|
||||
(producer_var->type->contains_integer() ||
|
||||
producer_var->type->contains_double());
|
||||
|
||||
if (needs_flat_qualifier ||
|
||||
(consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) {
|
||||
/* Since this varying is not being consumed by the fragment shader, its
|
||||
* interpolation type varying cannot possibly affect rendering.
|
||||
* Also, this variable is non-flat and is (or contains) an integer.
|
||||
* Also, this variable is non-flat and is (or contains) an integer
|
||||
* or a double.
|
||||
* If the consumer stage is unknown, don't modify the interpolation
|
||||
* type as it could affect rendering later with separate shaders.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -4633,8 +4633,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
&prog->NumShaderStorageBlocks,
|
||||
&prog->SsboInterfaceBlockIndex);
|
||||
|
||||
/* FINISHME: Assign fragment shader output locations. */
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] == NULL)
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -327,6 +327,7 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx,
|
|||
unsigned *const_offset,
|
||||
bool *row_major,
|
||||
int *matrix_columns,
|
||||
const glsl_struct_field **struct_field,
|
||||
unsigned packing)
|
||||
{
|
||||
*offset = new(mem_ctx) ir_constant(0u);
|
||||
|
|
@ -442,8 +443,11 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx,
|
|||
intra_struct_offset = glsl_align(intra_struct_offset, field_align);
|
||||
|
||||
if (strcmp(struct_type->fields.structure[i].name,
|
||||
deref_record->field) == 0)
|
||||
deref_record->field) == 0) {
|
||||
if (struct_field)
|
||||
*struct_field = &struct_type->fields.structure[i];
|
||||
break;
|
||||
}
|
||||
|
||||
if (packing == GLSL_INTERFACE_PACKING_STD430)
|
||||
intra_struct_offset += type->std430_size(field_row_major);
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ public:
|
|||
void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref,
|
||||
ir_rvalue **offset, unsigned *const_offset,
|
||||
bool *row_major, int *matrix_columns,
|
||||
const glsl_struct_field **struct_field,
|
||||
unsigned packing);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -142,7 +142,7 @@ lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
|
|||
|
||||
setup_buffer_access(mem_ctx, var, deref,
|
||||
&offset, &const_offset,
|
||||
&row_major, &matrix_columns, packing);
|
||||
&row_major, &matrix_columns, NULL, packing);
|
||||
|
||||
/* Now that we've calculated the offset to the start of the
|
||||
* dereference, walk over the type and emit loads into a temporary.
|
||||
|
|
@ -210,7 +210,7 @@ lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
|
|||
|
||||
setup_buffer_access(mem_ctx, var, deref,
|
||||
&offset, &const_offset,
|
||||
&row_major, &matrix_columns, packing);
|
||||
&row_major, &matrix_columns, NULL, packing);
|
||||
|
||||
deref = new(mem_ctx) ir_dereference_variable(store_var);
|
||||
|
||||
|
|
@ -370,7 +370,7 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
|
|||
|
||||
setup_buffer_access(mem_ctx, var, deref,
|
||||
&offset, &const_offset,
|
||||
&row_major, &matrix_columns, packing);
|
||||
&row_major, &matrix_columns, NULL, packing);
|
||||
|
||||
assert(offset);
|
||||
assert(!row_major);
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class lower_ubo_reference_visitor :
|
|||
public lower_buffer_access::lower_buffer_access {
|
||||
public:
|
||||
lower_ubo_reference_visitor(struct gl_shader *shader)
|
||||
: shader(shader)
|
||||
: shader(shader), struct_field(NULL), variable(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
@ -60,6 +60,7 @@ public:
|
|||
bool *row_major,
|
||||
int *matrix_columns,
|
||||
unsigned packing);
|
||||
uint32_t ssbo_access_params();
|
||||
ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
|
||||
ir_rvalue *offset);
|
||||
ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
|
||||
|
|
@ -104,6 +105,8 @@ public:
|
|||
|
||||
struct gl_shader *shader;
|
||||
struct gl_uniform_buffer_variable *ubo_var;
|
||||
const struct glsl_struct_field *struct_field;
|
||||
ir_variable *variable;
|
||||
ir_rvalue *uniform_block;
|
||||
bool progress;
|
||||
};
|
||||
|
|
@ -288,8 +291,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
|
|||
|
||||
*const_offset = ubo_var->Offset;
|
||||
|
||||
this->struct_field = NULL;
|
||||
setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
|
||||
matrix_columns, packing);
|
||||
matrix_columns, &this->struct_field, packing);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -317,6 +321,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
|
|||
this->buffer_access_type =
|
||||
var->is_in_shader_storage_block() ?
|
||||
ssbo_load_access : ubo_load_access;
|
||||
this->variable = var;
|
||||
|
||||
/* Compute the offset to the start if the dereference as well as other
|
||||
* information we need to configure the write
|
||||
|
|
@ -370,6 +375,24 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
|
|||
return state->ARB_shader_storage_buffer_object_enable;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
lower_ubo_reference_visitor::ssbo_access_params()
|
||||
{
|
||||
assert(variable);
|
||||
|
||||
if (variable->is_interface_instance()) {
|
||||
assert(struct_field);
|
||||
|
||||
return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
|
||||
(struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
|
||||
(struct_field->image_volatile ? ACCESS_VOLATILE : 0));
|
||||
} else {
|
||||
return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
|
||||
(variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
|
||||
(variable->data.image_volatile ? ACCESS_VOLATILE : 0));
|
||||
}
|
||||
}
|
||||
|
||||
ir_call *
|
||||
lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
|
||||
ir_rvalue *deref,
|
||||
|
|
@ -394,6 +417,10 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
|
|||
ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
|
||||
sig_params.push_tail(writemask_ref);
|
||||
|
||||
ir_variable *access_ref = new(mem_ctx)
|
||||
ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
|
||||
sig_params.push_tail(access_ref);
|
||||
|
||||
ir_function_signature *sig = new(mem_ctx)
|
||||
ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
|
||||
assert(sig);
|
||||
|
|
@ -408,6 +435,7 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
|
|||
call_params.push_tail(offset->clone(mem_ctx, NULL));
|
||||
call_params.push_tail(deref->clone(mem_ctx, NULL));
|
||||
call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
|
||||
call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
|
||||
return new(mem_ctx) ir_call(sig, NULL, &call_params);
|
||||
}
|
||||
|
||||
|
|
@ -426,6 +454,10 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
|
|||
ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
|
||||
sig_params.push_tail(offset_ref);
|
||||
|
||||
ir_variable *access_ref = new(mem_ctx)
|
||||
ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
|
||||
sig_params.push_tail(access_ref);
|
||||
|
||||
ir_function_signature *sig =
|
||||
new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
|
||||
assert(sig);
|
||||
|
|
@ -444,6 +476,7 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
|
|||
exec_list call_params;
|
||||
call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
|
||||
call_params.push_tail(offset->clone(mem_ctx, NULL));
|
||||
call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
|
||||
|
||||
return new(mem_ctx) ir_call(sig, deref_result, &call_params);
|
||||
}
|
||||
|
|
@ -499,6 +532,7 @@ lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
|
|||
unsigned packing = var->get_interface_type()->interface_packing;
|
||||
|
||||
this->buffer_access_type = ssbo_store_access;
|
||||
this->variable = var;
|
||||
|
||||
/* Compute the offset to the start if the dereference as well as other
|
||||
* information we need to configure the write
|
||||
|
|
@ -678,6 +712,7 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
|
|||
int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
|
||||
|
||||
this->buffer_access_type = ssbo_unsized_array_length_access;
|
||||
this->variable = var;
|
||||
|
||||
/* Compute the offset to the start if the dereference as well as other
|
||||
* information we need to calculate the length.
|
||||
|
|
@ -910,6 +945,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
|
|||
unsigned packing = var->get_interface_type()->interface_packing;
|
||||
|
||||
this->buffer_access_type = ssbo_atomic_access;
|
||||
this->variable = var;
|
||||
|
||||
setup_for_load_or_store(mem_ctx, var, deref,
|
||||
&offset, &const_offset,
|
||||
|
|
|
|||
|
|
@ -361,11 +361,12 @@ tree_grafting_basic_block(ir_instruction *bb_first,
|
|||
if (!lhs_var)
|
||||
continue;
|
||||
|
||||
if (lhs_var->data.mode == ir_var_function_out ||
|
||||
lhs_var->data.mode == ir_var_function_inout ||
|
||||
lhs_var->data.mode == ir_var_shader_out ||
|
||||
lhs_var->data.mode == ir_var_shader_storage)
|
||||
continue;
|
||||
if (lhs_var->data.mode == ir_var_function_out ||
|
||||
lhs_var->data.mode == ir_var_function_inout ||
|
||||
lhs_var->data.mode == ir_var_shader_out ||
|
||||
lhs_var->data.mode == ir_var_shader_storage ||
|
||||
lhs_var->data.mode == ir_var_shader_shared)
|
||||
continue;
|
||||
|
||||
ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
|
||||
|
||||
|
|
|
|||
|
|
@ -164,6 +164,11 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
|
|||
this->fields.structure[i].sample = fields[i].sample;
|
||||
this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
|
||||
this->fields.structure[i].patch = fields[i].patch;
|
||||
this->fields.structure[i].image_read_only = fields[i].image_read_only;
|
||||
this->fields.structure[i].image_write_only = fields[i].image_write_only;
|
||||
this->fields.structure[i].image_coherent = fields[i].image_coherent;
|
||||
this->fields.structure[i].image_volatile = fields[i].image_volatile;
|
||||
this->fields.structure[i].image_restrict = fields[i].image_restrict;
|
||||
this->fields.structure[i].precision = fields[i].precision;
|
||||
}
|
||||
|
||||
|
|
@ -1330,6 +1335,13 @@ glsl_type::can_implicitly_convert_to(const glsl_type *desired,
|
|||
if (this == desired)
|
||||
return true;
|
||||
|
||||
/* ESSL does not allow implicit conversions. If there is no state, we're
|
||||
* doing intra-stage function linking where these checks have already been
|
||||
* done.
|
||||
*/
|
||||
if (state && state->es_shader)
|
||||
return false;
|
||||
|
||||
/* There is no conversion among matrix types. */
|
||||
if (this->matrix_columns > 1 || desired->matrix_columns > 1)
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -885,7 +885,8 @@ struct glsl_struct_field {
|
|||
glsl_struct_field(const struct glsl_type *_type, const char *_name)
|
||||
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
|
||||
sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
|
||||
precision(GLSL_PRECISION_NONE)
|
||||
precision(GLSL_PRECISION_NONE), image_read_only(0), image_write_only(0),
|
||||
image_coherent(0), image_volatile(0), image_restrict(0)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
|||
b->shader->options->lower_pack_unorm_2x16);
|
||||
|
||||
nir_ssa_def *word =
|
||||
nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
|
||||
nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
|
||||
nir_ssa_def *val =
|
||||
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
|
||||
nir_channel(b, word, 0));
|
||||
|
|
@ -154,7 +154,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
|||
b->shader->options->lower_pack_unorm_4x8);
|
||||
|
||||
nir_ssa_def *byte =
|
||||
nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
|
||||
nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
|
||||
nir_ssa_def *val =
|
||||
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
|
||||
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
|
||||
|
|
|
|||
|
|
@ -238,15 +238,15 @@ unpack_2x16("unorm")
|
|||
unpack_4x8("unorm")
|
||||
unpack_2x16("half")
|
||||
|
||||
unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """
|
||||
dst = (src0.x & 0xffff) | (src0.y >> 16);
|
||||
unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
|
||||
dst.x = (src0.x & 0xffff) | (src0.y >> 16);
|
||||
""")
|
||||
|
||||
unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """
|
||||
dst = (src0.x << 0) |
|
||||
(src0.y << 8) |
|
||||
(src0.z << 16) |
|
||||
(src0.w << 24);
|
||||
unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
|
||||
dst.x = (src0.x << 0) |
|
||||
(src0.y << 8) |
|
||||
(src0.z << 16) |
|
||||
(src0.w << 24);
|
||||
""")
|
||||
|
||||
# Lowered floating point unpacking operations.
|
||||
|
|
@ -562,12 +562,12 @@ dst.y = src1.x;
|
|||
""")
|
||||
|
||||
# Byte extraction
|
||||
binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
|
||||
binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))")
|
||||
binop("extract_u8", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
|
||||
binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
|
||||
|
||||
# Word extraction
|
||||
binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
|
||||
binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))")
|
||||
binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
|
||||
binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
|
||||
|
||||
|
||||
def triop(name, ty, const_expr):
|
||||
|
|
|
|||
|
|
@ -248,19 +248,19 @@ optimizations = [
|
|||
('ubfe', 'value', 'offset', 'bits')),
|
||||
'options->lower_bitfield_extract'),
|
||||
|
||||
(('extract_ibyte', a, b),
|
||||
('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8),
|
||||
(('extract_i8', a, b),
|
||||
('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
|
||||
'options->lower_extract_byte'),
|
||||
|
||||
(('extract_ubyte', a, b),
|
||||
(('extract_u8', a, b),
|
||||
('iand', ('ushr', a, ('imul', b, 8)), 0xff),
|
||||
'options->lower_extract_byte'),
|
||||
|
||||
(('extract_iword', a, b),
|
||||
(('extract_i16', a, b),
|
||||
('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
|
||||
'options->lower_extract_word'),
|
||||
|
||||
(('extract_uword', a, b),
|
||||
(('extract_u16', a, b),
|
||||
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
|
||||
'options->lower_extract_word'),
|
||||
|
||||
|
|
@ -285,30 +285,30 @@ optimizations = [
|
|||
'options->lower_pack_snorm_4x8'),
|
||||
|
||||
(('unpack_unorm_2x16', 'v'),
|
||||
('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0),
|
||||
('extract_uword', 'v', 1), 0, 0)),
|
||||
('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0),
|
||||
('extract_u16', 'v', 1))),
|
||||
65535.0),
|
||||
'options->lower_unpack_unorm_2x16'),
|
||||
|
||||
(('unpack_unorm_4x8', 'v'),
|
||||
('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0),
|
||||
('extract_ubyte', 'v', 1),
|
||||
('extract_ubyte', 'v', 2),
|
||||
('extract_ubyte', 'v', 3))),
|
||||
('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0),
|
||||
('extract_u8', 'v', 1),
|
||||
('extract_u8', 'v', 2),
|
||||
('extract_u8', 'v', 3))),
|
||||
255.0),
|
||||
'options->lower_unpack_unorm_4x8'),
|
||||
|
||||
(('unpack_snorm_2x16', 'v'),
|
||||
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0),
|
||||
('extract_iword', 'v', 1), 0, 0)),
|
||||
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
|
||||
('extract_i16', 'v', 1))),
|
||||
32767.0))),
|
||||
'options->lower_unpack_snorm_2x16'),
|
||||
|
||||
(('unpack_snorm_4x8', 'v'),
|
||||
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0),
|
||||
('extract_ibyte', 'v', 1),
|
||||
('extract_ibyte', 'v', 2),
|
||||
('extract_ibyte', 'v', 3))),
|
||||
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
|
||||
('extract_i8', 'v', 1),
|
||||
('extract_i8', 'v', 2),
|
||||
('extract_i8', 'v', 3))),
|
||||
127.0))),
|
||||
'options->lower_unpack_snorm_4x8'),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -544,6 +544,16 @@ enum gl_frag_depth_layout
|
|||
FRAG_DEPTH_LAYOUT_UNCHANGED
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Buffer access qualifiers
|
||||
*/
|
||||
enum gl_buffer_access_qualifier
|
||||
{
|
||||
ACCESS_COHERENT = 1,
|
||||
ACCESS_RESTRICT = 2,
|
||||
ACCESS_VOLATILE = 4,
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ endif
|
|||
|
||||
# virgl
|
||||
ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),)
|
||||
SUBDIRS += winsys/virgl/drm drivers/virgl
|
||||
SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
|
||||
endif
|
||||
|
||||
# vmwgfx
|
||||
|
|
|
|||
|
|
@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
|
|||
*
|
||||
* Convert float32 to half floats, preserving Infs and NaNs,
|
||||
* with rounding towards zero (trunc).
|
||||
* XXX: For GL, would prefer rounding towards nearest(-even).
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_float_to_half(struct gallivm_state *gallivm,
|
||||
|
|
@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
|
|||
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
|
||||
LLVMValueRef result;
|
||||
|
||||
/*
|
||||
* Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
|
||||
* directly, without any (x86 or generic) intrinsics.
|
||||
* Albeit the rounding mode cannot be specified (and is undefined,
|
||||
* though in practice on x86 seems to do nearest-even but it may
|
||||
* be dependent on instruction set support), so is essentially
|
||||
* useless.
|
||||
*/
|
||||
|
||||
if (util_cpu_caps.has_f16c &&
|
||||
(length == 4 || length == 8)) {
|
||||
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
|
||||
|
|
@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
|
|||
LLVMValueRef index = LLVMConstInt(i32t, i, 0);
|
||||
LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
|
||||
#if 0
|
||||
/* XXX: not really supported by backends */
|
||||
/*
|
||||
* XXX: not really supported by backends.
|
||||
* Even if they would now, rounding mode cannot be specified and
|
||||
* is undefined.
|
||||
*/
|
||||
LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
|
||||
#else
|
||||
LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
|
||||
|
|
|
|||
|
|
@ -256,6 +256,32 @@ lp_build_concat_n(struct gallivm_state *gallivm,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Un-interleave vector.
|
||||
* This will return a vector consisting of every second element
|
||||
* (depending on lo_hi, beginning at 0 or 1).
|
||||
* The returned vector size (elems and width) will only be half
|
||||
* that of the source vector.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_uninterleave1(struct gallivm_state *gallivm,
|
||||
unsigned num_elems,
|
||||
LLVMValueRef a,
|
||||
unsigned lo_hi)
|
||||
{
|
||||
LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned i;
|
||||
assert(num_elems <= LP_MAX_VECTOR_LENGTH);
|
||||
|
||||
for (i = 0; i < num_elems / 2; ++i)
|
||||
elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
|
||||
|
||||
shuffle = LLVMConstVector(elems, num_elems / 2);
|
||||
|
||||
return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Interleave vector elements.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm,
|
|||
LLVMValueRef b,
|
||||
unsigned lo_hi);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_uninterleave1(struct gallivm_state *gallivm,
|
||||
unsigned num_elems,
|
||||
LLVMValueRef a,
|
||||
unsigned lo_hi);
|
||||
|
||||
void
|
||||
lp_build_unpack2(struct gallivm_state *gallivm,
|
||||
|
|
|
|||
|
|
@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
|
|||
/* Ignore deprecated instructions */
|
||||
switch (inst->Instruction.Opcode) {
|
||||
|
||||
case TGSI_OPCODE_UP2H:
|
||||
case TGSI_OPCODE_UP2US:
|
||||
case TGSI_OPCODE_UP4B:
|
||||
case TGSI_OPCODE_UP4UB:
|
||||
|
|
|
|||
|
|
@ -45,8 +45,10 @@
|
|||
#include "lp_bld_arit.h"
|
||||
#include "lp_bld_bitarit.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_conv.h"
|
||||
#include "lp_bld_gather.h"
|
||||
#include "lp_bld_logic.h"
|
||||
#include "lp_bld_pack.h"
|
||||
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
|
||||
|
|
@ -530,6 +532,77 @@ static struct lp_build_tgsi_action log_action = {
|
|||
log_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_PK2H */
|
||||
|
||||
static void
|
||||
pk2h_fetch_args(
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
/* src0.x */
|
||||
emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
|
||||
0, TGSI_CHAN_X);
|
||||
/* src0.y */
|
||||
emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
|
||||
0, TGSI_CHAN_Y);
|
||||
}
|
||||
|
||||
static void
|
||||
pk2h_emit(
|
||||
const struct lp_build_tgsi_action *action,
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
struct lp_build_emit_data *emit_data)
|
||||
{
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
struct lp_type f16i_t;
|
||||
LLVMValueRef lo, hi, res;
|
||||
|
||||
f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
|
||||
lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
|
||||
hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
|
||||
/* maybe some interleave doubling vector width would be useful... */
|
||||
lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
|
||||
hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
|
||||
res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
|
||||
|
||||
emit_data->output[emit_data->chan] = res;
|
||||
}
|
||||
|
||||
static struct lp_build_tgsi_action pk2h_action = {
|
||||
pk2h_fetch_args, /* fetch_args */
|
||||
pk2h_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_UP2H */
|
||||
|
||||
static void
|
||||
up2h_emit(
|
||||
const struct lp_build_tgsi_action *action,
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
struct lp_build_emit_data *emit_data)
|
||||
{
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMContextRef context = gallivm->context;
|
||||
LLVMValueRef lo, hi, res[2], arg;
|
||||
unsigned nr = bld_base->base.type.length;
|
||||
LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
|
||||
|
||||
arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
|
||||
lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
|
||||
hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
|
||||
res[0] = lp_build_half_to_float(gallivm, lo);
|
||||
res[1] = lp_build_half_to_float(gallivm, hi);
|
||||
|
||||
emit_data->output[0] = emit_data->output[2] = res[0];
|
||||
emit_data->output[1] = emit_data->output[3] = res[1];
|
||||
}
|
||||
|
||||
static struct lp_build_tgsi_action up2h_action = {
|
||||
scalar_unary_fetch_args, /* fetch_args */
|
||||
up2h_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_LRP */
|
||||
|
||||
static void
|
||||
|
|
@ -1032,10 +1105,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
|
|||
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
|
||||
|
||||
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
|
||||
|
|
|
|||
|
|
@ -226,14 +226,9 @@ pipe_freedreno_create_screen(int fd)
|
|||
struct pipe_screen *
|
||||
pipe_virgl_create_screen(int fd)
|
||||
{
|
||||
struct virgl_winsys *vws;
|
||||
struct pipe_screen *screen;
|
||||
|
||||
vws = virgl_drm_winsys_create(fd);
|
||||
if (!vws)
|
||||
return NULL;
|
||||
|
||||
screen = virgl_create_screen(vws);
|
||||
screen = virgl_drm_screen_create(fd);
|
||||
return screen ? debug_screen_wrap(screen) : NULL;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@
|
|||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_util.h"
|
||||
#include "tgsi_exec.h"
|
||||
#include "util/u_half.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
|
|
@ -3057,6 +3058,45 @@ exec_dp2(struct tgsi_exec_machine *mach,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_pk2h(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
unsigned chan;
|
||||
union tgsi_exec_channel arg[2], dst;
|
||||
|
||||
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
|
||||
dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
|
||||
(util_float_to_half(arg[1].f[chan]) << 16);
|
||||
}
|
||||
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
|
||||
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
|
||||
store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_up2h(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
unsigned chan;
|
||||
union tgsi_exec_channel arg, dst[2];
|
||||
|
||||
fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
|
||||
for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
|
||||
dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
|
||||
dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
|
||||
}
|
||||
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
|
||||
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
|
||||
store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_scs(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
|
|
@ -4339,7 +4379,7 @@ exec_instruction(
|
|||
break;
|
||||
|
||||
case TGSI_OPCODE_PK2H:
|
||||
assert (0);
|
||||
exec_pk2h(mach, inst);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_PK2US:
|
||||
|
|
@ -4425,7 +4465,7 @@ exec_instruction(
|
|||
break;
|
||||
|
||||
case TGSI_OPCODE_UP2H:
|
||||
assert (0);
|
||||
exec_up2h(mach, inst);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_UP2US:
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
|
|||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
|
||||
{ 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
|
||||
|
|
@ -426,6 +426,7 @@ tgsi_opcode_infer_src_type( uint opcode )
|
|||
case TGSI_OPCODE_SAMPLE_I:
|
||||
case TGSI_OPCODE_SAMPLE_I_MS:
|
||||
case TGSI_OPCODE_UMUL_HI:
|
||||
case TGSI_OPCODE_UP2H:
|
||||
return TGSI_TYPE_UNSIGNED;
|
||||
case TGSI_OPCODE_IMUL_HI:
|
||||
case TGSI_OPCODE_I2F:
|
||||
|
|
|
|||
|
|
@ -377,6 +377,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
info->reads_position = TRUE;
|
||||
else if (semName == TGSI_SEMANTIC_FACE)
|
||||
info->uses_frontface = TRUE;
|
||||
else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
|
||||
info->reads_samplemask = TRUE;
|
||||
}
|
||||
else if (file == TGSI_FILE_OUTPUT) {
|
||||
info->output_semantic_name[reg] = (ubyte) semName;
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ struct tgsi_shader_info
|
|||
ubyte colors_written;
|
||||
boolean reads_position; /**< does fragment shader read position? */
|
||||
boolean reads_z; /**< does fragment shader read depth? */
|
||||
boolean reads_samplemask; /**< does fragment shader read sample mask? */
|
||||
boolean writes_z; /**< does fragment shader write Z value? */
|
||||
boolean writes_stencil; /**< does fragment shader write stencil value? */
|
||||
boolean writes_samplemask; /**< does fragment shader write sample mask? */
|
||||
|
|
|
|||
|
|
@ -195,4 +195,16 @@ u_box_minify_2d(struct pipe_box *dst,
|
|||
dst->height = MAX2(src->height >> l, 1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
u_box_minify_3d(struct pipe_box *dst,
|
||||
const struct pipe_box *src, unsigned l)
|
||||
{
|
||||
dst->x = src->x >> l;
|
||||
dst->y = src->y >> l;
|
||||
dst->z = src->z >> l;
|
||||
dst->width = MAX2(src->width >> l, 1);
|
||||
dst->height = MAX2(src->height >> l, 1);
|
||||
dst->depth = MAX2(src->depth >> l, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@
|
|||
#include <machine/cpu.h>
|
||||
#endif
|
||||
|
||||
#if defined(PIPE_OS_FREEBSD)
|
||||
#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY)
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -313,7 +313,7 @@ def _parse_channels(fields, layout, colorspace, swizzles):
|
|||
return channels
|
||||
|
||||
def parse(filename):
|
||||
'''Parse the format descrition in CSV format in terms of the
|
||||
'''Parse the format description in CSV format in terms of the
|
||||
Channel and Format classes above.'''
|
||||
|
||||
stream = open(filename)
|
||||
|
|
|
|||
|
|
@ -74,7 +74,11 @@ util_float_to_half(float f)
|
|||
f32.ui &= round_mask;
|
||||
f32.f *= magic.f;
|
||||
f32.ui -= round_mask;
|
||||
|
||||
/*
|
||||
* XXX: The magic mul relies on denorms being available, otherwise
|
||||
* all f16 denorms get flushed to zero - hence when this is used
|
||||
* for tgsi_exec in softpipe we won't get f16 denorms.
|
||||
*/
|
||||
/*
|
||||
* Clamp to max finite value if overflowed.
|
||||
* OpenGL has completely undefined rounding behavior for float to
|
||||
|
|
@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16)
|
|||
|
||||
/* Adjust */
|
||||
f32.f *= magic.f;
|
||||
/* XXX: The magic mul relies on denorms being available */
|
||||
|
||||
/* Inf / NaN */
|
||||
if (f32.f >= infnan.f)
|
||||
|
|
|
|||
|
|
@ -49,6 +49,13 @@ enum VS_OUTPUT
|
|||
VS_O_VTEX = 0
|
||||
};
|
||||
|
||||
const int vl_zscan_normal_16[] =
|
||||
{
|
||||
/* Zig-Zag scan pattern */
|
||||
0, 1, 4, 8, 5, 2, 3, 6,
|
||||
9,12,13,10, 7,11,14,15
|
||||
};
|
||||
|
||||
const int vl_zscan_linear[] =
|
||||
{
|
||||
/* Linear scan pattern */
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ struct vl_zscan_buffer
|
|||
struct pipe_surface *dst;
|
||||
};
|
||||
|
||||
extern const int vl_zscan_normal_16[];
|
||||
extern const int vl_zscan_linear[];
|
||||
extern const int vl_zscan_normal[];
|
||||
extern const int vl_zscan_alternate[];
|
||||
|
|
|
|||
|
|
@ -325,6 +325,11 @@ returned). Otherwise, if the ``wait`` parameter is FALSE, the call
|
|||
will not block and the return value will be TRUE if the query has
|
||||
completed or FALSE otherwise.
|
||||
|
||||
``get_query_result_resource`` is used to store the result of a query into
|
||||
a resource without synchronizing with the CPU. This write will optionally
|
||||
wait for the query to complete, and will optionally write whether the value
|
||||
is available instead of the value itself.
|
||||
|
||||
The interface currently includes the following types of queries:
|
||||
|
||||
``PIPE_QUERY_OCCLUSION_COUNTER`` counts the number of fragments which
|
||||
|
|
|
|||
|
|
@ -138,6 +138,10 @@ The integer capabilities:
|
|||
* ``PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT``: Describes the required
|
||||
alignment for pipe_sampler_view::u.buf.first_element, in bytes.
|
||||
If a driver does not support first/last_element, it should return 0.
|
||||
* ``PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY``: Whether the driver only
|
||||
supports R, RG, RGB and RGBA formats for PIPE_BUFFER sampler views.
|
||||
When this is the case it should be assumed that the swizzle parameters
|
||||
in the sampler view have no effect.
|
||||
* ``PIPE_CAP_TGSI_TEXCOORD``: This CAP describes a hw limitation.
|
||||
If true, the hardware cannot replace arbitrary shader inputs with sprite
|
||||
coordinates and hence the inputs that are desired to be replaceable must
|
||||
|
|
@ -164,7 +168,7 @@ The integer capabilities:
|
|||
view it is intended to be used with, or herein undefined results may occur
|
||||
for permutational swizzles.
|
||||
* ``PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE``: The maximum accessible size with
|
||||
a buffer sampler view, in bytes.
|
||||
a buffer sampler view, in texels.
|
||||
* ``PIPE_CAP_MAX_VIEWPORTS``: The maximum number of viewports (and scissors
|
||||
since they are linked) a driver can support. Returning 0 is equivalent
|
||||
to returning 1 because every driver has to support at least a single
|
||||
|
|
@ -306,6 +310,15 @@ The integer capabilities:
|
|||
* ``PIPE_CAP_GENERATE_MIPMAP``: Indicates whether pipe_context::generate_mipmap
|
||||
is supported.
|
||||
* ``PIPE_CAP_STRING_MARKER``: Whether pipe->emit_string_marker() is supported.
|
||||
* ``PIPE_CAP_SURFACE_REINTERPRET_BLOCKS``: Indicates whether
|
||||
pipe_context::create_surface supports reinterpreting a texture as a surface
|
||||
of a format with different block width/height (but same block size in bits).
|
||||
For example, a compressed texture image can be interpreted as a
|
||||
non-compressed surface whose texels are the same number of bits as the
|
||||
compressed blocks, and vice versa. The width and height of the surface is
|
||||
adjusted appropriately.
|
||||
* ``PIPE_CAP_QUERY_BUFFER_OBJECT``: Driver supports
|
||||
context::get_query_result_resource callback.
|
||||
|
||||
|
||||
.. _pipe_capf:
|
||||
|
|
|
|||
|
|
@ -2372,6 +2372,23 @@ programs.
|
|||
the program. Results are unspecified if any of the remaining
|
||||
threads terminates or never reaches an executed BARRIER instruction.
|
||||
|
||||
.. opcode:: MEMBAR - Memory barrier
|
||||
|
||||
``MEMBAR type``
|
||||
|
||||
This opcode waits for the completion of all memory accesses based on
|
||||
the type passed in. The type is an immediate bitfield with the following
|
||||
meaning:
|
||||
|
||||
Bit 0: Shader storage buffers
|
||||
Bit 1: Atomic buffers
|
||||
Bit 2: Images
|
||||
Bit 3: Shared memory
|
||||
Bit 4: Thread group
|
||||
|
||||
These may be passed in in any combination. An implementation is free to not
|
||||
distinguish between these as it sees fit. However these map to all the
|
||||
possibilities made available by GLSL.
|
||||
|
||||
.. _atomopcodes:
|
||||
|
||||
|
|
|
|||
|
|
@ -152,6 +152,9 @@ fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len)
|
|||
struct fd_ringbuffer *ring = ctx->ring;
|
||||
const uint32_t *buf = (const void *)string;
|
||||
|
||||
/* max packet size is 0x3fff dwords: */
|
||||
len = MIN2(len, 0x3fff * 4);
|
||||
|
||||
OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
|
||||
while (len >= 4) {
|
||||
OUT_RING(ring, *buf);
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_BARRIER:
|
||||
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_SM3:
|
||||
|
|
@ -183,6 +184,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_CLIP_HALFZ:
|
||||
return is_a3xx(screen) || is_a4xx(screen);
|
||||
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
return 0;
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
|
||||
if (is_a3xx(screen)) return 16;
|
||||
if (is_a4xx(screen)) return 32;
|
||||
|
|
@ -248,6 +251,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
|
|
@ -296,6 +300,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
/* Queries. */
|
||||
case PIPE_CAP_QUERY_TIME_ELAPSED:
|
||||
case PIPE_CAP_QUERY_TIMESTAMP:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
return 0;
|
||||
case PIPE_CAP_OCCLUSION_QUERY:
|
||||
return is_a3xx(screen) || is_a4xx(screen);
|
||||
|
|
|
|||
|
|
@ -556,6 +556,10 @@ create_frag_coord(struct ir3_compile *ctx, unsigned comp)
|
|||
}
|
||||
}
|
||||
|
||||
/* NOTE: this creates the "TGSI" style fragface (ie. input slot
|
||||
* VARYING_SLOT_FACE). For NIR style nir_intrinsic_load_front_face
|
||||
* we can just use the value from hw directly (since it is boolean)
|
||||
*/
|
||||
static struct ir3_instruction *
|
||||
create_frag_face(struct ir3_compile *ctx, unsigned comp)
|
||||
{
|
||||
|
|
@ -1224,7 +1228,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
break;
|
||||
case nir_intrinsic_load_vertex_id_zero_base:
|
||||
if (!ctx->vertex_id) {
|
||||
ctx->vertex_id = create_input(ctx->block, 0);
|
||||
ctx->vertex_id = create_input(b, 0);
|
||||
add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
|
||||
ctx->vertex_id);
|
||||
}
|
||||
|
|
@ -1232,7 +1236,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
break;
|
||||
case nir_intrinsic_load_instance_id:
|
||||
if (!ctx->instance_id) {
|
||||
ctx->instance_id = create_input(ctx->block, 0);
|
||||
ctx->instance_id = create_input(b, 0);
|
||||
add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
|
||||
ctx->instance_id);
|
||||
}
|
||||
|
|
@ -1244,6 +1248,14 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_front_face:
|
||||
if (!ctx->frag_face) {
|
||||
ctx->so->frag_face = true;
|
||||
ctx->frag_face = create_input(b, 0);
|
||||
ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
|
||||
}
|
||||
dst[0] = ir3_ADD_S(b, ctx->frag_face, 0, create_immed(b, 1), 0);
|
||||
break;
|
||||
case nir_intrinsic_discard_if:
|
||||
case nir_intrinsic_discard: {
|
||||
struct ir3_instruction *cond, *kill;
|
||||
|
|
@ -1349,6 +1361,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
|
|||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
|
||||
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
|
||||
struct ir3_instruction *const_off[4];
|
||||
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
|
||||
unsigned i, coords, flags;
|
||||
unsigned nsrc0 = 0, nsrc1 = 0;
|
||||
|
|
@ -1392,7 +1405,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
|
|||
ddy = get_src(ctx, &tex->src[i].src);
|
||||
break;
|
||||
default:
|
||||
compile_error(ctx, "Unhandled NIR tex serc type: %d\n",
|
||||
compile_error(ctx, "Unhandled NIR tex src type: %d\n",
|
||||
tex->src[i].src_type);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1417,6 +1430,21 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
|
|||
|
||||
tex_info(tex, &flags, &coords);
|
||||
|
||||
if (!has_off) {
|
||||
/* could still have a constant offset: */
|
||||
if (tex->const_offset[0] || tex->const_offset[1] ||
|
||||
tex->const_offset[2] || tex->const_offset[3]) {
|
||||
off = const_off;
|
||||
|
||||
off[0] = create_immed(b, tex->const_offset[0]);
|
||||
off[1] = create_immed(b, tex->const_offset[1]);
|
||||
off[2] = create_immed(b, tex->const_offset[2]);
|
||||
off[3] = create_immed(b, tex->const_offset[3]);
|
||||
|
||||
has_off = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* scale up integer coords for TXF based on the LOD */
|
||||
if (ctx->unminify_coords && (opc == OPC_ISAML)) {
|
||||
assert(has_lod);
|
||||
|
|
@ -2053,6 +2081,9 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
|
|||
case VARYING_SLOT_CLIP_DIST0:
|
||||
case VARYING_SLOT_CLIP_DIST1:
|
||||
break;
|
||||
case VARYING_SLOT_CLIP_VERTEX:
|
||||
/* handled entirely in nir_lower_clip: */
|
||||
return;
|
||||
default:
|
||||
if (slot >= VARYING_SLOT_VAR0)
|
||||
break;
|
||||
|
|
@ -2135,11 +2166,17 @@ emit_instructions(struct ir3_compile *ctx)
|
|||
setup_output(ctx, var);
|
||||
}
|
||||
|
||||
/* Setup variables (which should only be arrays): */
|
||||
/* Setup global variables (which should only be arrays): */
|
||||
nir_foreach_variable(var, &ctx->s->globals) {
|
||||
declare_var(ctx, var);
|
||||
}
|
||||
|
||||
/* Setup local variables (which should only be arrays): */
|
||||
/* NOTE: need to do something more clever when we support >1 fxn */
|
||||
nir_foreach_variable(var, &fxn->locals) {
|
||||
declare_var(ctx, var);
|
||||
}
|
||||
|
||||
/* And emit the body: */
|
||||
ctx->impl = fxn;
|
||||
emit_function(ctx, fxn);
|
||||
|
|
|
|||
|
|
@ -262,6 +262,9 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
|
|||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
|
|
|
|||
|
|
@ -428,6 +428,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_CUBE_MAP_ARRAY:
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
|
||||
return true;
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
return 0;
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 1;
|
||||
case PIPE_CAP_TGSI_TEXCOORD:
|
||||
|
|
@ -486,6 +488,9 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
|||
|
|
@ -308,17 +308,4 @@ void
|
|||
lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
|
||||
|
||||
|
||||
#ifdef PIPE_ARCH_SSE
|
||||
#include <emmintrin.h>
|
||||
#include "util/u_sse.h"
|
||||
|
||||
static inline __m128i
|
||||
lp_plane_to_m128i(const struct lp_rast_plane *plane)
|
||||
{
|
||||
return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
|
||||
(int32_t)plane->dcdy, (int32_t)plane->eo);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@ sign_bits4(const __m128i *cstep, int cdiff)
|
|||
|
||||
void
|
||||
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
const struct lp_rast_triangle *tri = arg.triangle.tri;
|
||||
const struct lp_rast_plane *plane = GET_PLANES(tri);
|
||||
|
|
@ -250,26 +250,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
|
|||
struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
|
||||
unsigned nr = 0;
|
||||
|
||||
__m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
|
||||
__m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
|
||||
__m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
|
||||
/* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
|
||||
__m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
|
||||
__m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
|
||||
__m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
__m128i c;
|
||||
__m128i dcdx;
|
||||
__m128i dcdy;
|
||||
__m128i rej4;
|
||||
|
||||
__m128i dcdx2;
|
||||
__m128i dcdx3;
|
||||
__m128i c, dcdx, dcdy, rej4;
|
||||
__m128i dcdx_neg_mask, dcdy_neg_mask;
|
||||
__m128i dcdx2, dcdx3;
|
||||
|
||||
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
|
||||
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
|
||||
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
|
||||
__m128i unused;
|
||||
|
||||
|
||||
transpose4_epi32(&p0, &p1, &p2, &zero,
|
||||
&c, &dcdx, &dcdy, &rej4);
|
||||
&c, &unused, &dcdx, &dcdy);
|
||||
|
||||
/* recalc eo - easier than trying to load as scalars / shuffle... */
|
||||
dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
|
||||
dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
|
||||
rej4 = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
|
||||
_mm_and_si128(dcdx_neg_mask, dcdx));
|
||||
|
||||
/* Adjust dcdx;
|
||||
*/
|
||||
|
|
@ -349,32 +352,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
|
|||
|
||||
void
|
||||
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
const struct lp_rast_triangle *tri = arg.triangle.tri;
|
||||
const struct lp_rast_plane *plane = GET_PLANES(tri);
|
||||
unsigned x = (arg.triangle.plane_mask & 0xff) + task->x;
|
||||
unsigned y = (arg.triangle.plane_mask >> 8) + task->y;
|
||||
|
||||
__m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
|
||||
__m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
|
||||
__m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
|
||||
/* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
|
||||
__m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
|
||||
__m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
|
||||
__m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
__m128i c;
|
||||
__m128i dcdx;
|
||||
__m128i dcdy;
|
||||
__m128i c, dcdx, dcdy;
|
||||
__m128i dcdx2, dcdx3;
|
||||
|
||||
__m128i dcdx2;
|
||||
__m128i dcdx3;
|
||||
|
||||
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
|
||||
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
|
||||
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
|
||||
__m128i unused;
|
||||
|
||||
transpose4_epi32(&p0, &p1, &p2, &zero,
|
||||
&c, &dcdx, &dcdy, &unused);
|
||||
&c, &unused, &dcdx, &dcdy);
|
||||
|
||||
/* Adjust dcdx;
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -311,6 +311,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
|||
|
|
@ -168,6 +168,21 @@ struct lp_setup_context
|
|||
const float (*v2)[4]);
|
||||
};
|
||||
|
||||
static inline void
|
||||
scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
|
||||
struct u_rect *scissor)
|
||||
{
|
||||
/* left */
|
||||
scis_planes[0] = (bbox->x0 < scissor->x0);
|
||||
/* right */
|
||||
scis_planes[1] = (bbox->x1 > scissor->x1);
|
||||
/* top */
|
||||
scis_planes[2] = (bbox->y0 < scissor->y0);
|
||||
/* bottom */
|
||||
scis_planes[3] = (bbox->y1 > scissor->y1);
|
||||
}
|
||||
|
||||
|
||||
void lp_setup_choose_triangle( struct lp_setup_context *setup );
|
||||
void lp_setup_choose_line( struct lp_setup_context *setup );
|
||||
void lp_setup_choose_point( struct lp_setup_context *setup );
|
||||
|
|
|
|||
|
|
@ -336,13 +336,6 @@ try_setup_line( struct lp_setup_context *setup,
|
|||
layer = MIN2(layer, scene->fb_max_layer);
|
||||
}
|
||||
|
||||
if (setup->scissor_test) {
|
||||
nr_planes = 8;
|
||||
}
|
||||
else {
|
||||
nr_planes = 4;
|
||||
}
|
||||
|
||||
dx = v1[0][0] - v2[0][0];
|
||||
dy = v1[0][1] - v2[0][1];
|
||||
area = (dx * dx + dy * dy);
|
||||
|
|
@ -591,6 +584,18 @@ try_setup_line( struct lp_setup_context *setup,
|
|||
bbox.x0 = MAX2(bbox.x0, 0);
|
||||
bbox.y0 = MAX2(bbox.y0, 0);
|
||||
|
||||
nr_planes = 4;
|
||||
/*
|
||||
* Determine how many scissor planes we need, that is drop scissor
|
||||
* edges if the bounding box of the tri is fully inside that edge.
|
||||
*/
|
||||
if (setup->scissor_test) {
|
||||
/* why not just use draw_regions */
|
||||
boolean s_planes[4];
|
||||
scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
|
||||
nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
|
||||
}
|
||||
|
||||
line = lp_setup_alloc_triangle(scene,
|
||||
key->num_inputs,
|
||||
nr_planes,
|
||||
|
|
@ -708,30 +713,46 @@ try_setup_line( struct lp_setup_context *setup,
|
|||
* Note that otherwise, the scissor planes only vary in 'C' value,
|
||||
* and even then only on state-changes. Could alternatively store
|
||||
* these planes elsewhere.
|
||||
* (Or only store the c value together with a bit indicating which
|
||||
* scissor edge this is, so rasterization would treat them differently
|
||||
* (easier to evaluate) to ordinary planes.)
|
||||
*/
|
||||
if (nr_planes == 8) {
|
||||
const struct u_rect *scissor =
|
||||
&setup->scissors[viewport_index];
|
||||
if (nr_planes > 4) {
|
||||
/* why not just use draw_regions */
|
||||
struct u_rect *scissor = &setup->scissors[viewport_index];
|
||||
struct lp_rast_plane *plane_s = &plane[4];
|
||||
boolean s_planes[4];
|
||||
scissor_planes_needed(s_planes, &bbox, scissor);
|
||||
|
||||
plane[4].dcdx = -1 << 8;
|
||||
plane[4].dcdy = 0;
|
||||
plane[4].c = (1-scissor->x0) << 8;
|
||||
plane[4].eo = 1 << 8;
|
||||
|
||||
plane[5].dcdx = 1 << 8;
|
||||
plane[5].dcdy = 0;
|
||||
plane[5].c = (scissor->x1+1) << 8;
|
||||
plane[5].eo = 0;
|
||||
|
||||
plane[6].dcdx = 0;
|
||||
plane[6].dcdy = 1 << 8;
|
||||
plane[6].c = (1-scissor->y0) << 8;
|
||||
plane[6].eo = 1 << 8;
|
||||
|
||||
plane[7].dcdx = 0;
|
||||
plane[7].dcdy = -1 << 8;
|
||||
plane[7].c = (scissor->y1+1) << 8;
|
||||
plane[7].eo = 0;
|
||||
if (s_planes[0]) {
|
||||
plane_s->dcdx = -1 << 8;
|
||||
plane_s->dcdy = 0;
|
||||
plane_s->c = (1-scissor->x0) << 8;
|
||||
plane_s->eo = 1 << 8;
|
||||
plane_s++;
|
||||
}
|
||||
if (s_planes[1]) {
|
||||
plane_s->dcdx = 1 << 8;
|
||||
plane_s->dcdy = 0;
|
||||
plane_s->c = (scissor->x1+1) << 8;
|
||||
plane_s->eo = 0 << 8;
|
||||
plane_s++;
|
||||
}
|
||||
if (s_planes[2]) {
|
||||
plane_s->dcdx = 0;
|
||||
plane_s->dcdy = 1 << 8;
|
||||
plane_s->c = (1-scissor->y0) << 8;
|
||||
plane_s->eo = 1 << 8;
|
||||
plane_s++;
|
||||
}
|
||||
if (s_planes[3]) {
|
||||
plane_s->dcdx = 0;
|
||||
plane_s->dcdy = -1 << 8;
|
||||
plane_s->c = (scissor->y1+1) << 8;
|
||||
plane_s->eo = 0;
|
||||
plane_s++;
|
||||
}
|
||||
assert(plane_s == &plane[nr_planes]);
|
||||
}
|
||||
|
||||
return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index);
|
||||
|
|
|
|||
|
|
@ -302,13 +302,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
layer = MIN2(layer, scene->fb_max_layer);
|
||||
}
|
||||
|
||||
if (setup->scissor_test) {
|
||||
nr_planes = 7;
|
||||
}
|
||||
else {
|
||||
nr_planes = 3;
|
||||
}
|
||||
|
||||
/* Bounding rectangle (in pixels) */
|
||||
{
|
||||
/* Yes this is necessary to accurately calculate bounding boxes
|
||||
|
|
@ -347,6 +340,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
bbox.x0 = MAX2(bbox.x0, 0);
|
||||
bbox.y0 = MAX2(bbox.y0, 0);
|
||||
|
||||
nr_planes = 3;
|
||||
/*
|
||||
* Determine how many scissor planes we need, that is drop scissor
|
||||
* edges if the bounding box of the tri is fully inside that edge.
|
||||
*/
|
||||
if (setup->scissor_test) {
|
||||
/* why not just use draw_regions */
|
||||
boolean s_planes[4];
|
||||
scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
|
||||
nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
|
||||
}
|
||||
|
||||
tri = lp_setup_alloc_triangle(scene,
|
||||
key->num_inputs,
|
||||
nr_planes,
|
||||
|
|
@ -367,13 +372,11 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
|
||||
/* Setup parameter interpolants:
|
||||
*/
|
||||
setup->setup.variant->jit_function( v0,
|
||||
v1,
|
||||
v2,
|
||||
frontfacing,
|
||||
GET_A0(&tri->inputs),
|
||||
GET_DADX(&tri->inputs),
|
||||
GET_DADY(&tri->inputs) );
|
||||
setup->setup.variant->jit_function(v0, v1, v2,
|
||||
frontfacing,
|
||||
GET_A0(&tri->inputs),
|
||||
GET_DADX(&tri->inputs),
|
||||
GET_DADY(&tri->inputs));
|
||||
|
||||
tri->inputs.frontfacing = frontfacing;
|
||||
tri->inputs.disable = FALSE;
|
||||
|
|
@ -383,9 +386,9 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
|
||||
if (0)
|
||||
lp_dump_setup_coef(&setup->setup.variant->key,
|
||||
(const float (*)[4])GET_A0(&tri->inputs),
|
||||
(const float (*)[4])GET_DADX(&tri->inputs),
|
||||
(const float (*)[4])GET_DADY(&tri->inputs));
|
||||
(const float (*)[4])GET_A0(&tri->inputs),
|
||||
(const float (*)[4])GET_DADX(&tri->inputs),
|
||||
(const float (*)[4])GET_DADY(&tri->inputs));
|
||||
|
||||
plane = GET_PLANES(tri);
|
||||
|
||||
|
|
@ -672,29 +675,46 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
* Note that otherwise, the scissor planes only vary in 'C' value,
|
||||
* and even then only on state-changes. Could alternatively store
|
||||
* these planes elsewhere.
|
||||
* (Or only store the c value together with a bit indicating which
|
||||
* scissor edge this is, so rasterization would treat them differently
|
||||
* (easier to evaluate) to ordinary planes.)
|
||||
*/
|
||||
if (nr_planes == 7) {
|
||||
const struct u_rect *scissor = &setup->scissors[viewport_index];
|
||||
if (nr_planes > 3) {
|
||||
/* why not just use draw_regions */
|
||||
struct u_rect *scissor = &setup->scissors[viewport_index];
|
||||
struct lp_rast_plane *plane_s = &plane[3];
|
||||
boolean s_planes[4];
|
||||
scissor_planes_needed(s_planes, &bbox, scissor);
|
||||
|
||||
plane[3].dcdx = -1 << 8;
|
||||
plane[3].dcdy = 0;
|
||||
plane[3].c = (1-scissor->x0) << 8;
|
||||
plane[3].eo = 1 << 8;
|
||||
|
||||
plane[4].dcdx = 1 << 8;
|
||||
plane[4].dcdy = 0;
|
||||
plane[4].c = (scissor->x1+1) << 8;
|
||||
plane[4].eo = 0;
|
||||
|
||||
plane[5].dcdx = 0;
|
||||
plane[5].dcdy = 1 << 8;
|
||||
plane[5].c = (1-scissor->y0) << 8;
|
||||
plane[5].eo = 1 << 8;
|
||||
|
||||
plane[6].dcdx = 0;
|
||||
plane[6].dcdy = -1 << 8;
|
||||
plane[6].c = (scissor->y1+1) << 8;
|
||||
plane[6].eo = 0;
|
||||
if (s_planes[0]) {
|
||||
plane_s->dcdx = -1 << 8;
|
||||
plane_s->dcdy = 0;
|
||||
plane_s->c = (1-scissor->x0) << 8;
|
||||
plane_s->eo = 1 << 8;
|
||||
plane_s++;
|
||||
}
|
||||
if (s_planes[1]) {
|
||||
plane_s->dcdx = 1 << 8;
|
||||
plane_s->dcdy = 0;
|
||||
plane_s->c = (scissor->x1+1) << 8;
|
||||
plane_s->eo = 0 << 8;
|
||||
plane_s++;
|
||||
}
|
||||
if (s_planes[2]) {
|
||||
plane_s->dcdx = 0;
|
||||
plane_s->dcdy = 1 << 8;
|
||||
plane_s->c = (1-scissor->y0) << 8;
|
||||
plane_s->eo = 1 << 8;
|
||||
plane_s++;
|
||||
}
|
||||
if (s_planes[3]) {
|
||||
plane_s->dcdx = 0;
|
||||
plane_s->dcdy = -1 << 8;
|
||||
plane_s->c = (scissor->y1+1) << 8;
|
||||
plane_s->eo = 0;
|
||||
plane_s++;
|
||||
}
|
||||
assert(plane_s == &plane[nr_planes]);
|
||||
}
|
||||
|
||||
return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
|
||||
|
|
@ -984,17 +1004,16 @@ calc_fixed_position(struct lp_setup_context *setup,
|
|||
* Both should be acceptable, I think.
|
||||
*/
|
||||
#if defined(PIPE_ARCH_SSE)
|
||||
__m128d v0r, v1r, v2r;
|
||||
__m128 v0r, v1r;
|
||||
__m128 vxy0xy2, vxy1xy0;
|
||||
__m128i vxy0xy2i, vxy1xy0i;
|
||||
__m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
|
||||
__m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
|
||||
__m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
|
||||
v0r = _mm_load_sd((const double *)v0[0]);
|
||||
v1r = _mm_load_sd((const double *)v1[0]);
|
||||
v2r = _mm_load_sd((const double *)v2[0]);
|
||||
vxy0xy2 = _mm_castpd_ps(_mm_unpacklo_pd(v0r, v2r));
|
||||
vxy1xy0 = _mm_castpd_ps(_mm_unpacklo_pd(v1r, v0r));
|
||||
v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
|
||||
vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
|
||||
v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
|
||||
vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
|
||||
vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
|
||||
vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
|
||||
vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
|
||||
|
|
|
|||
|
|
@ -393,6 +393,9 @@ ImmediateValue::isInteger(const int i) const
|
|||
case TYPE_S32:
|
||||
case TYPE_U32:
|
||||
return reg.data.s32 == i; // as if ...
|
||||
case TYPE_S64:
|
||||
case TYPE_U64:
|
||||
return reg.data.s64 == i; // as if ...
|
||||
case TYPE_F32:
|
||||
return reg.data.f32 == static_cast<float>(i);
|
||||
case TYPE_F64:
|
||||
|
|
|
|||
|
|
@ -132,6 +132,7 @@ enum operation
|
|||
OP_SUBFM, // surface bitfield manipulation
|
||||
OP_SUCLAMP, // clamp surface coordinates
|
||||
OP_SUEAU, // surface effective address
|
||||
OP_SUQ, // surface query
|
||||
OP_MADSP, // special integer multiply-add
|
||||
OP_TEXBAR, // texture dependency barrier
|
||||
OP_DFDX,
|
||||
|
|
|
|||
|
|
@ -1947,10 +1947,16 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
|
|||
case OP_CEIL:
|
||||
case OP_FLOOR:
|
||||
case OP_TRUNC:
|
||||
case OP_CVT:
|
||||
case OP_SAT:
|
||||
emitCVT(insn);
|
||||
break;
|
||||
case OP_CVT:
|
||||
if (insn->def(0).getFile() == FILE_PREDICATE ||
|
||||
insn->src(0).getFile() == FILE_PREDICATE)
|
||||
emitMOV(insn);
|
||||
else
|
||||
emitCVT(insn);
|
||||
break;
|
||||
case OP_RSQ:
|
||||
emitSFnOp(insn, 5 + 2 * insn->subOp);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -673,7 +673,12 @@ CodeEmitterGM107::emitMOV()
|
|||
(insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) {
|
||||
switch (insn->src(0).getFile()) {
|
||||
case FILE_GPR:
|
||||
emitInsn(0x5c980000);
|
||||
if (insn->def(0).getFile() == FILE_PREDICATE) {
|
||||
emitInsn(0x5b6a0000);
|
||||
emitGPR (0x08);
|
||||
} else {
|
||||
emitInsn(0x5c980000);
|
||||
}
|
||||
emitGPR (0x14, insn->src(0));
|
||||
break;
|
||||
case FILE_MEMORY_CONST:
|
||||
|
|
@ -684,18 +689,32 @@ CodeEmitterGM107::emitMOV()
|
|||
emitInsn(0x38980000);
|
||||
emitIMMD(0x14, 19, insn->src(0));
|
||||
break;
|
||||
case FILE_PREDICATE:
|
||||
emitInsn(0x50880000);
|
||||
emitPRED(0x0c, insn->src(0));
|
||||
emitPRED(0x1d);
|
||||
emitPRED(0x27);
|
||||
break;
|
||||
default:
|
||||
assert(!"bad src file");
|
||||
break;
|
||||
}
|
||||
emitField(0x27, 4, insn->lanes);
|
||||
if (insn->def(0).getFile() != FILE_PREDICATE &&
|
||||
insn->src(0).getFile() != FILE_PREDICATE)
|
||||
emitField(0x27, 4, insn->lanes);
|
||||
} else {
|
||||
emitInsn (0x01000000);
|
||||
emitIMMD (0x14, 32, insn->src(0));
|
||||
emitField(0x0c, 4, insn->lanes);
|
||||
}
|
||||
|
||||
emitGPR(0x00, insn->def(0));
|
||||
if (insn->def(0).getFile() == FILE_PREDICATE) {
|
||||
emitPRED(0x27);
|
||||
emitPRED(0x03, insn->def(0));
|
||||
emitPRED(0x00);
|
||||
} else {
|
||||
emitGPR(0x00, insn->def(0));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -2684,11 +2703,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
|
|||
emitRAM();
|
||||
break;
|
||||
case OP_MOV:
|
||||
if (insn->def(0).getFile() == FILE_GPR &&
|
||||
insn->src(0).getFile() != FILE_PREDICATE)
|
||||
emitMOV();
|
||||
else
|
||||
assert(!"R2P/P2R");
|
||||
emitMOV();
|
||||
break;
|
||||
case OP_RDSV:
|
||||
emitS2R();
|
||||
|
|
@ -2700,7 +2715,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
|
|||
case OP_CEIL:
|
||||
case OP_TRUNC:
|
||||
case OP_CVT:
|
||||
if (isFloatType(insn->dType)) {
|
||||
if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
|
||||
insn->src(0).getFile() == FILE_PREDICATE)) {
|
||||
emitMOV();
|
||||
} else if (isFloatType(insn->dType)) {
|
||||
if (isFloatType(insn->sType))
|
||||
emitF2F();
|
||||
else
|
||||
|
|
|
|||
|
|
@ -2021,8 +2021,10 @@ CodeEmitterNVC0::emitATOM(const Instruction *i)
|
|||
code[0] |= 63 << 20;
|
||||
}
|
||||
|
||||
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
|
||||
srcId(i->src(2), 32 + 17);
|
||||
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
|
||||
assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
|
||||
code[1] |= (SDATA(i->src(1)).id + 1) << 17;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -2433,10 +2435,16 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
|
|||
case OP_CEIL:
|
||||
case OP_FLOOR:
|
||||
case OP_TRUNC:
|
||||
case OP_CVT:
|
||||
case OP_SAT:
|
||||
emitCVT(insn);
|
||||
break;
|
||||
case OP_CVT:
|
||||
if (insn->def(0).getFile() == FILE_PREDICATE ||
|
||||
insn->src(0).getFile() == FILE_PREDICATE)
|
||||
emitMOV(insn);
|
||||
else
|
||||
emitCVT(insn);
|
||||
break;
|
||||
case OP_RSQ:
|
||||
emitSFnOp(insn, 5 + 2 * insn->subOp);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ static nv50_ir::operation translateOpcode(uint opcode);
|
|||
static nv50_ir::DataFile translateFile(uint file);
|
||||
static nv50_ir::TexTarget translateTexture(uint texTarg);
|
||||
static nv50_ir::SVSemantic translateSysVal(uint sysval);
|
||||
static nv50_ir::CacheMode translateCacheMode(uint qualifier);
|
||||
|
||||
class Instruction
|
||||
{
|
||||
|
|
@ -213,6 +214,12 @@ public:
|
|||
|
||||
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
|
||||
|
||||
nv50_ir::CacheMode getCacheMode() const {
|
||||
if (!insn->Instruction.Memory)
|
||||
return nv50_ir::CACHE_CA;
|
||||
return translateCacheMode(insn->Memory.Qualifier);
|
||||
}
|
||||
|
||||
inline uint getLabel() { return insn->Label.Label; }
|
||||
|
||||
unsigned getSaturate() const { return insn->Instruction.Saturate; }
|
||||
|
|
@ -366,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file)
|
|||
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
|
||||
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
|
||||
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
|
||||
//case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
|
||||
case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
|
||||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_NULL:
|
||||
default:
|
||||
|
|
@ -436,6 +443,15 @@ static nv50_ir::TexTarget translateTexture(uint tex)
|
|||
}
|
||||
}
|
||||
|
||||
static nv50_ir::CacheMode translateCacheMode(uint qualifier)
|
||||
{
|
||||
if (qualifier & TGSI_MEMORY_VOLATILE)
|
||||
return nv50_ir::CACHE_CV;
|
||||
if (qualifier & TGSI_MEMORY_COHERENT)
|
||||
return nv50_ir::CACHE_CG;
|
||||
return nv50_ir::CACHE_CA;
|
||||
}
|
||||
|
||||
nv50_ir::DataType Instruction::inferSrcType() const
|
||||
{
|
||||
switch (getOpcode()) {
|
||||
|
|
@ -1210,6 +1226,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
case TGSI_FILE_IMMEDIATE:
|
||||
case TGSI_FILE_PREDICATE:
|
||||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_BUFFER:
|
||||
break;
|
||||
default:
|
||||
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
|
||||
|
|
@ -1255,6 +1272,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
|||
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
|
||||
if (insn.getDst(0).isIndirect(0))
|
||||
indirectTempArrays.insert(insn.getDst(0).getArrayId());
|
||||
} else
|
||||
if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
info->io.globalAccess |= 0x2;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1264,13 +1284,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
|||
if (src.isIndirect(0))
|
||||
indirectTempArrays.insert(src.getArrayId());
|
||||
} else
|
||||
/*
|
||||
if (src.getFile() == TGSI_FILE_RESOURCE) {
|
||||
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
|
||||
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
|
||||
if (src.getFile() == TGSI_FILE_BUFFER) {
|
||||
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
|
||||
0x1 : 0x2;
|
||||
} else
|
||||
*/
|
||||
if (src.getFile() == TGSI_FILE_OUTPUT) {
|
||||
if (src.isIndirect(0)) {
|
||||
// We don't know which one is accessed, just mark everything for
|
||||
|
|
@ -1752,7 +1769,7 @@ Converter::acquireDst(int d, int c)
|
|||
int idx = dst.getIndex(0);
|
||||
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
|
||||
|
||||
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
|
||||
if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
|
||||
return NULL;
|
||||
|
||||
if (dst.isIndirect(0) ||
|
||||
|
|
@ -2222,6 +2239,28 @@ Converter::handleLOAD(Value *dst0[4])
|
|||
int c;
|
||||
std::vector<Value *> off, src, ldv, def;
|
||||
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!dst0[c])
|
||||
continue;
|
||||
|
||||
Value *off = fetchSrc(1, c);
|
||||
Symbol *sym;
|
||||
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
|
||||
off = NULL;
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
|
||||
} else {
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
|
||||
}
|
||||
|
||||
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
|
||||
ld->cache = tgsi.getCacheMode();
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
getResourceCoords(off, r, 1);
|
||||
|
||||
if (isResourceRaw(code, r)) {
|
||||
|
|
@ -2298,6 +2337,30 @@ Converter::handleSTORE()
|
|||
int c;
|
||||
std::vector<Value *> off, src, dummy;
|
||||
|
||||
if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!(tgsi.getDst(0).getMask() & (1 << c)))
|
||||
continue;
|
||||
|
||||
Symbol *sym;
|
||||
Value *off;
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
|
||||
off = NULL;
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
|
||||
tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
|
||||
} else {
|
||||
off = fetchSrc(0, 0);
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
|
||||
}
|
||||
|
||||
Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
|
||||
st->cache = tgsi.getCacheMode();
|
||||
if (tgsi.getDst(0).isIndirect(0))
|
||||
st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
getResourceCoords(off, r, 0);
|
||||
src = off;
|
||||
const int s = src.size();
|
||||
|
|
@ -2359,6 +2422,37 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
|||
std::vector<Value *> defv;
|
||||
LValue *dst = getScratch();
|
||||
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
for (int c = 0; c < 4; ++c) {
|
||||
if (!dst0[c])
|
||||
continue;
|
||||
|
||||
Instruction *insn;
|
||||
Value *off = fetchSrc(1, c), *off2 = NULL;
|
||||
Value *sym;
|
||||
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
|
||||
else
|
||||
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
|
||||
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
|
||||
insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
|
||||
else
|
||||
insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
|
||||
if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
|
||||
insn->setIndirect(0, 0, off);
|
||||
if (off2)
|
||||
insn->setIndirect(0, 1, off2);
|
||||
insn->subOp = subOp;
|
||||
}
|
||||
for (int c = 0; c < 4; ++c)
|
||||
if (dst0[c])
|
||||
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
getResourceCoords(srcv, r, 1);
|
||||
|
||||
if (isResourceSpecial(r)) {
|
||||
|
|
@ -3103,6 +3197,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
geni->fixed = 1;
|
||||
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
|
||||
break;
|
||||
case TGSI_OPCODE_MEMBAR:
|
||||
geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
|
||||
geni->fixed = 1;
|
||||
if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP)
|
||||
geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
|
||||
else
|
||||
geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
|
||||
break;
|
||||
case TGSI_OPCODE_ATOMUADD:
|
||||
case TGSI_OPCODE_ATOMXCHG:
|
||||
case TGSI_OPCODE_ATOMCAS:
|
||||
|
|
@ -3115,6 +3217,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
case TGSI_OPCODE_ATOMIMAX:
|
||||
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
|
||||
break;
|
||||
case TGSI_OPCODE_RESQ:
|
||||
geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0],
|
||||
makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0));
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
|
||||
break;
|
||||
case TGSI_OPCODE_IBFE:
|
||||
case TGSI_OPCODE_UBFE:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
|
|
|
|||
|
|
@ -1022,11 +1022,22 @@ NVC0LoweringPass::handleTXLQ(TexInstruction *i)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
NVC0LoweringPass::handleSUQ(Instruction *suq)
|
||||
{
|
||||
suq->op = OP_MOV;
|
||||
suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
|
||||
suq->getSrc(0)->reg.fileIndex * 16));
|
||||
suq->setIndirect(0, 0, NULL);
|
||||
suq->setIndirect(0, 1, NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
NVC0LoweringPass::handleATOM(Instruction *atom)
|
||||
{
|
||||
SVSemantic sv;
|
||||
Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base;
|
||||
|
||||
switch (atom->src(0).getFile()) {
|
||||
case FILE_MEMORY_LOCAL:
|
||||
|
|
@ -1037,16 +1048,22 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
|
|||
break;
|
||||
default:
|
||||
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
|
||||
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
|
||||
assert(base->reg.size == 8);
|
||||
if (ptr)
|
||||
base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
|
||||
assert(base->reg.size == 8);
|
||||
atom->setIndirect(0, 0, base);
|
||||
return true;
|
||||
}
|
||||
Value *base =
|
||||
base =
|
||||
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0));
|
||||
Value *ptr = atom->getIndirect(0, 0);
|
||||
|
||||
atom->setSrc(0, cloneShallow(func, atom->getSrc(0)));
|
||||
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
|
||||
if (ptr)
|
||||
base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr);
|
||||
atom->setIndirect(0, 1, NULL);
|
||||
atom->setIndirect(0, 0, base);
|
||||
|
||||
return true;
|
||||
|
|
@ -1069,7 +1086,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
|
|||
cctl->setPredicate(cas->cc, cas->getPredicate());
|
||||
}
|
||||
|
||||
if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
|
||||
if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
|
||||
// CAS is crazy. It's 2nd source is a double reg, and the 3rd source
|
||||
// should be set to the high part of the double reg or bad things will
|
||||
// happen elsewhere in the universe.
|
||||
|
|
@ -1079,6 +1096,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
|
|||
bld.setPosition(cas, false);
|
||||
bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2));
|
||||
cas->setSrc(1, dreg);
|
||||
cas->setSrc(2, dreg);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -1093,6 +1111,32 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
|
|||
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
|
||||
}
|
||||
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
|
||||
{
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
off += prog->driver->io.suInfoBase;
|
||||
|
||||
if (ptr)
|
||||
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
|
||||
|
||||
return bld.
|
||||
mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr);
|
||||
}
|
||||
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
|
||||
{
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
off += prog->driver->io.suInfoBase;
|
||||
|
||||
if (ptr)
|
||||
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
|
||||
|
||||
return bld.
|
||||
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
|
||||
}
|
||||
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
|
||||
{
|
||||
|
|
@ -1786,6 +1830,7 @@ NVC0LoweringPass::visit(Instruction *i)
|
|||
return handleRDSV(i);
|
||||
case OP_WRSV:
|
||||
return handleWRSV(i);
|
||||
case OP_STORE:
|
||||
case OP_LOAD:
|
||||
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
|
||||
if (prog->getType() == Program::TYPE_COMPUTE) {
|
||||
|
|
@ -1820,6 +1865,26 @@ NVC0LoweringPass::visit(Instruction *i)
|
|||
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
|
||||
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
|
||||
i->op = OP_VFETCH;
|
||||
} else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
|
||||
Value *ind = i->getIndirect(0, 1);
|
||||
Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
|
||||
// XXX come up with a way not to do this for EVERY little access but
|
||||
// rather to batch these up somehow. Unfortunately we've lost the
|
||||
// information about the field width by the time we get here.
|
||||
Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
|
||||
Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16);
|
||||
Value *pred = new_LValue(func, FILE_PREDICATE);
|
||||
if (i->src(0).isIndirect(0)) {
|
||||
bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
|
||||
bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
|
||||
}
|
||||
i->setIndirect(0, 1, NULL);
|
||||
i->setIndirect(0, 0, ptr);
|
||||
bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
|
||||
i->setPredicate(CC_NOT_P, pred);
|
||||
if (i->defExists(0)) {
|
||||
bld.mkMov(i->getDef(0), bld.mkImm(0));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OP_ATOM:
|
||||
|
|
@ -1838,6 +1903,9 @@ NVC0LoweringPass::visit(Instruction *i)
|
|||
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
|
||||
handleSurfaceOpNVE4(i->asTex());
|
||||
break;
|
||||
case OP_SUQ:
|
||||
handleSUQ(i);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ protected:
|
|||
bool handleTXQ(TexInstruction *);
|
||||
virtual bool handleManualTXD(TexInstruction *);
|
||||
bool handleTXLQ(TexInstruction *);
|
||||
bool handleSUQ(Instruction *);
|
||||
bool handleATOM(Instruction *);
|
||||
bool handleCasExch(Instruction *, bool needCctl);
|
||||
void handleSurfaceOpNVE4(TexInstruction *);
|
||||
|
|
@ -116,6 +117,8 @@ private:
|
|||
void readTessCoord(LValue *dst, int c);
|
||||
|
||||
Value *loadResInfo32(Value *ptr, uint32_t off);
|
||||
Value *loadResInfo64(Value *ptr, uint32_t off);
|
||||
Value *loadResLength32(Value *ptr, uint32_t off);
|
||||
Value *loadMsInfo32(Value *ptr, uint32_t off);
|
||||
Value *loadTexHandle(Value *ptr, unsigned int slot);
|
||||
|
||||
|
|
|
|||
|
|
@ -336,6 +336,7 @@ private:
|
|||
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
|
||||
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
|
||||
void opnd(Instruction *, ImmediateValue&, int s);
|
||||
void opnd3(Instruction *, ImmediateValue&);
|
||||
|
||||
void unary(Instruction *, const ImmediateValue&);
|
||||
|
||||
|
|
@ -388,6 +389,8 @@ ConstantFolding::visit(BasicBlock *bb)
|
|||
else
|
||||
if (i->srcExists(1) && i->src(1).getImmediate(src1))
|
||||
opnd(i, src1, 1);
|
||||
if (i->srcExists(2) && i->src(2).getImmediate(src2))
|
||||
opnd3(i, src2);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
@ -872,6 +875,24 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
|
||||
{
|
||||
switch (i->op) {
|
||||
case OP_MAD:
|
||||
case OP_FMA:
|
||||
if (imm2.isInteger(0)) {
|
||||
i->op = OP_MUL;
|
||||
i->setSrc(2, NULL);
|
||||
foldCount++;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
{
|
||||
|
|
@ -1202,6 +1223,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
|||
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
|
||||
}
|
||||
break;
|
||||
case OP_SHR:
|
||||
if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) {
|
||||
bld.setPosition(i, false);
|
||||
i->op = OP_AND;
|
||||
i->setSrc(0, si->getSrc(0));
|
||||
i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1)));
|
||||
}
|
||||
break;
|
||||
case OP_MUL:
|
||||
int muls;
|
||||
if (isFloatType(si->dType))
|
||||
|
|
@ -2504,6 +2533,12 @@ MemoryOpt::runOpt(BasicBlock *bb)
|
|||
}
|
||||
} else
|
||||
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
|
||||
if (typeSizeof(ldst->dType) == 4 &&
|
||||
ldst->src(1).getFile() == FILE_GPR &&
|
||||
ldst->getSrc(1)->getInsn()->op == OP_NOP) {
|
||||
delete_Instruction(prog, ldst);
|
||||
continue;
|
||||
}
|
||||
isLoad = false;
|
||||
} else {
|
||||
// TODO: maybe have all fixed ops act as barrier ?
|
||||
|
|
@ -3015,7 +3050,7 @@ Instruction::isResultEqual(const Instruction *that) const
|
|||
if (that->srcExists(s))
|
||||
return false;
|
||||
|
||||
if (op == OP_LOAD || op == OP_VFETCH) {
|
||||
if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
|
||||
switch (src(0).getFile()) {
|
||||
case FILE_MEMORY_CONST:
|
||||
case FILE_SHADER_INPUT:
|
||||
|
|
@ -3046,6 +3081,8 @@ GlobalCSE::visit(BasicBlock *bb)
|
|||
ik = phi->getSrc(0)->getInsn();
|
||||
if (!ik)
|
||||
continue; // probably a function input
|
||||
if (ik->defCount(0xff) > 1)
|
||||
continue; // too painful to check if we can really push this forward
|
||||
for (s = 1; phi->srcExists(s); ++s) {
|
||||
if (phi->getSrc(s)->refCount() > 1)
|
||||
break;
|
||||
|
|
@ -3179,10 +3216,10 @@ DeadCodeElim::buryAll(Program *prog)
|
|||
bool
|
||||
DeadCodeElim::visit(BasicBlock *bb)
|
||||
{
|
||||
Instruction *next;
|
||||
Instruction *prev;
|
||||
|
||||
for (Instruction *i = bb->getFirst(); i; i = next) {
|
||||
next = i->next;
|
||||
for (Instruction *i = bb->getExit(); i; i = prev) {
|
||||
prev = i->prev;
|
||||
if (i->isDead()) {
|
||||
++deadCount;
|
||||
delete_Instruction(prog, i);
|
||||
|
|
|
|||
|
|
@ -161,6 +161,7 @@ const char *operationStr[OP_LAST + 1] =
|
|||
"subfm",
|
||||
"suclamp",
|
||||
"sueau",
|
||||
"suq",
|
||||
"madsp",
|
||||
"texbar",
|
||||
"dfdx",
|
||||
|
|
|
|||
|
|
@ -1544,6 +1544,9 @@ GCRA::cleanup(const bool success)
|
|||
|
||||
delete[] nodes;
|
||||
nodes = NULL;
|
||||
hi.next = hi.prev = &hi;
|
||||
lo[0].next = lo[0].prev = &lo[0];
|
||||
lo[1].next = lo[1].prev = &lo[1];
|
||||
}
|
||||
|
||||
Symbol *
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ const uint8_t Target::operationSrcNr[] =
|
|||
1, 1, 1, // TEX, TXB, TXL,
|
||||
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
|
||||
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
|
||||
3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
|
||||
3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
|
||||
0, // TEXBAR
|
||||
1, 1, // DFDX, DFDY
|
||||
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
|
||||
|
|
@ -109,8 +109,8 @@ const OpClass Target::operationClass[] =
|
|||
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
|
||||
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
|
||||
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
|
||||
// SUBFM, SUCLAMP, SUEAU, MADSP
|
||||
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
|
||||
// SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
|
||||
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
|
||||
// TEXBAR
|
||||
OPCLASS_OTHER,
|
||||
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
|
||||
|
|
|
|||
|
|
@ -266,7 +266,9 @@ nouveau_vp3_bsp_next(struct nouveau_vp3_decoder *dec, unsigned num_buffers,
|
|||
int i;
|
||||
|
||||
for (i = 0; i < num_buffers; ++i) {
|
||||
#ifndef NDEBUG
|
||||
assert(bsp_bo->size >= str_bsp->w0[0] + num_bytes[i]);
|
||||
#endif
|
||||
memcpy(dec->bsp_ptr, data[i], num_bytes[i]);
|
||||
dec->bsp_ptr += num_bytes[i];
|
||||
str_bsp->w0[0] += num_bytes[i];
|
||||
|
|
|
|||
|
|
@ -184,6 +184,10 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
|||
|
|
@ -369,7 +369,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
|
|||
NOUVEAU_ERR("shader translation failed: %i\n", ret);
|
||||
goto out;
|
||||
}
|
||||
FREE(info->bin.syms);
|
||||
|
||||
prog->code = info->bin.code;
|
||||
prog->code_size = info->bin.codeSize;
|
||||
|
|
@ -403,10 +402,13 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
|
|||
break;
|
||||
}
|
||||
prog->gp.vert_count = info->prop.gp.maxVertices;
|
||||
} else
|
||||
}
|
||||
|
||||
if (prog->type == PIPE_SHADER_COMPUTE) {
|
||||
prog->cp.syms = info->bin.syms;
|
||||
prog->cp.num_syms = info->bin.numSyms;
|
||||
} else {
|
||||
FREE(info->bin.syms);
|
||||
}
|
||||
|
||||
if (prog->pipe.stream_output.num_outputs)
|
||||
|
|
@ -507,6 +509,9 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
|
|||
FREE(p->interps);
|
||||
FREE(p->so);
|
||||
|
||||
if (type == PIPE_SHADER_COMPUTE)
|
||||
FREE(p->cp.syms);
|
||||
|
||||
memset(p, 0, sizeof(*p));
|
||||
|
||||
p->pipe = pipe;
|
||||
|
|
|
|||
|
|
@ -227,6 +227,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
|||
|
|
@ -594,6 +594,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
|
|||
PUSH_DATA (push, nv50->rt_array_mode);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_clear_buffer_push(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned offset, unsigned size,
|
||||
const void *data, int data_size)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv04_resource *buf = nv04_resource(res);
|
||||
unsigned count = (size + 3) / 4;
|
||||
unsigned xcoord = offset & 0xff;
|
||||
unsigned tmp, i;
|
||||
|
||||
if (data_size == 1) {
|
||||
tmp = *(unsigned char *)data;
|
||||
tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
|
||||
data = &tmp;
|
||||
data_size = 4;
|
||||
} else if (data_size == 2) {
|
||||
tmp = *(unsigned short *)data;
|
||||
tmp = (tmp << 16) | tmp;
|
||||
data = &tmp;
|
||||
data_size = 4;
|
||||
}
|
||||
|
||||
unsigned data_words = data_size / 4;
|
||||
|
||||
nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
nouveau_pushbuf_bufctx(push, nv50->bufctx);
|
||||
nouveau_pushbuf_validate(push);
|
||||
|
||||
offset &= ~0xff;
|
||||
|
||||
BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
|
||||
PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
|
||||
PUSH_DATA (push, 262144);
|
||||
PUSH_DATA (push, 65536);
|
||||
PUSH_DATA (push, 1);
|
||||
PUSH_DATAh(push, buf->address + offset);
|
||||
PUSH_DATA (push, buf->address + offset);
|
||||
BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
|
||||
BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
|
||||
PUSH_DATA (push, size);
|
||||
PUSH_DATA (push, 1);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 1);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 1);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, xcoord);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
while (count) {
|
||||
unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
|
||||
unsigned nr = nr_data * data_words;
|
||||
|
||||
BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
|
||||
for (i = 0; i < nr_data; i++)
|
||||
PUSH_DATAp(push, data, data_words);
|
||||
|
||||
count -= nr;
|
||||
}
|
||||
|
||||
if (buf->mm) {
|
||||
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
|
||||
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
|
||||
}
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_clear_buffer(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
|
|
@ -643,9 +719,22 @@ nv50_clear_buffer(struct pipe_context *pipe,
|
|||
|
||||
assert(size % data_size == 0);
|
||||
|
||||
if (offset & 0xff) {
|
||||
unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
|
||||
assert(fixup_size % data_size == 0);
|
||||
nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
|
||||
offset += fixup_size;
|
||||
size -= fixup_size;
|
||||
if (!size)
|
||||
return;
|
||||
}
|
||||
|
||||
elements = size / data_size;
|
||||
height = (elements + 8191) / 8192;
|
||||
width = elements / height;
|
||||
if (height > 1)
|
||||
width &= ~0xff;
|
||||
assert(width > 0);
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
|
||||
PUSH_DATAf(push, color.f[0]);
|
||||
|
|
@ -669,13 +758,13 @@ nv50_clear_buffer(struct pipe_context *pipe,
|
|||
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
|
||||
PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
|
||||
PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
|
||||
PUSH_DATAh(push, buf->address + offset);
|
||||
PUSH_DATA (push, buf->address + offset);
|
||||
PUSH_DATA (push, nv50_format_table[dst_fmt].rt);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
|
||||
PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
|
||||
PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100));
|
||||
PUSH_DATA (push, height);
|
||||
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
|
@ -694,25 +783,20 @@ nv50_clear_buffer(struct pipe_context *pipe,
|
|||
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
|
||||
PUSH_DATA (push, 0x3c);
|
||||
|
||||
if (width * height != elements) {
|
||||
offset += width * height * data_size;
|
||||
width = elements - width * height;
|
||||
height = 1;
|
||||
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2);
|
||||
PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
|
||||
PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
|
||||
BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
|
||||
PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
|
||||
PUSH_DATA (push, height);
|
||||
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
|
||||
PUSH_DATA (push, 0x3c);
|
||||
}
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
|
||||
PUSH_DATA (push, nv50->cond_condmode);
|
||||
|
||||
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
|
||||
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
|
||||
if (buf->mm) {
|
||||
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
|
||||
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
|
||||
}
|
||||
|
||||
if (width * height != elements) {
|
||||
offset += width * height * data_size;
|
||||
width = elements - width * height;
|
||||
nv50_clear_buffer_push(pipe, res, offset, width * data_size,
|
||||
data, data_size);
|
||||
}
|
||||
|
||||
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -491,3 +491,52 @@ daic_runout:
|
|||
daic_runout_check:
|
||||
branz annul $r7 #daic_runout
|
||||
bra annul #daic_restore
|
||||
|
||||
/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
|
||||
*
|
||||
* This is a combination macro for all of our query buffer object needs.
|
||||
* It has the option to clamp results to a configurable amount, as well as
|
||||
* to write out one or two words.
|
||||
*
|
||||
* We use the query engine to write out the values, and expect the query
|
||||
* address to point to the right place.
|
||||
*
|
||||
* arg = clamp value (0 means unclamped). clamped means just 1 written value.
|
||||
* parm[0] = LSB of end value
|
||||
* parm[1] = MSB of end value
|
||||
* parm[2] = LSB of start value
|
||||
* parm[3] = MSB of start value
|
||||
* parm[4] = desired sequence
|
||||
* parm[5] = actual sequence
|
||||
*/
|
||||
.section #mme9097_query_buffer_write
|
||||
parm $r2
|
||||
parm $r3
|
||||
parm $r4
|
||||
parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */
|
||||
parm $r6
|
||||
parm $r7
|
||||
mov $r6 (sub $r7 $r6) /* actual - desired */
|
||||
mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
|
||||
braz annul $r6 #qbw_ready
|
||||
exit
|
||||
qbw_ready:
|
||||
mov $r2 (sub $r2 $r4)
|
||||
braz $r1 #qbw_postclamp
|
||||
mov $r3 (sbb $r3 $r5)
|
||||
branz annul $r3 #qbw_clamp
|
||||
mov $r4 (sub $r1 $r2)
|
||||
mov $r4 (sbb 0x0 0x0)
|
||||
braz annul $r4 #qbw_postclamp
|
||||
qbw_clamp:
|
||||
mov $r2 $r1
|
||||
qbw_postclamp:
|
||||
send $r2
|
||||
mov $r4 0x1000
|
||||
branz annul $r1 #qbw_done
|
||||
send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
|
||||
maddr 0x16c2 /* QUERY_SEQUENCE */
|
||||
send $r3
|
||||
qbw_done:
|
||||
exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
|
||||
nop
|
||||
|
|
|
|||
|
|
@ -332,3 +332,36 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
|
|||
0xfffef837,
|
||||
0xfffdc027,
|
||||
};
|
||||
|
||||
uint32_t mme9097_query_buffer_write[] = {
|
||||
0x00000201,
|
||||
0x00000301,
|
||||
/* 0x000a: qbw_ready */
|
||||
0x00000401,
|
||||
0x05b08551,
|
||||
/* 0x0011: qbw_clamp */
|
||||
/* 0x0012: qbw_postclamp */
|
||||
0x00000601,
|
||||
0x00000701,
|
||||
/* 0x0018: qbw_done */
|
||||
0x0005be10,
|
||||
0x00060610,
|
||||
0x0000b027,
|
||||
0x00000091,
|
||||
0x00051210,
|
||||
0x0001c807,
|
||||
0x00075b10,
|
||||
0x00011837,
|
||||
0x00048c10,
|
||||
0x00060410,
|
||||
0x0000a027,
|
||||
0x00000a11,
|
||||
0x00001041,
|
||||
0x04000411,
|
||||
0x00010837,
|
||||
0x84010042,
|
||||
0x05b08021,
|
||||
0x00001841,
|
||||
0x840100c2,
|
||||
0x00000011,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ static void
|
|||
nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
int i, s;
|
||||
|
||||
if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
|
||||
|
|
@ -90,6 +91,9 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
|
|||
}
|
||||
}
|
||||
}
|
||||
if (flags & PIPE_BARRIER_SHADER_BUFFER) {
|
||||
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -122,6 +126,10 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
|
|||
pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
|
||||
}
|
||||
|
||||
for (s = 0; s < 6; ++s)
|
||||
for (i = 0; i < NVC0_MAX_BUFFERS; ++i)
|
||||
pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL);
|
||||
|
||||
for (i = 0; i < nvc0->num_tfbbufs; ++i)
|
||||
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
|
||||
|
||||
|
|
@ -180,10 +188,9 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
|
|||
int ref)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
|
||||
unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
|
||||
unsigned s, i;
|
||||
|
||||
if (bind & PIPE_BIND_RENDER_TARGET) {
|
||||
if (res->bind & PIPE_BIND_RENDER_TARGET) {
|
||||
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
|
||||
if (nvc0->framebuffer.cbufs[i] &&
|
||||
nvc0->framebuffer.cbufs[i]->texture == res) {
|
||||
|
|
@ -194,7 +201,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
|
|||
}
|
||||
}
|
||||
}
|
||||
if (bind & PIPE_BIND_DEPTH_STENCIL) {
|
||||
if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
|
||||
if (nvc0->framebuffer.zsbuf &&
|
||||
nvc0->framebuffer.zsbuf->texture == res) {
|
||||
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
|
||||
|
|
@ -204,12 +211,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
if (bind & (PIPE_BIND_VERTEX_BUFFER |
|
||||
PIPE_BIND_INDEX_BUFFER |
|
||||
PIPE_BIND_CONSTANT_BUFFER |
|
||||
PIPE_BIND_STREAM_OUTPUT |
|
||||
PIPE_BIND_COMMAND_ARGS_BUFFER |
|
||||
PIPE_BIND_SAMPLER_VIEW)) {
|
||||
if (res->target == PIPE_BUFFER) {
|
||||
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
|
||||
if (nvc0->vtxbuf[i].buffer == res) {
|
||||
nvc0->dirty |= NVC0_NEW_ARRAYS;
|
||||
|
|
@ -253,6 +255,18 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (s = 0; s < 5; ++s) {
|
||||
for (i = 0; i < NVC0_MAX_BUFFERS; ++i) {
|
||||
if (nvc0->buffers[s][i].buffer == res) {
|
||||
nvc0->buffers_dirty[s] |= 1 << i;
|
||||
nvc0->dirty |= NVC0_NEW_BUFFERS;
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
|
||||
if (!--ref)
|
||||
return ref;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ref;
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@
|
|||
#define NVC0_NEW_SURFACES (1 << 23)
|
||||
#define NVC0_NEW_MIN_SAMPLES (1 << 24)
|
||||
#define NVC0_NEW_TESSFACTOR (1 << 25)
|
||||
#define NVC0_NEW_BUFFERS (1 << 26)
|
||||
|
||||
#define NVC0_NEW_CP_PROGRAM (1 << 0)
|
||||
#define NVC0_NEW_CP_SURFACES (1 << 1)
|
||||
|
|
@ -73,9 +74,10 @@
|
|||
#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
|
||||
#define NVC0_BIND_TFB 244
|
||||
#define NVC0_BIND_SUF 245
|
||||
#define NVC0_BIND_SCREEN 246
|
||||
#define NVC0_BIND_TLS 247
|
||||
#define NVC0_BIND_3D_COUNT 248
|
||||
#define NVC0_BIND_BUF 246
|
||||
#define NVC0_BIND_SCREEN 247
|
||||
#define NVC0_BIND_TLS 249
|
||||
#define NVC0_BIND_3D_COUNT 250
|
||||
|
||||
/* compute bufctx (during launch_grid) */
|
||||
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
|
||||
|
|
@ -187,10 +189,15 @@ struct nvc0_context {
|
|||
|
||||
struct nvc0_blitctx *blit;
|
||||
|
||||
/* NOTE: some of these surfaces may reference buffers */
|
||||
struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
|
||||
uint16_t surfaces_dirty[2];
|
||||
uint16_t surfaces_valid[2];
|
||||
|
||||
struct pipe_shader_buffer buffers[6][NVC0_MAX_BUFFERS];
|
||||
uint32_t buffers_dirty[6];
|
||||
uint32_t buffers_valid[6];
|
||||
|
||||
struct util_dynarray global_residents;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -33,4 +33,6 @@
|
|||
|
||||
#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850
|
||||
|
||||
#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
|
||||
|
||||
#endif /* __NVC0_MACROS_H__ */
|
||||
|
|
|
|||
|
|
@ -554,6 +554,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
|
|||
}
|
||||
info->io.resInfoCBSlot = 15;
|
||||
info->io.sampleInfoBase = 256 + 128;
|
||||
info->io.suInfoBase = 512;
|
||||
info->io.msInfoCBSlot = 15;
|
||||
info->io.msInfoBase = 0; /* TODO */
|
||||
}
|
||||
|
|
@ -635,6 +636,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
|
|||
}
|
||||
*/
|
||||
if (info->io.globalAccess)
|
||||
prog->hdr[0] |= 1 << 26;
|
||||
if (info->io.globalAccess & 0x2)
|
||||
prog->hdr[0] |= 1 << 16;
|
||||
if (info->io.fp64)
|
||||
prog->hdr[0] |= 1 << 27;
|
||||
|
|
|
|||
|
|
@ -73,6 +73,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
return q->funcs->get_query_result(nvc0_context(pipe), q, wait, result);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_get_query_result_resource(struct pipe_context *pipe,
|
||||
struct pipe_query *pq,
|
||||
boolean wait,
|
||||
enum pipe_query_value_type result_type,
|
||||
int index,
|
||||
struct pipe_resource *resource,
|
||||
unsigned offset)
|
||||
{
|
||||
struct nvc0_query *q = nvc0_query(pq);
|
||||
if (!q->funcs->get_query_result_resource) {
|
||||
assert(!"Unexpected lack of get_query_result_resource");
|
||||
return;
|
||||
}
|
||||
q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type,
|
||||
index, resource, offset);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_render_condition(struct pipe_context *pipe,
|
||||
struct pipe_query *pq,
|
||||
|
|
@ -129,7 +147,7 @@ nvc0_render_condition(struct pipe_context *pipe,
|
|||
}
|
||||
|
||||
if (wait)
|
||||
nvc0_hw_query_fifo_wait(push, q);
|
||||
nvc0_hw_query_fifo_wait(nvc0, q);
|
||||
|
||||
PUSH_SPACE(push, 7);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
|
|
@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
|
|||
pipe->begin_query = nvc0_begin_query;
|
||||
pipe->end_query = nvc0_end_query;
|
||||
pipe->get_query_result = nvc0_get_query_result;
|
||||
pipe->get_query_result_resource = nvc0_get_query_result_resource;
|
||||
pipe->render_condition = nvc0_render_condition;
|
||||
nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,13 @@ struct nvc0_query_funcs {
|
|||
void (*end_query)(struct nvc0_context *, struct nvc0_query *);
|
||||
boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *,
|
||||
boolean, union pipe_query_result *);
|
||||
void (*get_query_result_resource)(struct nvc0_context *nvc0,
|
||||
struct nvc0_query *q,
|
||||
boolean wait,
|
||||
enum pipe_query_value_type result_type,
|
||||
int index,
|
||||
struct pipe_resource *resource,
|
||||
unsigned offset);
|
||||
};
|
||||
|
||||
struct nvc0_query {
|
||||
|
|
|
|||
|
|
@ -358,11 +358,119 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
|
||||
struct nvc0_query *q,
|
||||
boolean wait,
|
||||
enum pipe_query_value_type result_type,
|
||||
int index,
|
||||
struct pipe_resource *resource,
|
||||
unsigned offset)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_hw_query *hq = nvc0_hw_query(q);
|
||||
struct nv04_resource *buf = nv04_resource(resource);
|
||||
unsigned stride;
|
||||
|
||||
assert(!hq->funcs || !hq->funcs->get_query_result);
|
||||
|
||||
if (index == -1) {
|
||||
/* TODO: Use a macro to write the availability of the query */
|
||||
if (hq->state != NVC0_HW_QUERY_STATE_READY)
|
||||
nvc0_hw_query_update(nvc0->screen->base.client, q);
|
||||
uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY};
|
||||
nvc0->base.push_cb(&nvc0->base, buf, offset,
|
||||
result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
|
||||
ready);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If the fence guarding this query has not been emitted, that makes a lot
|
||||
* of the following logic more complicated.
|
||||
*/
|
||||
if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
|
||||
nouveau_fence_emit(hq->fence);
|
||||
|
||||
/* We either need to compute a 32- or 64-bit difference between 2 values,
|
||||
* and then store the result as either a 32- or 64-bit value. As such let's
|
||||
* treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
|
||||
* ones), and have one macro that clamps result to i32, u32, or just
|
||||
* outputs the difference (no need to worry about 64-bit clamping).
|
||||
*/
|
||||
if (hq->state != NVC0_HW_QUERY_STATE_READY)
|
||||
nvc0_hw_query_update(nvc0->screen->base.client, q);
|
||||
|
||||
if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
|
||||
nvc0_hw_query_fifo_wait(nvc0, q);
|
||||
|
||||
nouveau_pushbuf_space(push, 16, 2, 0);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, buf->address + offset);
|
||||
PUSH_DATA (push, buf->address + offset);
|
||||
BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7);
|
||||
if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */
|
||||
PUSH_DATA(push, 0x00000001);
|
||||
else if (result_type == PIPE_QUERY_TYPE_I32)
|
||||
PUSH_DATA(push, 0x7fffffff);
|
||||
else if (result_type == PIPE_QUERY_TYPE_U32)
|
||||
PUSH_DATA(push, 0xffffffff);
|
||||
else
|
||||
PUSH_DATA(push, 0x00000000);
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
stride = 2;
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
stride = 12;
|
||||
break;
|
||||
default:
|
||||
assert(index == 0);
|
||||
stride = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (hq->is64bit) {
|
||||
nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index,
|
||||
8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
|
||||
nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride),
|
||||
8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
|
||||
} else {
|
||||
nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
|
||||
4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
|
||||
PUSH_DATA(push, 0);
|
||||
nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4,
|
||||
4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
|
||||
PUSH_DATA(push, 0);
|
||||
}
|
||||
|
||||
if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
|
||||
PUSH_DATA(push, 0);
|
||||
PUSH_DATA(push, 0);
|
||||
} else if (hq->is64bit) {
|
||||
PUSH_DATA(push, hq->fence->sequence);
|
||||
nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
|
||||
4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
|
||||
} else {
|
||||
PUSH_DATA(push, hq->sequence);
|
||||
nouveau_pushbuf_data(push, hq->bo, hq->offset,
|
||||
4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
|
||||
}
|
||||
|
||||
if (buf->mm) {
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct nvc0_query_funcs hw_query_funcs = {
|
||||
.destroy_query = nvc0_hw_destroy_query,
|
||||
.begin_query = nvc0_hw_begin_query,
|
||||
.end_query = nvc0_hw_end_query,
|
||||
.get_query_result = nvc0_hw_get_query_result,
|
||||
.get_query_result_resource = nvc0_hw_get_query_result_resource,
|
||||
};
|
||||
|
||||
struct nvc0_query *
|
||||
|
|
@ -476,8 +584,9 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
|
|||
}
|
||||
|
||||
void
|
||||
nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
|
||||
nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_hw_query *hq = nvc0_hw_query(q);
|
||||
unsigned offset = hq->offset;
|
||||
|
||||
|
|
@ -486,9 +595,15 @@ nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
|
|||
PUSH_SPACE(push, 5);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
|
||||
PUSH_DATAh(push, hq->bo->offset + offset);
|
||||
PUSH_DATA (push, hq->bo->offset + offset);
|
||||
PUSH_DATA (push, hq->sequence);
|
||||
if (hq->is64bit) {
|
||||
PUSH_DATAh(push, nvc0->screen->fence.bo->offset);
|
||||
PUSH_DATA (push, nvc0->screen->fence.bo->offset);
|
||||
PUSH_DATA (push, hq->fence->sequence);
|
||||
} else {
|
||||
PUSH_DATAh(push, hq->bo->offset + offset);
|
||||
PUSH_DATA (push, hq->bo->offset + offset);
|
||||
PUSH_DATA (push, hq->sequence);
|
||||
}
|
||||
PUSH_DATA (push, (1 << 12) |
|
||||
NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,6 @@ void
|
|||
nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *,
|
||||
unsigned);
|
||||
void
|
||||
nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *);
|
||||
nvc0_hw_query_fifo_wait(struct nvc0_context *, struct nvc0_query *);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -111,6 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return 256;
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 1; /* 256 for binding as RT, but that's not possible in GL */
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 16;
|
||||
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
|
||||
return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
|
|
@ -189,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
return 1;
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
|
||||
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
|
||||
|
|
@ -212,10 +215,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
@ -322,8 +327,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
return NVC0_MAX_BUFFERS;
|
||||
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
|
||||
return 16; /* would be 32 in linked (OpenGL-style) mode */
|
||||
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
|
||||
|
|
@ -676,8 +682,9 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
push->rsvd_kick = 5;
|
||||
|
||||
screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
|
||||
PIPE_BIND_SHADER_BUFFER |
|
||||
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
|
||||
PIPE_BIND_COMMAND_ARGS_BUFFER;
|
||||
PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER;
|
||||
screen->base.sysmem_bindings |=
|
||||
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
|
||||
|
||||
|
|
@ -891,9 +898,9 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
/* TIC and TSC entries for each unit (nve4+ only) */
|
||||
/* auxiliary constants (6 user clip planes, base instance id) */
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, 512);
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
|
||||
PUSH_DATA (push, 1024);
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
|
||||
PUSH_DATA (push, (15 << 4) | 1);
|
||||
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
|
||||
|
|
@ -913,8 +920,8 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, 256);
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
|
||||
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATAf(push, 0.0f);
|
||||
|
|
@ -922,8 +929,8 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
PUSH_DATAf(push, 0.0f);
|
||||
PUSH_DATAf(push, 0.0f);
|
||||
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
|
||||
|
||||
if (screen->base.drm->version >= 0x01000101) {
|
||||
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
|
||||
|
|
@ -953,8 +960,12 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
PUSH_DATA (push, screen->tls->size);
|
||||
BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
/* Reduce likelihood of collision with real buffers by placing the hole at
|
||||
* the top of the 4G area. This will have to be dealt with for real
|
||||
* eventually by blocking off that area from the VM.
|
||||
*/
|
||||
BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0xff << 24);
|
||||
|
||||
if (screen->eng3d->oclass < GM107_3D_CLASS) {
|
||||
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
|
||||
|
|
@ -1039,6 +1050,7 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
|
||||
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
|
||||
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
|
||||
MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@
|
|||
|
||||
#define NVC0_MAX_VIEWPORTS 16
|
||||
|
||||
#define NVC0_MAX_BUFFERS 32
|
||||
|
||||
|
||||
struct nvc0_context;
|
||||
|
||||
|
|
|
|||
|
|
@ -316,7 +316,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
|
|||
continue;
|
||||
|
||||
if (!targ->clean)
|
||||
nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
|
||||
nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
|
||||
nouveau_pushbuf_space(push, 0, 0, 1);
|
||||
BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
|
||||
PUSH_DATA (push, 1);
|
||||
|
|
|
|||
|
|
@ -1243,11 +1243,50 @@ nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
|
|||
unsigned start_slot, unsigned count,
|
||||
struct pipe_image_view **views)
|
||||
{
|
||||
#if 0
|
||||
nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
|
||||
}
|
||||
|
||||
nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
|
||||
#endif
|
||||
static void
|
||||
nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_shader_buffer *pbuffers)
|
||||
{
|
||||
const unsigned end = start + nr;
|
||||
const unsigned mask = ((1 << nr) - 1) << start;
|
||||
unsigned i;
|
||||
|
||||
assert(t < 5);
|
||||
|
||||
if (pbuffers) {
|
||||
for (i = start; i < end; ++i) {
|
||||
const unsigned p = i - start;
|
||||
if (pbuffers[p].buffer)
|
||||
nvc0->buffers_valid[t] |= (1 << i);
|
||||
else
|
||||
nvc0->buffers_valid[t] &= ~(1 << i);
|
||||
nvc0->buffers[t][i].buffer_offset = pbuffers[p].buffer_offset;
|
||||
nvc0->buffers[t][i].buffer_size = pbuffers[p].buffer_size;
|
||||
pipe_resource_reference(&nvc0->buffers[t][i].buffer, pbuffers[p].buffer);
|
||||
}
|
||||
} else {
|
||||
for (i = start; i < end; ++i)
|
||||
pipe_resource_reference(&nvc0->buffers[t][i].buffer, NULL);
|
||||
nvc0->buffers_valid[t] &= ~mask;
|
||||
}
|
||||
nvc0->buffers_dirty[t] |= mask;
|
||||
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_set_shader_buffers(struct pipe_context *pipe,
|
||||
unsigned shader,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_shader_buffer *buffers)
|
||||
{
|
||||
const unsigned s = nvc0_shader_stage(shader);
|
||||
nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers);
|
||||
|
||||
nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS;
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
@ -1377,6 +1416,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
|
|||
pipe->set_global_binding = nvc0_set_global_bindings;
|
||||
pipe->set_compute_resources = nvc0_set_compute_resources;
|
||||
pipe->set_shader_images = nvc0_set_shader_images;
|
||||
pipe->set_shader_buffers = nvc0_set_shader_buffers;
|
||||
|
||||
nvc0->sample_mask = ~0;
|
||||
nvc0->min_samples = 1;
|
||||
|
|
|
|||
|
|
@ -183,9 +183,9 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
|
|||
|
||||
ms = 1 << ms_mode;
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, 512);
|
||||
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
|
||||
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
|
||||
PUSH_DATA (push, 1024);
|
||||
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
|
||||
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
|
||||
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
|
||||
PUSH_DATA (push, 256 + 128);
|
||||
for (i = 0; i < ms; i++) {
|
||||
|
|
@ -317,9 +317,9 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
|
|||
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, 512);
|
||||
PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
|
||||
PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
|
||||
PUSH_DATA (push, 1024);
|
||||
PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10));
|
||||
PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10));
|
||||
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
|
||||
PUSH_DATA (push, 256);
|
||||
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
|
||||
|
|
@ -470,6 +470,39 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_validate_buffers(struct nvc0_context *nvc0)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
int i, s;
|
||||
|
||||
for (s = 0; s < 5; s++) {
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, 1024);
|
||||
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
|
||||
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
|
||||
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
|
||||
PUSH_DATA (push, 512);
|
||||
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
|
||||
if (nvc0->buffers[s][i].buffer) {
|
||||
struct nv04_resource *res =
|
||||
nv04_resource(nvc0->buffers[s][i].buffer);
|
||||
PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
|
||||
PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
|
||||
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
|
||||
PUSH_DATA (push, 0);
|
||||
BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR);
|
||||
} else {
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_validate_sample_mask(struct nvc0_context *nvc0)
|
||||
{
|
||||
|
|
@ -663,6 +696,7 @@ static struct state_validate {
|
|||
{ nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
|
||||
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
|
||||
{ nvc0_validate_surfaces, NVC0_NEW_SURFACES },
|
||||
{ nvc0_validate_buffers, NVC0_NEW_BUFFERS },
|
||||
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
|
||||
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG },
|
||||
{ nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES },
|
||||
|
|
|
|||
|
|
@ -357,27 +357,132 @@ nvc0_clear_render_target(struct pipe_context *pipe,
|
|||
}
|
||||
|
||||
static void
|
||||
nvc0_clear_buffer_cpu(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned offset, unsigned size,
|
||||
const void *data, int data_size)
|
||||
nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned offset, unsigned size,
|
||||
const void *data, int data_size)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nv04_resource *buf = nv04_resource(res);
|
||||
struct pipe_transfer *pt;
|
||||
struct pipe_box box;
|
||||
unsigned elements, i;
|
||||
unsigned i;
|
||||
|
||||
elements = size / data_size;
|
||||
nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
nouveau_pushbuf_bufctx(push, nvc0->bufctx);
|
||||
nouveau_pushbuf_validate(push);
|
||||
|
||||
u_box_1d(offset, size, &box);
|
||||
unsigned count = (size + 3) / 4;
|
||||
unsigned data_words = data_size / 4;
|
||||
|
||||
uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE,
|
||||
&box, &pt);
|
||||
while (count) {
|
||||
unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
|
||||
unsigned nr = nr_data * data_words;
|
||||
|
||||
for (i = 0; i < elements; ++i)
|
||||
memcpy(&map[i*data_size], data, data_size);
|
||||
if (!PUSH_SPACE(push, nr + 9))
|
||||
break;
|
||||
|
||||
buf->vtbl->transfer_unmap(pipe, pt);
|
||||
BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
|
||||
PUSH_DATAh(push, buf->address + offset);
|
||||
PUSH_DATA (push, buf->address + offset);
|
||||
BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
|
||||
PUSH_DATA (push, MIN2(size, nr * 4));
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
|
||||
PUSH_DATA (push, 0x100111);
|
||||
|
||||
/* must not be interrupted (trap on QUERY fence, 0x50 works however) */
|
||||
BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
|
||||
for (i = 0; i < nr_data; i++)
|
||||
PUSH_DATAp(push, data, data_words);
|
||||
|
||||
count -= nr;
|
||||
offset += nr * 4;
|
||||
size -= nr * 4;
|
||||
}
|
||||
|
||||
if (buf->mm) {
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
|
||||
}
|
||||
|
||||
nouveau_bufctx_reset(nvc0->bufctx, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_clear_buffer_push_nve4(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned offset, unsigned size,
|
||||
const void *data, int data_size)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nv04_resource *buf = nv04_resource(res);
|
||||
unsigned i;
|
||||
|
||||
nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
nouveau_pushbuf_bufctx(push, nvc0->bufctx);
|
||||
nouveau_pushbuf_validate(push);
|
||||
|
||||
unsigned count = (size + 3) / 4;
|
||||
unsigned data_words = data_size / 4;
|
||||
|
||||
while (count) {
|
||||
unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
|
||||
unsigned nr = nr_data * data_words;
|
||||
|
||||
if (!PUSH_SPACE(push, nr + 10))
|
||||
break;
|
||||
|
||||
BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, buf->address + offset);
|
||||
PUSH_DATA (push, buf->address + offset);
|
||||
BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
|
||||
PUSH_DATA (push, MIN2(size, nr * 4));
|
||||
PUSH_DATA (push, 1);
|
||||
/* must not be interrupted (trap on QUERY fence, 0x50 works however) */
|
||||
BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1);
|
||||
PUSH_DATA (push, 0x1001);
|
||||
for (i = 0; i < nr_data; i++)
|
||||
PUSH_DATAp(push, data, data_words);
|
||||
|
||||
count -= nr;
|
||||
offset += nr * 4;
|
||||
size -= nr * 4;
|
||||
}
|
||||
|
||||
if (buf->mm) {
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
|
||||
}
|
||||
|
||||
nouveau_bufctx_reset(nvc0->bufctx, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_clear_buffer_push(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned offset, unsigned size,
|
||||
const void *data, int data_size)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
unsigned tmp;
|
||||
|
||||
if (data_size == 1) {
|
||||
tmp = *(unsigned char *)data;
|
||||
tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
|
||||
data = &tmp;
|
||||
data_size = 4;
|
||||
} else if (data_size == 2) {
|
||||
tmp = *(unsigned short *)data;
|
||||
tmp = (tmp << 16) | tmp;
|
||||
data = &tmp;
|
||||
data_size = 4;
|
||||
}
|
||||
|
||||
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
|
||||
nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size);
|
||||
else
|
||||
nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -402,10 +507,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
|
|||
memcpy(&color.ui, data, 16);
|
||||
break;
|
||||
case 12:
|
||||
/* This doesn't work, RGB32 is not a valid RT format.
|
||||
* dst_fmt = PIPE_FORMAT_R32G32B32_UINT;
|
||||
* memcpy(&color.ui, data, 12);
|
||||
* memset(&color.ui[3], 0, 4);
|
||||
/* RGB32 is not a valid RT format. This will be handled by the pushbuf
|
||||
* uploader.
|
||||
*/
|
||||
break;
|
||||
case 8:
|
||||
|
|
@ -437,14 +540,26 @@ nvc0_clear_buffer(struct pipe_context *pipe,
|
|||
assert(size % data_size == 0);
|
||||
|
||||
if (data_size == 12) {
|
||||
/* TODO: Find a way to do this with the GPU! */
|
||||
nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size);
|
||||
nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (offset & 0xff) {
|
||||
unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
|
||||
assert(fixup_size % data_size == 0);
|
||||
nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
|
||||
offset += fixup_size;
|
||||
size -= fixup_size;
|
||||
if (!size)
|
||||
return;
|
||||
}
|
||||
|
||||
elements = size / data_size;
|
||||
height = (elements + 16383) / 16384;
|
||||
width = elements / height;
|
||||
if (height > 1)
|
||||
width &= ~0xff;
|
||||
assert(width > 0);
|
||||
|
||||
if (!PUSH_SPACE(push, 40))
|
||||
return;
|
||||
|
|
@ -465,7 +580,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
|
|||
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
|
||||
PUSH_DATAh(push, buf->address + offset);
|
||||
PUSH_DATA (push, buf->address + offset);
|
||||
PUSH_DATA (push, width * data_size);
|
||||
PUSH_DATA (push, align(width * data_size, 0x100));
|
||||
PUSH_DATA (push, height);
|
||||
PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
|
||||
PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR);
|
||||
|
|
@ -480,24 +595,20 @@ nvc0_clear_buffer(struct pipe_context *pipe,
|
|||
|
||||
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
|
||||
|
||||
if (buf->mm) {
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
|
||||
}
|
||||
|
||||
if (width * height != elements) {
|
||||
offset += width * height * data_size;
|
||||
width = elements - width * height;
|
||||
height = 1;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 4);
|
||||
PUSH_DATAh(push, buf->address + offset);
|
||||
PUSH_DATA (push, buf->address + offset);
|
||||
PUSH_DATA (push, width * data_size);
|
||||
PUSH_DATA (push, height);
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
|
||||
nvc0_clear_buffer_push(pipe, res, offset, width * data_size,
|
||||
data, data_size);
|
||||
}
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
|
||||
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
|
||||
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
|
||||
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -515,12 +515,12 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
|
|||
return;
|
||||
address = nvc0->screen->uniform_bo->offset + (5 << 16);
|
||||
|
||||
for (s = 0; s < 5; ++s, address += (1 << 9)) {
|
||||
for (s = 0; s < 5; ++s, address += (1 << 10)) {
|
||||
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
|
||||
if (!dirty)
|
||||
continue;
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, 512);
|
||||
PUSH_DATA (push, 1024);
|
||||
PUSH_DATAh(push, address);
|
||||
PUSH_DATA (push, address);
|
||||
do {
|
||||
|
|
|
|||
|
|
@ -334,7 +334,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
|
|||
b = ve->pipe.vertex_buffer_index;
|
||||
vb = &nvc0->vtxbuf[b];
|
||||
|
||||
if (!vb->buffer) {
|
||||
if (nvc0->vbo_user & (1 << b)) {
|
||||
if (!(nvc0->constant_vbos & (1 << b))) {
|
||||
if (ve->pipe.instance_divisor) {
|
||||
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
|
||||
|
|
@ -352,13 +352,13 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
|
|||
|
||||
if (unlikely(ve->pipe.instance_divisor)) {
|
||||
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
|
||||
PUSH_DATA (push, (1 << 12) | vb->stride);
|
||||
PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
|
||||
PUSH_DATAh(push, res->address + offset);
|
||||
PUSH_DATA (push, res->address + offset);
|
||||
PUSH_DATA (push, ve->pipe.instance_divisor);
|
||||
} else {
|
||||
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
|
||||
PUSH_DATA (push, (1 << 12) | vb->stride);
|
||||
PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
|
||||
PUSH_DATAh(push, res->address + offset);
|
||||
PUSH_DATA (push, res->address + offset);
|
||||
}
|
||||
|
|
@ -382,7 +382,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
|
|||
unsigned b;
|
||||
const uint32_t mask = nvc0->vbo_user;
|
||||
|
||||
PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
|
||||
PUSH_SPACE(push, nvc0->num_vtxbufs * 8 + nvc0->vertex->num_elements);
|
||||
for (b = 0; b < nvc0->num_vtxbufs; ++b) {
|
||||
struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
|
||||
struct nv04_resource *buf;
|
||||
|
|
@ -395,6 +395,10 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
|
|||
}
|
||||
/* address/value set in nvc0_update_user_vbufs_shared */
|
||||
continue;
|
||||
} else if (!vb->buffer) {
|
||||
/* there can be holes in the vertex buffer lists */
|
||||
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
|
||||
continue;
|
||||
}
|
||||
buf = nv04_resource(vb->buffer);
|
||||
offset = vb->buffer_offset;
|
||||
|
|
@ -410,6 +414,12 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
|
|||
|
||||
BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
|
||||
}
|
||||
/* If there are more elements than buffers, we might not have unset
|
||||
* fetching on the later elements.
|
||||
*/
|
||||
for (; b < nvc0->vertex->num_elements; ++b)
|
||||
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
|
||||
|
||||
if (nvc0->vbo_user)
|
||||
nvc0_update_user_vbufs_shared(nvc0);
|
||||
}
|
||||
|
|
@ -680,7 +690,7 @@ nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
|
|||
|
||||
if (count & 1) {
|
||||
count--;
|
||||
PUSH_SPACE(push, 1);
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
|
||||
PUSH_DATA (push, *map++);
|
||||
}
|
||||
|
|
@ -779,7 +789,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
|
|||
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
|
||||
PUSH_SPACE(push, 2);
|
||||
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
|
||||
nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq));
|
||||
nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq));
|
||||
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
|
||||
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
|
||||
|
||||
|
|
@ -811,6 +821,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
|
|||
unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
|
||||
uint32_t offset = buf->offset + info->indirect_offset;
|
||||
|
||||
PUSH_SPACE(push, 7);
|
||||
|
||||
/* must make FIFO wait for engines idle before continuing to process */
|
||||
if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
|
||||
(buf_count && buf_count->fence_wr &&
|
||||
|
|
@ -951,6 +963,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
if (info->mode == PIPE_PRIM_PATCHES &&
|
||||
nvc0->state.patch_vertices != info->vertices_per_patch) {
|
||||
nvc0->state.patch_vertices = info->vertices_per_patch;
|
||||
PUSH_SPACE(push, 1);
|
||||
IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
|
||||
}
|
||||
|
||||
|
|
@ -958,6 +971,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
nvc0_state_validate(nvc0, ~0, 8);
|
||||
|
||||
if (nvc0->vertprog->vp.need_draw_parameters) {
|
||||
PUSH_SPACE(push, 9);
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, 512);
|
||||
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
|
||||
|
|
@ -979,6 +993,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
}
|
||||
|
||||
if (nvc0->cb_dirty) {
|
||||
PUSH_SPACE(push, 1);
|
||||
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
|
||||
nvc0->cb_dirty = false;
|
||||
}
|
||||
|
|
@ -987,6 +1002,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
if (!nvc0->textures_coherent[s])
|
||||
continue;
|
||||
|
||||
PUSH_SPACE(push, nvc0->num_textures[s] * 2);
|
||||
|
||||
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
if (!(nvc0->textures_coherent[s] & (1 << i)))
|
||||
|
|
|
|||
|
|
@ -210,6 +210,10 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
return 0;
|
||||
|
||||
/* SWTCL-only features. */
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ void *evergreen_create_compute_state(
|
|||
}
|
||||
}
|
||||
#else
|
||||
memset(&shader->binary, 0, sizeof(shader->binary));
|
||||
radeon_shader_binary_init(&shader->binary);
|
||||
radeon_elf_read(code, header->num_bytes, &shader->binary);
|
||||
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
|
||||
|
||||
|
|
@ -245,13 +245,31 @@ void *evergreen_create_compute_state(
|
|||
return shader;
|
||||
}
|
||||
|
||||
void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
|
||||
void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
|
||||
{
|
||||
struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n");
|
||||
struct r600_pipe_compute *shader = state;
|
||||
|
||||
if (!shader)
|
||||
return;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#if HAVE_LLVM < 0x0306
|
||||
for (unsigned i = 0; i < shader->num_kernels; i++) {
|
||||
struct r600_kernel *kernel = &shader->kernels[i];
|
||||
LLVMDisposeModule(module);
|
||||
}
|
||||
FREE(shader->kernels);
|
||||
LLVMContextDispose(shader->llvm_ctx);
|
||||
#else
|
||||
radeon_shader_binary_clean(&shader->binary);
|
||||
r600_destroy_shader(&shader->bc);
|
||||
|
||||
/* TODO destroy shader->code_bo, shader->const_bo
|
||||
* we'll need something like r600_buffer_free */
|
||||
#endif
|
||||
#endif
|
||||
FREE(shader);
|
||||
}
|
||||
|
||||
|
|
@ -349,7 +367,7 @@ static void evergreen_emit_direct_dispatch(
|
|||
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
|
||||
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
|
||||
unsigned num_waves;
|
||||
unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
|
||||
unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
|
||||
unsigned wave_divisor = (16 * num_pipes);
|
||||
int group_size = 1;
|
||||
int grid_size = 1;
|
||||
|
|
@ -723,7 +741,7 @@ static void evergreen_set_global_binding(
|
|||
* command stream by the start_cs_cmd atom. However, since the SET_CONTEXT_REG
|
||||
* packet requires that the shader type bit be set, we must initialize all
|
||||
* context registers needed for compute in this function. The registers
|
||||
* intialized by the start_cs_cmd atom can be found in evereen_state.c in the
|
||||
* initialized by the start_cs_cmd atom can be found in evergreen_state.c in the
|
||||
* functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending
|
||||
* on the GPU family.
|
||||
*/
|
||||
|
|
@ -733,7 +751,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
|
|||
int num_threads;
|
||||
int num_stack_entries;
|
||||
|
||||
/* since all required registers are initialised in the
|
||||
/* since all required registers are initialized in the
|
||||
* start_compute_cs_cmd atom, we can EMIT_EARLY here.
|
||||
*/
|
||||
r600_init_command_buffer(cb, 256);
|
||||
|
|
@ -818,7 +836,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
|
|||
* R_008E28_SQ_STATIC_THREAD_MGMT3
|
||||
*/
|
||||
|
||||
/* XXX: We may need to adjust the thread and stack resouce
|
||||
/* XXX: We may need to adjust the thread and stack resource
|
||||
* values for 3D/compute interop */
|
||||
|
||||
r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
|
||||
|
|
|
|||
|
|
@ -772,7 +772,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
if (util_format_get_blocksize(pipe_format) >= 16)
|
||||
non_disp_tiling = 1;
|
||||
}
|
||||
nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
|
||||
nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
|
||||
|
||||
if (state->target == PIPE_TEXTURE_1D_ARRAY) {
|
||||
height = 1;
|
||||
|
|
@ -986,7 +986,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
|
|||
unsigned block_size =
|
||||
align(util_format_get_blocksize(pipe_buffer->format), 4);
|
||||
unsigned pitch_alignment =
|
||||
MAX2(64, rctx->screen->b.tiling_info.group_bytes / block_size);
|
||||
MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
|
||||
unsigned pitch = align(pipe_buffer->width0, pitch_alignment);
|
||||
|
||||
/* XXX: This is copied from evergreen_init_color_surface(). I don't
|
||||
|
|
@ -1098,7 +1098,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
|
|||
if (util_format_get_blocksize(surf->base.format) >= 16)
|
||||
non_disp_tiling = 1;
|
||||
}
|
||||
nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
|
||||
nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
|
||||
desc = util_format_description(surf->base.format);
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
|
||||
|
|
@ -1253,7 +1253,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
|
|||
macro_aspect = eg_macro_tile_aspect(macro_aspect);
|
||||
bankw = eg_bank_wh(bankw);
|
||||
bankh = eg_bank_wh(bankh);
|
||||
nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
|
||||
nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
|
||||
offset >>= 8;
|
||||
|
||||
surf->db_z_info = S_028040_ARRAY_MODE(array_mode) |
|
||||
|
|
@ -3467,7 +3467,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
|
|||
sub_cmd = EG_DMA_COPY_TILED;
|
||||
lbpp = util_logbase2(bpp);
|
||||
pitch_tile_max = ((pitch / bpp) / 8) - 1;
|
||||
nbanks = eg_num_banks(rctx->screen->b.tiling_info.num_banks);
|
||||
nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks);
|
||||
|
||||
if (dst_mode == RADEON_SURF_MODE_LINEAR) {
|
||||
/* T2L */
|
||||
|
|
@ -3670,9 +3670,9 @@ void evergreen_init_state_functions(struct r600_context *rctx)
|
|||
unsigned id = 1;
|
||||
unsigned i;
|
||||
/* !!!
|
||||
* To avoid GPU lockup registers must be emited in a specific order
|
||||
* To avoid GPU lockup registers must be emitted in a specific order
|
||||
* (no kidding ...). The order below is important and have been
|
||||
* partialy infered from analyzing fglrx command stream.
|
||||
* partially inferred from analyzing fglrx command stream.
|
||||
*
|
||||
* Don't reorder atom without carefully checking the effect (GPU lockup
|
||||
* or piglit regression).
|
||||
|
|
@ -3793,7 +3793,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
|
|||
unsigned output_patch0_offset, perpatch_output_offset, lds_size;
|
||||
uint32_t values[16];
|
||||
unsigned num_waves;
|
||||
unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
|
||||
unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
|
||||
unsigned wave_divisor = (16 * num_pipes);
|
||||
|
||||
*num_patches = 1;
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue