Merge remote-tracking branch 'mesa-public/master' into vulkan

This commit is contained in:
Jason Ekstrand 2016-02-05 15:21:11 -08:00
commit 9401516113
248 changed files with 7167 additions and 2008 deletions

View file

@ -5,6 +5,7 @@
(c-file-style . "stroustrup")
(fill-column . 78)
(eval . (progn
(c-set-offset 'case-label '0)
(c-set-offset 'innamespace '0)
(c-set-offset 'inline-open '0)))
)

View file

@ -6,7 +6,7 @@
# - Select Git and fill in the Git clone URL
# - Setup a Git hook as explained in
# https://github.com/appveyor/webhooks#installing-git-hook
# - Check 'Settings > General > Skip branches without appveyor'
# - Check 'Settings > General > Skip branches without appveyor.yml'
# - Check 'Settings > General > Rolling builds'
# - Setup the global or project notifications to your liking
#
@ -24,7 +24,14 @@ branches:
except:
- /^travis.*$/
clone_depth: 5
# Don't download the full Mesa history to speed up cloning. However the clone
# depth must not be too small, otherwise builds might fail when lots of patches
# are committed in succession, because the desired commit is not found on the
# truncated history.
#
# See also:
# - https://www.appveyor.com/blog/2014/06/04/shallow-clone-for-git-repositories
clone_depth: 100
cache:
- win_flex_bison-2.4.5.zip

View file

@ -2161,7 +2161,12 @@ gallium_require_drm_loader() {
fi
}
dnl This is for Glamor. Skip this if OpenGL is disabled.
require_egl_drm() {
if test "x$enable_opengl" = xno; then
return 0
fi
case "$with_egl_platforms" in
*drm*)
;;

View file

@ -135,7 +135,7 @@ GL 4.2, GLSL 4.20:
GL_ARB_texture_compression_bptc DONE (i965, nvc0, r600, radeonsi)
GL_ARB_compressed_texture_pixel_storage DONE (all drivers)
GL_ARB_shader_atomic_counters DONE (i965)
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
GL_ARB_texture_storage DONE (all drivers)
GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_robust_buffer_access_behavior not started
GL_ARB_shader_image_size DONE (i965)
GL_ARB_shader_storage_buffer_object DONE (i965)
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
@ -186,7 +186,7 @@ GL 4.4, GLSL 4.40:
- specified transform/feedback layout in progress
- input/output block locations DONE
GL_ARB_multi_bind DONE (all drivers)
GL_ARB_query_buffer_object not started
GL_ARB_query_buffer_object DONE (nvc0)
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)

View file

@ -96,6 +96,7 @@ glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as
"130". Mesa will not really implement all the features of the given language version
if it's higher than what's normally reported. (for developers only)
<li>MESA_GLSL - <a href="shading.html#envvars">shading language compiler options</a>
<li>MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally disabled.
</ul>

View file

@ -48,7 +48,10 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_compute_shader on i965</li>
<li>GL_ARB_copy_image on r600</li>
<li>GL_ARB_indirect_parameters on nvc0</li>
<li>GL_ARB_query_buffer_object on nvc0</li>
<li>GL_ARB_shader_atomic_counters on nvc0</li>
<li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
<li>GL_ARB_shader_storage_buffer_object on nvc0</li>
<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
@ -58,6 +61,8 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx</li>
<li>GL_KHR_texture_compression_astc_ldr on freedreno/a4xx</li>
<li>GL_AMD_performance_monitor on radeonsi (CIK+ only)</li>
<li>GL_ATI_meminfo on r600, radeonsi</li>
<li>GL_NVX_gpu_memory_info on r600, radeonsi</li>
<li>New OSMesaCreateContextAttribs() function (for creating core profile
contexts)</li>
</ul>

View file

@ -227,6 +227,7 @@ typedef struct _RGNDATA {
#define D3DERR_DRIVERINVALIDCALL MAKE_D3DHRESULT(2157)
#define D3DERR_DEVICEREMOVED MAKE_D3DHRESULT(2160)
#define D3DERR_DEVICEHUNG MAKE_D3DHRESULT(2164)
#define S_PRESENT_OCCLUDED MAKE_D3DSTATUS(2168)
/********************************************************
* Bitmasks *

View file

@ -69,6 +69,8 @@ typedef struct ID3DPresentVtbl
HRESULT (WINAPI *SetCursor)(ID3DPresent *This, void *pBitmap, POINT *pHotspot, BOOL bShow);
HRESULT (WINAPI *SetGammaRamp)(ID3DPresent *This, const D3DGAMMARAMP *pRamp, HWND hWndOverride);
HRESULT (WINAPI *GetWindowInfo)(ID3DPresent *This, HWND hWnd, int *width, int *height, int *depth);
/* Available since version 1.1 */
BOOL (WINAPI *GetWindowOccluded)(ID3DPresent *This);
} ID3DPresentVtbl;
struct ID3DPresent
@ -96,6 +98,7 @@ struct ID3DPresent
#define ID3DPresent_SetCursor(p,a,b,c) (p)->lpVtbl->SetCursor(p,a,b,c)
#define ID3DPresent_SetGammaRamp(p,a,b) (p)->lpVtbl->SetGammaRamp(p,a,b)
#define ID3DPresent_GetWindowInfo(p,a,b,c,d) (p)->lpVtbl->GetWindowSize(p,a,b,c,d)
#define ID3DPresent_GetWindowOccluded(p) (p)->lpVtbl->GetWindowOccluded(p)
typedef struct ID3DPresentGroupVtbl
{

1
src/compiler/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
glsl_compiler

View file

@ -220,9 +220,11 @@ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
$(MKDIR_GEN)
$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
$(MKDIR_GEN)
$(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y

View file

@ -1,4 +1,3 @@
glsl_compiler
glsl_lexer.cpp
glsl_parser.cpp
glsl_parser.h

View file

@ -291,6 +291,10 @@ apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from,
if (!state->is_version(120, 0))
return false;
/* ESSL does not allow implicit conversions */
if (state->es_shader)
return false;
/* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec:
*
* "There are no implicit array or structure conversions. For

View file

@ -661,7 +661,7 @@ private:
BA1(roundEven)
BA1(ceil)
BA1(fract)
B2(mod)
BA2(mod)
BA1(modf)
BA2(min)
BA2(max)
@ -1242,23 +1242,23 @@ builtin_builder::create_builtins()
FD(fract)
add_function("mod",
_mod(glsl_type::float_type, glsl_type::float_type),
_mod(glsl_type::vec2_type, glsl_type::float_type),
_mod(glsl_type::vec3_type, glsl_type::float_type),
_mod(glsl_type::vec4_type, glsl_type::float_type),
_mod(always_available, glsl_type::float_type, glsl_type::float_type),
_mod(always_available, glsl_type::vec2_type, glsl_type::float_type),
_mod(always_available, glsl_type::vec3_type, glsl_type::float_type),
_mod(always_available, glsl_type::vec4_type, glsl_type::float_type),
_mod(glsl_type::vec2_type, glsl_type::vec2_type),
_mod(glsl_type::vec3_type, glsl_type::vec3_type),
_mod(glsl_type::vec4_type, glsl_type::vec4_type),
_mod(always_available, glsl_type::vec2_type, glsl_type::vec2_type),
_mod(always_available, glsl_type::vec3_type, glsl_type::vec3_type),
_mod(always_available, glsl_type::vec4_type, glsl_type::vec4_type),
_mod(glsl_type::double_type, glsl_type::double_type),
_mod(glsl_type::dvec2_type, glsl_type::double_type),
_mod(glsl_type::dvec3_type, glsl_type::double_type),
_mod(glsl_type::dvec4_type, glsl_type::double_type),
_mod(fp64, glsl_type::double_type, glsl_type::double_type),
_mod(fp64, glsl_type::dvec2_type, glsl_type::double_type),
_mod(fp64, glsl_type::dvec3_type, glsl_type::double_type),
_mod(fp64, glsl_type::dvec4_type, glsl_type::double_type),
_mod(glsl_type::dvec2_type, glsl_type::dvec2_type),
_mod(glsl_type::dvec3_type, glsl_type::dvec3_type),
_mod(glsl_type::dvec4_type, glsl_type::dvec4_type),
_mod(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type),
_mod(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),
_mod(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),
NULL);
FD(modf)
@ -3452,9 +3452,10 @@ UNOPA(ceil, ir_unop_ceil)
UNOPA(fract, ir_unop_fract)
ir_function_signature *
builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
builtin_builder::_mod(builtin_available_predicate avail,
const glsl_type *x_type, const glsl_type *y_type)
{
return binop(always_available, ir_binop_mod, x_type, x_type, y_type);
return binop(avail, ir_binop_mod, x_type, x_type, y_type);
}
ir_function_signature *

View file

@ -328,6 +328,11 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
this->fields[this->num_fields].sample = 0;
this->fields[this->num_fields].patch = 0;
this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
this->fields[this->num_fields].image_read_only = 0;
this->fields[this->num_fields].image_write_only = 0;
this->fields[this->num_fields].image_coherent = 0;
this->fields[this->num_fields].image_volatile = 0;
this->fields[this->num_fields].image_restrict = 0;
this->num_fields++;
}
@ -1201,7 +1206,12 @@ builtin_variable_generator::generate_varyings()
/* gl_Position and gl_PointSize are not visible from fragment shaders. */
if (state->stage != MESA_SHADER_FRAGMENT) {
add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position");
add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
if (!state->es_shader ||
state->stage == MESA_SHADER_VERTEX ||
(state->stage == MESA_SHADER_GEOMETRY &&
state->OES_geometry_point_size_enable)) {
add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
}
}
if (state->is_version(130, 0)) {

View file

@ -2386,6 +2386,13 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
if (extensions->ARB_blend_func_extended)
add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
if (version >= 310) {
if (extensions->OES_geometry_shader) {
add_builtin_define(parser, "GL_OES_geometry_point_size", 1);
add_builtin_define(parser, "GL_OES_geometry_shader", 1);
}
}
}
} else {
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);

View file

@ -600,6 +600,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
/* OES extensions go here, sorted alphabetically.
*/
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
EXT(OES_texture_3D, false, true, dummy_true),
@ -1867,59 +1868,76 @@ do_common_optimization(exec_list *ir, bool linked,
const struct gl_shader_compiler_options *options,
bool native_integers)
{
const bool debug = false;
GLboolean progress = GL_FALSE;
progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
#define OPT(PASS, ...) do { \
if (debug) { \
fprintf(stderr, "START GLSL optimization %s\n", #PASS); \
const bool opt_progress = PASS(__VA_ARGS__); \
progress = opt_progress || progress; \
if (opt_progress) \
_mesa_print_ir(stderr, ir, NULL); \
fprintf(stderr, "GLSL optimization %s: %s progress\n", \
#PASS, opt_progress ? "made" : "no"); \
} else { \
progress = PASS(__VA_ARGS__) || progress; \
} \
} while (false)
OPT(lower_instructions, ir, SUB_TO_ADD_NEG);
if (linked) {
progress = do_function_inlining(ir) || progress;
progress = do_dead_functions(ir) || progress;
progress = do_structure_splitting(ir) || progress;
OPT(do_function_inlining, ir);
OPT(do_dead_functions, ir);
OPT(do_structure_splitting, ir);
}
progress = do_if_simplification(ir) || progress;
progress = opt_flatten_nested_if_blocks(ir) || progress;
progress = opt_conditional_discard(ir) || progress;
progress = do_copy_propagation(ir) || progress;
progress = do_copy_propagation_elements(ir) || progress;
OPT(do_if_simplification, ir);
OPT(opt_flatten_nested_if_blocks, ir);
OPT(opt_conditional_discard, ir);
OPT(do_copy_propagation, ir);
OPT(do_copy_propagation_elements, ir);
if (options->OptimizeForAOS && !linked)
progress = opt_flip_matrices(ir) || progress;
OPT(opt_flip_matrices, ir);
if (linked && options->OptimizeForAOS) {
progress = do_vectorize(ir) || progress;
OPT(do_vectorize, ir);
}
if (linked)
progress = do_dead_code(ir, uniform_locations_assigned) || progress;
OPT(do_dead_code, ir, uniform_locations_assigned);
else
progress = do_dead_code_unlinked(ir) || progress;
progress = do_dead_code_local(ir) || progress;
progress = do_tree_grafting(ir) || progress;
progress = do_constant_propagation(ir) || progress;
OPT(do_dead_code_unlinked, ir);
OPT(do_dead_code_local, ir);
OPT(do_tree_grafting, ir);
OPT(do_constant_propagation, ir);
if (linked)
progress = do_constant_variable(ir) || progress;
OPT(do_constant_variable, ir);
else
progress = do_constant_variable_unlinked(ir) || progress;
progress = do_constant_folding(ir) || progress;
progress = do_minmax_prune(ir) || progress;
progress = do_rebalance_tree(ir) || progress;
progress = do_algebraic(ir, native_integers, options) || progress;
progress = do_lower_jumps(ir) || progress;
progress = do_vec_index_to_swizzle(ir) || progress;
progress = lower_vector_insert(ir, false) || progress;
progress = do_swizzle_swizzle(ir) || progress;
progress = do_noop_swizzle(ir) || progress;
OPT(do_constant_variable_unlinked, ir);
OPT(do_constant_folding, ir);
OPT(do_minmax_prune, ir);
OPT(do_rebalance_tree, ir);
OPT(do_algebraic, ir, native_integers, options);
OPT(do_lower_jumps, ir);
OPT(do_vec_index_to_swizzle, ir);
OPT(lower_vector_insert, ir, false);
OPT(do_swizzle_swizzle, ir);
OPT(do_noop_swizzle, ir);
progress = optimize_split_arrays(ir, linked) || progress;
progress = optimize_redundant_jumps(ir) || progress;
OPT(optimize_split_arrays, ir, linked);
OPT(optimize_redundant_jumps, ir);
loop_state *ls = analyze_loop_variables(ir);
if (ls->loop_found) {
progress = set_loop_controls(ir, ls) || progress;
progress = unroll_loops(ir, ls, options) || progress;
OPT(set_loop_controls, ir, ls);
OPT(unroll_loops, ir, ls, options);
}
delete ls;
#undef OPT
return progress;
}

View file

@ -591,6 +591,8 @@ struct _mesa_glsl_parse_state {
*/
bool OES_EGL_image_external_enable;
bool OES_EGL_image_external_warn;
bool OES_geometry_point_size_enable;
bool OES_geometry_point_size_warn;
bool OES_geometry_shader_enable;
bool OES_geometry_shader_warn;
bool OES_standard_derivatives_enable;

View file

@ -471,10 +471,11 @@ private:
*/
class parcel_out_uniform_storage : public program_resource_visitor {
public:
parcel_out_uniform_storage(struct string_to_uint_map *map,
parcel_out_uniform_storage(struct gl_shader_program *prog,
struct string_to_uint_map *map,
struct gl_uniform_storage *uniforms,
union gl_constant_value *values)
: map(map), uniforms(uniforms), values(values)
: prog(prog), map(map), uniforms(uniforms), values(values)
{
}
@ -492,8 +493,7 @@ public:
memset(this->targets, 0, sizeof(this->targets));
}
void set_and_process(struct gl_shader_program *prog,
ir_variable *var)
void set_and_process(ir_variable *var)
{
current_var = var;
field_counter = 0;
@ -643,6 +643,16 @@ private:
uniform->opaque[shader_type].index = this->next_image;
uniform->opaque[shader_type].active = true;
/* Set image access qualifiers */
const GLenum access =
(current_var->data.image_read_only ? GL_READ_ONLY :
current_var->data.image_write_only ? GL_WRITE_ONLY :
GL_READ_WRITE);
for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j)
prog->_LinkedShaders[shader_type]->
ImageAccess[this->next_image + j] = access;
/* Increment the image index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
@ -844,6 +854,11 @@ private:
this->values += values_for_type(type);
}
/**
* Current program being processed.
*/
struct gl_shader_program *prog;
struct string_to_uint_map *map;
struct gl_uniform_storage *uniforms;
@ -1007,40 +1022,6 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
}
}
static void
link_set_image_access_qualifiers(struct gl_shader_program *prog,
gl_shader *sh, unsigned shader_stage,
ir_variable *var, const glsl_type *type,
char **name, size_t name_length)
{
/* Handle arrays of arrays */
if (type->is_array() && type->fields.array->is_array()) {
for (unsigned i = 0; i < type->length; i++) {
size_t new_length = name_length;
/* Append the subscript to the current variable name */
ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
link_set_image_access_qualifiers(prog, sh, shader_stage, var,
type->fields.array, name,
new_length);
}
} else {
unsigned id = 0;
bool found = prog->UniformHash->get(id, *name);
assert(found);
(void) found;
const gl_uniform_storage *storage = &prog->UniformStorage[id];
const unsigned index = storage->opaque[shader_stage].index;
const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
var->data.image_write_only ? GL_WRITE_ONLY :
GL_READ_WRITE);
for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j)
sh->ImageAccess[index + j] = access;
}
}
/**
* Combine the hidden uniform hash map with the uniform hash map so that the
* hidden uniforms will be given indicies at the end of the uniform storage
@ -1148,7 +1129,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
union gl_constant_value *data_end = &data[num_data_slots];
#endif
parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);
parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
@ -1163,7 +1144,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
var->data.mode != ir_var_shader_storage))
continue;
parcel.set_and_process(prog, var);
parcel.set_and_process(var);
}
prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
@ -1301,29 +1282,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
prog->NumHiddenUniforms = hidden_uniforms;
prog->UniformStorage = uniforms;
/**
* Scan the program for image uniforms and store image unit access
* information into the gl_shader data structure.
*/
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
gl_shader *sh = prog->_LinkedShaders[i];
if (sh == NULL)
continue;
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
if (var && var->data.mode == ir_var_uniform &&
var->type->contains_image()) {
char *name_copy = ralloc_strdup(NULL, var->name);
link_set_image_access_qualifiers(prog, sh, i, var, var->type,
&name_copy, strlen(var->name));
ralloc_free(name_copy);
}
}
}
link_set_uniform_initializers(prog, boolean_true);
return;

View file

@ -967,11 +967,16 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
return;
}
if ((consumer_var == NULL && producer_var->type->contains_integer()) ||
bool needs_flat_qualifier = consumer_var == NULL &&
(producer_var->type->contains_integer() ||
producer_var->type->contains_double());
if (needs_flat_qualifier ||
(consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) {
/* Since this varying is not being consumed by the fragment shader, its
* interpolation type varying cannot possibly affect rendering.
* Also, this variable is non-flat and is (or contains) an integer.
* Also, this variable is non-flat and is (or contains) an integer
* or a double.
* If the consumer stage is unknown, don't modify the interpolation
* type as it could affect rendering later with separate shaders.
*

View file

@ -4633,8 +4633,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
&prog->NumShaderStorageBlocks,
&prog->SsboInterfaceBlockIndex);
/* FINISHME: Assign fragment shader output locations. */
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;

View file

@ -327,6 +327,7 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx,
unsigned *const_offset,
bool *row_major,
int *matrix_columns,
const glsl_struct_field **struct_field,
unsigned packing)
{
*offset = new(mem_ctx) ir_constant(0u);
@ -442,8 +443,11 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx,
intra_struct_offset = glsl_align(intra_struct_offset, field_align);
if (strcmp(struct_type->fields.structure[i].name,
deref_record->field) == 0)
deref_record->field) == 0) {
if (struct_field)
*struct_field = &struct_type->fields.structure[i];
break;
}
if (packing == GLSL_INTERFACE_PACKING_STD430)
intra_struct_offset += type->std430_size(field_row_major);

View file

@ -57,6 +57,7 @@ public:
void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref,
ir_rvalue **offset, unsigned *const_offset,
bool *row_major, int *matrix_columns,
const glsl_struct_field **struct_field,
unsigned packing);
};

View file

@ -142,7 +142,7 @@ lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
&row_major, &matrix_columns, packing);
&row_major, &matrix_columns, NULL, packing);
/* Now that we've calculated the offset to the start of the
* dereference, walk over the type and emit loads into a temporary.
@ -210,7 +210,7 @@ lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
&row_major, &matrix_columns, packing);
&row_major, &matrix_columns, NULL, packing);
deref = new(mem_ctx) ir_dereference_variable(store_var);
@ -370,7 +370,7 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
&row_major, &matrix_columns, packing);
&row_major, &matrix_columns, NULL, packing);
assert(offset);
assert(!row_major);

View file

@ -45,7 +45,7 @@ class lower_ubo_reference_visitor :
public lower_buffer_access::lower_buffer_access {
public:
lower_ubo_reference_visitor(struct gl_shader *shader)
: shader(shader)
: shader(shader), struct_field(NULL), variable(NULL)
{
}
@ -60,6 +60,7 @@ public:
bool *row_major,
int *matrix_columns,
unsigned packing);
uint32_t ssbo_access_params();
ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
ir_rvalue *offset);
ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
@ -104,6 +105,8 @@ public:
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
const struct glsl_struct_field *struct_field;
ir_variable *variable;
ir_rvalue *uniform_block;
bool progress;
};
@ -288,8 +291,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
*const_offset = ubo_var->Offset;
this->struct_field = NULL;
setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
matrix_columns, packing);
matrix_columns, &this->struct_field, packing);
}
void
@ -317,6 +321,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
this->buffer_access_type =
var->is_in_shader_storage_block() ?
ssbo_load_access : ubo_load_access;
this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
@ -370,6 +375,24 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
return state->ARB_shader_storage_buffer_object_enable;
}
uint32_t
lower_ubo_reference_visitor::ssbo_access_params()
{
assert(variable);
if (variable->is_interface_instance()) {
assert(struct_field);
return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
(struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
(struct_field->image_volatile ? ACCESS_VOLATILE : 0));
} else {
return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
(variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
(variable->data.image_volatile ? ACCESS_VOLATILE : 0));
}
}
ir_call *
lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
ir_rvalue *deref,
@ -394,6 +417,10 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
sig_params.push_tail(writemask_ref);
ir_variable *access_ref = new(mem_ctx)
ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
sig_params.push_tail(access_ref);
ir_function_signature *sig = new(mem_ctx)
ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
assert(sig);
@ -408,6 +435,7 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
call_params.push_tail(offset->clone(mem_ctx, NULL));
call_params.push_tail(deref->clone(mem_ctx, NULL));
call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
return new(mem_ctx) ir_call(sig, NULL, &call_params);
}
@ -426,6 +454,10 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
sig_params.push_tail(offset_ref);
ir_variable *access_ref = new(mem_ctx)
ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
sig_params.push_tail(access_ref);
ir_function_signature *sig =
new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
assert(sig);
@ -444,6 +476,7 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
exec_list call_params;
call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
call_params.push_tail(offset->clone(mem_ctx, NULL));
call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
return new(mem_ctx) ir_call(sig, deref_result, &call_params);
}
@ -499,6 +532,7 @@ lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
unsigned packing = var->get_interface_type()->interface_packing;
this->buffer_access_type = ssbo_store_access;
this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
@ -678,6 +712,7 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
this->buffer_access_type = ssbo_unsized_array_length_access;
this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to calculate the length.
@ -910,6 +945,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
unsigned packing = var->get_interface_type()->interface_packing;
this->buffer_access_type = ssbo_atomic_access;
this->variable = var;
setup_for_load_or_store(mem_ctx, var, deref,
&offset, &const_offset,

View file

@ -361,11 +361,12 @@ tree_grafting_basic_block(ir_instruction *bb_first,
if (!lhs_var)
continue;
if (lhs_var->data.mode == ir_var_function_out ||
lhs_var->data.mode == ir_var_function_inout ||
lhs_var->data.mode == ir_var_shader_out ||
lhs_var->data.mode == ir_var_shader_storage)
continue;
if (lhs_var->data.mode == ir_var_function_out ||
lhs_var->data.mode == ir_var_function_inout ||
lhs_var->data.mode == ir_var_shader_out ||
lhs_var->data.mode == ir_var_shader_storage ||
lhs_var->data.mode == ir_var_shader_shared)
continue;
ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);

View file

@ -164,6 +164,11 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
this->fields.structure[i].sample = fields[i].sample;
this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
this->fields.structure[i].patch = fields[i].patch;
this->fields.structure[i].image_read_only = fields[i].image_read_only;
this->fields.structure[i].image_write_only = fields[i].image_write_only;
this->fields.structure[i].image_coherent = fields[i].image_coherent;
this->fields.structure[i].image_volatile = fields[i].image_volatile;
this->fields.structure[i].image_restrict = fields[i].image_restrict;
this->fields.structure[i].precision = fields[i].precision;
}
@ -1330,6 +1335,13 @@ glsl_type::can_implicitly_convert_to(const glsl_type *desired,
if (this == desired)
return true;
/* ESSL does not allow implicit conversions. If there is no state, we're
* doing intra-stage function linking where these checks have already been
* done.
*/
if (state && state->es_shader)
return false;
/* There is no conversion among matrix types. */
if (this->matrix_columns > 1 || desired->matrix_columns > 1)
return false;

View file

@ -885,7 +885,8 @@ struct glsl_struct_field {
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
precision(GLSL_PRECISION_NONE)
precision(GLSL_PRECISION_NONE), image_read_only(0), image_write_only(0),
image_coherent(0), image_volatile(0), image_restrict(0)
{
/* empty */
}

View file

@ -139,7 +139,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
b->shader->options->lower_pack_unorm_2x16);
nir_ssa_def *word =
nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
nir_channel(b, word, 0));
@ -154,7 +154,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
b->shader->options->lower_pack_unorm_4x8);
nir_ssa_def *byte =
nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),

View file

@ -238,15 +238,15 @@ unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """
dst = (src0.x & 0xffff) | (src0.y >> 16);
unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
dst.x = (src0.x & 0xffff) | (src0.y >> 16);
""")
unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """
dst = (src0.x << 0) |
(src0.y << 8) |
(src0.z << 16) |
(src0.w << 24);
unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
dst.x = (src0.x << 0) |
(src0.y << 8) |
(src0.z << 16) |
(src0.w << 24);
""")
# Lowered floating point unpacking operations.
@ -562,12 +562,12 @@ dst.y = src1.x;
""")
# Byte extraction
binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))")
binop("extract_u8", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
# Word extraction
binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))")
binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
def triop(name, ty, const_expr):

View file

@ -248,19 +248,19 @@ optimizations = [
('ubfe', 'value', 'offset', 'bits')),
'options->lower_bitfield_extract'),
(('extract_ibyte', a, b),
('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8),
(('extract_i8', a, b),
('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
'options->lower_extract_byte'),
(('extract_ubyte', a, b),
(('extract_u8', a, b),
('iand', ('ushr', a, ('imul', b, 8)), 0xff),
'options->lower_extract_byte'),
(('extract_iword', a, b),
(('extract_i16', a, b),
('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
'options->lower_extract_word'),
(('extract_uword', a, b),
(('extract_u16', a, b),
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
'options->lower_extract_word'),
@ -285,30 +285,30 @@ optimizations = [
'options->lower_pack_snorm_4x8'),
(('unpack_unorm_2x16', 'v'),
('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0),
('extract_uword', 'v', 1), 0, 0)),
('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0),
('extract_u16', 'v', 1))),
65535.0),
'options->lower_unpack_unorm_2x16'),
(('unpack_unorm_4x8', 'v'),
('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0),
('extract_ubyte', 'v', 1),
('extract_ubyte', 'v', 2),
('extract_ubyte', 'v', 3))),
('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0),
('extract_u8', 'v', 1),
('extract_u8', 'v', 2),
('extract_u8', 'v', 3))),
255.0),
'options->lower_unpack_unorm_4x8'),
(('unpack_snorm_2x16', 'v'),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0),
('extract_iword', 'v', 1), 0, 0)),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
('extract_i16', 'v', 1))),
32767.0))),
'options->lower_unpack_snorm_2x16'),
(('unpack_snorm_4x8', 'v'),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0),
('extract_ibyte', 'v', 1),
('extract_ibyte', 'v', 2),
('extract_ibyte', 'v', 3))),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
('extract_i8', 'v', 1),
('extract_i8', 'v', 2),
('extract_i8', 'v', 3))),
127.0))),
'options->lower_unpack_snorm_4x8'),
]

View file

@ -544,6 +544,16 @@ enum gl_frag_depth_layout
FRAG_DEPTH_LAYOUT_UNCHANGED
};
/**
* \brief Buffer access qualifiers
*/
enum gl_buffer_access_qualifier
{
ACCESS_COHERENT = 1,
ACCESS_RESTRICT = 2,
ACCESS_VOLATILE = 4,
};
#ifdef __cplusplus
} /* extern "C" */
#endif

View file

@ -85,7 +85,7 @@ endif
# virgl
ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),)
SUBDIRS += winsys/virgl/drm drivers/virgl
SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
endif
# vmwgfx

View file

@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
*
* Convert float32 to half floats, preserving Infs and NaNs,
* with rounding towards zero (trunc).
* XXX: For GL, would prefer rounding towards nearest(-even).
*/
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
LLVMValueRef result;
/*
* Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
* directly, without any (x86 or generic) intrinsics.
* Albeit the rounding mode cannot be specified (and is undefined,
* though in practice on x86 seems to do nearest-even but it may
* be dependent on instruction set support), so is essentially
* useless.
*/
if (util_cpu_caps.has_f16c &&
(length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
LLVMValueRef index = LLVMConstInt(i32t, i, 0);
LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
/* XXX: not really supported by backends */
/*
* XXX: not really supported by backends.
* Even if they would now, rounding mode cannot be specified and
* is undefined.
*/
LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");

View file

@ -256,6 +256,32 @@ lp_build_concat_n(struct gallivm_state *gallivm,
}
/**
* Un-interleave vector.
* This will return a vector consisting of every second element
* (depending on lo_hi, beginning at 0 or 1).
* The returned vector size (elems and width) will only be half
* that of the source vector.
*/
LLVMValueRef
lp_build_uninterleave1(struct gallivm_state *gallivm,
unsigned num_elems,
LLVMValueRef a,
unsigned lo_hi)
{
LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
unsigned i;
assert(num_elems <= LP_MAX_VECTOR_LENGTH);
for (i = 0; i < num_elems / 2; ++i)
elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
shuffle = LLVMConstVector(elems, num_elems / 2);
return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
}
/**
* Interleave vector elements.
*

View file

@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm,
LLVMValueRef b,
unsigned lo_hi);
LLVMValueRef
lp_build_uninterleave1(struct gallivm_state *gallivm,
unsigned num_elems,
LLVMValueRef a,
unsigned lo_hi);
void
lp_build_unpack2(struct gallivm_state *gallivm,

View file

@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
/* Ignore deprecated instructions */
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_UP2H:
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:

View file

@ -45,8 +45,10 @@
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
#include "lp_bld_pack.h"
#include "tgsi/tgsi_exec.h"
@ -530,6 +532,77 @@ static struct lp_build_tgsi_action log_action = {
log_emit /* emit */
};
/* TGSI_OPCODE_PK2H */
static void
pk2h_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
/* src0.x */
emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
0, TGSI_CHAN_X);
/* src0.y */
emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
0, TGSI_CHAN_Y);
}
static void
pk2h_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct lp_type f16i_t;
LLVMValueRef lo, hi, res;
f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
/* maybe some interleave doubling vector width would be useful... */
lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
emit_data->output[emit_data->chan] = res;
}
static struct lp_build_tgsi_action pk2h_action = {
pk2h_fetch_args, /* fetch_args */
pk2h_emit /* emit */
};
/* TGSI_OPCODE_UP2H */
static void
up2h_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMContextRef context = gallivm->context;
LLVMValueRef lo, hi, res[2], arg;
unsigned nr = bld_base->base.type.length;
LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
res[0] = lp_build_half_to_float(gallivm, lo);
res[1] = lp_build_half_to_float(gallivm, hi);
emit_data->output[0] = emit_data->output[2] = res[0];
emit_data->output[1] = emit_data->output[3] = res[1];
}
static struct lp_build_tgsi_action up2h_action = {
scalar_unary_fetch_args, /* fetch_args */
up2h_emit /* emit */
};
/* TGSI_OPCODE_LRP */
static void
@ -1032,10 +1105,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;

View file

@ -226,14 +226,9 @@ pipe_freedreno_create_screen(int fd)
struct pipe_screen *
pipe_virgl_create_screen(int fd)
{
struct virgl_winsys *vws;
struct pipe_screen *screen;
vws = virgl_drm_winsys_create(fd);
if (!vws)
return NULL;
screen = virgl_create_screen(vws);
screen = virgl_drm_screen_create(fd);
return screen ? debug_screen_wrap(screen) : NULL;
}

View file

@ -58,6 +58,7 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
#include "util/u_half.h"
#include "util/u_memory.h"
#include "util/u_math.h"
@ -3057,6 +3058,45 @@ exec_dp2(struct tgsi_exec_machine *mach,
}
}
static void
exec_pk2h(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
unsigned chan;
union tgsi_exec_channel arg[2], dst;
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
(util_float_to_half(arg[1].f[chan]) << 16);
}
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
}
}
}
static void
exec_up2h(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
unsigned chan;
union tgsi_exec_channel arg, dst[2];
fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
}
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
}
}
}
static void
exec_scs(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
@ -4339,7 +4379,7 @@ exec_instruction(
break;
case TGSI_OPCODE_PK2H:
assert (0);
exec_pk2h(mach, inst);
break;
case TGSI_OPCODE_PK2US:
@ -4425,7 +4465,7 @@ exec_instruction(
break;
case TGSI_OPCODE_UP2H:
assert (0);
exec_up2h(mach, inst);
break;
case TGSI_OPCODE_UP2US:

View file

@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
@ -426,6 +426,7 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_SAMPLE_I:
case TGSI_OPCODE_SAMPLE_I_MS:
case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_UP2H:
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:

View file

@ -377,6 +377,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->reads_position = TRUE;
else if (semName == TGSI_SEMANTIC_FACE)
info->uses_frontface = TRUE;
else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
info->reads_samplemask = TRUE;
}
else if (file == TGSI_FILE_OUTPUT) {
info->output_semantic_name[reg] = (ubyte) semName;

View file

@ -81,6 +81,7 @@ struct tgsi_shader_info
ubyte colors_written;
boolean reads_position; /**< does fragment shader read position? */
boolean reads_z; /**< does fragment shader read depth? */
boolean reads_samplemask; /**< does fragment shader read sample mask? */
boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_samplemask; /**< does fragment shader write sample mask? */

View file

@ -195,4 +195,16 @@ u_box_minify_2d(struct pipe_box *dst,
dst->height = MAX2(src->height >> l, 1);
}
static inline void
u_box_minify_3d(struct pipe_box *dst,
const struct pipe_box *src, unsigned l)
{
dst->x = src->x >> l;
dst->y = src->y >> l;
dst->z = src->z >> l;
dst->width = MAX2(src->width >> l, 1);
dst->height = MAX2(src->height >> l, 1);
dst->depth = MAX2(src->depth >> l, 1);
}
#endif

View file

@ -52,7 +52,7 @@
#include <machine/cpu.h>
#endif
#if defined(PIPE_OS_FREEBSD)
#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif

View file

@ -313,7 +313,7 @@ def _parse_channels(fields, layout, colorspace, swizzles):
return channels
def parse(filename):
'''Parse the format descrition in CSV format in terms of the
'''Parse the format description in CSV format in terms of the
Channel and Format classes above.'''
stream = open(filename)

View file

@ -74,7 +74,11 @@ util_float_to_half(float f)
f32.ui &= round_mask;
f32.f *= magic.f;
f32.ui -= round_mask;
/*
* XXX: The magic mul relies on denorms being available, otherwise
* all f16 denorms get flushed to zero - hence when this is used
* for tgsi_exec in softpipe we won't get f16 denorms.
*/
/*
* Clamp to max finite value if overflowed.
* OpenGL has completely undefined rounding behavior for float to
@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16)
/* Adjust */
f32.f *= magic.f;
/* XXX: The magic mul relies on denorms being available */
/* Inf / NaN */
if (f32.f >= infnan.f)

View file

@ -49,6 +49,13 @@ enum VS_OUTPUT
VS_O_VTEX = 0
};
const int vl_zscan_normal_16[] =
{
/* Zig-Zag scan pattern */
0, 1, 4, 8, 5, 2, 3, 6,
9,12,13,10, 7,11,14,15
};
const int vl_zscan_linear[] =
{
/* Linear scan pattern */

View file

@ -64,6 +64,7 @@ struct vl_zscan_buffer
struct pipe_surface *dst;
};
extern const int vl_zscan_normal_16[];
extern const int vl_zscan_linear[];
extern const int vl_zscan_normal[];
extern const int vl_zscan_alternate[];

View file

@ -325,6 +325,11 @@ returned). Otherwise, if the ``wait`` parameter is FALSE, the call
will not block and the return value will be TRUE if the query has
completed or FALSE otherwise.
``get_query_result_resource`` is used to store the result of a query into
a resource without synchronizing with the CPU. This write will optionally
wait for the query to complete, and will optionally write whether the value
is available instead of the value itself.
The interface currently includes the following types of queries:
``PIPE_QUERY_OCCLUSION_COUNTER`` counts the number of fragments which

View file

@ -138,6 +138,10 @@ The integer capabilities:
* ``PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT``: Describes the required
alignment for pipe_sampler_view::u.buf.first_element, in bytes.
If a driver does not support first/last_element, it should return 0.
* ``PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY``: Whether the driver only
supports R, RG, RGB and RGBA formats for PIPE_BUFFER sampler views.
When this is the case it should be assumed that the swizzle parameters
in the sampler view have no effect.
* ``PIPE_CAP_TGSI_TEXCOORD``: This CAP describes a hw limitation.
If true, the hardware cannot replace arbitrary shader inputs with sprite
coordinates and hence the inputs that are desired to be replaceable must
@ -164,7 +168,7 @@ The integer capabilities:
view it is intended to be used with, or herein undefined results may occur
for permutational swizzles.
* ``PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE``: The maximum accessible size with
a buffer sampler view, in bytes.
a buffer sampler view, in texels.
* ``PIPE_CAP_MAX_VIEWPORTS``: The maximum number of viewports (and scissors
since they are linked) a driver can support. Returning 0 is equivalent
to returning 1 because every driver has to support at least a single
@ -306,6 +310,15 @@ The integer capabilities:
* ``PIPE_CAP_GENERATE_MIPMAP``: Indicates whether pipe_context::generate_mipmap
is supported.
* ``PIPE_CAP_STRING_MARKER``: Whether pipe->emit_string_marker() is supported.
* ``PIPE_CAP_SURFACE_REINTERPRET_BLOCKS``: Indicates whether
pipe_context::create_surface supports reinterpreting a texture as a surface
of a format with different block width/height (but same block size in bits).
For example, a compressed texture image can be interpreted as a
non-compressed surface whose texels are the same number of bits as the
compressed blocks, and vice versa. The width and height of the surface is
adjusted appropriately.
* ``PIPE_CAP_QUERY_BUFFER_OBJECT``: Driver supports
context::get_query_result_resource callback.
.. _pipe_capf:

View file

@ -2372,6 +2372,23 @@ programs.
the program. Results are unspecified if any of the remaining
threads terminates or never reaches an executed BARRIER instruction.
.. opcode:: MEMBAR - Memory barrier
``MEMBAR type``
This opcode waits for the completion of all memory accesses based on
the type passed in. The type is an immediate bitfield with the following
meaning:
Bit 0: Shader storage buffers
Bit 1: Atomic buffers
Bit 2: Images
Bit 3: Shared memory
Bit 4: Thread group
These may be passed in in any combination. An implementation is free to not
distinguish between these as it sees fit. However these map to all the
possibilities made available by GLSL.
.. _atomopcodes:

View file

@ -152,6 +152,9 @@ fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len)
struct fd_ringbuffer *ring = ctx->ring;
const uint32_t *buf = (const void *)string;
/* max packet size is 0x3fff dwords: */
len = MIN2(len, 0x3fff * 4);
OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
while (len >= 4) {
OUT_RING(ring, *buf);

View file

@ -165,6 +165,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_SM3:
@ -183,6 +184,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLIP_HALFZ:
return is_a3xx(screen) || is_a4xx(screen);
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
if (is_a3xx(screen)) return 16;
if (is_a4xx(screen)) return 32;
@ -248,6 +251,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@ -296,6 +300,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Queries. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
return is_a3xx(screen) || is_a4xx(screen);

View file

@ -556,6 +556,10 @@ create_frag_coord(struct ir3_compile *ctx, unsigned comp)
}
}
/* NOTE: this creates the "TGSI" style fragface (ie. input slot
* VARYING_SLOT_FACE). For NIR style nir_intrinsic_load_front_face
* we can just use the value from hw directly (since it is boolean)
*/
static struct ir3_instruction *
create_frag_face(struct ir3_compile *ctx, unsigned comp)
{
@ -1224,7 +1228,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_vertex_id_zero_base:
if (!ctx->vertex_id) {
ctx->vertex_id = create_input(ctx->block, 0);
ctx->vertex_id = create_input(b, 0);
add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
ctx->vertex_id);
}
@ -1232,7 +1236,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_instance_id:
if (!ctx->instance_id) {
ctx->instance_id = create_input(ctx->block, 0);
ctx->instance_id = create_input(b, 0);
add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
ctx->instance_id);
}
@ -1244,6 +1248,14 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
}
break;
case nir_intrinsic_load_front_face:
if (!ctx->frag_face) {
ctx->so->frag_face = true;
ctx->frag_face = create_input(b, 0);
ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
}
dst[0] = ir3_ADD_S(b, ctx->frag_face, 0, create_immed(b, 1), 0);
break;
case nir_intrinsic_discard_if:
case nir_intrinsic_discard: {
struct ir3_instruction *cond, *kill;
@ -1349,6 +1361,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
struct ir3_block *b = ctx->block;
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
struct ir3_instruction *const_off[4];
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;
@ -1392,7 +1405,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
ddy = get_src(ctx, &tex->src[i].src);
break;
default:
compile_error(ctx, "Unhandled NIR tex serc type: %d\n",
compile_error(ctx, "Unhandled NIR tex src type: %d\n",
tex->src[i].src_type);
return;
}
@ -1417,6 +1430,21 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
tex_info(tex, &flags, &coords);
if (!has_off) {
/* could still have a constant offset: */
if (tex->const_offset[0] || tex->const_offset[1] ||
tex->const_offset[2] || tex->const_offset[3]) {
off = const_off;
off[0] = create_immed(b, tex->const_offset[0]);
off[1] = create_immed(b, tex->const_offset[1]);
off[2] = create_immed(b, tex->const_offset[2]);
off[3] = create_immed(b, tex->const_offset[3]);
has_off = true;
}
}
/* scale up integer coords for TXF based on the LOD */
if (ctx->unminify_coords && (opc == OPC_ISAML)) {
assert(has_lod);
@ -2053,6 +2081,9 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1:
break;
case VARYING_SLOT_CLIP_VERTEX:
/* handled entirely in nir_lower_clip: */
return;
default:
if (slot >= VARYING_SLOT_VAR0)
break;
@ -2135,11 +2166,17 @@ emit_instructions(struct ir3_compile *ctx)
setup_output(ctx, var);
}
/* Setup variables (which should only be arrays): */
/* Setup global variables (which should only be arrays): */
nir_foreach_variable(var, &ctx->s->globals) {
declare_var(ctx, var);
}
/* Setup local variables (which should only be arrays): */
/* NOTE: need to do something more clever when we support >1 fxn */
nir_foreach_variable(var, &fxn->locals) {
declare_var(ctx, var);
}
/* And emit the body: */
ctx->impl = fxn;
emit_function(ctx, fxn);

View file

@ -262,6 +262,9 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:

View file

@ -428,6 +428,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return true;
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1;
case PIPE_CAP_TGSI_TEXCOORD:
@ -486,6 +488,9 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:

View file

@ -308,17 +308,4 @@ void
lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
#ifdef PIPE_ARCH_SSE
#include <emmintrin.h>
#include "util/u_sse.h"
static inline __m128i
lp_plane_to_m128i(const struct lp_rast_plane *plane)
{
return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
(int32_t)plane->dcdy, (int32_t)plane->eo);
}
#endif
#endif

View file

@ -239,7 +239,7 @@ sign_bits4(const __m128i *cstep, int cdiff)
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
@ -250,26 +250,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
unsigned nr = 0;
__m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
__m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
__m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
/* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
__m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
__m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
__m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
__m128i zero = _mm_setzero_si128();
__m128i c;
__m128i dcdx;
__m128i dcdy;
__m128i rej4;
__m128i dcdx2;
__m128i dcdx3;
__m128i c, dcdx, dcdy, rej4;
__m128i dcdx_neg_mask, dcdy_neg_mask;
__m128i dcdx2, dcdx3;
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
transpose4_epi32(&p0, &p1, &p2, &zero,
&c, &dcdx, &dcdy, &rej4);
&c, &unused, &dcdx, &dcdy);
/* recalc eo - easier than trying to load as scalars / shuffle... */
dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
rej4 = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
_mm_and_si128(dcdx_neg_mask, dcdx));
/* Adjust dcdx;
*/
@ -349,32 +352,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
void
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
unsigned x = (arg.triangle.plane_mask & 0xff) + task->x;
unsigned y = (arg.triangle.plane_mask >> 8) + task->y;
__m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
__m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
__m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
/* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
__m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
__m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
__m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
__m128i zero = _mm_setzero_si128();
__m128i c;
__m128i dcdx;
__m128i dcdy;
__m128i c, dcdx, dcdy;
__m128i dcdx2, dcdx3;
__m128i dcdx2;
__m128i dcdx3;
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
transpose4_epi32(&p0, &p1, &p2, &zero,
&c, &dcdx, &dcdy, &unused);
&c, &unused, &dcdx, &dcdy);
/* Adjust dcdx;
*/

View file

@ -311,6 +311,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
}
/* should only get here on unhandled cases */

View file

@ -168,6 +168,21 @@ struct lp_setup_context
const float (*v2)[4]);
};
static inline void
scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
struct u_rect *scissor)
{
/* left */
scis_planes[0] = (bbox->x0 < scissor->x0);
/* right */
scis_planes[1] = (bbox->x1 > scissor->x1);
/* top */
scis_planes[2] = (bbox->y0 < scissor->y0);
/* bottom */
scis_planes[3] = (bbox->y1 > scissor->y1);
}
void lp_setup_choose_triangle( struct lp_setup_context *setup );
void lp_setup_choose_line( struct lp_setup_context *setup );
void lp_setup_choose_point( struct lp_setup_context *setup );

View file

@ -336,13 +336,6 @@ try_setup_line( struct lp_setup_context *setup,
layer = MIN2(layer, scene->fb_max_layer);
}
if (setup->scissor_test) {
nr_planes = 8;
}
else {
nr_planes = 4;
}
dx = v1[0][0] - v2[0][0];
dy = v1[0][1] - v2[0][1];
area = (dx * dx + dy * dy);
@ -591,6 +584,18 @@ try_setup_line( struct lp_setup_context *setup,
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
nr_planes = 4;
/*
* Determine how many scissor planes we need, that is drop scissor
* edges if the bounding box of the tri is fully inside that edge.
*/
if (setup->scissor_test) {
/* why not just use draw_regions */
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
}
line = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
@ -708,30 +713,46 @@ try_setup_line( struct lp_setup_context *setup,
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
* (Or only store the c value together with a bit indicating which
* scissor edge this is, so rasterization would treat them differently
* (easier to evaluate) to ordinary planes.)
*/
if (nr_planes == 8) {
const struct u_rect *scissor =
&setup->scissors[viewport_index];
if (nr_planes > 4) {
/* why not just use draw_regions */
struct u_rect *scissor = &setup->scissors[viewport_index];
struct lp_rast_plane *plane_s = &plane[4];
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, scissor);
plane[4].dcdx = -1 << 8;
plane[4].dcdy = 0;
plane[4].c = (1-scissor->x0) << 8;
plane[4].eo = 1 << 8;
plane[5].dcdx = 1 << 8;
plane[5].dcdy = 0;
plane[5].c = (scissor->x1+1) << 8;
plane[5].eo = 0;
plane[6].dcdx = 0;
plane[6].dcdy = 1 << 8;
plane[6].c = (1-scissor->y0) << 8;
plane[6].eo = 1 << 8;
plane[7].dcdx = 0;
plane[7].dcdy = -1 << 8;
plane[7].c = (scissor->y1+1) << 8;
plane[7].eo = 0;
if (s_planes[0]) {
plane_s->dcdx = -1 << 8;
plane_s->dcdy = 0;
plane_s->c = (1-scissor->x0) << 8;
plane_s->eo = 1 << 8;
plane_s++;
}
if (s_planes[1]) {
plane_s->dcdx = 1 << 8;
plane_s->dcdy = 0;
plane_s->c = (scissor->x1+1) << 8;
plane_s->eo = 0 << 8;
plane_s++;
}
if (s_planes[2]) {
plane_s->dcdx = 0;
plane_s->dcdy = 1 << 8;
plane_s->c = (1-scissor->y0) << 8;
plane_s->eo = 1 << 8;
plane_s++;
}
if (s_planes[3]) {
plane_s->dcdx = 0;
plane_s->dcdy = -1 << 8;
plane_s->c = (scissor->y1+1) << 8;
plane_s->eo = 0;
plane_s++;
}
assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index);

View file

@ -302,13 +302,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
layer = MIN2(layer, scene->fb_max_layer);
}
if (setup->scissor_test) {
nr_planes = 7;
}
else {
nr_planes = 3;
}
/* Bounding rectangle (in pixels) */
{
/* Yes this is necessary to accurately calculate bounding boxes
@ -347,6 +340,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
nr_planes = 3;
/*
* Determine how many scissor planes we need, that is drop scissor
* edges if the bounding box of the tri is fully inside that edge.
*/
if (setup->scissor_test) {
/* why not just use draw_regions */
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
}
tri = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
@ -367,13 +372,11 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Setup parameter interpolants:
*/
setup->setup.variant->jit_function( v0,
v1,
v2,
frontfacing,
GET_A0(&tri->inputs),
GET_DADX(&tri->inputs),
GET_DADY(&tri->inputs) );
setup->setup.variant->jit_function(v0, v1, v2,
frontfacing,
GET_A0(&tri->inputs),
GET_DADX(&tri->inputs),
GET_DADY(&tri->inputs));
tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
@ -383,9 +386,9 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (0)
lp_dump_setup_coef(&setup->setup.variant->key,
(const float (*)[4])GET_A0(&tri->inputs),
(const float (*)[4])GET_DADX(&tri->inputs),
(const float (*)[4])GET_DADY(&tri->inputs));
(const float (*)[4])GET_A0(&tri->inputs),
(const float (*)[4])GET_DADX(&tri->inputs),
(const float (*)[4])GET_DADY(&tri->inputs));
plane = GET_PLANES(tri);
@ -672,29 +675,46 @@ do_triangle_ccw(struct lp_setup_context *setup,
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
* (Or only store the c value together with a bit indicating which
* scissor edge this is, so rasterization would treat them differently
* (easier to evaluate) to ordinary planes.)
*/
if (nr_planes == 7) {
const struct u_rect *scissor = &setup->scissors[viewport_index];
if (nr_planes > 3) {
/* why not just use draw_regions */
struct u_rect *scissor = &setup->scissors[viewport_index];
struct lp_rast_plane *plane_s = &plane[3];
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, scissor);
plane[3].dcdx = -1 << 8;
plane[3].dcdy = 0;
plane[3].c = (1-scissor->x0) << 8;
plane[3].eo = 1 << 8;
plane[4].dcdx = 1 << 8;
plane[4].dcdy = 0;
plane[4].c = (scissor->x1+1) << 8;
plane[4].eo = 0;
plane[5].dcdx = 0;
plane[5].dcdy = 1 << 8;
plane[5].c = (1-scissor->y0) << 8;
plane[5].eo = 1 << 8;
plane[6].dcdx = 0;
plane[6].dcdy = -1 << 8;
plane[6].c = (scissor->y1+1) << 8;
plane[6].eo = 0;
if (s_planes[0]) {
plane_s->dcdx = -1 << 8;
plane_s->dcdy = 0;
plane_s->c = (1-scissor->x0) << 8;
plane_s->eo = 1 << 8;
plane_s++;
}
if (s_planes[1]) {
plane_s->dcdx = 1 << 8;
plane_s->dcdy = 0;
plane_s->c = (scissor->x1+1) << 8;
plane_s->eo = 0 << 8;
plane_s++;
}
if (s_planes[2]) {
plane_s->dcdx = 0;
plane_s->dcdy = 1 << 8;
plane_s->c = (1-scissor->y0) << 8;
plane_s->eo = 1 << 8;
plane_s++;
}
if (s_planes[3]) {
plane_s->dcdx = 0;
plane_s->dcdy = -1 << 8;
plane_s->c = (scissor->y1+1) << 8;
plane_s->eo = 0;
plane_s++;
}
assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
@ -984,17 +1004,16 @@ calc_fixed_position(struct lp_setup_context *setup,
* Both should be acceptable, I think.
*/
#if defined(PIPE_ARCH_SSE)
__m128d v0r, v1r, v2r;
__m128 v0r, v1r;
__m128 vxy0xy2, vxy1xy0;
__m128i vxy0xy2i, vxy1xy0i;
__m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
__m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
__m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
v0r = _mm_load_sd((const double *)v0[0]);
v1r = _mm_load_sd((const double *)v1[0]);
v2r = _mm_load_sd((const double *)v2[0]);
vxy0xy2 = _mm_castpd_ps(_mm_unpacklo_pd(v0r, v2r));
vxy1xy0 = _mm_castpd_ps(_mm_unpacklo_pd(v1r, v0r));
v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);

View file

@ -393,6 +393,9 @@ ImmediateValue::isInteger(const int i) const
case TYPE_S32:
case TYPE_U32:
return reg.data.s32 == i; // as if ...
case TYPE_S64:
case TYPE_U64:
return reg.data.s64 == i; // as if ...
case TYPE_F32:
return reg.data.f32 == static_cast<float>(i);
case TYPE_F64:

View file

@ -132,6 +132,7 @@ enum operation
OP_SUBFM, // surface bitfield manipulation
OP_SUCLAMP, // clamp surface coordinates
OP_SUEAU, // surface effective address
OP_SUQ, // surface query
OP_MADSP, // special integer multiply-add
OP_TEXBAR, // texture dependency barrier
OP_DFDX,

View file

@ -1947,10 +1947,16 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
case OP_CVT:
if (insn->def(0).getFile() == FILE_PREDICATE ||
insn->src(0).getFile() == FILE_PREDICATE)
emitMOV(insn);
else
emitCVT(insn);
break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;

View file

@ -673,7 +673,12 @@ CodeEmitterGM107::emitMOV()
(insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) {
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5c980000);
if (insn->def(0).getFile() == FILE_PREDICATE) {
emitInsn(0x5b6a0000);
emitGPR (0x08);
} else {
emitInsn(0x5c980000);
}
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
@ -684,18 +689,32 @@ CodeEmitterGM107::emitMOV()
emitInsn(0x38980000);
emitIMMD(0x14, 19, insn->src(0));
break;
case FILE_PREDICATE:
emitInsn(0x50880000);
emitPRED(0x0c, insn->src(0));
emitPRED(0x1d);
emitPRED(0x27);
break;
default:
assert(!"bad src file");
break;
}
emitField(0x27, 4, insn->lanes);
if (insn->def(0).getFile() != FILE_PREDICATE &&
insn->src(0).getFile() != FILE_PREDICATE)
emitField(0x27, 4, insn->lanes);
} else {
emitInsn (0x01000000);
emitIMMD (0x14, 32, insn->src(0));
emitField(0x0c, 4, insn->lanes);
}
emitGPR(0x00, insn->def(0));
if (insn->def(0).getFile() == FILE_PREDICATE) {
emitPRED(0x27);
emitPRED(0x03, insn->def(0));
emitPRED(0x00);
} else {
emitGPR(0x00, insn->def(0));
}
}
void
@ -2684,11 +2703,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
emitRAM();
break;
case OP_MOV:
if (insn->def(0).getFile() == FILE_GPR &&
insn->src(0).getFile() != FILE_PREDICATE)
emitMOV();
else
assert(!"R2P/P2R");
emitMOV();
break;
case OP_RDSV:
emitS2R();
@ -2700,7 +2715,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_CEIL:
case OP_TRUNC:
case OP_CVT:
if (isFloatType(insn->dType)) {
if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
insn->src(0).getFile() == FILE_PREDICATE)) {
emitMOV();
} else if (isFloatType(insn->dType)) {
if (isFloatType(insn->sType))
emitF2F();
else

View file

@ -2021,8 +2021,10 @@ CodeEmitterNVC0::emitATOM(const Instruction *i)
code[0] |= 63 << 20;
}
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
srcId(i->src(2), 32 + 17);
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
code[1] |= (SDATA(i->src(1)).id + 1) << 17;
}
}
void
@ -2433,10 +2435,16 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
case OP_CVT:
if (insn->def(0).getFile() == FILE_PREDICATE ||
insn->src(0).getFile() == FILE_PREDICATE)
emitMOV(insn);
else
emitCVT(insn);
break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;

View file

@ -38,6 +38,7 @@ static nv50_ir::operation translateOpcode(uint opcode);
static nv50_ir::DataFile translateFile(uint file);
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
static nv50_ir::CacheMode translateCacheMode(uint qualifier);
class Instruction
{
@ -213,6 +214,12 @@ public:
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
nv50_ir::CacheMode getCacheMode() const {
if (!insn->Instruction.Memory)
return nv50_ir::CACHE_CA;
return translateCacheMode(insn->Memory.Qualifier);
}
inline uint getLabel() { return insn->Label.Label; }
unsigned getSaturate() const { return insn->Instruction.Saturate; }
@ -366,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
//case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@ -436,6 +443,15 @@ static nv50_ir::TexTarget translateTexture(uint tex)
}
}
static nv50_ir::CacheMode translateCacheMode(uint qualifier)
{
if (qualifier & TGSI_MEMORY_VOLATILE)
return nv50_ir::CACHE_CV;
if (qualifier & TGSI_MEMORY_COHERENT)
return nv50_ir::CACHE_CG;
return nv50_ir::CACHE_CA;
}
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
@ -1210,6 +1226,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_BUFFER:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
@ -1255,6 +1272,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
indirectTempArrays.insert(insn.getDst(0).getArrayId());
} else
if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
info->io.globalAccess |= 0x2;
}
}
@ -1264,13 +1284,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (src.isIndirect(0))
indirectTempArrays.insert(src.getArrayId());
} else
/*
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
if (src.getFile() == TGSI_FILE_BUFFER) {
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
*/
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
@ -1752,7 +1769,7 @@ Converter::acquireDst(int d, int c)
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
return NULL;
if (dst.isIndirect(0) ||
@ -2222,6 +2239,28 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
Value *off = fetchSrc(1, c);
Symbol *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
} else {
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
}
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
ld->cache = tgsi.getCacheMode();
if (tgsi.getSrc(0).isIndirect(0))
ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
}
return;
}
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
@ -2298,6 +2337,30 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
for (c = 0; c < 4; ++c) {
if (!(tgsi.getDst(0).getMask() & (1 << c)))
continue;
Symbol *sym;
Value *off;
if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
} else {
off = fetchSrc(0, 0);
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
}
Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
st->cache = tgsi.getCacheMode();
if (tgsi.getDst(0).isIndirect(0))
st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
}
return;
}
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
@ -2359,6 +2422,37 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
for (int c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
Instruction *insn;
Value *off = fetchSrc(1, c), *off2 = NULL;
Value *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
else
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
if (tgsi.getSrc(0).isIndirect(0))
off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
else
insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
insn->setIndirect(0, 0, off);
if (off2)
insn->setIndirect(0, 1, off2);
insn->subOp = subOp;
}
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
return;
}
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {
@ -3103,6 +3197,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
geni->fixed = 1;
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
break;
case TGSI_OPCODE_MEMBAR:
geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
geni->fixed = 1;
if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP)
geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
else
geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
break;
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
@ -3115,6 +3217,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
case TGSI_OPCODE_RESQ:
geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0],
makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0));
if (tgsi.getSrc(0).isIndirect(0))
geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
break;
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_UBFE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {

View file

@ -1022,11 +1022,22 @@ NVC0LoweringPass::handleTXLQ(TexInstruction *i)
return true;
}
bool
NVC0LoweringPass::handleSUQ(Instruction *suq)
{
suq->op = OP_MOV;
suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
suq->getSrc(0)->reg.fileIndex * 16));
suq->setIndirect(0, 0, NULL);
suq->setIndirect(0, 1, NULL);
return true;
}
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
SVSemantic sv;
Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base;
switch (atom->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
@ -1037,16 +1048,22 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
break;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
assert(base->reg.size == 8);
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
assert(base->reg.size == 8);
atom->setIndirect(0, 0, base);
return true;
}
Value *base =
base =
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0));
Value *ptr = atom->getIndirect(0, 0);
atom->setSrc(0, cloneShallow(func, atom->getSrc(0)));
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr);
atom->setIndirect(0, 1, NULL);
atom->setIndirect(0, 0, base);
return true;
@ -1069,7 +1086,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
cctl->setPredicate(cas->cc, cas->getPredicate());
}
if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
// CAS is crazy. It's 2nd source is a double reg, and the 3rd source
// should be set to the high part of the double reg or bad things will
// happen elsewhere in the universe.
@ -1079,6 +1096,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
bld.setPosition(cas, false);
bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2));
cas->setSrc(1, dreg);
cas->setSrc(2, dreg);
}
return true;
@ -1093,6 +1111,32 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
inline Value *
NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
if (ptr)
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
return bld.
mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr);
}
inline Value *
NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
if (ptr)
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
}
inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
@ -1786,6 +1830,7 @@ NVC0LoweringPass::visit(Instruction *i)
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
case OP_STORE:
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
@ -1820,6 +1865,26 @@ NVC0LoweringPass::visit(Instruction *i)
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
} else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
Value *ind = i->getIndirect(0, 1);
Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
// XXX come up with a way not to do this for EVERY little access but
// rather to batch these up somehow. Unfortunately we've lost the
// information about the field width by the time we get here.
Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16);
Value *pred = new_LValue(func, FILE_PREDICATE);
if (i->src(0).isIndirect(0)) {
bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
}
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
i->setPredicate(CC_NOT_P, pred);
if (i->defExists(0)) {
bld.mkMov(i->getDef(0), bld.mkImm(0));
}
}
break;
case OP_ATOM:
@ -1838,6 +1903,9 @@ NVC0LoweringPass::visit(Instruction *i)
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
handleSurfaceOpNVE4(i->asTex());
break;
case OP_SUQ:
handleSUQ(i);
break;
default:
break;
}

View file

@ -101,6 +101,7 @@ protected:
bool handleTXQ(TexInstruction *);
virtual bool handleManualTXD(TexInstruction *);
bool handleTXLQ(TexInstruction *);
bool handleSUQ(Instruction *);
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
@ -116,6 +117,8 @@ private:
void readTessCoord(LValue *dst, int c);
Value *loadResInfo32(Value *ptr, uint32_t off);
Value *loadResInfo64(Value *ptr, uint32_t off);
Value *loadResLength32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);

View file

@ -336,6 +336,7 @@ private:
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@ -388,6 +389,8 @@ ConstantFolding::visit(BasicBlock *bb)
else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
opnd(i, src1, 1);
if (i->srcExists(2) && i->src(2).getImmediate(src2))
opnd3(i, src2);
}
return true;
}
@ -872,6 +875,24 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
}
}
void
ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
{
switch (i->op) {
case OP_MAD:
case OP_FMA:
if (imm2.isInteger(0)) {
i->op = OP_MUL;
i->setSrc(2, NULL);
foldCount++;
return;
}
break;
default:
return;
}
}
void
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
@ -1202,6 +1223,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
}
break;
case OP_SHR:
if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) {
bld.setPosition(i, false);
i->op = OP_AND;
i->setSrc(0, si->getSrc(0));
i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1)));
}
break;
case OP_MUL:
int muls;
if (isFloatType(si->dType))
@ -2504,6 +2533,12 @@ MemoryOpt::runOpt(BasicBlock *bb)
}
} else
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
if (typeSizeof(ldst->dType) == 4 &&
ldst->src(1).getFile() == FILE_GPR &&
ldst->getSrc(1)->getInsn()->op == OP_NOP) {
delete_Instruction(prog, ldst);
continue;
}
isLoad = false;
} else {
// TODO: maybe have all fixed ops act as barrier ?
@ -3015,7 +3050,7 @@ Instruction::isResultEqual(const Instruction *that) const
if (that->srcExists(s))
return false;
if (op == OP_LOAD || op == OP_VFETCH) {
if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
switch (src(0).getFile()) {
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
@ -3046,6 +3081,8 @@ GlobalCSE::visit(BasicBlock *bb)
ik = phi->getSrc(0)->getInsn();
if (!ik)
continue; // probably a function input
if (ik->defCount(0xff) > 1)
continue; // too painful to check if we can really push this forward
for (s = 1; phi->srcExists(s); ++s) {
if (phi->getSrc(s)->refCount() > 1)
break;
@ -3179,10 +3216,10 @@ DeadCodeElim::buryAll(Program *prog)
bool
DeadCodeElim::visit(BasicBlock *bb)
{
Instruction *next;
Instruction *prev;
for (Instruction *i = bb->getFirst(); i; i = next) {
next = i->next;
for (Instruction *i = bb->getExit(); i; i = prev) {
prev = i->prev;
if (i->isDead()) {
++deadCount;
delete_Instruction(prog, i);

View file

@ -161,6 +161,7 @@ const char *operationStr[OP_LAST + 1] =
"subfm",
"suclamp",
"sueau",
"suq",
"madsp",
"texbar",
"dfdx",

View file

@ -1544,6 +1544,9 @@ GCRA::cleanup(const bool success)
delete[] nodes;
nodes = NULL;
hi.next = hi.prev = &hi;
lo[0].next = lo[0].prev = &lo[0];
lo[1].next = lo[1].prev = &lo[1];
}
Symbol *

View file

@ -46,7 +46,7 @@ const uint8_t Target::operationSrcNr[] =
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
@ -109,8 +109,8 @@ const OpClass Target::operationClass[] =
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
// SUBFM, SUCLAMP, SUEAU, MADSP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// TEXBAR
OPCLASS_OTHER,
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP

View file

@ -266,7 +266,9 @@ nouveau_vp3_bsp_next(struct nouveau_vp3_decoder *dec, unsigned num_buffers,
int i;
for (i = 0; i < num_buffers; ++i) {
#ifndef NDEBUG
assert(bsp_bo->size >= str_bsp->w0[0] + num_bytes[i]);
#endif
memcpy(dec->bsp_ptr, data[i], num_bytes[i]);
dec->bsp_ptr += num_bytes[i];
str_bsp->w0[0] += num_bytes[i];

View file

@ -184,6 +184,10 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:

View file

@ -369,7 +369,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
FREE(info->bin.syms);
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
@ -403,10 +402,13 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
break;
}
prog->gp.vert_count = info->prop.gp.maxVertices;
} else
}
if (prog->type == PIPE_SHADER_COMPUTE) {
prog->cp.syms = info->bin.syms;
prog->cp.num_syms = info->bin.numSyms;
} else {
FREE(info->bin.syms);
}
if (prog->pipe.stream_output.num_outputs)
@ -507,6 +509,9 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
FREE(p->interps);
FREE(p->so);
if (type == PIPE_SHADER_COMPUTE)
FREE(p->cp.syms);
memset(p, 0, sizeof(*p));
p->pipe = pipe;

View file

@ -227,6 +227,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:

View file

@ -594,6 +594,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
PUSH_DATA (push, nv50->rt_array_mode);
}
static void
nv50_clear_buffer_push(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size,
const void *data, int data_size)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv04_resource *buf = nv04_resource(res);
unsigned count = (size + 3) / 4;
unsigned xcoord = offset & 0xff;
unsigned tmp, i;
if (data_size == 1) {
tmp = *(unsigned char *)data;
tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
data = &tmp;
data_size = 4;
} else if (data_size == 2) {
tmp = *(unsigned short *)data;
tmp = (tmp << 16) | tmp;
data = &tmp;
data_size = 4;
}
unsigned data_words = data_size / 4;
nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
nouveau_pushbuf_bufctx(push, nv50->bufctx);
nouveau_pushbuf_validate(push);
offset &= ~0xff;
BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
PUSH_DATA (push, 262144);
PUSH_DATA (push, 65536);
PUSH_DATA (push, 1);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
PUSH_DATA (push, size);
PUSH_DATA (push, 1);
PUSH_DATA (push, 0);
PUSH_DATA (push, 1);
PUSH_DATA (push, 0);
PUSH_DATA (push, 1);
PUSH_DATA (push, 0);
PUSH_DATA (push, xcoord);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
while (count) {
unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
unsigned nr = nr_data * data_words;
BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
for (i = 0; i < nr_data; i++)
PUSH_DATAp(push, data, data_words);
count -= nr;
}
if (buf->mm) {
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
}
nouveau_bufctx_reset(nv50->bufctx, 0);
}
static void
nv50_clear_buffer(struct pipe_context *pipe,
struct pipe_resource *res,
@ -643,9 +719,22 @@ nv50_clear_buffer(struct pipe_context *pipe,
assert(size % data_size == 0);
if (offset & 0xff) {
unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
assert(fixup_size % data_size == 0);
nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
offset += fixup_size;
size -= fixup_size;
if (!size)
return;
}
elements = size / data_size;
height = (elements + 8191) / 8192;
width = elements / height;
if (height > 1)
width &= ~0xff;
assert(width > 0);
BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
PUSH_DATAf(push, color.f[0]);
@ -669,13 +758,13 @@ nv50_clear_buffer(struct pipe_context *pipe,
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
PUSH_DATA (push, nv50_format_table[dst_fmt].rt);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100));
PUSH_DATA (push, height);
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
@ -694,25 +783,20 @@ nv50_clear_buffer(struct pipe_context *pipe,
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, 0x3c);
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
height = 1;
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2);
PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
PUSH_DATA (push, height);
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, 0x3c);
}
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, nv50->cond_condmode);
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
if (buf->mm) {
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
}
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
nv50_clear_buffer_push(pipe, res, offset, width * data_size,
data, data_size);
}
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}

View file

@ -491,3 +491,52 @@ daic_runout:
daic_runout_check:
branz annul $r7 #daic_runout
bra annul #daic_restore
/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
*
* This is a combination macro for all of our query buffer object needs.
* It has the option to clamp results to a configurable amount, as well as
* to write out one or two words.
*
* We use the query engine to write out the values, and expect the query
* address to point to the right place.
*
* arg = clamp value (0 means unclamped). clamped means just 1 written value.
* parm[0] = LSB of end value
* parm[1] = MSB of end value
* parm[2] = LSB of start value
* parm[3] = MSB of start value
* parm[4] = desired sequence
* parm[5] = actual sequence
*/
.section #mme9097_query_buffer_write
parm $r2
parm $r3
parm $r4
parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */
parm $r6
parm $r7
mov $r6 (sub $r7 $r6) /* actual - desired */
mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
braz annul $r6 #qbw_ready
exit
qbw_ready:
mov $r2 (sub $r2 $r4)
braz $r1 #qbw_postclamp
mov $r3 (sbb $r3 $r5)
branz annul $r3 #qbw_clamp
mov $r4 (sub $r1 $r2)
mov $r4 (sbb 0x0 0x0)
braz annul $r4 #qbw_postclamp
qbw_clamp:
mov $r2 $r1
qbw_postclamp:
send $r2
mov $r4 0x1000
branz annul $r1 #qbw_done
send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
maddr 0x16c2 /* QUERY_SEQUENCE */
send $r3
qbw_done:
exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
nop

View file

@ -332,3 +332,36 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
0xfffef837,
0xfffdc027,
};
uint32_t mme9097_query_buffer_write[] = {
0x00000201,
0x00000301,
/* 0x000a: qbw_ready */
0x00000401,
0x05b08551,
/* 0x0011: qbw_clamp */
/* 0x0012: qbw_postclamp */
0x00000601,
0x00000701,
/* 0x0018: qbw_done */
0x0005be10,
0x00060610,
0x0000b027,
0x00000091,
0x00051210,
0x0001c807,
0x00075b10,
0x00011837,
0x00048c10,
0x00060410,
0x0000a027,
0x00000a11,
0x00001041,
0x04000411,
0x00010837,
0x84010042,
0x05b08021,
0x00001841,
0x840100c2,
0x00000011,
};

View file

@ -56,6 +56,7 @@ static void
nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
int i, s;
if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
@ -90,6 +91,9 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
}
}
}
if (flags & PIPE_BARRIER_SHADER_BUFFER) {
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
}
}
static void
@ -122,6 +126,10 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
}
for (s = 0; s < 6; ++s)
for (i = 0; i < NVC0_MAX_BUFFERS; ++i)
pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL);
for (i = 0; i < nvc0->num_tfbbufs; ++i)
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
@ -180,10 +188,9 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
if (bind & PIPE_BIND_RENDER_TARGET) {
if (res->bind & PIPE_BIND_RENDER_TARGET) {
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
@ -194,7 +201,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@ -204,12 +211,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
if (bind & (PIPE_BIND_VERTEX_BUFFER |
PIPE_BIND_INDEX_BUFFER |
PIPE_BIND_CONSTANT_BUFFER |
PIPE_BIND_STREAM_OUTPUT |
PIPE_BIND_COMMAND_ARGS_BUFFER |
PIPE_BIND_SAMPLER_VIEW)) {
if (res->target == PIPE_BUFFER) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
nvc0->dirty |= NVC0_NEW_ARRAYS;
@ -253,6 +255,18 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
for (s = 0; s < 5; ++s) {
for (i = 0; i < NVC0_MAX_BUFFERS; ++i) {
if (nvc0->buffers[s][i].buffer == res) {
nvc0->buffers_dirty[s] |= 1 << i;
nvc0->dirty |= NVC0_NEW_BUFFERS;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
if (!--ref)
return ref;
}
}
}
}
return ref;

View file

@ -56,6 +56,7 @@
#define NVC0_NEW_SURFACES (1 << 23)
#define NVC0_NEW_MIN_SAMPLES (1 << 24)
#define NVC0_NEW_TESSFACTOR (1 << 25)
#define NVC0_NEW_BUFFERS (1 << 26)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
@ -73,9 +74,10 @@
#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
#define NVC0_BIND_TFB 244
#define NVC0_BIND_SUF 245
#define NVC0_BIND_SCREEN 246
#define NVC0_BIND_TLS 247
#define NVC0_BIND_3D_COUNT 248
#define NVC0_BIND_BUF 246
#define NVC0_BIND_SCREEN 247
#define NVC0_BIND_TLS 249
#define NVC0_BIND_3D_COUNT 250
/* compute bufctx (during launch_grid) */
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
@ -187,10 +189,15 @@ struct nvc0_context {
struct nvc0_blitctx *blit;
/* NOTE: some of these surfaces may reference buffers */
struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
uint16_t surfaces_dirty[2];
uint16_t surfaces_valid[2];
struct pipe_shader_buffer buffers[6][NVC0_MAX_BUFFERS];
uint32_t buffers_dirty[6];
uint32_t buffers_valid[6];
struct util_dynarray global_residents;
};

View file

@ -33,4 +33,6 @@
#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850
#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
#endif /* __NVC0_MACROS_H__ */

View file

@ -554,6 +554,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
info->io.resInfoCBSlot = 15;
info->io.sampleInfoBase = 256 + 128;
info->io.suInfoBase = 512;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}
@ -635,6 +636,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
*/
if (info->io.globalAccess)
prog->hdr[0] |= 1 << 26;
if (info->io.globalAccess & 0x2)
prog->hdr[0] |= 1 << 16;
if (info->io.fp64)
prog->hdr[0] |= 1 << 27;

View file

@ -73,6 +73,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
return q->funcs->get_query_result(nvc0_context(pipe), q, wait, result);
}
static void
nvc0_get_query_result_resource(struct pipe_context *pipe,
struct pipe_query *pq,
boolean wait,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
unsigned offset)
{
struct nvc0_query *q = nvc0_query(pq);
if (!q->funcs->get_query_result_resource) {
assert(!"Unexpected lack of get_query_result_resource");
return;
}
q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type,
index, resource, offset);
}
static void
nvc0_render_condition(struct pipe_context *pipe,
struct pipe_query *pq,
@ -129,7 +147,7 @@ nvc0_render_condition(struct pipe_context *pipe,
}
if (wait)
nvc0_hw_query_fifo_wait(push, q);
nvc0_hw_query_fifo_wait(nvc0, q);
PUSH_SPACE(push, 7);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
pipe->begin_query = nvc0_begin_query;
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
pipe->get_query_result_resource = nvc0_get_query_result_resource;
pipe->render_condition = nvc0_render_condition;
nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}

View file

@ -14,6 +14,13 @@ struct nvc0_query_funcs {
void (*end_query)(struct nvc0_context *, struct nvc0_query *);
boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *,
boolean, union pipe_query_result *);
void (*get_query_result_resource)(struct nvc0_context *nvc0,
struct nvc0_query *q,
boolean wait,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
unsigned offset);
};
struct nvc0_query {

View file

@ -358,11 +358,119 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
return true;
}
static void
nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
struct nvc0_query *q,
boolean wait,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
unsigned offset)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_hw_query *hq = nvc0_hw_query(q);
struct nv04_resource *buf = nv04_resource(resource);
unsigned stride;
assert(!hq->funcs || !hq->funcs->get_query_result);
if (index == -1) {
/* TODO: Use a macro to write the availability of the query */
if (hq->state != NVC0_HW_QUERY_STATE_READY)
nvc0_hw_query_update(nvc0->screen->base.client, q);
uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY};
nvc0->base.push_cb(&nvc0->base, buf, offset,
result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
ready);
return;
}
/* If the fence guarding this query has not been emitted, that makes a lot
* of the following logic more complicated.
*/
if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
nouveau_fence_emit(hq->fence);
/* We either need to compute a 32- or 64-bit difference between 2 values,
* and then store the result as either a 32- or 64-bit value. As such let's
* treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
* ones), and have one macro that clamps result to i32, u32, or just
* outputs the difference (no need to worry about 64-bit clamping).
*/
if (hq->state != NVC0_HW_QUERY_STATE_READY)
nvc0_hw_query_update(nvc0->screen->base.client, q);
if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
nvc0_hw_query_fifo_wait(nvc0, q);
nouveau_pushbuf_space(push, 16, 2, 0);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7);
if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */
PUSH_DATA(push, 0x00000001);
else if (result_type == PIPE_QUERY_TYPE_I32)
PUSH_DATA(push, 0x7fffffff);
else if (result_type == PIPE_QUERY_TYPE_U32)
PUSH_DATA(push, 0xffffffff);
else
PUSH_DATA(push, 0x00000000);
switch (q->type) {
case PIPE_QUERY_SO_STATISTICS:
stride = 2;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
stride = 12;
break;
default:
assert(index == 0);
stride = 1;
break;
}
if (hq->is64bit) {
nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index,
8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride),
8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
} else {
nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
PUSH_DATA(push, 0);
nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4,
4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
PUSH_DATA(push, 0);
}
if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
PUSH_DATA(push, 0);
PUSH_DATA(push, 0);
} else if (hq->is64bit) {
PUSH_DATA(push, hq->fence->sequence);
nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
} else {
PUSH_DATA(push, hq->sequence);
nouveau_pushbuf_data(push, hq->bo, hq->offset,
4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
}
if (buf->mm) {
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
}
}
static const struct nvc0_query_funcs hw_query_funcs = {
.destroy_query = nvc0_hw_destroy_query,
.begin_query = nvc0_hw_begin_query,
.end_query = nvc0_hw_end_query,
.get_query_result = nvc0_hw_get_query_result,
.get_query_result_resource = nvc0_hw_get_query_result_resource,
};
struct nvc0_query *
@ -476,8 +584,9 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
}
void
nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_hw_query *hq = nvc0_hw_query(q);
unsigned offset = hq->offset;
@ -486,9 +595,15 @@ nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
PUSH_SPACE(push, 5);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
PUSH_DATAh(push, hq->bo->offset + offset);
PUSH_DATA (push, hq->bo->offset + offset);
PUSH_DATA (push, hq->sequence);
if (hq->is64bit) {
PUSH_DATAh(push, nvc0->screen->fence.bo->offset);
PUSH_DATA (push, nvc0->screen->fence.bo->offset);
PUSH_DATA (push, hq->fence->sequence);
} else {
PUSH_DATAh(push, hq->bo->offset + offset);
PUSH_DATA (push, hq->bo->offset + offset);
PUSH_DATA (push, hq->sequence);
}
PUSH_DATA (push, (1 << 12) |
NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
}

View file

@ -51,6 +51,6 @@ void
nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *,
unsigned);
void
nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *);
nvc0_hw_query_fifo_wait(struct nvc0_context *, struct nvc0_query *);
#endif

View file

@ -111,6 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 256;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1; /* 256 for binding as RT, but that's not possible in GL */
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 16;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_MAX_VIEWPORTS:
@ -189,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@ -212,10 +215,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
@ -322,8 +327,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return NVC0_MAX_BUFFERS;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16; /* would be 32 in linked (OpenGL-style) mode */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
@ -676,8 +682,9 @@ nvc0_screen_create(struct nouveau_device *dev)
push->rsvd_kick = 5;
screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
PIPE_BIND_SHADER_BUFFER |
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
PIPE_BIND_COMMAND_ARGS_BUFFER;
PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER;
screen->base.sysmem_bindings |=
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
@ -891,9 +898,9 @@ nvc0_screen_create(struct nouveau_device *dev)
/* TIC and TSC entries for each unit (nve4+ only) */
/* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
PUSH_DATA (push, 1024);
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
@ -913,8 +920,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 256);
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 0.0f);
@ -922,8 +929,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
@ -953,8 +960,12 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->tls->size);
BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
/* Reduce likelihood of collision with real buffers by placing the hole at
* the top of the 4G area. This will have to be dealt with for real
* eventually by blocking off that area from the VM.
*/
BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0xff << 24);
if (screen->eng3d->oclass < GM107_3D_CLASS) {
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
@ -1039,6 +1050,7 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);

View file

@ -22,6 +22,8 @@
#define NVC0_MAX_VIEWPORTS 16
#define NVC0_MAX_BUFFERS 32
struct nvc0_context;

View file

@ -316,7 +316,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
continue;
if (!targ->clean)
nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
nouveau_pushbuf_space(push, 0, 0, 1);
BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
PUSH_DATA (push, 1);

View file

@ -1243,11 +1243,50 @@ nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
unsigned start_slot, unsigned count,
struct pipe_image_view **views)
{
#if 0
nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
}
nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
#endif
static void
nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
unsigned start, unsigned nr,
struct pipe_shader_buffer *pbuffers)
{
const unsigned end = start + nr;
const unsigned mask = ((1 << nr) - 1) << start;
unsigned i;
assert(t < 5);
if (pbuffers) {
for (i = start; i < end; ++i) {
const unsigned p = i - start;
if (pbuffers[p].buffer)
nvc0->buffers_valid[t] |= (1 << i);
else
nvc0->buffers_valid[t] &= ~(1 << i);
nvc0->buffers[t][i].buffer_offset = pbuffers[p].buffer_offset;
nvc0->buffers[t][i].buffer_size = pbuffers[p].buffer_size;
pipe_resource_reference(&nvc0->buffers[t][i].buffer, pbuffers[p].buffer);
}
} else {
for (i = start; i < end; ++i)
pipe_resource_reference(&nvc0->buffers[t][i].buffer, NULL);
nvc0->buffers_valid[t] &= ~mask;
}
nvc0->buffers_dirty[t] |= mask;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
}
static void
nvc0_set_shader_buffers(struct pipe_context *pipe,
unsigned shader,
unsigned start, unsigned nr,
struct pipe_shader_buffer *buffers)
{
const unsigned s = nvc0_shader_stage(shader);
nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers);
nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS;
}
static inline void
@ -1377,6 +1416,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_global_binding = nvc0_set_global_bindings;
pipe->set_compute_resources = nvc0_set_compute_resources;
pipe->set_shader_images = nvc0_set_shader_images;
pipe->set_shader_buffers = nvc0_set_shader_buffers;
nvc0->sample_mask = ~0;
nvc0->min_samples = 1;

View file

@ -183,9 +183,9 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
PUSH_DATA (push, 1024);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
PUSH_DATA (push, 256 + 128);
for (i = 0; i < ms; i++) {
@ -317,9 +317,9 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
PUSH_DATA (push, 1024);
PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10));
PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
PUSH_DATA (push, 256);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
@ -470,6 +470,39 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
}
}
static void
nvc0_validate_buffers(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
int i, s;
for (s = 0; s < 5; s++) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
PUSH_DATA (push, 512);
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
if (nvc0->buffers[s][i].buffer) {
struct nv04_resource *res =
nv04_resource(nvc0->buffers[s][i].buffer);
PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
PUSH_DATA (push, 0);
BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR);
} else {
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
}
}
}
}
static void
nvc0_validate_sample_mask(struct nvc0_context *nvc0)
{
@ -663,6 +696,7 @@ static struct state_validate {
{ nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
{ nvc0_validate_surfaces, NVC0_NEW_SURFACES },
{ nvc0_validate_buffers, NVC0_NEW_BUFFERS },
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG },
{ nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES },

View file

@ -357,27 +357,132 @@ nvc0_clear_render_target(struct pipe_context *pipe,
}
static void
nvc0_clear_buffer_cpu(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size,
const void *data, int data_size)
nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size,
const void *data, int data_size)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv04_resource *buf = nv04_resource(res);
struct pipe_transfer *pt;
struct pipe_box box;
unsigned elements, i;
unsigned i;
elements = size / data_size;
nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
nouveau_pushbuf_bufctx(push, nvc0->bufctx);
nouveau_pushbuf_validate(push);
u_box_1d(offset, size, &box);
unsigned count = (size + 3) / 4;
unsigned data_words = data_size / 4;
uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE,
&box, &pt);
while (count) {
unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
unsigned nr = nr_data * data_words;
for (i = 0; i < elements; ++i)
memcpy(&map[i*data_size], data, data_size);
if (!PUSH_SPACE(push, nr + 9))
break;
buf->vtbl->transfer_unmap(pipe, pt);
BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
PUSH_DATA (push, MIN2(size, nr * 4));
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
PUSH_DATA (push, 0x100111);
/* must not be interrupted (trap on QUERY fence, 0x50 works however) */
BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
for (i = 0; i < nr_data; i++)
PUSH_DATAp(push, data, data_words);
count -= nr;
offset += nr * 4;
size -= nr * 4;
}
if (buf->mm) {
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
}
nouveau_bufctx_reset(nvc0->bufctx, 0);
}
static void
nvc0_clear_buffer_push_nve4(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size,
const void *data, int data_size)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv04_resource *buf = nv04_resource(res);
unsigned i;
nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
nouveau_pushbuf_bufctx(push, nvc0->bufctx);
nouveau_pushbuf_validate(push);
unsigned count = (size + 3) / 4;
unsigned data_words = data_size / 4;
while (count) {
unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
unsigned nr = nr_data * data_words;
if (!PUSH_SPACE(push, nr + 10))
break;
BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, MIN2(size, nr * 4));
PUSH_DATA (push, 1);
/* must not be interrupted (trap on QUERY fence, 0x50 works however) */
BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1);
PUSH_DATA (push, 0x1001);
for (i = 0; i < nr_data; i++)
PUSH_DATAp(push, data, data_words);
count -= nr;
offset += nr * 4;
size -= nr * 4;
}
if (buf->mm) {
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
}
nouveau_bufctx_reset(nvc0->bufctx, 0);
}
static void
nvc0_clear_buffer_push(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size,
const void *data, int data_size)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
unsigned tmp;
if (data_size == 1) {
tmp = *(unsigned char *)data;
tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
data = &tmp;
data_size = 4;
} else if (data_size == 2) {
tmp = *(unsigned short *)data;
tmp = (tmp << 16) | tmp;
data = &tmp;
data_size = 4;
}
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size);
else
nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size);
}
static void
@ -402,10 +507,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
memcpy(&color.ui, data, 16);
break;
case 12:
/* This doesn't work, RGB32 is not a valid RT format.
* dst_fmt = PIPE_FORMAT_R32G32B32_UINT;
* memcpy(&color.ui, data, 12);
* memset(&color.ui[3], 0, 4);
/* RGB32 is not a valid RT format. This will be handled by the pushbuf
* uploader.
*/
break;
case 8:
@ -437,14 +540,26 @@ nvc0_clear_buffer(struct pipe_context *pipe,
assert(size % data_size == 0);
if (data_size == 12) {
/* TODO: Find a way to do this with the GPU! */
nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size);
nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
return;
}
if (offset & 0xff) {
unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
assert(fixup_size % data_size == 0);
nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
offset += fixup_size;
size -= fixup_size;
if (!size)
return;
}
elements = size / data_size;
height = (elements + 16383) / 16384;
width = elements / height;
if (height > 1)
width &= ~0xff;
assert(width > 0);
if (!PUSH_SPACE(push, 40))
return;
@ -465,7 +580,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
PUSH_DATA (push, width * data_size);
PUSH_DATA (push, align(width * data_size, 0x100));
PUSH_DATA (push, height);
PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR);
@ -480,24 +595,20 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
if (buf->mm) {
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
}
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
height = 1;
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 4);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
PUSH_DATA (push, width * data_size);
PUSH_DATA (push, height);
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
nvc0_clear_buffer_push(pipe, res, offset, width * data_size,
data, data_size);
}
IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}

View file

@ -515,12 +515,12 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
return;
address = nvc0->screen->uniform_bo->offset + (5 << 16);
for (s = 0; s < 5; ++s, address += (1 << 9)) {
for (s = 0; s < 5; ++s, address += (1 << 10)) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
if (!dirty)
continue;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATA (push, 1024);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
do {

View file

@ -334,7 +334,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
b = ve->pipe.vertex_buffer_index;
vb = &nvc0->vtxbuf[b];
if (!vb->buffer) {
if (nvc0->vbo_user & (1 << b)) {
if (!(nvc0->constant_vbos & (1 << b))) {
if (ve->pipe.instance_divisor) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
@ -352,13 +352,13 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
if (unlikely(ve->pipe.instance_divisor)) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
PUSH_DATA (push, (1 << 12) | vb->stride);
PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
PUSH_DATA (push, ve->pipe.instance_divisor);
} else {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
PUSH_DATA (push, (1 << 12) | vb->stride);
PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
}
@ -382,7 +382,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
unsigned b;
const uint32_t mask = nvc0->vbo_user;
PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
PUSH_SPACE(push, nvc0->num_vtxbufs * 8 + nvc0->vertex->num_elements);
for (b = 0; b < nvc0->num_vtxbufs; ++b) {
struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
struct nv04_resource *buf;
@ -395,6 +395,10 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
}
/* address/value set in nvc0_update_user_vbufs_shared */
continue;
} else if (!vb->buffer) {
/* there can be holes in the vertex buffer lists */
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
continue;
}
buf = nv04_resource(vb->buffer);
offset = vb->buffer_offset;
@ -410,6 +414,12 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
}
/* If there are more elements than buffers, we might not have unset
* fetching on the later elements.
*/
for (; b < nvc0->vertex->num_elements; ++b)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
if (nvc0->vbo_user)
nvc0_update_user_vbufs_shared(nvc0);
}
@ -680,7 +690,7 @@ nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
if (count & 1) {
count--;
PUSH_SPACE(push, 1);
PUSH_SPACE(push, 2);
BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
PUSH_DATA (push, *map++);
}
@ -779,7 +789,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
PUSH_SPACE(push, 2);
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq));
nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq));
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
@ -811,6 +821,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
uint32_t offset = buf->offset + info->indirect_offset;
PUSH_SPACE(push, 7);
/* must make FIFO wait for engines idle before continuing to process */
if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
(buf_count && buf_count->fence_wr &&
@ -951,6 +963,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (info->mode == PIPE_PRIM_PATCHES &&
nvc0->state.patch_vertices != info->vertices_per_patch) {
nvc0->state.patch_vertices = info->vertices_per_patch;
PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
}
@ -958,6 +971,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0_state_validate(nvc0, ~0, 8);
if (nvc0->vertprog->vp.need_draw_parameters) {
PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
@ -979,6 +993,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
if (nvc0->cb_dirty) {
PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
nvc0->cb_dirty = false;
}
@ -987,6 +1002,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nvc0->textures_coherent[s])
continue;
PUSH_SPACE(push, nvc0->num_textures[s] * 2);
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
if (!(nvc0->textures_coherent[s] & (1 << i)))

View file

@ -210,6 +210,10 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
/* SWTCL-only features. */

View file

@ -225,7 +225,7 @@ void *evergreen_create_compute_state(
}
}
#else
memset(&shader->binary, 0, sizeof(shader->binary));
radeon_shader_binary_init(&shader->binary);
radeon_elf_read(code, header->num_bytes, &shader->binary);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
@ -245,13 +245,31 @@ void *evergreen_create_compute_state(
return shader;
}
void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
{
struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;
struct r600_context *ctx = (struct r600_context *)ctx_;
COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n");
struct r600_pipe_compute *shader = state;
if (!shader)
return;
#ifdef HAVE_OPENCL
#if HAVE_LLVM < 0x0306
for (unsigned i = 0; i < shader->num_kernels; i++) {
struct r600_kernel *kernel = &shader->kernels[i];
LLVMDisposeModule(module);
}
FREE(shader->kernels);
LLVMContextDispose(shader->llvm_ctx);
#else
radeon_shader_binary_clean(&shader->binary);
r600_destroy_shader(&shader->bc);
/* TODO destroy shader->code_bo, shader->const_bo
* we'll need something like r600_buffer_free */
#endif
#endif
FREE(shader);
}
@ -349,7 +367,7 @@ static void evergreen_emit_direct_dispatch(
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
unsigned num_waves;
unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
int group_size = 1;
int grid_size = 1;
@ -723,7 +741,7 @@ static void evergreen_set_global_binding(
* command stream by the start_cs_cmd atom. However, since the SET_CONTEXT_REG
* packet requires that the shader type bit be set, we must initialize all
* context registers needed for compute in this function. The registers
* intialized by the start_cs_cmd atom can be found in evereen_state.c in the
* initialized by the start_cs_cmd atom can be found in evergreen_state.c in the
* functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending
* on the GPU family.
*/
@ -733,7 +751,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
int num_threads;
int num_stack_entries;
/* since all required registers are initialised in the
/* since all required registers are initialized in the
* start_compute_cs_cmd atom, we can EMIT_EARLY here.
*/
r600_init_command_buffer(cb, 256);
@ -818,7 +836,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
* R_008E28_SQ_STATIC_THREAD_MGMT3
*/
/* XXX: We may need to adjust the thread and stack resouce
/* XXX: We may need to adjust the thread and stack resource
* values for 3D/compute interop */
r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);

View file

@ -772,7 +772,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
if (util_format_get_blocksize(pipe_format) >= 16)
non_disp_tiling = 1;
}
nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
if (state->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
@ -986,7 +986,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
unsigned block_size =
align(util_format_get_blocksize(pipe_buffer->format), 4);
unsigned pitch_alignment =
MAX2(64, rctx->screen->b.tiling_info.group_bytes / block_size);
MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
unsigned pitch = align(pipe_buffer->width0, pitch_alignment);
/* XXX: This is copied from evergreen_init_color_surface(). I don't
@ -1098,7 +1098,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
if (util_format_get_blocksize(surf->base.format) >= 16)
non_disp_tiling = 1;
}
nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
desc = util_format_description(surf->base.format);
for (i = 0; i < 4; i++) {
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
@ -1253,7 +1253,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
macro_aspect = eg_macro_tile_aspect(macro_aspect);
bankw = eg_bank_wh(bankw);
bankh = eg_bank_wh(bankh);
nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
offset >>= 8;
surf->db_z_info = S_028040_ARRAY_MODE(array_mode) |
@ -3467,7 +3467,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
sub_cmd = EG_DMA_COPY_TILED;
lbpp = util_logbase2(bpp);
pitch_tile_max = ((pitch / bpp) / 8) - 1;
nbanks = eg_num_banks(rctx->screen->b.tiling_info.num_banks);
nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks);
if (dst_mode == RADEON_SURF_MODE_LINEAR) {
/* T2L */
@ -3670,9 +3670,9 @@ void evergreen_init_state_functions(struct r600_context *rctx)
unsigned id = 1;
unsigned i;
/* !!!
* To avoid GPU lockup registers must be emited in a specific order
* To avoid GPU lockup registers must be emitted in a specific order
* (no kidding ...). The order below is important and have been
* partialy infered from analyzing fglrx command stream.
* partially inferred from analyzing fglrx command stream.
*
* Don't reorder atom without carefully checking the effect (GPU lockup
* or piglit regression).
@ -3793,7 +3793,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
unsigned output_patch0_offset, perpatch_output_offset, lds_size;
uint32_t values[16];
unsigned num_waves;
unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
*num_patches = 1;

Some files were not shown because too many files have changed in this diff Show more