mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
Merge remote-tracking branch 'mesa-public/master' into vulkan
This commit is contained in:
commit
ef56cf7738
95 changed files with 1400 additions and 3937 deletions
|
|
@ -231,15 +231,17 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
|
|||
_mesa_glsl_error(&loc, state, "unsized array index must be constant");
|
||||
}
|
||||
} else if (array->type->fields.array->is_interface()
|
||||
&& array->variable_referenced()->data.mode == ir_var_uniform
|
||||
&& (array->variable_referenced()->data.mode == ir_var_uniform ||
|
||||
array->variable_referenced()->data.mode == ir_var_shader_storage)
|
||||
&& !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
|
||||
/* Page 46 in section 4.3.7 of the OpenGL ES 3.00 spec says:
|
||||
/* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says:
|
||||
*
|
||||
* "All indexes used to index a uniform block array must be
|
||||
* constant integral expressions."
|
||||
* "All indices used to index a uniform or shader storage block
|
||||
* array must be constant integral expressions."
|
||||
*/
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"uniform block array index must be constant");
|
||||
_mesa_glsl_error(&loc, state, "%s block array index must be constant",
|
||||
array->variable_referenced()->data.mode
|
||||
== ir_var_uniform ? "uniform" : "shader storage");
|
||||
} else {
|
||||
/* whole_variable_referenced can return NULL if the array is a
|
||||
* member of a structure. In this case it is safe to not update
|
||||
|
|
|
|||
|
|
@ -5766,6 +5766,10 @@ ast_process_structure_or_interface_block(exec_list *instructions,
|
|||
|
||||
const struct ast_type_qualifier *const qual =
|
||||
& decl_list->type->qualifier;
|
||||
|
||||
if (qual->flags.q.explicit_binding)
|
||||
validate_binding_qualifier(state, &loc, decl_type, qual);
|
||||
|
||||
if (qual->flags.q.std140 ||
|
||||
qual->flags.q.std430 ||
|
||||
qual->flags.q.packed ||
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
|
|||
base_type(GLSL_TYPE_SUBROUTINE),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
sampler_type(0), interface_packing(0),
|
||||
vector_elements(0), matrix_columns(0),
|
||||
vector_elements(1), matrix_columns(1),
|
||||
length(0)
|
||||
{
|
||||
mtx_lock(&glsl_type::mutex);
|
||||
|
|
@ -214,7 +214,6 @@ glsl_type::glsl_type(const char *subroutine_name) :
|
|||
init_ralloc_type_ctx();
|
||||
assert(subroutine_name != NULL);
|
||||
this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
|
||||
this->vector_elements = 1;
|
||||
mtx_unlock(&glsl_type::mutex);
|
||||
}
|
||||
|
||||
|
|
@ -1615,6 +1614,7 @@ glsl_type::std430_base_alignment(bool row_major) const
|
|||
base_alignment = MAX2(base_alignment,
|
||||
field_type->std430_base_alignment(field_row_major));
|
||||
}
|
||||
assert(base_alignment > 0);
|
||||
return base_alignment;
|
||||
}
|
||||
assert(!"not reached");
|
||||
|
|
|
|||
|
|
@ -1131,15 +1131,15 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
const unsigned num_data_slots = uniform_size.num_values;
|
||||
const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms;
|
||||
|
||||
/* assign hidden uniforms a slot id */
|
||||
hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
|
||||
delete hiddenUniforms;
|
||||
|
||||
/* On the outside chance that there were no uniforms, bail out.
|
||||
*/
|
||||
if (num_uniforms == 0)
|
||||
return;
|
||||
|
||||
/* assign hidden uniforms a slot id */
|
||||
hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
|
||||
delete hiddenUniforms;
|
||||
|
||||
struct gl_uniform_storage *uniforms =
|
||||
rzalloc_array(prog, struct gl_uniform_storage, num_uniforms);
|
||||
union gl_constant_value *data =
|
||||
|
|
|
|||
|
|
@ -3133,6 +3133,60 @@ check_explicit_uniform_locations(struct gl_context *ctx,
|
|||
delete uniform_map;
|
||||
}
|
||||
|
||||
static bool
|
||||
should_add_buffer_variable(struct gl_shader_program *shProg,
|
||||
GLenum type, const char *name)
|
||||
{
|
||||
bool found_interface = false;
|
||||
const char *block_name = NULL;
|
||||
|
||||
/* These rules only apply to buffer variables. So we return
|
||||
* true for the rest of types.
|
||||
*/
|
||||
if (type != GL_BUFFER_VARIABLE)
|
||||
return true;
|
||||
|
||||
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
|
||||
block_name = shProg->UniformBlocks[i].Name;
|
||||
if (strncmp(block_name, name, strlen(block_name)) == 0) {
|
||||
found_interface = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* We remove the interface name from the buffer variable name,
|
||||
* including the dot that follows it.
|
||||
*/
|
||||
if (found_interface)
|
||||
name = name + strlen(block_name) + 1;
|
||||
|
||||
/* From: ARB_program_interface_query extension:
|
||||
*
|
||||
* "For an active shader storage block member declared as an array, an
|
||||
* entry will be generated only for the first array element, regardless
|
||||
* of its type. For arrays of aggregate types, the enumeration rules are
|
||||
* applied recursively for the single enumerated array element.
|
||||
*/
|
||||
const char *first_dot = strchr(name, '.');
|
||||
const char *first_square_bracket = strchr(name, '[');
|
||||
|
||||
/* The buffer variable is on top level and it is not an array */
|
||||
if (!first_square_bracket) {
|
||||
return true;
|
||||
/* The shader storage block member is a struct, then generate the entry */
|
||||
} else if (first_dot && first_dot < first_square_bracket) {
|
||||
return true;
|
||||
} else {
|
||||
/* Shader storage block member is an array, only generate an entry for the
|
||||
* first array element.
|
||||
*/
|
||||
if (strncmp(first_square_bracket, "[0]", 3) == 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
add_program_resource(struct gl_shader_program *prog, GLenum type,
|
||||
const void *data, uint8_t stages)
|
||||
|
|
@ -3412,6 +3466,10 @@ build_program_resource_list(struct gl_shader_program *shProg)
|
|||
|
||||
bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage;
|
||||
GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
|
||||
if (!should_add_buffer_variable(shProg, type,
|
||||
shProg->UniformStorage[i].name))
|
||||
continue;
|
||||
|
||||
if (!add_program_resource(shProg, type,
|
||||
&shProg->UniformStorage[i], stageref))
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -754,6 +754,12 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
|
|||
add(base_offset,
|
||||
new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
|
||||
if (is_write) {
|
||||
/* If the component is not in the writemask, then don't
|
||||
* store any value.
|
||||
*/
|
||||
if (!((1 << i) & write_mask))
|
||||
continue;
|
||||
|
||||
base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1));
|
||||
} else {
|
||||
if (!this->is_shader_storage) {
|
||||
|
|
|
|||
|
|
@ -131,9 +131,13 @@ private:
|
|||
}; /* end of anonymous namespace */
|
||||
|
||||
nir_shader *
|
||||
glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options)
|
||||
glsl_to_nir(const struct gl_shader_program *shader_prog,
|
||||
gl_shader_stage stage,
|
||||
const nir_shader_compiler_options *options)
|
||||
{
|
||||
nir_shader *shader = nir_shader_create(NULL, sh->Stage, options);
|
||||
struct gl_shader *sh = shader_prog->_LinkedShaders[stage];
|
||||
|
||||
nir_shader *shader = nir_shader_create(NULL, stage, options);
|
||||
|
||||
nir_visitor v1(shader, sh);
|
||||
nir_function_visitor v2(&v1);
|
||||
|
|
@ -142,8 +146,26 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options)
|
|||
|
||||
nir_lower_outputs_to_temporaries(shader);
|
||||
|
||||
shader->gs.vertices_out = sh->Geom.VerticesOut;
|
||||
shader->gs.invocations = sh->Geom.Invocations;
|
||||
/* TODO: Use _mesa_fls instead */
|
||||
unsigned num_textures = 0;
|
||||
for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++)
|
||||
if (sh->Program->SamplersUsed & (1 << i))
|
||||
num_textures = i;
|
||||
|
||||
shader->info.name = ralloc_asprintf(shader, "GLSL%d", sh->Name);
|
||||
shader->info.num_textures = num_textures;
|
||||
shader->info.num_ubos = sh->NumUniformBlocks;
|
||||
shader->info.num_abos = shader_prog->NumAtomicBuffers;
|
||||
shader->info.num_ssbos = shader_prog->NumBufferInterfaceBlocks;
|
||||
shader->info.num_images = sh->NumImages;
|
||||
shader->info.inputs_read = sh->Program->InputsRead;
|
||||
shader->info.outputs_written = sh->Program->OutputsWritten;
|
||||
shader->info.system_values_read = sh->Program->SystemValuesRead;
|
||||
shader->info.uses_texture_gather = sh->Program->UsesGather;
|
||||
shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut;
|
||||
shader->info.separate_shader = shader_prog->SeparateShader;
|
||||
shader->info.gs.vertices_out = sh->Geom.VerticesOut;
|
||||
shader->info.gs.invocations = sh->Geom.Invocations;
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
|
@ -274,6 +296,11 @@ nir_visitor::visit(ir_variable *ir)
|
|||
/* For whatever reason, GLSL IR makes gl_FrontFacing an input */
|
||||
var->data.location = SYSTEM_VALUE_FRONT_FACE;
|
||||
var->data.mode = nir_var_system_value;
|
||||
} else if (shader->stage == MESA_SHADER_GEOMETRY &&
|
||||
ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
|
||||
/* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
|
||||
var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
|
||||
var->data.mode = nir_var_system_value;
|
||||
} else {
|
||||
var->data.mode = nir_var_shader_in;
|
||||
}
|
||||
|
|
@ -799,7 +826,6 @@ nir_visitor::visit(ir_call *ir)
|
|||
instr = nir_intrinsic_instr_create(shader, op);
|
||||
instr->src[2] = evaluate_rvalue(offset);
|
||||
instr->const_index[0] = 0;
|
||||
dest = &instr->dest;
|
||||
} else {
|
||||
instr->const_index[0] = const_offset->value.u[0];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
nir_shader *glsl_to_nir(struct gl_shader *sh,
|
||||
nir_shader *glsl_to_nir(const struct gl_shader_program *shader_prog,
|
||||
gl_shader_stage stage,
|
||||
const nir_shader_compiler_options *options);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ nir_shader_create(void *mem_ctx,
|
|||
exec_list_make_empty(&shader->outputs);
|
||||
|
||||
shader->options = options;
|
||||
memset(&shader->info, 0, sizeof(shader->info));
|
||||
|
||||
exec_list_make_empty(&shader->functions);
|
||||
exec_list_make_empty(&shader->registers);
|
||||
|
|
@ -54,9 +55,6 @@ nir_shader_create(void *mem_ctx,
|
|||
|
||||
shader->stage = stage;
|
||||
|
||||
shader->gs.vertices_out = 0;
|
||||
shader->gs.invocations = 0;
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
|
|
@ -1489,10 +1487,11 @@ nir_intrinsic_from_system_value(gl_system_value val)
|
|||
return nir_intrinsic_load_work_group_id;
|
||||
case SYSTEM_VALUE_NUM_WORK_GROUPS:
|
||||
return nir_intrinsic_load_num_work_groups;
|
||||
case SYSTEM_VALUE_PRIMITIVE_ID:
|
||||
return nir_intrinsic_load_primitive_id;
|
||||
/* FINISHME: Add tessellation intrinsics.
|
||||
case SYSTEM_VALUE_TESS_COORD:
|
||||
case SYSTEM_VALUE_VERTICES_IN:
|
||||
case SYSTEM_VALUE_PRIMITIVE_ID:
|
||||
case SYSTEM_VALUE_TESS_LEVEL_OUTER:
|
||||
case SYSTEM_VALUE_TESS_LEVEL_INNER:
|
||||
*/
|
||||
|
|
@ -1529,6 +1528,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
|
|||
return SYSTEM_VALUE_NUM_WORK_GROUPS;
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
return SYSTEM_VALUE_WORK_GROUP_ID;
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
return SYSTEM_VALUE_PRIMITIVE_ID;
|
||||
/* FINISHME: Add tessellation intrinsics.
|
||||
return SYSTEM_VALUE_TESS_COORD;
|
||||
return SYSTEM_VALUE_VERTICES_IN;
|
||||
|
|
|
|||
|
|
@ -1480,6 +1480,45 @@ typedef struct nir_shader_compiler_options {
|
|||
bool native_integers;
|
||||
} nir_shader_compiler_options;
|
||||
|
||||
typedef struct nir_shader_info {
|
||||
const char *name;
|
||||
|
||||
/* Number of textures used by this shader */
|
||||
unsigned num_textures;
|
||||
/* Number of uniform buffers used by this shader */
|
||||
unsigned num_ubos;
|
||||
/* Number of atomic buffers used by this shader */
|
||||
unsigned num_abos;
|
||||
/* Number of shader storage buffers used by this shader */
|
||||
unsigned num_ssbos;
|
||||
/* Number of images used by this shader */
|
||||
unsigned num_images;
|
||||
|
||||
/* Which inputs are actually read */
|
||||
uint64_t inputs_read;
|
||||
/* Which outputs are actually written */
|
||||
uint64_t outputs_written;
|
||||
/* Which system values are actually read */
|
||||
uint64_t system_values_read;
|
||||
|
||||
/* Whether or not this shader ever uses textureGather() */
|
||||
bool uses_texture_gather;
|
||||
|
||||
/* Whether or not this shader uses the gl_ClipDistance output */
|
||||
bool uses_clip_distance_out;
|
||||
|
||||
/* Whether or not separate shader objects were used */
|
||||
bool separate_shader;
|
||||
|
||||
struct {
|
||||
/** The maximum number of vertices the geometry shader might write. */
|
||||
unsigned vertices_out;
|
||||
|
||||
/** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
|
||||
unsigned invocations;
|
||||
} gs;
|
||||
} nir_shader_info;
|
||||
|
||||
typedef struct nir_shader {
|
||||
/** list of uniforms (nir_variable) */
|
||||
struct exec_list uniforms;
|
||||
|
|
@ -1497,6 +1536,9 @@ typedef struct nir_shader {
|
|||
*/
|
||||
const struct nir_shader_compiler_options *options;
|
||||
|
||||
/** Various bits of compile-time information about a given shader */
|
||||
struct nir_shader_info info;
|
||||
|
||||
/** list of global variables in the shader (nir_variable) */
|
||||
struct exec_list globals;
|
||||
|
||||
|
|
@ -1519,14 +1561,6 @@ typedef struct nir_shader {
|
|||
|
||||
/** The shader stage, such as MESA_SHADER_VERTEX. */
|
||||
gl_shader_stage stage;
|
||||
|
||||
struct {
|
||||
/** The maximum number of vertices the geometry shader might write. */
|
||||
unsigned vertices_out;
|
||||
|
||||
/** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
|
||||
unsigned invocations;
|
||||
} gs;
|
||||
} nir_shader;
|
||||
|
||||
#define nir_foreach_overload(shader, overload) \
|
||||
|
|
@ -1861,6 +1895,7 @@ void nir_assign_var_locations(struct exec_list *var_list,
|
|||
int (*type_size)(const struct glsl_type *));
|
||||
|
||||
void nir_lower_io(nir_shader *shader,
|
||||
nir_variable_mode mode,
|
||||
int (*type_size)(const struct glsl_type *));
|
||||
void nir_lower_vars_to_ssa(nir_shader *shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -194,6 +194,7 @@ SYSTEM_VALUE(instance_id, 1, 0)
|
|||
SYSTEM_VALUE(sample_id, 1, 0)
|
||||
SYSTEM_VALUE(sample_pos, 2, 0)
|
||||
SYSTEM_VALUE(sample_mask_in, 1, 0)
|
||||
SYSTEM_VALUE(primitive_id, 1, 0)
|
||||
SYSTEM_VALUE(invocation_id, 1, 0)
|
||||
SYSTEM_VALUE(local_invocation_id, 3, 0)
|
||||
SYSTEM_VALUE(work_group_id, 3, 0)
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
|
|||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
|
||||
|
||||
nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->gs.vertices_out);
|
||||
nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->info.gs.vertices_out);
|
||||
|
||||
/* Create: if (vertex_count < max_vertices) and insert it.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ struct lower_io_state {
|
|||
nir_builder builder;
|
||||
void *mem_ctx;
|
||||
int (*type_size)(const struct glsl_type *type);
|
||||
nir_variable_mode mode;
|
||||
};
|
||||
|
||||
void
|
||||
|
|
@ -154,9 +155,17 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_load_var &&
|
||||
intrin->intrinsic != nir_intrinsic_store_var)
|
||||
continue;
|
||||
|
||||
nir_variable_mode mode = intrin->variables[0]->var->data.mode;
|
||||
|
||||
if (state->mode != -1 && state->mode != mode)
|
||||
continue;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_var: {
|
||||
nir_variable_mode mode = intrin->variables[0]->var->data.mode;
|
||||
if (mode != nir_var_shader_in && mode != nir_var_uniform)
|
||||
continue;
|
||||
|
||||
|
|
@ -239,12 +248,15 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
}
|
||||
|
||||
static void
|
||||
nir_lower_io_impl(nir_function_impl *impl, int(*type_size)(const struct glsl_type *))
|
||||
nir_lower_io_impl(nir_function_impl *impl,
|
||||
nir_variable_mode mode,
|
||||
int (*type_size)(const struct glsl_type *))
|
||||
{
|
||||
struct lower_io_state state;
|
||||
|
||||
nir_builder_init(&state.builder, impl);
|
||||
state.mem_ctx = ralloc_parent(impl);
|
||||
state.mode = mode;
|
||||
state.type_size = type_size;
|
||||
|
||||
nir_foreach_block(impl, nir_lower_io_block, &state);
|
||||
|
|
@ -254,10 +266,11 @@ nir_lower_io_impl(nir_function_impl *impl, int(*type_size)(const struct glsl_typ
|
|||
}
|
||||
|
||||
void
|
||||
nir_lower_io(nir_shader *shader, int(*type_size)(const struct glsl_type *))
|
||||
nir_lower_io(nir_shader *shader, nir_variable_mode mode,
|
||||
int (*type_size)(const struct glsl_type *))
|
||||
{
|
||||
nir_foreach_overload(shader, overload) {
|
||||
if (overload->impl)
|
||||
nir_lower_io_impl(overload->impl, type_size);
|
||||
nir_lower_io_impl(overload->impl, mode, type_size);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ convert_block(nir_block *block, void *state)
|
|||
static bool
|
||||
convert_impl(nir_function_impl *impl)
|
||||
{
|
||||
bool progress;
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_block(impl, convert_block, &progress);
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
|
|
|
|||
|
|
@ -58,6 +58,20 @@ remove_phis_block(nir_block *block, void *state)
|
|||
|
||||
nir_foreach_phi_src(phi, src) {
|
||||
assert(src->src.is_ssa);
|
||||
|
||||
/* For phi nodes at the beginning of loops, we may encounter some
|
||||
* sources from backedges that point back to the destination of the
|
||||
* same phi, i.e. something like:
|
||||
*
|
||||
* a = phi(a, b, ...)
|
||||
*
|
||||
* We can safely ignore these sources, since if all of the normal
|
||||
* sources point to the same definition, then that definition must
|
||||
* still dominate the phi node, and the phi will still always take
|
||||
* the value of that definition.
|
||||
*/
|
||||
if (src->src.ssa == &phi->dest.ssa)
|
||||
continue;
|
||||
|
||||
if (def == NULL) {
|
||||
def = src->src.ssa;
|
||||
|
|
@ -72,6 +86,11 @@ remove_phis_block(nir_block *block, void *state)
|
|||
if (!srcs_same)
|
||||
continue;
|
||||
|
||||
/* We must have found at least one definition, since there must be at
|
||||
* least one forward edge.
|
||||
*/
|
||||
assert(def != NULL);
|
||||
|
||||
assert(phi->dest.is_ssa);
|
||||
nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
|
||||
nir_instr_remove(instr);
|
||||
|
|
|
|||
|
|
@ -399,7 +399,7 @@ typedef enum
|
|||
/*@{*/
|
||||
SYSTEM_VALUE_TESS_COORD,
|
||||
SYSTEM_VALUE_VERTICES_IN, /**< Tessellation vertices in input patch */
|
||||
SYSTEM_VALUE_PRIMITIVE_ID, /**< (currently not used by GS) */
|
||||
SYSTEM_VALUE_PRIMITIVE_ID,
|
||||
SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */
|
||||
SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
|
||||
/*@}*/
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ AM_CFLAGS = \
|
|||
$(EXTRA_DEFINES_XF86VIDMODE) \
|
||||
-D_REENTRANT \
|
||||
-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
|
||||
-DGL_LIB_NAME=\"lib@GL_LIB@.so.1\" \
|
||||
$(DEFINES) \
|
||||
$(LIBDRM_CFLAGS) \
|
||||
$(DRI2PROTO_CFLAGS) \
|
||||
|
|
|
|||
|
|
@ -73,6 +73,10 @@ dri_message(int level, const char *f, ...)
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef GL_LIB_NAME
|
||||
#define GL_LIB_NAME "libGL.so.1"
|
||||
#endif
|
||||
|
||||
#ifndef DEFAULT_DRIVER_DIR
|
||||
/* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */
|
||||
#define DEFAULT_DRIVER_DIR "/usr/local/lib/dri"
|
||||
|
|
@ -99,7 +103,7 @@ driOpenDriver(const char *driverName)
|
|||
int len;
|
||||
|
||||
/* Attempt to make sure libGL symbols will be visible to the driver */
|
||||
glhandle = dlopen("libGL.so.1", RTLD_NOW | RTLD_GLOBAL);
|
||||
glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL);
|
||||
|
||||
libPaths = NULL;
|
||||
if (geteuid() == getuid()) {
|
||||
|
|
|
|||
|
|
@ -2646,7 +2646,11 @@ _X_EXPORT void (*glXGetProcAddressARB(const GLubyte * procName)) (void)
|
|||
*/
|
||||
_X_EXPORT void (*glXGetProcAddress(const GLubyte * procName)) (void)
|
||||
#if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
|
||||
# if defined(USE_MGL_NAMESPACE)
|
||||
__attribute__ ((alias("mglXGetProcAddressARB")));
|
||||
# else
|
||||
__attribute__ ((alias("glXGetProcAddressARB")));
|
||||
# endif
|
||||
#else
|
||||
{
|
||||
return glXGetProcAddressARB(procName);
|
||||
|
|
|
|||
|
|
@ -281,11 +281,17 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char *name);
|
|||
# define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func)
|
||||
#else
|
||||
# if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
|
||||
# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
|
||||
/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
|
||||
* extra expansion means that the name mangling macros in glx_mangle.h will
|
||||
* apply before stringification, so the alias attribute will have a string like
|
||||
* "mglXFoo" instead of "glXFoo". */
|
||||
# define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \
|
||||
return_type real_func proto_args \
|
||||
__attribute__ ((alias( # aliased_func ) ));
|
||||
# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
|
||||
GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func)
|
||||
# define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
|
||||
GLX_ALIAS(void, real_func, proto_args, args, aliased_func)
|
||||
GLX_ALIAS2(void, real_func, proto_args, args, aliased_func)
|
||||
# else
|
||||
# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
|
||||
return_type real_func proto_args \
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ _glapi_get_stub(const char *name, int generate)
|
|||
const struct mapi_stub *stub;
|
||||
|
||||
#ifdef USE_MGL_NAMESPACE
|
||||
if (name)
|
||||
if (name && name[0] == 'm')
|
||||
name++;
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -71,9 +71,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
|
|||
char *sample_map_str = rzalloc_size(mem_ctx, 1);
|
||||
char *sample_map_expr = rzalloc_size(mem_ctx, 1);
|
||||
char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);
|
||||
const char *vs_source;
|
||||
const char *sampler_array_suffix = "";
|
||||
const char *texcoord_type = "vec2";
|
||||
float y_scale;
|
||||
enum blit_msaa_shader shader_index;
|
||||
|
||||
|
|
@ -99,7 +97,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
|
|||
shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE -
|
||||
BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE;
|
||||
sampler_array_suffix = "Array";
|
||||
texcoord_type = "vec3";
|
||||
}
|
||||
|
||||
if (blit->msaa_shaders[shader_index]) {
|
||||
|
|
@ -150,28 +147,37 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
|
|||
" const int sample_map[%d] = int[%d](%s);\n",
|
||||
samples, samples, sample_map_str);
|
||||
|
||||
ralloc_asprintf_append(&texel_fetch_macro,
|
||||
"#define TEXEL_FETCH(coord) texelFetch(texSampler, i%s(coord), %s);\n",
|
||||
texcoord_type, sample_number);
|
||||
if (target == GL_TEXTURE_2D_MULTISAMPLE) {
|
||||
ralloc_asprintf_append(&texel_fetch_macro,
|
||||
"#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec2(coord), %s);\n",
|
||||
sample_number);
|
||||
} else {
|
||||
ralloc_asprintf_append(&texel_fetch_macro,
|
||||
"#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord, layer), %s);\n",
|
||||
sample_number);
|
||||
}
|
||||
|
||||
vs_source = ralloc_asprintf(mem_ctx,
|
||||
static const char vs_source[] =
|
||||
"#version 130\n"
|
||||
"in vec2 position;\n"
|
||||
"in %s textureCoords;\n"
|
||||
"out %s texCoords;\n"
|
||||
"in vec3 textureCoords;\n"
|
||||
"out vec2 texCoords;\n"
|
||||
"flat out int layer;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" texCoords = textureCoords;\n"
|
||||
" texCoords = textureCoords.xy;\n"
|
||||
" layer = int(textureCoords.z);\n"
|
||||
" gl_Position = vec4(position, 0.0, 1.0);\n"
|
||||
"}\n",
|
||||
texcoord_type,
|
||||
texcoord_type);
|
||||
"}\n"
|
||||
;
|
||||
|
||||
fs_source = ralloc_asprintf(mem_ctx,
|
||||
"#version 130\n"
|
||||
"#extension GL_ARB_texture_multisample : enable\n"
|
||||
"uniform sampler2DMS%s texSampler;\n"
|
||||
"uniform float src_width, src_height;\n"
|
||||
"in %s texCoords;\n"
|
||||
"in vec2 texCoords;\n"
|
||||
"flat in int layer;\n"
|
||||
"out vec4 out_color;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
|
|
@ -212,7 +218,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
|
|||
" out_color = mix(x_0_color, x_1_color, interp.y);\n"
|
||||
"}\n",
|
||||
sampler_array_suffix,
|
||||
texcoord_type,
|
||||
sample_map_expr,
|
||||
y_scale,
|
||||
1.0f / y_scale,
|
||||
|
|
|
|||
|
|
@ -42,10 +42,10 @@
|
|||
#define I830_UPLOAD_STIPPLE 0x4
|
||||
#define I830_UPLOAD_INVARIENT 0x8
|
||||
#define I830_UPLOAD_RASTER_RULES 0x10
|
||||
#define I830_UPLOAD_TEX(i) (0x10<<(i))
|
||||
#define I830_UPLOAD_TEXBLEND(i) (0x100<<(i))
|
||||
#define I830_UPLOAD_TEX_ALL (0x0f0)
|
||||
#define I830_UPLOAD_TEXBLEND_ALL (0xf00)
|
||||
#define I830_UPLOAD_TEX(i) (0x0100<<(i))
|
||||
#define I830_UPLOAD_TEXBLEND(i) (0x1000<<(i))
|
||||
#define I830_UPLOAD_TEX_ALL (0x0f00)
|
||||
#define I830_UPLOAD_TEXBLEND_ALL (0xf000)
|
||||
|
||||
/* State structure offsets - these will probably disappear.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -115,6 +115,8 @@ enum {
|
|||
I915_RASTER_RULES_SETUP_SIZE,
|
||||
};
|
||||
|
||||
#define I915_TEX_UNITS 8
|
||||
|
||||
#define I915_MAX_CONSTANT 32
|
||||
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
|
||||
|
||||
|
|
@ -194,7 +196,8 @@ struct i915_fragment_program
|
|||
|
||||
/* Helpers for i915_fragprog.c:
|
||||
*/
|
||||
GLuint wpos_tex;
|
||||
uint8_t texcoord_mapping[I915_TEX_UNITS];
|
||||
uint8_t wpos_tex;
|
||||
bool depth_written;
|
||||
|
||||
struct
|
||||
|
|
@ -205,15 +208,6 @@ struct i915_fragment_program
|
|||
GLuint nr_params;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define I915_TEX_UNITS 8
|
||||
|
||||
|
||||
struct i915_hw_state
|
||||
{
|
||||
GLuint Ctx[I915_CTX_SETUP_SIZE];
|
||||
|
|
|
|||
|
|
@ -72,6 +72,22 @@ static const GLfloat cos_constants[4] = { 1.0,
|
|||
-1.0 / (6 * 5 * 4 * 3 * 2 * 1)
|
||||
};
|
||||
|
||||
/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
|
||||
#define TEXCOORD_TEX (0<<7)
|
||||
#define TEXCOORD_VAR (1<<7)
|
||||
|
||||
static unsigned
|
||||
get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
|
||||
{
|
||||
for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
|
||||
if (p->texcoord_mapping[i] == texcoord)
|
||||
return i;
|
||||
}
|
||||
|
||||
/* blah */
|
||||
return p->ctx->Const.MaxTextureCoordUnits - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a ureg for the given source register. Will emit
|
||||
* constants, apply swizzling and negation as needed.
|
||||
|
|
@ -82,6 +98,7 @@ src_vector(struct i915_fragment_program *p,
|
|||
const struct gl_fragment_program *program)
|
||||
{
|
||||
GLuint src;
|
||||
unsigned unit;
|
||||
|
||||
switch (source->File) {
|
||||
|
||||
|
|
@ -119,8 +136,10 @@ src_vector(struct i915_fragment_program *p,
|
|||
case VARYING_SLOT_TEX5:
|
||||
case VARYING_SLOT_TEX6:
|
||||
case VARYING_SLOT_TEX7:
|
||||
unit = get_texcoord_mapping(p, (source->Index -
|
||||
VARYING_SLOT_TEX0) | TEXCOORD_TEX);
|
||||
src = i915_emit_decl(p, REG_TYPE_T,
|
||||
T_TEX0 + (source->Index - VARYING_SLOT_TEX0),
|
||||
T_TEX0 + unit,
|
||||
D0_CHANNEL_ALL);
|
||||
break;
|
||||
|
||||
|
|
@ -132,8 +151,10 @@ src_vector(struct i915_fragment_program *p,
|
|||
case VARYING_SLOT_VAR0 + 5:
|
||||
case VARYING_SLOT_VAR0 + 6:
|
||||
case VARYING_SLOT_VAR0 + 7:
|
||||
unit = get_texcoord_mapping(p, (source->Index -
|
||||
VARYING_SLOT_VAR0) | TEXCOORD_VAR);
|
||||
src = i915_emit_decl(p, REG_TYPE_T,
|
||||
T_TEX0 + (source->Index - VARYING_SLOT_VAR0),
|
||||
T_TEX0 + unit,
|
||||
D0_CHANNEL_ALL);
|
||||
break;
|
||||
|
||||
|
|
@ -1176,27 +1197,54 @@ fixup_depth_write(struct i915_fragment_program *p)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
check_texcoord_mapping(struct i915_fragment_program *p)
|
||||
{
|
||||
GLbitfield64 inputs = p->FragProg.Base.InputsRead;
|
||||
unsigned unit = 0;
|
||||
|
||||
for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
|
||||
if (inputs & VARYING_BIT_TEX(i)) {
|
||||
if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
|
||||
unit++;
|
||||
break;
|
||||
}
|
||||
p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
|
||||
}
|
||||
if (inputs & VARYING_BIT_VAR(i)) {
|
||||
if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
|
||||
unit++;
|
||||
break;
|
||||
}
|
||||
p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
|
||||
}
|
||||
}
|
||||
|
||||
if (unit > p->ctx->Const.MaxTextureCoordUnits)
|
||||
i915_program_error(p, "Too many texcoord units");
|
||||
}
|
||||
|
||||
static void
|
||||
check_wpos(struct i915_fragment_program *p)
|
||||
{
|
||||
GLbitfield64 inputs = p->FragProg.Base.InputsRead;
|
||||
GLint i;
|
||||
unsigned unit = 0;
|
||||
|
||||
p->wpos_tex = -1;
|
||||
|
||||
if ((inputs & VARYING_BIT_POS) == 0)
|
||||
return;
|
||||
|
||||
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
|
||||
if (inputs & (VARYING_BIT_TEX(i) | VARYING_BIT_VAR(i)))
|
||||
continue;
|
||||
else if (inputs & VARYING_BIT_POS) {
|
||||
p->wpos_tex = i;
|
||||
inputs &= ~VARYING_BIT_POS;
|
||||
}
|
||||
unit += !!(inputs & VARYING_BIT_TEX(i));
|
||||
unit += !!(inputs & VARYING_BIT_VAR(i));
|
||||
}
|
||||
|
||||
if (inputs & VARYING_BIT_POS) {
|
||||
if (unit < p->ctx->Const.MaxTextureCoordUnits)
|
||||
p->wpos_tex = unit;
|
||||
else
|
||||
i915_program_error(p, "No free texcoord for wpos value");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1212,6 +1260,7 @@ translate_program(struct i915_fragment_program *p)
|
|||
}
|
||||
|
||||
i915_init_program(i915, p);
|
||||
check_texcoord_mapping(p);
|
||||
check_wpos(p);
|
||||
upload_program(p);
|
||||
fixup_depth_write(p);
|
||||
|
|
@ -1420,22 +1469,24 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
|||
|
||||
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
|
||||
if (inputsRead & VARYING_BIT_TEX(i)) {
|
||||
int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
|
||||
int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
|
||||
|
||||
s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
|
||||
s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
|
||||
|
||||
EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
|
||||
}
|
||||
else if (inputsRead & VARYING_BIT_VAR(i)) {
|
||||
if (inputsRead & VARYING_BIT_VAR(i)) {
|
||||
int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
|
||||
int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
|
||||
|
||||
s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
|
||||
s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
|
||||
|
||||
EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
|
||||
}
|
||||
else if (i == p->wpos_tex) {
|
||||
if (i == p->wpos_tex) {
|
||||
int wpos_size = 4 * sizeof(float);
|
||||
/* If WPOS is required, duplicate the XYZ position data in an
|
||||
* unused texture coordinate:
|
||||
|
|
|
|||
|
|
@ -658,6 +658,11 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
|
|||
{
|
||||
struct intel_context *intel = intel_context(ctx);
|
||||
|
||||
/* Sync up the state of window system buffers. We need to do this before
|
||||
* we go looking for the buffers.
|
||||
*/
|
||||
intel_prepare_render(intel);
|
||||
|
||||
if (mask & GL_COLOR_BUFFER_BIT) {
|
||||
GLint i;
|
||||
struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ i965_FILES = \
|
|||
brw_nir.h \
|
||||
brw_nir.c \
|
||||
brw_nir_analyze_boolean_resolves.c \
|
||||
brw_nir_uniforms.cpp \
|
||||
brw_object_purgeable.c \
|
||||
brw_packed_float.c \
|
||||
brw_performance_monitor.c \
|
||||
|
|
@ -133,7 +134,6 @@ i965_FILES = \
|
|||
brw_vec4_surface_builder.cpp \
|
||||
brw_vec4_surface_builder.h \
|
||||
brw_vec4_visitor.cpp \
|
||||
brw_vec4_vp.cpp \
|
||||
brw_vec4_vs_visitor.cpp \
|
||||
brw_vs.c \
|
||||
brw_vs.h \
|
||||
|
|
|
|||
|
|
@ -1436,7 +1436,6 @@ intel_process_dri2_buffer(struct brw_context *brw,
|
|||
buffer->cpp, buffer->pitch);
|
||||
}
|
||||
|
||||
intel_miptree_release(&rb->mt);
|
||||
bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
|
||||
buffer->name);
|
||||
if (!bo) {
|
||||
|
|
|
|||
|
|
@ -340,9 +340,6 @@ struct brw_shader {
|
|||
bool compiled_once;
|
||||
};
|
||||
|
||||
/* Note: If adding fields that need anything besides a normal memcmp() for
|
||||
* comparing them, be sure to go fix brw_stage_prog_data_compare().
|
||||
*/
|
||||
struct brw_stage_prog_data {
|
||||
struct {
|
||||
/** size of our binding table. */
|
||||
|
|
@ -384,18 +381,11 @@ struct brw_stage_prog_data {
|
|||
|
||||
/* Pointers to tracked values (only valid once
|
||||
* _mesa_load_state_parameters has been called at runtime).
|
||||
*
|
||||
* These must be the last fields of the struct (see
|
||||
* brw_stage_prog_data_compare()).
|
||||
*/
|
||||
const gl_constant_value **param;
|
||||
const gl_constant_value **pull_param;
|
||||
|
||||
/**
|
||||
* Image metadata passed to the shader as uniforms. This is deliberately
|
||||
* ignored by brw_stage_prog_data_compare() because its contents don't have
|
||||
* any influence on program compilation.
|
||||
*/
|
||||
/** Image metadata passed to the shader as uniforms. */
|
||||
struct brw_image_param *image_param;
|
||||
};
|
||||
|
||||
|
|
@ -449,9 +439,6 @@ struct brw_image_param {
|
|||
* there can be many of these, each in a different GL state
|
||||
* corresponding to a different brw_wm_prog_key struct, with different
|
||||
* compiled programs.
|
||||
*
|
||||
* Note: brw_wm_prog_data_compare() must be updated when adding fields to this
|
||||
* struct!
|
||||
*/
|
||||
struct brw_wm_prog_data {
|
||||
struct brw_stage_prog_data base;
|
||||
|
|
@ -495,9 +482,6 @@ struct brw_wm_prog_data {
|
|||
int urb_setup[VARYING_SLOT_MAX];
|
||||
};
|
||||
|
||||
/* Note: brw_cs_prog_data_compare() must be updated when adding fields to this
|
||||
* struct!
|
||||
*/
|
||||
struct brw_cs_prog_data {
|
||||
struct brw_stage_prog_data base;
|
||||
|
||||
|
|
@ -698,9 +682,6 @@ enum shader_dispatch_mode {
|
|||
DISPATCH_MODE_SIMD8 = 3,
|
||||
};
|
||||
|
||||
/* Note: brw_vue_prog_data_compare() must be updated when adding fields to
|
||||
* this struct!
|
||||
*/
|
||||
struct brw_vue_prog_data {
|
||||
struct brw_stage_prog_data base;
|
||||
struct brw_vue_map vue_map;
|
||||
|
|
@ -718,9 +699,6 @@ struct brw_vue_prog_data {
|
|||
};
|
||||
|
||||
|
||||
/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
|
||||
* struct!
|
||||
*/
|
||||
struct brw_vs_prog_data {
|
||||
struct brw_vue_prog_data base;
|
||||
|
||||
|
|
@ -780,9 +758,6 @@ struct brw_vs_prog_data {
|
|||
|
||||
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
|
||||
|
||||
/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
|
||||
* this struct!
|
||||
*/
|
||||
struct brw_gs_prog_data
|
||||
{
|
||||
struct brw_vue_prog_data base;
|
||||
|
|
@ -875,7 +850,6 @@ struct brw_cache_item {
|
|||
};
|
||||
|
||||
|
||||
typedef bool (*cache_aux_compare_func)(const void *a, const void *b);
|
||||
typedef void (*cache_aux_free_func)(const void *aux);
|
||||
|
||||
struct brw_cache {
|
||||
|
|
@ -888,12 +862,6 @@ struct brw_cache {
|
|||
uint32_t next_offset;
|
||||
bool bo_used_by_gpu;
|
||||
|
||||
/**
|
||||
* Optional functions used in determining whether the prog_data for a new
|
||||
* cache item matches an existing cache item (in case there's relevant data
|
||||
* outside of the prog_data). If NULL, a plain memcmp is done.
|
||||
*/
|
||||
cache_aux_compare_func aux_compare[BRW_MAX_CACHE];
|
||||
/** Optional functions for freeing other pointers attached to a prog_data. */
|
||||
cache_aux_free_func aux_free[BRW_MAX_CACHE];
|
||||
};
|
||||
|
|
@ -1554,7 +1522,7 @@ struct brw_context
|
|||
|
||||
int num_atoms[BRW_NUM_PIPELINES];
|
||||
const struct brw_tracked_state render_atoms[60];
|
||||
const struct brw_tracked_state compute_atoms[7];
|
||||
const struct brw_tracked_state compute_atoms[8];
|
||||
|
||||
/* If (INTEL_DEBUG & DEBUG_BATCH) */
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -30,26 +30,23 @@
|
|||
#include "intel_mipmap_tree.h"
|
||||
#include "brw_state.h"
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "brw_nir.h"
|
||||
|
||||
bool
|
||||
brw_cs_prog_data_compare(const void *in_a, const void *in_b)
|
||||
static void
|
||||
assign_cs_binding_table_offsets(const struct brw_device_info *devinfo,
|
||||
const struct gl_shader_program *shader_prog,
|
||||
const struct gl_program *prog,
|
||||
struct brw_cs_prog_data *prog_data)
|
||||
{
|
||||
const struct brw_cs_prog_data *a =
|
||||
(const struct brw_cs_prog_data *)in_a;
|
||||
const struct brw_cs_prog_data *b =
|
||||
(const struct brw_cs_prog_data *)in_b;
|
||||
uint32_t next_binding_table_offset = 0;
|
||||
|
||||
/* Compare the base structure. */
|
||||
if (!brw_stage_prog_data_compare(&a->base, &b->base))
|
||||
return false;
|
||||
/* May not be used if the gl_NumWorkGroups variable is not accessed. */
|
||||
prog_data->binding_table.work_groups_start = next_binding_table_offset;
|
||||
next_binding_table_offset++;
|
||||
|
||||
/* Compare the rest of the structure. */
|
||||
const unsigned offset = sizeof(struct brw_stage_prog_data);
|
||||
if (memcmp(((char *) a) + offset, ((char *) b) + offset,
|
||||
sizeof(struct brw_cs_prog_data) - offset))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
brw_assign_common_binding_table_offsets(MESA_SHADER_COMPUTE, devinfo,
|
||||
shader_prog, prog, &prog_data->base,
|
||||
next_binding_table_offset);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -72,12 +69,14 @@ brw_codegen_cs_prog(struct brw_context *brw,
|
|||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
assign_cs_binding_table_offsets(brw->intelScreen->devinfo, prog,
|
||||
&cp->program.Base, &prog_data);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*/
|
||||
int param_count = cs->base.num_uniform_components +
|
||||
cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
|
||||
int param_count = cp->program.Base.nir->num_uniforms;
|
||||
|
||||
/* The backend also sometimes adds params for texture size. */
|
||||
param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
|
||||
|
|
@ -90,6 +89,9 @@ brw_codegen_cs_prog(struct brw_context *brw,
|
|||
prog_data.base.nr_params = param_count;
|
||||
prog_data.base.nr_image_params = cs->base.NumImages;
|
||||
|
||||
brw_nir_setup_glsl_uniforms(cp->program.Base.nir, prog, &cp->program.Base,
|
||||
&prog_data.base, true);
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = (brw->batch.last_bo &&
|
||||
drm_intel_bo_busy(brw->batch.last_bo));
|
||||
|
|
|
|||
|
|
@ -36,8 +36,6 @@ struct brw_cs_prog_key {
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool brw_cs_prog_data_compare(const void *a, const void *b);
|
||||
|
||||
void
|
||||
brw_upload_cs_prog(struct brw_context *brw);
|
||||
|
||||
|
|
@ -51,8 +49,7 @@ brw_cs_emit(struct brw_context *brw,
|
|||
unsigned *final_assembly_size);
|
||||
|
||||
unsigned
|
||||
brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
|
||||
unsigned dispatch_width);
|
||||
brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -949,20 +949,6 @@ fs_visitor::import_uniforms(fs_visitor *v)
|
|||
this->param_size = v->param_size;
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::setup_vec4_uniform_value(unsigned param_offset,
|
||||
const gl_constant_value *values,
|
||||
unsigned n)
|
||||
{
|
||||
static const gl_constant_value zero = { 0 };
|
||||
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
stage_prog_data->param[param_offset + i] = &values[i];
|
||||
|
||||
for (unsigned i = n; i < 4; ++i)
|
||||
stage_prog_data->param[param_offset + i] = &zero;
|
||||
}
|
||||
|
||||
fs_reg *
|
||||
fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
|
||||
bool origin_upper_left)
|
||||
|
|
@ -1416,7 +1402,7 @@ fs_visitor::calculate_urb_setup()
|
|||
int urb_next = 0;
|
||||
/* Figure out where each of the incoming setup attributes lands. */
|
||||
if (devinfo->gen >= 6) {
|
||||
if (_mesa_bitcount_64(prog->InputsRead &
|
||||
if (_mesa_bitcount_64(nir->info.inputs_read &
|
||||
BRW_FS_VARYING_INPUT_MASK) <= 16) {
|
||||
/* The SF/SBE pipeline stage can do arbitrary rearrangement of the
|
||||
* first 16 varying inputs, so we can put them wherever we want.
|
||||
|
|
@ -1428,7 +1414,7 @@ fs_visitor::calculate_urb_setup()
|
|||
* a different vertex (or geometry) shader.
|
||||
*/
|
||||
for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
|
||||
if (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
|
||||
if (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
|
||||
BITFIELD64_BIT(i)) {
|
||||
prog_data->urb_setup[i] = urb_next++;
|
||||
}
|
||||
|
|
@ -1442,7 +1428,7 @@ fs_visitor::calculate_urb_setup()
|
|||
struct brw_vue_map prev_stage_vue_map;
|
||||
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
|
||||
key->input_slots_valid,
|
||||
shader_prog->SeparateShader);
|
||||
nir->info.separate_shader);
|
||||
int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
|
||||
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
|
||||
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
|
||||
|
|
@ -1452,7 +1438,7 @@ fs_visitor::calculate_urb_setup()
|
|||
* unused.
|
||||
*/
|
||||
if (varying != BRW_VARYING_SLOT_COUNT &&
|
||||
(prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
|
||||
(nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
|
||||
BITFIELD64_BIT(varying))) {
|
||||
prog_data->urb_setup[varying] = slot - first_slot;
|
||||
}
|
||||
|
|
@ -1485,7 +1471,7 @@ fs_visitor::calculate_urb_setup()
|
|||
*
|
||||
* See compile_sf_prog() for more info.
|
||||
*/
|
||||
if (prog->InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC))
|
||||
if (nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))
|
||||
prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;
|
||||
}
|
||||
|
||||
|
|
@ -4537,7 +4523,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
|
|||
fprintf(file, "***m%d***", inst->src[i].reg);
|
||||
break;
|
||||
case ATTR:
|
||||
fprintf(file, "attr%d", inst->src[i].reg + inst->src[i].reg_offset);
|
||||
fprintf(file, "attr%d+%d", inst->src[i].reg, inst->src[i].reg_offset);
|
||||
break;
|
||||
case UNIFORM:
|
||||
fprintf(file, "u%d", inst->src[i].reg + inst->src[i].reg_offset);
|
||||
|
|
@ -4668,7 +4654,7 @@ void
|
|||
fs_visitor::setup_payload_gen6()
|
||||
{
|
||||
bool uses_depth =
|
||||
(prog->InputsRead & (1 << VARYING_SLOT_POS)) != 0;
|
||||
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
|
||||
unsigned barycentric_interp_modes =
|
||||
(stage == MESA_SHADER_FRAGMENT) ?
|
||||
((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0;
|
||||
|
|
@ -4727,7 +4713,7 @@ fs_visitor::setup_payload_gen6()
|
|||
}
|
||||
|
||||
/* R32: MSAA input coverage mask */
|
||||
if (prog->SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
|
||||
if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) {
|
||||
assert(devinfo->gen >= 7);
|
||||
payload.sample_mask_in_reg = payload.num_regs;
|
||||
payload.num_regs++;
|
||||
|
|
@ -4740,7 +4726,7 @@ fs_visitor::setup_payload_gen6()
|
|||
/* R34-: bary for 32-pixel. */
|
||||
/* R58-59: interp W for 32-pixel. */
|
||||
|
||||
if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||
source_depth_to_render_target = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -4759,9 +4745,9 @@ fs_visitor::setup_cs_payload()
|
|||
|
||||
payload.num_regs = 1;
|
||||
|
||||
if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
|
||||
if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
|
||||
const unsigned local_id_dwords =
|
||||
brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
|
||||
brw_cs_prog_local_id_payload_dwords(dispatch_width);
|
||||
assert((local_id_dwords & 0x7) == 0);
|
||||
const unsigned local_id_regs = local_id_dwords / 8;
|
||||
payload.local_invocation_id_reg = payload.num_regs;
|
||||
|
|
@ -4769,37 +4755,6 @@ fs_visitor::setup_cs_payload()
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::assign_fs_binding_table_offsets()
|
||||
{
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
|
||||
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
||||
uint32_t next_binding_table_offset = 0;
|
||||
|
||||
/* If there are no color regions, we still perform an FB write to a null
|
||||
* renderbuffer, which we place at surface index 0.
|
||||
*/
|
||||
prog_data->binding_table.render_target_start = next_binding_table_offset;
|
||||
next_binding_table_offset += MAX2(key->nr_color_regions, 1);
|
||||
|
||||
assign_common_binding_table_offsets(next_binding_table_offset);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::assign_cs_binding_table_offsets()
|
||||
{
|
||||
assert(stage == MESA_SHADER_COMPUTE);
|
||||
brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
|
||||
uint32_t next_binding_table_offset = 0;
|
||||
|
||||
/* May not be used if the gl_NumWorkGroups variable is not accessed. */
|
||||
prog_data->binding_table.work_groups_start = next_binding_table_offset;
|
||||
next_binding_table_offset++;
|
||||
|
||||
assign_common_binding_table_offsets(next_binding_table_offset);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::calculate_register_pressure()
|
||||
{
|
||||
|
|
@ -4851,8 +4806,8 @@ fs_visitor::optimize()
|
|||
\
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \
|
||||
char filename[64]; \
|
||||
snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \
|
||||
stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
|
||||
snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \
|
||||
stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
|
||||
\
|
||||
backend_shader::dump_instructions(filename); \
|
||||
} \
|
||||
|
|
@ -4865,9 +4820,8 @@ fs_visitor::optimize()
|
|||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
|
||||
char filename[64];
|
||||
snprintf(filename, 64, "%s%d-%04d-00-start",
|
||||
stage_abbrev, dispatch_width,
|
||||
shader_prog ? shader_prog->Name : 0);
|
||||
snprintf(filename, 64, "%s%d-%s-00-start",
|
||||
stage_abbrev, dispatch_width, nir->info.name);
|
||||
|
||||
backend_shader::dump_instructions(filename);
|
||||
}
|
||||
|
|
@ -5013,8 +4967,6 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
|
|||
{
|
||||
assert(stage == MESA_SHADER_VERTEX);
|
||||
|
||||
if (prog_data->map_entries == NULL)
|
||||
assign_common_binding_table_offsets(0);
|
||||
setup_vs_payload();
|
||||
|
||||
if (shader_time_index >= 0)
|
||||
|
|
@ -5053,11 +5005,6 @@ fs_visitor::run_fs(bool do_rep_send)
|
|||
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
sanity_param_count = prog->Parameters->NumParameters;
|
||||
|
||||
if (prog_data->map_entries == NULL)
|
||||
assign_fs_binding_table_offsets();
|
||||
|
||||
if (devinfo->gen >= 6)
|
||||
setup_payload_gen6();
|
||||
else
|
||||
|
|
@ -5073,7 +5020,7 @@ fs_visitor::run_fs(bool do_rep_send)
|
|||
emit_shader_time_begin();
|
||||
|
||||
calculate_urb_setup();
|
||||
if (prog->InputsRead > 0) {
|
||||
if (nir->info.inputs_read > 0) {
|
||||
if (devinfo->gen < 6)
|
||||
emit_interpolation_setup_gen4();
|
||||
else
|
||||
|
|
@ -5133,11 +5080,6 @@ bool
|
|||
fs_visitor::run_cs()
|
||||
{
|
||||
assert(stage == MESA_SHADER_COMPUTE);
|
||||
assert(shader);
|
||||
|
||||
sanity_param_count = prog->Parameters->NumParameters;
|
||||
|
||||
assign_cs_binding_table_offsets();
|
||||
|
||||
setup_cs_payload();
|
||||
|
||||
|
|
@ -5166,13 +5108,6 @@ fs_visitor::run_cs()
|
|||
if (failed)
|
||||
return false;
|
||||
|
||||
/* If any state parameters were appended, then ParameterValues could have
|
||||
* been realloced, in which case the driver uniform storage set up by
|
||||
* _mesa_associate_uniform_storage() would point to freed memory. Make
|
||||
* sure that didn't happen.
|
||||
*/
|
||||
assert(sanity_param_count == prog->Parameters->NumParameters);
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
|
|
@ -5200,9 +5135,8 @@ brw_wm_fs_emit(struct brw_context *brw,
|
|||
|
||||
/* Now the main event: Visit the shader IR and generate our FS IR for it.
|
||||
*/
|
||||
fs_visitor v(brw->intelScreen->compiler, brw,
|
||||
mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
|
||||
prog, &fp->Base, 8, st_index8);
|
||||
fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key,
|
||||
&prog_data->base, &fp->Base, fp->Base.nir, 8, st_index8);
|
||||
if (!v.run_fs(false /* do_rep_send */)) {
|
||||
if (prog) {
|
||||
prog->LinkStatus = false;
|
||||
|
|
@ -5216,9 +5150,8 @@ brw_wm_fs_emit(struct brw_context *brw,
|
|||
}
|
||||
|
||||
cfg_t *simd16_cfg = NULL;
|
||||
fs_visitor v2(brw->intelScreen->compiler, brw,
|
||||
mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
|
||||
prog, &fp->Base, 16, st_index16);
|
||||
fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key,
|
||||
&prog_data->base, &fp->Base, fp->Base.nir, 16, st_index16);
|
||||
if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
|
||||
if (!v.simd16_unsupported) {
|
||||
/* Try a SIMD16 compile */
|
||||
|
|
@ -5332,9 +5265,8 @@ brw_cs_emit(struct brw_context *brw,
|
|||
|
||||
/* Now the main event: Visit the shader IR and generate our CS IR for it.
|
||||
*/
|
||||
fs_visitor v8(brw->intelScreen->compiler, brw,
|
||||
mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
|
||||
&cp->Base, 8, st_index);
|
||||
fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key,
|
||||
&prog_data->base, &cp->Base, cp->Base.nir, 8, st_index);
|
||||
if (!v8.run_cs()) {
|
||||
fail_msg = v8.fail_msg;
|
||||
} else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
|
||||
|
|
@ -5342,9 +5274,8 @@ brw_cs_emit(struct brw_context *brw,
|
|||
prog_data->simd_size = 8;
|
||||
}
|
||||
|
||||
fs_visitor v16(brw->intelScreen->compiler, brw,
|
||||
mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
|
||||
&cp->Base, 16, st_index);
|
||||
fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key,
|
||||
&prog_data->base, &cp->Base, cp->Base.nir, 16, st_index);
|
||||
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
|
||||
!fail_msg && !v8.simd16_unsupported &&
|
||||
local_workgroup_size <= 16 * brw->max_cs_threads) {
|
||||
|
|
|
|||
|
|
@ -93,11 +93,10 @@ class fs_visitor : public backend_shader
|
|||
public:
|
||||
fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
||||
void *mem_ctx,
|
||||
gl_shader_stage stage,
|
||||
const void *key,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
nir_shader *shader,
|
||||
unsigned dispatch_width,
|
||||
int shader_time_index);
|
||||
|
||||
|
|
@ -128,8 +127,6 @@ public:
|
|||
bool run_cs();
|
||||
void optimize();
|
||||
void allocate_registers();
|
||||
void assign_fs_binding_table_offsets();
|
||||
void assign_cs_binding_table_offsets();
|
||||
void setup_payload_gen4();
|
||||
void setup_payload_gen6();
|
||||
void setup_vs_payload();
|
||||
|
|
@ -207,7 +204,7 @@ public:
|
|||
void emit_interpolation_setup_gen6();
|
||||
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
|
||||
fs_reg rescale_texcoord(fs_reg coordinate, int coord_components,
|
||||
bool is_rect, uint32_t sampler, int texunit);
|
||||
bool is_rect, uint32_t sampler);
|
||||
void emit_texture(ir_texture_opcode op,
|
||||
const glsl_type *dest_type,
|
||||
fs_reg coordinate, int components,
|
||||
|
|
@ -220,8 +217,7 @@ public:
|
|||
bool is_cube_array,
|
||||
bool is_rect,
|
||||
uint32_t sampler,
|
||||
fs_reg sampler_reg,
|
||||
int texunit);
|
||||
fs_reg sampler_reg);
|
||||
fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
||||
const fs_reg &sampler);
|
||||
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
|
||||
|
|
@ -238,12 +234,10 @@ public:
|
|||
uint32_t spill_offset, int count);
|
||||
|
||||
void emit_nir_code();
|
||||
void nir_setup_inputs(nir_shader *shader);
|
||||
void nir_setup_outputs(nir_shader *shader);
|
||||
void nir_setup_uniforms(nir_shader *shader);
|
||||
void nir_setup_uniform(nir_variable *var);
|
||||
void nir_setup_builtin_uniform(nir_variable *var);
|
||||
void nir_emit_system_values(nir_shader *shader);
|
||||
void nir_setup_inputs();
|
||||
void nir_setup_outputs();
|
||||
void nir_setup_uniforms();
|
||||
void nir_emit_system_values();
|
||||
void nir_emit_impl(nir_function_impl *impl);
|
||||
void nir_emit_cf_list(exec_list *list);
|
||||
void nir_emit_if(nir_if *if_stmt);
|
||||
|
|
@ -294,10 +288,6 @@ public:
|
|||
|
||||
struct brw_reg interp_reg(int location, int channel);
|
||||
|
||||
virtual void setup_vec4_uniform_value(unsigned param_offset,
|
||||
const gl_constant_value *values,
|
||||
unsigned n);
|
||||
|
||||
int implied_mrf_writes(fs_inst *inst);
|
||||
|
||||
virtual void dump_instructions();
|
||||
|
|
@ -309,7 +299,7 @@ public:
|
|||
const struct brw_sampler_prog_key_data *key_tex;
|
||||
|
||||
struct brw_stage_prog_data *prog_data;
|
||||
unsigned int sanity_param_count;
|
||||
struct gl_program *prog;
|
||||
|
||||
int *param_size;
|
||||
|
||||
|
|
|
|||
|
|
@ -37,16 +37,13 @@ using namespace brw::surface_access;
|
|||
void
|
||||
fs_visitor::emit_nir_code()
|
||||
{
|
||||
nir_shader *nir = prog->nir;
|
||||
|
||||
/* emit the arrays used for inputs and outputs - load/store intrinsics will
|
||||
* be converted to reads/writes of these arrays
|
||||
*/
|
||||
nir_setup_inputs(nir);
|
||||
nir_setup_outputs(nir);
|
||||
uniforms = nir->num_uniforms;
|
||||
//nir_setup_uniforms(nir);
|
||||
nir_emit_system_values(nir);
|
||||
nir_setup_inputs();
|
||||
nir_setup_outputs();
|
||||
nir_setup_uniforms();
|
||||
nir_emit_system_values();
|
||||
|
||||
/* get the main function and emit it */
|
||||
nir_foreach_overload(nir, overload) {
|
||||
|
|
@ -57,11 +54,11 @@ fs_visitor::emit_nir_code()
|
|||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_setup_inputs(nir_shader *shader)
|
||||
fs_visitor::nir_setup_inputs()
|
||||
{
|
||||
nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs);
|
||||
nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
|
||||
|
||||
foreach_list_typed(nir_variable, var, node, &shader->inputs) {
|
||||
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
||||
enum brw_reg_type type = brw_type_for_base_type(var->type);
|
||||
fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
|
||||
|
||||
|
|
@ -119,13 +116,13 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
|
|||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_setup_outputs(nir_shader *shader)
|
||||
fs_visitor::nir_setup_outputs()
|
||||
{
|
||||
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
||||
|
||||
nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_outputs);
|
||||
nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
|
||||
|
||||
foreach_list_typed(nir_variable, var, node, &shader->outputs) {
|
||||
foreach_list_typed(nir_variable, var, node, &nir->outputs) {
|
||||
fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
|
||||
|
||||
int vector_elements =
|
||||
|
|
@ -176,108 +173,20 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
|
|||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_setup_uniforms(nir_shader *shader)
|
||||
fs_visitor::nir_setup_uniforms()
|
||||
{
|
||||
if (dispatch_width != 8)
|
||||
return;
|
||||
|
||||
uniforms = shader->num_uniforms;
|
||||
uniforms = nir->num_uniforms;
|
||||
|
||||
if (shader_prog) {
|
||||
foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
|
||||
/* UBO's and atomics don't take up space in the uniform file */
|
||||
if (var->interface_type != NULL || var->type->contains_atomic())
|
||||
continue;
|
||||
|
||||
if (strncmp(var->name, "gl_", 3) == 0)
|
||||
nir_setup_builtin_uniform(var);
|
||||
else
|
||||
nir_setup_uniform(var);
|
||||
if(type_size_scalar(var->type) > 0)
|
||||
param_size[var->data.driver_location] = type_size_scalar(var->type);
|
||||
}
|
||||
} else {
|
||||
/* prog_to_nir only creates a single giant uniform variable so we can
|
||||
* just set param up directly. */
|
||||
for (unsigned p = 0; p < prog->Parameters->NumParameters; p++) {
|
||||
for (unsigned int i = 0; i < 4; i++) {
|
||||
stage_prog_data->param[4 * p + i] =
|
||||
&prog->Parameters->ParameterValues[p][i];
|
||||
}
|
||||
}
|
||||
if(prog->Parameters->NumParameters > 0)
|
||||
param_size[0] = prog->Parameters->NumParameters * 4;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_setup_uniform(nir_variable *var)
|
||||
{
|
||||
int namelen = strlen(var->name);
|
||||
|
||||
/* The data for our (non-builtin) uniforms is stored in a series of
|
||||
* gl_uniform_driver_storage structs for each subcomponent that
|
||||
* glGetUniformLocation() could name. We know it's been set up in the
|
||||
* same order we'd walk the type, so walk the list of storage and find
|
||||
* anything with our name, or the prefix of a component that starts with
|
||||
* our name.
|
||||
*/
|
||||
unsigned index = var->data.driver_location;
|
||||
for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
|
||||
struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
|
||||
|
||||
if (storage->builtin)
|
||||
continue;
|
||||
|
||||
if (strncmp(var->name, storage->name, namelen) != 0 ||
|
||||
(storage->name[namelen] != 0 &&
|
||||
storage->name[namelen] != '.' &&
|
||||
storage->name[namelen] != '[')) {
|
||||
foreach_list_typed(nir_variable, var, node, &nir->uniforms) {
|
||||
/* UBO's and atomics don't take up space in the uniform file */
|
||||
if (var->interface_type != NULL || var->type->contains_atomic())
|
||||
continue;
|
||||
}
|
||||
|
||||
if (storage->type->is_image()) {
|
||||
setup_image_uniform_values(index, storage);
|
||||
} else {
|
||||
unsigned slots = storage->type->component_slots();
|
||||
if (storage->array_elements)
|
||||
slots *= storage->array_elements;
|
||||
|
||||
for (unsigned i = 0; i < slots; i++) {
|
||||
stage_prog_data->param[index++] = &storage->storage[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
|
||||
{
|
||||
const nir_state_slot *const slots = var->state_slots;
|
||||
assert(var->state_slots != NULL);
|
||||
|
||||
unsigned uniform_index = var->data.driver_location;
|
||||
for (unsigned int i = 0; i < var->num_state_slots; i++) {
|
||||
/* This state reference has already been setup by ir_to_mesa, but we'll
|
||||
* get the same index back here.
|
||||
*/
|
||||
int index = _mesa_add_state_reference(this->prog->Parameters,
|
||||
(gl_state_index *)slots[i].tokens);
|
||||
|
||||
/* Add each of the unique swizzles of the element as a parameter.
|
||||
* This'll end up matching the expected layout of the
|
||||
* array/matrix/structure we're trying to fill in.
|
||||
*/
|
||||
int last_swiz = -1;
|
||||
for (unsigned int j = 0; j < 4; j++) {
|
||||
int swiz = GET_SWZ(slots[i].swizzle, j);
|
||||
if (swiz == last_swiz)
|
||||
break;
|
||||
last_swiz = swiz;
|
||||
|
||||
stage_prog_data->param[uniform_index++] =
|
||||
&prog->Parameters->ParameterValues[index][swiz];
|
||||
}
|
||||
if (type_size_scalar(var->type) > 0)
|
||||
param_size[var->data.driver_location] = type_size_scalar(var->type);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -363,10 +272,10 @@ emit_system_values_block(nir_block *block, void *void_visitor)
|
|||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_system_values(nir_shader *shader)
|
||||
fs_visitor::nir_emit_system_values()
|
||||
{
|
||||
nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX);
|
||||
nir_foreach_overload(shader, overload) {
|
||||
nir_foreach_overload(nir, overload) {
|
||||
assert(strcmp(overload->function->name, "main") == 0);
|
||||
assert(overload->impl);
|
||||
nir_foreach_block(overload->impl, emit_system_values_block, this);
|
||||
|
|
@ -1540,7 +1449,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
*/
|
||||
brw_mark_surface_used(prog_data,
|
||||
stage_prog_data->binding_table.ubo_start +
|
||||
shader_prog->NumBufferInterfaceBlocks - 1);
|
||||
nir->info.num_ssbos - 1);
|
||||
}
|
||||
|
||||
if (has_indirect) {
|
||||
|
|
@ -1603,7 +1512,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
*/
|
||||
brw_mark_surface_used(prog_data,
|
||||
stage_prog_data->binding_table.ubo_start +
|
||||
shader_prog->NumBufferInterfaceBlocks - 1);
|
||||
nir->info.num_ssbos - 1);
|
||||
}
|
||||
|
||||
/* Get the offset to read from */
|
||||
|
|
@ -1796,7 +1705,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
|
||||
brw_mark_surface_used(prog_data,
|
||||
stage_prog_data->binding_table.ubo_start +
|
||||
shader_prog->NumBufferInterfaceBlocks - 1);
|
||||
nir->info.num_ssbos - 1);
|
||||
}
|
||||
|
||||
/* Offset */
|
||||
|
|
@ -1913,8 +1822,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
|
||||
int reg_width = dispatch_width / 8;
|
||||
|
||||
assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
|
||||
|
||||
/* Set LOD = 0 */
|
||||
fs_reg source = fs_reg(0);
|
||||
|
||||
|
|
@ -1990,7 +1897,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
|||
*/
|
||||
brw_mark_surface_used(prog_data,
|
||||
stage_prog_data->binding_table.ubo_start +
|
||||
shader_prog->NumBufferInterfaceBlocks - 1);
|
||||
nir->info.num_ssbos - 1);
|
||||
}
|
||||
|
||||
fs_reg offset = get_nir_src(instr->src[1]);
|
||||
|
|
@ -2023,12 +1930,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
|||
unsigned sampler = stage_prog_data->bind_map[set].index[binding];
|
||||
fs_reg sampler_reg(sampler);
|
||||
|
||||
/* FINISHME: We're failing to recompile our programs when the sampler is
|
||||
* updated. This only matters for the texture rectangle scale parameters
|
||||
* (pre-gen6, or gen6+ with GL_CLAMP).
|
||||
*/
|
||||
int texunit = prog->SamplerUnits[sampler];
|
||||
|
||||
int gather_component = instr->component;
|
||||
|
||||
bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
|
||||
|
|
@ -2169,7 +2070,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
|||
emit_texture(op, dest_type, coordinate, instr->coord_components,
|
||||
shadow_comparitor, lod, lod2, lod_components, sample_index,
|
||||
tex_offset, mcs, gather_component,
|
||||
is_cube_array, is_rect, sampler, sampler_reg, texunit);
|
||||
is_cube_array, is_rect, sampler, sampler_reg);
|
||||
|
||||
fs_reg dest = get_nir_dest(instr->dest);
|
||||
dest.type = this->result.type;
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ fs_visitor::emit_vs_system_value(int location)
|
|||
|
||||
fs_reg
|
||||
fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
|
||||
bool is_rect, uint32_t sampler, int texunit)
|
||||
bool is_rect, uint32_t sampler)
|
||||
{
|
||||
bool needs_gl_clamp = true;
|
||||
fs_reg scale_x, scale_y;
|
||||
|
|
@ -93,10 +93,16 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
|
|||
(devinfo->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) ||
|
||||
key_tex->gl_clamp_mask[1] & (1 << sampler))))) {
|
||||
struct gl_program_parameter_list *params = prog->Parameters;
|
||||
|
||||
|
||||
/* FINISHME: We're failing to recompile our programs when the sampler is
|
||||
* updated. This only matters for the texture rectangle scale
|
||||
* parameters (pre-gen6, or gen6+ with GL_CLAMP).
|
||||
*/
|
||||
int tokens[STATE_LENGTH] = {
|
||||
STATE_INTERNAL,
|
||||
STATE_TEXRECT_SCALE,
|
||||
texunit,
|
||||
prog->SamplerUnits[sampler],
|
||||
0,
|
||||
0
|
||||
};
|
||||
|
|
@ -221,7 +227,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
|
|||
bool is_cube_array,
|
||||
bool is_rect,
|
||||
uint32_t sampler,
|
||||
fs_reg sampler_reg, int texunit)
|
||||
fs_reg sampler_reg)
|
||||
{
|
||||
fs_inst *inst = NULL;
|
||||
|
||||
|
|
@ -256,7 +262,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
|
|||
* samplers. This should only be a problem with GL_CLAMP on Gen7.
|
||||
*/
|
||||
coordinate = rescale_texcoord(coordinate, coord_components, is_rect,
|
||||
sampler, texunit);
|
||||
sampler);
|
||||
}
|
||||
|
||||
/* Writemasking doesn't eliminate channels on SIMD8 texture
|
||||
|
|
@ -692,7 +698,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
|
|||
fs_reg src_depth;
|
||||
|
||||
if (source_depth_to_render_target) {
|
||||
if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
||||
src_depth = frag_depth;
|
||||
else
|
||||
src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
|
||||
|
|
@ -1060,16 +1066,14 @@ fs_visitor::emit_barrier()
|
|||
|
||||
fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
||||
void *mem_ctx,
|
||||
gl_shader_stage stage,
|
||||
const void *key,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
nir_shader *shader,
|
||||
unsigned dispatch_width,
|
||||
int shader_time_index)
|
||||
: backend_shader(compiler, log_data, mem_ctx,
|
||||
shader_prog, prog, prog_data, stage),
|
||||
key(key), prog_data(prog_data),
|
||||
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
|
||||
key(key), prog_data(prog_data), prog(prog),
|
||||
dispatch_width(dispatch_width),
|
||||
shader_time_index(shader_time_index),
|
||||
promoted_constants(0),
|
||||
|
|
|
|||
|
|
@ -32,6 +32,24 @@
|
|||
#include "brw_vec4_gs_visitor.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_ff_gs.h"
|
||||
#include "brw_nir.h"
|
||||
|
||||
static void
|
||||
assign_gs_binding_table_offsets(const struct brw_device_info *devinfo,
|
||||
const struct gl_shader_program *shader_prog,
|
||||
const struct gl_program *prog,
|
||||
struct brw_gs_prog_data *prog_data)
|
||||
{
|
||||
/* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
|
||||
* feedback surfaces.
|
||||
*/
|
||||
uint32_t reserved = devinfo->gen == 6 ? BRW_MAX_SOL_BINDINGS : 0;
|
||||
|
||||
brw_assign_common_binding_table_offsets(MESA_SHADER_GEOMETRY, devinfo,
|
||||
shader_prog, prog,
|
||||
&prog_data->base.base,
|
||||
reserved);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_compile_gs_prog(struct brw_context *brw,
|
||||
|
|
@ -55,6 +73,9 @@ brw_compile_gs_prog(struct brw_context *brw,
|
|||
|
||||
c.prog_data.invocations = gp->program.Invocations;
|
||||
|
||||
assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog,
|
||||
&gp->program.Base, &c.prog_data);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
|
|
@ -64,9 +85,7 @@ brw_compile_gs_prog(struct brw_context *brw,
|
|||
* every uniform is a float which gets padded to the size of a vec4.
|
||||
*/
|
||||
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
|
||||
int param_count = gs->num_uniform_components * 4;
|
||||
|
||||
param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
|
||||
int param_count = gp->program.Base.nir->num_uniforms * 4;
|
||||
|
||||
c.prog_data.base.base.param =
|
||||
rzalloc_array(NULL, const gl_constant_value *, param_count);
|
||||
|
|
@ -77,6 +96,9 @@ brw_compile_gs_prog(struct brw_context *brw,
|
|||
c.prog_data.base.base.nr_params = param_count;
|
||||
c.prog_data.base.base.nr_image_params = gs->NumImages;
|
||||
|
||||
brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
|
||||
&c.prog_data.base.base, false);
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
|
||||
nir_gs_count_vertices(gp->program.Base.nir);
|
||||
|
|
@ -418,24 +440,3 @@ brw_gs_precompile(struct gl_context *ctx,
|
|||
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
brw_gs_prog_data_compare(const void *in_a, const void *in_b)
|
||||
{
|
||||
const struct brw_gs_prog_data *a = in_a;
|
||||
const struct brw_gs_prog_data *b = in_b;
|
||||
|
||||
/* Compare the base structure. */
|
||||
if (!brw_stage_prog_data_compare(&a->base.base, &b->base.base))
|
||||
return false;
|
||||
|
||||
/* Compare the rest of the struct. */
|
||||
const unsigned offset = sizeof(struct brw_stage_prog_data);
|
||||
if (memcmp(((char *) a) + offset, ((char *) b) + offset,
|
||||
sizeof(struct brw_gs_prog_data) - offset)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,24 @@
|
|||
#include "glsl/nir/glsl_to_nir.h"
|
||||
#include "program/prog_to_nir.h"
|
||||
|
||||
static void
|
||||
brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
|
||||
{
|
||||
nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
|
||||
is_scalar ? type_size_scalar : type_size_vec4);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
|
||||
{
|
||||
if (is_scalar) {
|
||||
nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar);
|
||||
} else {
|
||||
foreach_list_typed(nir_variable, var, node, &nir->outputs)
|
||||
var->data.driver_location = var->data.location;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nir_optimize(nir_shader *nir, bool is_scalar)
|
||||
{
|
||||
|
|
@ -80,12 +98,11 @@ brw_create_nir(struct brw_context *brw,
|
|||
struct gl_context *ctx = &brw->ctx;
|
||||
const nir_shader_compiler_options *options =
|
||||
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
|
||||
struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
|
||||
nir_shader *nir;
|
||||
|
||||
/* First, lower the GLSL IR or Mesa IR to NIR */
|
||||
if (shader_prog) {
|
||||
nir = glsl_to_nir(shader, options);
|
||||
nir = glsl_to_nir(shader_prog, stage, options);
|
||||
} else {
|
||||
nir = prog_to_nir(prog, options);
|
||||
nir_convert_to_ssa(nir); /* turn registers into SSA */
|
||||
|
|
@ -142,26 +159,12 @@ brw_process_nir(nir_shader *nir,
|
|||
/* Get rid of split copies */
|
||||
nir_optimize(nir, is_scalar);
|
||||
|
||||
if (is_scalar) {
|
||||
nir_assign_var_locations(&nir->uniforms,
|
||||
&nir->num_uniforms,
|
||||
type_size_scalar);
|
||||
nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar);
|
||||
nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar);
|
||||
nir_lower_io(nir, type_size_scalar);
|
||||
} else {
|
||||
nir_assign_var_locations(&nir->uniforms,
|
||||
&nir->num_uniforms,
|
||||
type_size_vec4);
|
||||
|
||||
nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_vec4);
|
||||
|
||||
foreach_list_typed(nir_variable, var, node, &nir->outputs)
|
||||
var->data.driver_location = var->data.location;
|
||||
|
||||
nir_lower_io(nir, type_size_vec4);
|
||||
}
|
||||
|
||||
brw_nir_lower_inputs(nir, is_scalar);
|
||||
brw_nir_lower_outputs(nir, is_scalar);
|
||||
nir_assign_var_locations(&nir->uniforms,
|
||||
&nir->num_uniforms,
|
||||
is_scalar ? type_size_scalar : type_size_vec4);
|
||||
nir_lower_io(nir, -1, is_scalar ? type_size_scalar : type_size_vec4);
|
||||
nir_validate_shader(nir);
|
||||
|
||||
nir_remove_dead_variables(nir);
|
||||
|
|
|
|||
|
|
@ -91,6 +91,15 @@ brw_process_nir(nir_shader *nir,
|
|||
const struct gl_shader_program *shader_prog,
|
||||
gl_shader_stage stage, bool is_scalar);
|
||||
|
||||
void brw_nir_setup_glsl_uniforms(nir_shader *shader,
|
||||
struct gl_shader_program *shader_prog,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
bool is_scalar);
|
||||
|
||||
void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
182
src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
Normal file
182
src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_shader.h"
|
||||
#include "brw_nir.h"
|
||||
#include "glsl/ir.h"
|
||||
#include "glsl/ir_uniform.h"
|
||||
|
||||
static void
|
||||
brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
unsigned comps_per_unit)
|
||||
{
|
||||
const nir_state_slot *const slots = var->state_slots;
|
||||
assert(var->state_slots != NULL);
|
||||
|
||||
unsigned uniform_index = var->data.driver_location * comps_per_unit;
|
||||
for (unsigned int i = 0; i < var->num_state_slots; i++) {
|
||||
/* This state reference has already been setup by ir_to_mesa, but we'll
|
||||
* get the same index back here.
|
||||
*/
|
||||
int index = _mesa_add_state_reference(prog->Parameters,
|
||||
(gl_state_index *)slots[i].tokens);
|
||||
|
||||
/* Add each of the unique swizzles of the element as a parameter.
|
||||
* This'll end up matching the expected layout of the
|
||||
* array/matrix/structure we're trying to fill in.
|
||||
*/
|
||||
int last_swiz = -1;
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
int swiz = GET_SWZ(slots[i].swizzle, j);
|
||||
|
||||
/* If we hit a pair of identical swizzles, this means we've hit the
|
||||
* end of the builtin variable. In scalar mode, we should just quit
|
||||
* and move on to the next one. In vec4, we need to continue and pad
|
||||
* it out to 4 components.
|
||||
*/
|
||||
if (swiz == last_swiz && comps_per_unit == 1)
|
||||
break;
|
||||
|
||||
last_swiz = swiz;
|
||||
|
||||
stage_prog_data->param[uniform_index++] =
|
||||
&prog->Parameters->ParameterValues[index][swiz];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
unsigned comps_per_unit)
|
||||
{
|
||||
int namelen = strlen(var->name);
|
||||
|
||||
/* The data for our (non-builtin) uniforms is stored in a series of
|
||||
* gl_uniform_driver_storage structs for each subcomponent that
|
||||
* glGetUniformLocation() could name. We know it's been set up in the same
|
||||
* order we'd walk the type, so walk the list of storage and find anything
|
||||
* with our name, or the prefix of a component that starts with our name.
|
||||
*/
|
||||
unsigned uniform_index = var->data.driver_location * comps_per_unit;
|
||||
for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
|
||||
struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
|
||||
|
||||
if (storage->builtin)
|
||||
continue;
|
||||
|
||||
if (strncmp(var->name, storage->name, namelen) != 0 ||
|
||||
(storage->name[namelen] != 0 &&
|
||||
storage->name[namelen] != '.' &&
|
||||
storage->name[namelen] != '[')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (storage->type->is_image()) {
|
||||
brw_setup_image_uniform_values(stage, stage_prog_data,
|
||||
uniform_index, storage);
|
||||
} else {
|
||||
gl_constant_value *components = storage->storage;
|
||||
unsigned vector_count = (MAX2(storage->array_elements, 1) *
|
||||
storage->type->matrix_columns);
|
||||
unsigned vector_size = storage->type->vector_elements;
|
||||
|
||||
for (unsigned s = 0; s < vector_count; s++) {
|
||||
unsigned i;
|
||||
for (i = 0; i < vector_size; i++) {
|
||||
stage_prog_data->param[uniform_index++] = components++;
|
||||
}
|
||||
|
||||
/* Pad out with zeros if needed (only needed for vec4) */
|
||||
for (; i < comps_per_unit; i++) {
|
||||
static const gl_constant_value zero = { 0.0 };
|
||||
stage_prog_data->param[uniform_index++] = &zero;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_setup_glsl_uniforms(nir_shader *shader,
|
||||
struct gl_shader_program *shader_prog,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
bool is_scalar)
|
||||
{
|
||||
unsigned comps_per_unit = is_scalar ? 1 : 4;
|
||||
|
||||
foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
|
||||
/* UBO's, atomics and samplers don't take up space in the
|
||||
uniform file */
|
||||
if (var->interface_type != NULL || var->type->contains_atomic())
|
||||
continue;
|
||||
|
||||
if (strncmp(var->name, "gl_", 3) == 0) {
|
||||
brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
|
||||
comps_per_unit);
|
||||
} else {
|
||||
brw_nir_setup_glsl_uniform(shader->stage, var, shader_prog,
|
||||
stage_prog_data, comps_per_unit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data)
|
||||
{
|
||||
struct gl_program_parameter_list *plist = prog->Parameters;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!shader->uniforms.is_empty()) {
|
||||
/* For ARB programs, only a single "parameters" variable is generated to
|
||||
* support uniform data.
|
||||
*/
|
||||
assert(shader->uniforms.length() == 1);
|
||||
nir_variable *var = (nir_variable *) shader->uniforms.get_head();
|
||||
assert(strcmp(var->name, "parameters") == 0);
|
||||
assert(var->type->array_size() == (int)plist->NumParameters);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (unsigned p = 0; p < plist->NumParameters; p++) {
|
||||
/* Parameters should be either vec4 uniforms or single component
|
||||
* constants; matrices and other larger types should have been broken
|
||||
* down earlier.
|
||||
*/
|
||||
assert(plist->Parameters[p].Size <= 4);
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < plist->Parameters[p].Size; i++) {
|
||||
stage_prog_data->param[4 * p + i] = &plist->ParameterValues[p][i];
|
||||
}
|
||||
for (; i < 4; i++) {
|
||||
static const gl_constant_value zero = { 0.0 };
|
||||
stage_prog_data->param[4 * p + i] = &zero;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -142,9 +142,7 @@ brwProgramStringNotify(struct gl_context *ctx,
|
|||
|
||||
brw_add_texrect_params(prog);
|
||||
|
||||
if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
|
||||
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
|
||||
}
|
||||
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
|
||||
|
||||
brw_fs_precompile(ctx, NULL, prog);
|
||||
break;
|
||||
|
|
@ -168,10 +166,8 @@ brwProgramStringNotify(struct gl_context *ctx,
|
|||
|
||||
brw_add_texrect_params(prog);
|
||||
|
||||
if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
|
||||
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
|
||||
brw->intelScreen->compiler->scalar_vs);
|
||||
}
|
||||
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
|
||||
brw->intelScreen->compiler->scalar_vs);
|
||||
|
||||
brw_vs_precompile(ctx, NULL, prog);
|
||||
break;
|
||||
|
|
@ -544,23 +540,6 @@ brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
|
|||
MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
|
||||
const struct brw_stage_prog_data *b)
|
||||
{
|
||||
/* Compare all the struct up to the pointers. */
|
||||
if (memcmp(a, b, offsetof(struct brw_stage_prog_data, param)))
|
||||
return false;
|
||||
|
||||
if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
|
||||
return false;
|
||||
|
||||
if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_stage_prog_data_free(const void *p)
|
||||
{
|
||||
|
|
@ -568,6 +547,7 @@ brw_stage_prog_data_free(const void *p)
|
|||
|
||||
ralloc_free(prog_data->param);
|
||||
ralloc_free(prog_data->pull_param);
|
||||
ralloc_free(prog_data->image_param);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -169,10 +169,6 @@ void
|
|||
brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
|
||||
unsigned surf_index);
|
||||
|
||||
bool
|
||||
brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
|
||||
const struct brw_stage_prog_data *b);
|
||||
|
||||
void
|
||||
brw_stage_prog_data_free(const void *prog_data);
|
||||
|
||||
|
|
|
|||
|
|
@ -72,6 +72,20 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
|
|||
va_end(args);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
|
||||
{
|
||||
switch (stage) {
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
case MESA_SHADER_COMPUTE:
|
||||
return true;
|
||||
case MESA_SHADER_VERTEX:
|
||||
return compiler->scalar_vs;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
struct brw_compiler *
|
||||
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
||||
{
|
||||
|
|
@ -120,19 +134,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
|||
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
|
||||
compiler->glsl_compiler_options[i].LowerClipDistance = true;
|
||||
|
||||
bool is_scalar;
|
||||
switch (i) {
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
case MESA_SHADER_COMPUTE:
|
||||
is_scalar = true;
|
||||
break;
|
||||
case MESA_SHADER_VERTEX:
|
||||
is_scalar = compiler->scalar_vs;
|
||||
break;
|
||||
default:
|
||||
is_scalar = false;
|
||||
break;
|
||||
}
|
||||
bool is_scalar = is_scalar_shader_stage(compiler, i);
|
||||
|
||||
compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
|
||||
compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
|
||||
|
|
@ -142,8 +144,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
|||
if (devinfo->gen < 7)
|
||||
compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
|
||||
|
||||
if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
|
||||
compiler->glsl_compiler_options[i].NirOptions = nir_options;
|
||||
compiler->glsl_compiler_options[i].NirOptions = nir_options;
|
||||
}
|
||||
|
||||
return compiler;
|
||||
|
|
@ -194,20 +195,6 @@ brw_shader_precompile(struct gl_context *ctx,
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_scalar_shader_stage(struct brw_context *brw, int stage)
|
||||
{
|
||||
switch (stage) {
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
case MESA_SHADER_COMPUTE:
|
||||
return true;
|
||||
case MESA_SHADER_VERTEX:
|
||||
return brw->intelScreen->compiler->scalar_vs;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_lower_packing_builtins(struct brw_context *brw,
|
||||
gl_shader_stage shader_type,
|
||||
|
|
@ -218,7 +205,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
|
|||
| LOWER_PACK_UNORM_2x16
|
||||
| LOWER_UNPACK_UNORM_2x16;
|
||||
|
||||
if (is_scalar_shader_stage(brw, shader_type)) {
|
||||
if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
|
||||
ops |= LOWER_UNPACK_UNORM_4x8
|
||||
| LOWER_UNPACK_SNORM_4x8
|
||||
| LOWER_PACK_UNORM_4x8
|
||||
|
|
@ -231,7 +218,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
|
|||
* lowering is needed. For SOA code, the Half2x16 ops must be
|
||||
* scalarized.
|
||||
*/
|
||||
if (is_scalar_shader_stage(brw, shader_type)) {
|
||||
if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
|
||||
ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
|
||||
| LOWER_UNPACK_HALF_2x16_TO_SPLIT;
|
||||
}
|
||||
|
|
@ -285,8 +272,6 @@ process_glsl_ir(gl_shader_stage stage,
|
|||
brw_lower_texture_gradients(brw, shader->ir);
|
||||
do_vec_index_to_cond_assign(shader->ir);
|
||||
lower_vector_insert(shader->ir, true);
|
||||
if (options->NirOptions == NULL)
|
||||
brw_do_cubemap_normalize(shader->ir);
|
||||
lower_offset_arrays(shader->ir);
|
||||
brw_do_lower_unnormalized_offset(shader->ir);
|
||||
lower_noise(shader->ir);
|
||||
|
|
@ -312,7 +297,7 @@ process_glsl_ir(gl_shader_stage stage,
|
|||
do {
|
||||
progress = false;
|
||||
|
||||
if (is_scalar_shader_stage(brw, shader->Stage)) {
|
||||
if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
|
||||
brw_do_channel_expressions(shader->ir);
|
||||
brw_do_vector_splitting(shader->ir);
|
||||
}
|
||||
|
|
@ -350,13 +335,11 @@ GLboolean
|
|||
brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct brw_compiler *compiler = brw->intelScreen->compiler;
|
||||
unsigned int stage;
|
||||
|
||||
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
|
||||
struct gl_shader *shader = shProg->_LinkedShaders[stage];
|
||||
const struct gl_shader_compiler_options *options =
|
||||
&ctx->Const.ShaderCompilerOptions[stage];
|
||||
|
||||
if (!shader)
|
||||
continue;
|
||||
|
||||
|
|
@ -404,10 +387,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
|
|||
|
||||
brw_add_texrect_params(prog);
|
||||
|
||||
if (options->NirOptions) {
|
||||
prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
|
||||
is_scalar_shader_stage(brw, stage));
|
||||
}
|
||||
prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
|
||||
is_scalar_shader_stage(compiler, stage));
|
||||
|
||||
_mesa_reference_program(ctx, &prog, NULL);
|
||||
}
|
||||
|
|
@ -917,21 +898,16 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
|
|||
backend_shader::backend_shader(const struct brw_compiler *compiler,
|
||||
void *log_data,
|
||||
void *mem_ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
gl_shader_stage stage)
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *stage_prog_data)
|
||||
: compiler(compiler),
|
||||
log_data(log_data),
|
||||
devinfo(compiler->devinfo),
|
||||
shader(shader_prog ?
|
||||
(struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
|
||||
shader_prog(shader_prog),
|
||||
prog(prog),
|
||||
nir(shader),
|
||||
stage_prog_data(stage_prog_data),
|
||||
mem_ctx(mem_ctx),
|
||||
cfg(NULL),
|
||||
stage(stage)
|
||||
stage(shader->stage)
|
||||
{
|
||||
debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
|
||||
stage_name = _mesa_shader_stage_to_string(stage);
|
||||
|
|
@ -1374,16 +1350,25 @@ backend_shader::invalidate_cfg()
|
|||
* trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
|
||||
*/
|
||||
void
|
||||
backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
|
||||
brw_assign_common_binding_table_offsets(gl_shader_stage stage,
|
||||
const struct brw_device_info *devinfo,
|
||||
const struct gl_shader_program *shader_prog,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
uint32_t next_binding_table_offset)
|
||||
{
|
||||
const struct gl_shader *shader = NULL;
|
||||
int num_textures = _mesa_fls(prog->SamplersUsed);
|
||||
|
||||
if (shader_prog)
|
||||
shader = shader_prog->_LinkedShaders[stage];
|
||||
|
||||
stage_prog_data->binding_table.texture_start = next_binding_table_offset;
|
||||
next_binding_table_offset += num_textures;
|
||||
|
||||
if (shader) {
|
||||
stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
|
||||
next_binding_table_offset += shader->base.NumUniformBlocks;
|
||||
next_binding_table_offset += shader->NumUniformBlocks;
|
||||
} else {
|
||||
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
|
@ -1414,9 +1399,9 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_
|
|||
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
if (shader && shader->base.NumImages) {
|
||||
if (shader && shader->NumImages) {
|
||||
stage_prog_data->binding_table.image_start = next_binding_table_offset;
|
||||
next_binding_table_offset += shader->base.NumImages;
|
||||
next_binding_table_offset += shader->NumImages;
|
||||
} else {
|
||||
stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
|
@ -1430,32 +1415,50 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_
|
|||
/* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
|
||||
}
|
||||
|
||||
void
|
||||
backend_shader::setup_image_uniform_values(unsigned param_offset,
|
||||
const gl_uniform_storage *storage)
|
||||
static void
|
||||
setup_vec4_uniform_value(const gl_constant_value **params,
|
||||
const gl_constant_value *values,
|
||||
unsigned n)
|
||||
{
|
||||
const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target);
|
||||
static const gl_constant_value zero = { 0 };
|
||||
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
params[i] = &values[i];
|
||||
|
||||
for (unsigned i = n; i < 4; ++i)
|
||||
params[i] = &zero;
|
||||
}
|
||||
|
||||
void
|
||||
brw_setup_image_uniform_values(gl_shader_stage stage,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
unsigned param_start_index,
|
||||
const gl_uniform_storage *storage)
|
||||
{
|
||||
const gl_constant_value **param =
|
||||
&stage_prog_data->param[param_start_index];
|
||||
|
||||
for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
|
||||
const unsigned image_idx = storage->image[stage].index + i;
|
||||
const brw_image_param *param = &stage_prog_data->image_param[image_idx];
|
||||
const brw_image_param *image_param =
|
||||
&stage_prog_data->image_param[image_idx];
|
||||
|
||||
/* Upload the brw_image_param structure. The order is expected to match
|
||||
* the BRW_IMAGE_PARAM_*_OFFSET defines.
|
||||
*/
|
||||
setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
|
||||
(const gl_constant_value *)¶m->surface_idx, 1);
|
||||
setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_OFFSET_OFFSET,
|
||||
(const gl_constant_value *)param->offset, 2);
|
||||
setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SIZE_OFFSET,
|
||||
(const gl_constant_value *)param->size, 3);
|
||||
setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_STRIDE_OFFSET,
|
||||
(const gl_constant_value *)param->stride, 4);
|
||||
setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_TILING_OFFSET,
|
||||
(const gl_constant_value *)param->tiling, 3);
|
||||
setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
|
||||
(const gl_constant_value *)param->swizzling, 2);
|
||||
param_offset += BRW_IMAGE_PARAM_SIZE;
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
|
||||
(const gl_constant_value *)&image_param->surface_idx, 1);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
|
||||
(const gl_constant_value *)image_param->offset, 2);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
|
||||
(const gl_constant_value *)image_param->size, 3);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
|
||||
(const gl_constant_value *)image_param->stride, 4);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
|
||||
(const gl_constant_value *)image_param->tiling, 3);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
|
||||
(const gl_constant_value *)image_param->swizzling, 2);
|
||||
param += BRW_IMAGE_PARAM_SIZE;
|
||||
|
||||
brw_mark_surface_used(
|
||||
stage_prog_data,
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
#include <stdint.h>
|
||||
#include "brw_reg.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_context.h"
|
||||
#include "main/compiler.h"
|
||||
#include "glsl/ir.h"
|
||||
#include "program/prog_parameter.h"
|
||||
|
|
@ -224,10 +225,8 @@ protected:
|
|||
backend_shader(const struct brw_compiler *compiler,
|
||||
void *log_data,
|
||||
void *mem_ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
gl_shader_stage stage);
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *stage_prog_data);
|
||||
|
||||
public:
|
||||
|
||||
|
|
@ -235,9 +234,7 @@ public:
|
|||
void *log_data; /* Passed to compiler->*_log functions */
|
||||
|
||||
const struct brw_device_info * const devinfo;
|
||||
struct brw_shader * const shader;
|
||||
struct gl_shader_program * const shader_prog;
|
||||
struct gl_program * const prog;
|
||||
nir_shader *nir;
|
||||
struct brw_stage_prog_data * const stage_prog_data;
|
||||
|
||||
/** ralloc context for temporary data used during compile */
|
||||
|
|
@ -266,19 +263,16 @@ public:
|
|||
void calculate_cfg();
|
||||
void invalidate_cfg();
|
||||
|
||||
void assign_common_binding_table_offsets(uint32_t next_binding_table_offset);
|
||||
|
||||
virtual void invalidate_live_intervals() = 0;
|
||||
|
||||
virtual void setup_vec4_uniform_value(unsigned param_offset,
|
||||
const gl_constant_value *values,
|
||||
unsigned n) = 0;
|
||||
void setup_image_uniform_values(unsigned param_offset,
|
||||
const gl_uniform_storage *storage);
|
||||
};
|
||||
|
||||
uint32_t brw_texture_offset(int *offsets, unsigned num_components);
|
||||
|
||||
void brw_setup_image_uniform_values(gl_shader_stage stage,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
unsigned param_start_index,
|
||||
const gl_uniform_storage *storage);
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type);
|
||||
|
|
@ -296,6 +290,14 @@ extern "C" {
|
|||
struct brw_compiler *
|
||||
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo);
|
||||
|
||||
void
|
||||
brw_assign_common_binding_table_offsets(gl_shader_stage stage,
|
||||
const struct brw_device_info *devinfo,
|
||||
const struct gl_shader_program *shader_prog,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
uint32_t next_binding_table_offset);
|
||||
|
||||
bool brw_vs_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog);
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ extern const struct brw_tracked_state brw_vs_binding_table;
|
|||
extern const struct brw_tracked_state brw_wm_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_wm_abo_surfaces;
|
||||
extern const struct brw_tracked_state brw_wm_image_surfaces;
|
||||
extern const struct brw_tracked_state brw_cs_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_cs_abo_surfaces;
|
||||
extern const struct brw_tracked_state brw_cs_image_surfaces;
|
||||
extern const struct brw_tracked_state brw_wm_unit;
|
||||
|
|
|
|||
|
|
@ -350,10 +350,6 @@ brw_init_caches(struct brw_context *brw)
|
|||
if (brw->has_llc)
|
||||
drm_intel_gem_bo_map_unsynchronized(cache->bo);
|
||||
|
||||
cache->aux_compare[BRW_CACHE_VS_PROG] = brw_vs_prog_data_compare;
|
||||
cache->aux_compare[BRW_CACHE_GS_PROG] = brw_gs_prog_data_compare;
|
||||
cache->aux_compare[BRW_CACHE_FS_PROG] = brw_wm_prog_data_compare;
|
||||
cache->aux_compare[BRW_CACHE_CS_PROG] = brw_cs_prog_data_compare;
|
||||
cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free;
|
||||
cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free;
|
||||
cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free;
|
||||
|
|
|
|||
|
|
@ -259,6 +259,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
|
|||
&brw_state_base_address,
|
||||
&brw_cs_image_surfaces,
|
||||
&gen7_cs_push_constants,
|
||||
&brw_cs_ubo_surfaces,
|
||||
&brw_cs_abo_surfaces,
|
||||
&brw_texture_surfaces,
|
||||
&brw_cs_work_groups_surface,
|
||||
|
|
@ -352,6 +353,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
|
|||
&gen8_state_base_address,
|
||||
&brw_cs_image_surfaces,
|
||||
&gen7_cs_push_constants,
|
||||
&brw_cs_ubo_surfaces,
|
||||
&brw_cs_abo_surfaces,
|
||||
&brw_texture_surfaces,
|
||||
&brw_cs_work_groups_surface,
|
||||
|
|
|
|||
|
|
@ -282,7 +282,7 @@ gen9_miptree_layout_1d(struct intel_mipmap_tree *mt)
|
|||
/* When this layout is used the horizontal alignment is fixed at 64 and the
|
||||
* hardware ignores the value given in the surface state
|
||||
*/
|
||||
const unsigned int align_w = 64;
|
||||
const unsigned int halign = 64;
|
||||
|
||||
mt->total_height = mt->physical_height0;
|
||||
mt->total_width = 0;
|
||||
|
|
@ -292,7 +292,7 @@ gen9_miptree_layout_1d(struct intel_mipmap_tree *mt)
|
|||
|
||||
intel_miptree_set_level_info(mt, level, x, 0, depth);
|
||||
|
||||
img_width = ALIGN(width, align_w);
|
||||
img_width = ALIGN(width, halign);
|
||||
|
||||
mt->total_width = MAX2(mt->total_width, x + img_width);
|
||||
|
||||
|
|
@ -328,10 +328,10 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
|
|||
unsigned mip1_width;
|
||||
|
||||
if (mt->compressed) {
|
||||
mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
|
||||
mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) +
|
||||
ALIGN_NPOT(minify(mt->physical_width0, 2), bw);
|
||||
} else {
|
||||
mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
|
||||
mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) +
|
||||
minify(mt->physical_width0, 2);
|
||||
}
|
||||
|
||||
|
|
@ -348,7 +348,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
|
|||
|
||||
intel_miptree_set_level_info(mt, level, x, y, depth);
|
||||
|
||||
img_height = ALIGN_NPOT(height, mt->align_h);
|
||||
img_height = ALIGN_NPOT(height, mt->valign);
|
||||
if (mt->compressed)
|
||||
img_height /= bh;
|
||||
|
||||
|
|
@ -365,7 +365,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
|
|||
/* Layout_below: step right after second mipmap.
|
||||
*/
|
||||
if (level == mt->first_level + 1) {
|
||||
x += ALIGN_NPOT(width, mt->align_w) / bw;
|
||||
x += ALIGN_NPOT(width, mt->halign) / bw;
|
||||
} else {
|
||||
y += img_height;
|
||||
}
|
||||
|
|
@ -385,7 +385,7 @@ brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
|
|||
{
|
||||
if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) ||
|
||||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
|
||||
return ALIGN_NPOT(minify(mt->physical_width0, level), mt->align_w);
|
||||
return ALIGN_NPOT(minify(mt->physical_width0, level), mt->halign);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -426,13 +426,13 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
|
|||
} else if (mt->target == GL_TEXTURE_3D ||
|
||||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) ||
|
||||
mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
|
||||
return ALIGN_NPOT(minify(mt->physical_height0, level), mt->align_h);
|
||||
return ALIGN_NPOT(minify(mt->physical_height0, level), mt->valign);
|
||||
|
||||
} else {
|
||||
const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->align_h);
|
||||
const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->align_h);
|
||||
const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->valign);
|
||||
const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->valign);
|
||||
|
||||
return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h;
|
||||
return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->valign;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -502,9 +502,9 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
|
|||
|
||||
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
|
||||
unsigned img_height;
|
||||
img_height = ALIGN_NPOT(height, mt->align_h);
|
||||
img_height = ALIGN_NPOT(height, mt->valign);
|
||||
if (mt->compressed)
|
||||
img_height /= mt->align_h;
|
||||
img_height /= mt->valign;
|
||||
|
||||
for (unsigned q = 0; q < mt->level[level].depth; q++) {
|
||||
if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
|
||||
|
|
@ -537,8 +537,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
|
|||
unsigned WL = MAX2(mt->physical_width0 >> level, 1);
|
||||
unsigned HL = MAX2(mt->physical_height0 >> level, 1);
|
||||
unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
|
||||
unsigned wL = ALIGN_NPOT(WL, mt->align_w);
|
||||
unsigned hL = ALIGN_NPOT(HL, mt->align_h);
|
||||
unsigned wL = ALIGN_NPOT(WL, mt->halign);
|
||||
unsigned hL = ALIGN_NPOT(HL, mt->valign);
|
||||
|
||||
if (mt->target == GL_TEXTURE_CUBE_MAP)
|
||||
DL = 6;
|
||||
|
|
@ -656,7 +656,7 @@ brw_miptree_choose_tiling(struct brw_context *brw,
|
|||
* to know that ahead of time. And besides, since we use a vertical
|
||||
* alignment of 4 as often as we can, this shouldn't happen very often.
|
||||
*/
|
||||
if (brw->gen == 7 && mt->align_h == 2 &&
|
||||
if (brw->gen == 7 && mt->valign == 2 &&
|
||||
brw->format_supported_as_render_target[mt->format]) {
|
||||
return I915_TILING_X;
|
||||
}
|
||||
|
|
@ -748,21 +748,21 @@ intel_miptree_set_alignment(struct brw_context *brw,
|
|||
/* Stencil uses W tiling, so we force W tiling alignment for the
|
||||
* ALL_SLICES_AT_EACH_LOD miptree layout.
|
||||
*/
|
||||
mt->align_w = 64;
|
||||
mt->align_h = 64;
|
||||
mt->halign = 64;
|
||||
mt->valign = 64;
|
||||
assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
|
||||
} else {
|
||||
/* Depth uses Y tiling, so we force need Y tiling alignment for the
|
||||
* ALL_SLICES_AT_EACH_LOD miptree layout.
|
||||
*/
|
||||
mt->align_w = 128 / mt->cpp;
|
||||
mt->align_h = 32;
|
||||
mt->halign = 128 / mt->cpp;
|
||||
mt->valign = 32;
|
||||
}
|
||||
} else if (mt->compressed) {
|
||||
/* The hardware alignment requirements for compressed textures
|
||||
* happen to match the block boundaries.
|
||||
*/
|
||||
_mesa_get_format_block_size(mt->format, &mt->align_w, &mt->align_h);
|
||||
_mesa_get_format_block_size(mt->format, &mt->halign, &mt->valign);
|
||||
|
||||
/* On Gen9+ we can pick our own alignment for compressed textures but it
|
||||
* has to be a multiple of the block size. The minimum alignment we can
|
||||
|
|
@ -770,21 +770,21 @@ intel_miptree_set_alignment(struct brw_context *brw,
|
|||
* size
|
||||
*/
|
||||
if (brw->gen >= 9) {
|
||||
mt->align_w *= 4;
|
||||
mt->align_h *= 4;
|
||||
mt->halign *= 4;
|
||||
mt->valign *= 4;
|
||||
}
|
||||
} else if (mt->format == MESA_FORMAT_S_UINT8) {
|
||||
mt->align_w = 8;
|
||||
mt->align_h = brw->gen >= 7 ? 8 : 4;
|
||||
mt->halign = 8;
|
||||
mt->valign = brw->gen >= 7 ? 8 : 4;
|
||||
} else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
|
||||
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
|
||||
* vertical alignment < 64. */
|
||||
mt->align_w = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
|
||||
mt->align_h = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
|
||||
mt->halign = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
|
||||
mt->valign = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
|
||||
} else {
|
||||
mt->align_w =
|
||||
mt->halign =
|
||||
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
|
||||
mt->align_h = intel_vertical_texture_alignment_unit(brw, mt);
|
||||
mt->valign = intel_vertical_texture_alignment_unit(brw, mt);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -809,8 +809,8 @@ brw_miptree_layout(struct brw_context *brw,
|
|||
if (brw->gen >= 9) {
|
||||
unsigned int i, j;
|
||||
_mesa_get_format_block_size(mt->format, &i, &j);
|
||||
mt->align_w /= i;
|
||||
mt->align_h /= j;
|
||||
mt->halign /= i;
|
||||
mt->valign /= j;
|
||||
}
|
||||
|
||||
if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0)
|
||||
|
|
|
|||
|
|
@ -518,11 +518,11 @@ vec4_visitor::split_uniform_registers()
|
|||
void
|
||||
vec4_visitor::pack_uniform_registers()
|
||||
{
|
||||
bool uniform_used[this->uniforms];
|
||||
uint8_t chans_used[this->uniforms];
|
||||
int new_loc[this->uniforms];
|
||||
int new_chan[this->uniforms];
|
||||
|
||||
memset(uniform_used, 0, sizeof(uniform_used));
|
||||
memset(chans_used, 0, sizeof(chans_used));
|
||||
memset(new_loc, 0, sizeof(new_loc));
|
||||
memset(new_chan, 0, sizeof(new_chan));
|
||||
|
||||
|
|
@ -531,11 +531,36 @@ vec4_visitor::pack_uniform_registers()
|
|||
* to pull constants, and from some GLSL code generators like wine.
|
||||
*/
|
||||
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
||||
for (int i = 0 ; i < 3; i++) {
|
||||
if (inst->src[i].file != UNIFORM)
|
||||
continue;
|
||||
unsigned readmask;
|
||||
switch (inst->opcode) {
|
||||
case VEC4_OPCODE_PACK_BYTES:
|
||||
case BRW_OPCODE_DP4:
|
||||
case BRW_OPCODE_DPH:
|
||||
readmask = 0xf;
|
||||
break;
|
||||
case BRW_OPCODE_DP3:
|
||||
readmask = 0x7;
|
||||
break;
|
||||
case BRW_OPCODE_DP2:
|
||||
readmask = 0x3;
|
||||
break;
|
||||
default:
|
||||
readmask = inst->dst.writemask;
|
||||
break;
|
||||
}
|
||||
|
||||
uniform_used[inst->src[i].reg] = true;
|
||||
for (int i = 0 ; i < 3; i++) {
|
||||
if (inst->src[i].file != UNIFORM)
|
||||
continue;
|
||||
|
||||
int reg = inst->src[i].reg;
|
||||
for (int c = 0; c < 4; c++) {
|
||||
if (!(readmask & (1 << c)))
|
||||
continue;
|
||||
|
||||
chans_used[reg] = MAX2(chans_used[reg],
|
||||
BRW_GET_SWZ(inst->src[i].swizzle, c) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -546,17 +571,15 @@ vec4_visitor::pack_uniform_registers()
|
|||
*/
|
||||
for (int src = 0; src < uniforms; src++) {
|
||||
assert(src < uniform_array_size);
|
||||
int size = this->uniform_vector_size[src];
|
||||
int size = chans_used[src];
|
||||
|
||||
if (!uniform_used[src]) {
|
||||
this->uniform_vector_size[src] = 0;
|
||||
continue;
|
||||
}
|
||||
if (size == 0)
|
||||
continue;
|
||||
|
||||
int dst;
|
||||
/* Find the lowest place we can slot this uniform in. */
|
||||
for (dst = 0; dst < src; dst++) {
|
||||
if (this->uniform_vector_size[dst] + size <= 4)
|
||||
if (chans_used[dst] + size <= 4)
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -565,7 +588,7 @@ vec4_visitor::pack_uniform_registers()
|
|||
new_chan[src] = 0;
|
||||
} else {
|
||||
new_loc[src] = dst;
|
||||
new_chan[src] = this->uniform_vector_size[dst];
|
||||
new_chan[src] = chans_used[dst];
|
||||
|
||||
/* Move the references to the data */
|
||||
for (int j = 0; j < size; j++) {
|
||||
|
|
@ -573,8 +596,8 @@ vec4_visitor::pack_uniform_registers()
|
|||
stage_prog_data->param[src * 4 + j];
|
||||
}
|
||||
|
||||
this->uniform_vector_size[dst] += size;
|
||||
this->uniform_vector_size[src] = 0;
|
||||
chans_used[dst] += size;
|
||||
chans_used[src] = 0;
|
||||
}
|
||||
|
||||
new_uniform_count = MAX2(new_uniform_count, dst + 1);
|
||||
|
|
@ -1643,7 +1666,6 @@ vec4_visitor::setup_uniforms(int reg)
|
|||
*/
|
||||
if (devinfo->gen < 6 && this->uniforms == 0) {
|
||||
assert(this->uniforms < this->uniform_array_size);
|
||||
this->uniform_vector_size[this->uniforms] = 1;
|
||||
|
||||
stage_prog_data->param =
|
||||
reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4);
|
||||
|
|
@ -1685,12 +1707,6 @@ vec4_vs_visitor::setup_payload(void)
|
|||
this->first_non_payload_grf = reg;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::assign_binding_table_offsets()
|
||||
{
|
||||
assign_common_binding_table_offsets(0);
|
||||
}
|
||||
|
||||
src_reg
|
||||
vec4_visitor::get_timestamp()
|
||||
{
|
||||
|
|
@ -1786,31 +1802,14 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
|
|||
bool
|
||||
vec4_visitor::run()
|
||||
{
|
||||
bool use_vec4_nir =
|
||||
compiler->glsl_compiler_options[stage].NirOptions != NULL;
|
||||
|
||||
sanity_param_count = prog->Parameters->NumParameters;
|
||||
|
||||
if (shader_time_index >= 0)
|
||||
emit_shader_time_begin();
|
||||
|
||||
assign_binding_table_offsets();
|
||||
|
||||
emit_prolog();
|
||||
|
||||
if (use_vec4_nir) {
|
||||
assert(prog->nir != NULL);
|
||||
emit_nir_code();
|
||||
if (failed)
|
||||
return false;
|
||||
} else if (shader) {
|
||||
/* Generate VS IR for main(). (the visitor only descends into
|
||||
* functions called "main").
|
||||
*/
|
||||
visit_instructions(shader->base.ir);
|
||||
} else {
|
||||
emit_program_code();
|
||||
}
|
||||
emit_nir_code();
|
||||
if (failed)
|
||||
return false;
|
||||
base_ir = NULL;
|
||||
|
||||
emit_thread_end();
|
||||
|
|
@ -1823,18 +1822,9 @@ vec4_visitor::run()
|
|||
* that we have reladdr computations available for CSE, since we'll
|
||||
* often do repeated subexpressions for those.
|
||||
*/
|
||||
if (shader || use_vec4_nir) {
|
||||
move_grf_array_access_to_scratch();
|
||||
move_uniform_array_access_to_pull_constants();
|
||||
} else {
|
||||
/* The ARB_vertex_program frontend emits pull constant loads directly
|
||||
* rather than using reladdr, so we don't need to walk through all the
|
||||
* instructions looking for things to move. There isn't anything.
|
||||
*
|
||||
* We do still need to split things to vec4 size.
|
||||
*/
|
||||
split_uniform_registers();
|
||||
}
|
||||
move_grf_array_access_to_scratch();
|
||||
move_uniform_array_access_to_pull_constants();
|
||||
|
||||
pack_uniform_registers();
|
||||
move_push_constants_to_pull_constants();
|
||||
split_virtual_grfs();
|
||||
|
|
@ -1845,8 +1835,8 @@ vec4_visitor::run()
|
|||
\
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \
|
||||
char filename[64]; \
|
||||
snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \
|
||||
stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
|
||||
snprintf(filename, 64, "%s-%s-%02d-%02d-" #pass, \
|
||||
stage_abbrev, nir->info.name, iteration, pass_num); \
|
||||
\
|
||||
backend_shader::dump_instructions(filename); \
|
||||
} \
|
||||
|
|
@ -1858,8 +1848,8 @@ vec4_visitor::run()
|
|||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
|
||||
char filename[64];
|
||||
snprintf(filename, 64, "%s-%04d-00-start",
|
||||
stage_abbrev, shader_prog ? shader_prog->Name : 0);
|
||||
snprintf(filename, 64, "%s-%s-00-start",
|
||||
stage_abbrev, nir->info.name);
|
||||
|
||||
backend_shader::dump_instructions(filename);
|
||||
}
|
||||
|
|
@ -1933,13 +1923,6 @@ vec4_visitor::run()
|
|||
brw_get_scratch_size(last_scratch * REG_SIZE);
|
||||
}
|
||||
|
||||
/* If any state parameters were appended, then ParameterValues could have
|
||||
* been realloced, in which case the driver uniform storage set up by
|
||||
* _mesa_associate_uniform_storage() would point to freed memory. Make
|
||||
* sure that didn't happen.
|
||||
*/
|
||||
assert(sanity_param_count == prog->Parameters->NumParameters);
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
|
|
@ -1974,27 +1957,13 @@ brw_vs_emit(struct brw_context *brw,
|
|||
if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
|
||||
brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
|
||||
|
||||
if (!vp->Base.nir &&
|
||||
(brw->intelScreen->compiler->scalar_vs ||
|
||||
brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) {
|
||||
/* Normally we generate NIR in LinkShader() or
|
||||
* ProgramStringNotify(), but Mesa's fixed-function vertex program
|
||||
* handling doesn't notify the driver at all. Just do it here, at
|
||||
* the last minute, even though it's lame.
|
||||
*/
|
||||
assert(vp->Base.Id == 0 && prog == NULL);
|
||||
vp->Base.nir =
|
||||
brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX,
|
||||
brw->intelScreen->compiler->scalar_vs);
|
||||
}
|
||||
|
||||
if (brw->intelScreen->compiler->scalar_vs) {
|
||||
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
|
||||
|
||||
fs_visitor v(brw->intelScreen->compiler, brw,
|
||||
mem_ctx, MESA_SHADER_VERTEX, key,
|
||||
&prog_data->base.base, prog, &vp->Base,
|
||||
8, st_index);
|
||||
mem_ctx, key, &prog_data->base.base,
|
||||
NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */
|
||||
vp->Base.nir, 8, st_index);
|
||||
if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
|
||||
if (prog) {
|
||||
prog->LinkStatus = false;
|
||||
|
|
@ -2031,7 +2000,7 @@ brw_vs_emit(struct brw_context *brw,
|
|||
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
|
||||
|
||||
vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
|
||||
vp, prog, brw_select_clip_planes(&brw->ctx),
|
||||
vp->Base.nir, brw_select_clip_planes(&brw->ctx),
|
||||
mem_ctx, st_index,
|
||||
!_mesa_is_gles3(&brw->ctx));
|
||||
if (!v.run()) {
|
||||
|
|
|
|||
|
|
@ -65,16 +65,14 @@ class vec4_live_variables;
|
|||
* Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
|
||||
* fixed-function) into VS IR.
|
||||
*/
|
||||
class vec4_visitor : public backend_shader, public ir_visitor
|
||||
class vec4_visitor : public backend_shader
|
||||
{
|
||||
public:
|
||||
vec4_visitor(const struct brw_compiler *compiler,
|
||||
void *log_data,
|
||||
struct gl_program *prog,
|
||||
const struct brw_sampler_prog_key_data *key,
|
||||
struct brw_vue_prog_data *prog_data,
|
||||
struct gl_shader_program *shader_prog,
|
||||
gl_shader_stage stage,
|
||||
nir_shader *shader,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
int shader_time_index);
|
||||
|
|
@ -97,8 +95,6 @@ public:
|
|||
|
||||
const struct brw_sampler_prog_key_data * const key_tex;
|
||||
struct brw_vue_prog_data * const prog_data;
|
||||
unsigned int sanity_param_count;
|
||||
|
||||
char *fail_msg;
|
||||
bool failed;
|
||||
|
||||
|
|
@ -116,66 +112,22 @@ public:
|
|||
brw::vec4_live_variables *live_intervals;
|
||||
dst_reg userplane[MAX_CLIP_PLANES];
|
||||
|
||||
dst_reg *variable_storage(ir_variable *var);
|
||||
|
||||
void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
|
||||
|
||||
bool need_all_constants_in_pull_buffer;
|
||||
|
||||
/**
|
||||
* \name Visit methods
|
||||
*
|
||||
* As typical for the visitor pattern, there must be one \c visit method for
|
||||
* each concrete subclass of \c ir_instruction. Virtual base classes within
|
||||
* the hierarchy should not have \c visit methods.
|
||||
*/
|
||||
/*@{*/
|
||||
virtual void visit(ir_variable *);
|
||||
virtual void visit(ir_loop *);
|
||||
virtual void visit(ir_loop_jump *);
|
||||
virtual void visit(ir_function_signature *);
|
||||
virtual void visit(ir_function *);
|
||||
virtual void visit(ir_expression *);
|
||||
virtual void visit(ir_swizzle *);
|
||||
virtual void visit(ir_dereference_variable *);
|
||||
virtual void visit(ir_dereference_array *);
|
||||
virtual void visit(ir_dereference_record *);
|
||||
virtual void visit(ir_assignment *);
|
||||
virtual void visit(ir_constant *);
|
||||
virtual void visit(ir_call *);
|
||||
virtual void visit(ir_return *);
|
||||
virtual void visit(ir_discard *);
|
||||
virtual void visit(ir_texture *);
|
||||
virtual void visit(ir_if *);
|
||||
virtual void visit(ir_emit_vertex *);
|
||||
virtual void visit(ir_end_primitive *);
|
||||
virtual void visit(ir_barrier *);
|
||||
/*@}*/
|
||||
|
||||
src_reg result;
|
||||
|
||||
/* Regs for vertex results. Generated at ir_variable visiting time
|
||||
* for the ir->location's used.
|
||||
*/
|
||||
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
|
||||
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
|
||||
int *uniform_size;
|
||||
int *uniform_vector_size;
|
||||
int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
|
||||
int uniform_array_size; /*< Size of the uniform_size array */
|
||||
int uniforms;
|
||||
|
||||
src_reg shader_start_time;
|
||||
|
||||
struct hash_table *variable_ht;
|
||||
|
||||
bool run();
|
||||
void fail(const char *msg, ...);
|
||||
|
||||
virtual void setup_vec4_uniform_value(unsigned param_offset,
|
||||
const gl_constant_value *values,
|
||||
unsigned n);
|
||||
void setup_uniform_values(ir_variable *ir);
|
||||
void setup_builtin_uniform_values(ir_variable *ir);
|
||||
int setup_uniforms(int payload_reg);
|
||||
|
||||
bool reg_allocate_trivial();
|
||||
|
|
@ -271,21 +223,9 @@ public:
|
|||
|
||||
int implied_mrf_writes(vec4_instruction *inst);
|
||||
|
||||
bool try_rewrite_rhs_to_dst(ir_assignment *ir,
|
||||
dst_reg dst,
|
||||
src_reg src,
|
||||
vec4_instruction *pre_rhs_inst,
|
||||
vec4_instruction *last_rhs_inst);
|
||||
|
||||
/** Walks an exec_list of ir_instruction and sends it through this visitor. */
|
||||
void visit_instructions(const exec_list *list);
|
||||
|
||||
void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
|
||||
src_reg src0, src_reg src1, src_reg one);
|
||||
|
||||
void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
|
||||
void emit_if_gen6(ir_if *ir);
|
||||
|
||||
vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
|
||||
src_reg src0, src_reg src1);
|
||||
|
||||
|
|
@ -298,22 +238,11 @@ public:
|
|||
*/
|
||||
src_reg emit_uniformize(const src_reg &src);
|
||||
|
||||
void emit_block_move(dst_reg *dst, src_reg *src,
|
||||
const struct glsl_type *type, brw_predicate predicate);
|
||||
|
||||
void emit_constant_values(dst_reg *dst, ir_constant *value);
|
||||
|
||||
/**
|
||||
* Emit the correct dot-product instruction for the type of arguments
|
||||
*/
|
||||
void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
|
||||
|
||||
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
|
||||
dst_reg dst, src_reg src0);
|
||||
|
||||
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
|
||||
dst_reg dst, src_reg src0, src_reg src1);
|
||||
|
||||
src_reg fix_3src_operand(const src_reg &src);
|
||||
src_reg resolve_source_modifiers(const src_reg &src);
|
||||
|
||||
|
|
@ -389,29 +318,20 @@ public:
|
|||
src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
|
||||
vec4_instruction *inst, src_reg src);
|
||||
|
||||
bool try_emit_mad(ir_expression *ir);
|
||||
bool try_emit_b2f_of_compare(ir_expression *ir);
|
||||
void resolve_ud_negate(src_reg *reg);
|
||||
void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
|
||||
|
||||
src_reg get_timestamp();
|
||||
|
||||
bool process_move_condition(ir_rvalue *ir);
|
||||
|
||||
void dump_instruction(backend_instruction *inst);
|
||||
void dump_instruction(backend_instruction *inst, FILE *file);
|
||||
|
||||
void visit_atomic_counter_intrinsic(ir_call *ir);
|
||||
|
||||
bool is_high_sampler(src_reg sampler);
|
||||
|
||||
virtual void emit_nir_code();
|
||||
virtual void nir_setup_inputs(nir_shader *shader);
|
||||
virtual void nir_setup_uniforms(nir_shader *shader);
|
||||
virtual void nir_setup_uniform(nir_variable *var);
|
||||
virtual void nir_setup_builtin_uniform(nir_variable *var);
|
||||
virtual void nir_setup_inputs();
|
||||
virtual void nir_setup_uniforms();
|
||||
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
|
||||
virtual void nir_setup_system_values(nir_shader *shader);
|
||||
virtual void nir_setup_system_values();
|
||||
virtual void nir_emit_impl(nir_function_impl *impl);
|
||||
virtual void nir_emit_cf_list(exec_list *list);
|
||||
virtual void nir_emit_if(nir_if *if_stmt);
|
||||
|
|
@ -450,14 +370,11 @@ protected:
|
|||
bool interleaved);
|
||||
void setup_payload_interference(struct ra_graph *g, int first_payload_node,
|
||||
int reg_node_count);
|
||||
virtual void assign_binding_table_offsets();
|
||||
virtual void setup_payload() = 0;
|
||||
virtual void emit_prolog() = 0;
|
||||
virtual void emit_program_code() = 0;
|
||||
virtual void emit_thread_end() = 0;
|
||||
virtual void emit_urb_write_header(int mrf) = 0;
|
||||
virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
|
||||
virtual int compute_array_stride(ir_dereference_array *ir);
|
||||
virtual void gs_emit_vertex(int stream_id);
|
||||
virtual void gs_end_primitive();
|
||||
|
||||
|
|
|
|||
|
|
@ -27,11 +27,11 @@
|
|||
namespace brw {
|
||||
|
||||
void
|
||||
vec4_gs_visitor::nir_setup_inputs(nir_shader *shader)
|
||||
vec4_gs_visitor::nir_setup_inputs()
|
||||
{
|
||||
nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
|
||||
nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
|
||||
|
||||
foreach_list_typed(nir_variable, var, node, &shader->inputs) {
|
||||
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
||||
int offset = var->data.driver_location;
|
||||
if (var->type->base_type == GLSL_TYPE_ARRAY) {
|
||||
/* Geometry shader inputs are arrays, but they use an unusual array
|
||||
|
|
@ -72,6 +72,10 @@ vec4_gs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
|
|||
dst_reg *reg;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
/* We'll just read g1 directly; don't create a temporary. */
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_invocation_id:
|
||||
reg = &this->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
|
||||
if (reg->file == BAD_FILE)
|
||||
|
|
@ -111,6 +115,12 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
assert(c->prog_data.include_primitive_id);
|
||||
dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
|
||||
emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_invocation_id: {
|
||||
src_reg invocation_id =
|
||||
src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
|
||||
|
|
|
|||
|
|
@ -38,13 +38,14 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
|
|||
void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
struct gl_shader_program *prog,
|
||||
nir_shader *shader,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
int shader_time_index)
|
||||
: vec4_visitor(compiler, log_data,
|
||||
&c->gp->program.Base, &c->key.tex,
|
||||
&c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
|
||||
: vec4_visitor(compiler, log_data, &c->key.tex,
|
||||
&c->prog_data.base, shader, mem_ctx,
|
||||
no_spills, shader_time_index),
|
||||
shader_prog(prog),
|
||||
c(c)
|
||||
{
|
||||
}
|
||||
|
|
@ -207,15 +208,6 @@ vec4_gs_visitor::emit_prolog()
|
|||
this->current_annotation = NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vec4_gs_visitor::emit_program_code()
|
||||
{
|
||||
/* We don't support NV_geometry_program4. */
|
||||
unreachable("Unreached");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vec4_gs_visitor::emit_thread_end()
|
||||
{
|
||||
|
|
@ -310,24 +302,6 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
|
|||
}
|
||||
|
||||
|
||||
int
|
||||
vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir)
|
||||
{
|
||||
/* Geometry shader inputs are arrays, but they use an unusual array layout:
|
||||
* instead of all array elements for a given geometry shader input being
|
||||
* stored consecutively, all geometry shader inputs are interleaved into
|
||||
* one giant array. At this stage of compilation, we assume that the
|
||||
* stride of the array is BRW_VARYING_SLOT_COUNT. Later,
|
||||
* setup_attributes() will remap our accesses to the actual input array.
|
||||
*/
|
||||
ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
|
||||
if (deref_var && deref_var->var->data.mode == ir_var_shader_in)
|
||||
return BRW_VARYING_SLOT_COUNT;
|
||||
else
|
||||
return vec4_visitor::compute_array_stride(ir);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Write out a batch of 32 control data bits from the control_data_bits
|
||||
* register to the URB.
|
||||
|
|
@ -575,27 +549,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
|
|||
this->current_annotation = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_gs_visitor::visit(ir_emit_vertex *ir)
|
||||
{
|
||||
/* To ensure that we don't output more vertices than the shader specified
|
||||
* using max_vertices, do the logic inside a conditional of the form "if
|
||||
* (vertex_count < MAX)"
|
||||
*/
|
||||
unsigned num_output_vertices = c->gp->program.VerticesOut;
|
||||
emit(CMP(dst_null_d(), this->vertex_count,
|
||||
src_reg(num_output_vertices), BRW_CONDITIONAL_L));
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
|
||||
gs_emit_vertex(ir->stream_id());
|
||||
|
||||
this->current_annotation = "emit vertex: increment vertex count";
|
||||
emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
|
||||
src_reg(1u)));
|
||||
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
|
||||
void
|
||||
vec4_gs_visitor::gs_end_primitive()
|
||||
{
|
||||
|
|
@ -647,12 +600,6 @@ vec4_gs_visitor::gs_end_primitive()
|
|||
emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
|
||||
}
|
||||
|
||||
void
|
||||
vec4_gs_visitor::visit(ir_end_primitive *)
|
||||
{
|
||||
gs_end_primitive();
|
||||
}
|
||||
|
||||
static const unsigned *
|
||||
generate_assembly(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
|
|
@ -675,12 +622,10 @@ brw_gs_emit(struct brw_context *brw,
|
|||
void *mem_ctx,
|
||||
unsigned *final_assembly_size)
|
||||
{
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
|
||||
struct brw_shader *shader =
|
||||
(brw_shader *) prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
|
||||
struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
|
||||
|
||||
brw_dump_ir("geometry", prog, &shader->base, NULL);
|
||||
}
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_GS))
|
||||
brw_dump_ir("geometry", prog, shader, NULL);
|
||||
|
||||
int st_index = -1;
|
||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
|
||||
|
|
@ -696,7 +641,8 @@ brw_gs_emit(struct brw_context *brw,
|
|||
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
|
||||
|
||||
vec4_gs_visitor v(brw->intelScreen->compiler, brw,
|
||||
c, prog, mem_ctx, true /* no_spills */, st_index);
|
||||
c, prog, shader->Program->nir,
|
||||
mem_ctx, true /* no_spills */, st_index);
|
||||
if (v.run()) {
|
||||
return generate_assembly(brw, prog, &c->gp->program.Base,
|
||||
&c->prog_data.base, mem_ctx, v.cfg,
|
||||
|
|
@ -738,11 +684,13 @@ brw_gs_emit(struct brw_context *brw,
|
|||
|
||||
if (brw->gen >= 7)
|
||||
gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
|
||||
c, prog, mem_ctx, false /* no_spills */,
|
||||
c, prog, shader->Program->nir,
|
||||
mem_ctx, false /* no_spills */,
|
||||
st_index);
|
||||
else
|
||||
gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
|
||||
c, prog, mem_ctx, false /* no_spills */,
|
||||
c, prog, shader->Program->nir,
|
||||
mem_ctx, false /* no_spills */,
|
||||
st_index);
|
||||
|
||||
if (!gs->run()) {
|
||||
|
|
|
|||
|
|
@ -71,11 +71,12 @@ public:
|
|||
void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
struct gl_shader_program *prog,
|
||||
nir_shader *shader,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
int shader_time_index);
|
||||
|
||||
virtual void nir_setup_inputs(nir_shader *shader);
|
||||
virtual void nir_setup_inputs();
|
||||
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
|
||||
|
||||
protected:
|
||||
|
|
@ -83,13 +84,9 @@ protected:
|
|||
const glsl_type *type);
|
||||
virtual void setup_payload();
|
||||
virtual void emit_prolog();
|
||||
virtual void emit_program_code();
|
||||
virtual void emit_thread_end();
|
||||
virtual void emit_urb_write_header(int mrf);
|
||||
virtual vec4_instruction *emit_urb_write_opcode(bool complete);
|
||||
virtual int compute_array_stride(ir_dereference_array *ir);
|
||||
virtual void visit(ir_emit_vertex *);
|
||||
virtual void visit(ir_end_primitive *);
|
||||
virtual void gs_emit_vertex(int stream_id);
|
||||
virtual void gs_end_primitive();
|
||||
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
|
||||
|
|
@ -100,6 +97,8 @@ protected:
|
|||
void emit_control_data_bits();
|
||||
void set_stream_control_data_bits(unsigned stream_id);
|
||||
|
||||
struct gl_shader_program *shader_prog;
|
||||
|
||||
src_reg vertex_count;
|
||||
src_reg control_data_bits;
|
||||
const struct brw_gs_compile * const c;
|
||||
|
|
|
|||
|
|
@ -35,15 +35,13 @@ namespace brw {
|
|||
void
|
||||
vec4_visitor::emit_nir_code()
|
||||
{
|
||||
nir_shader *nir = prog->nir;
|
||||
|
||||
if (nir->num_inputs > 0)
|
||||
nir_setup_inputs(nir);
|
||||
nir_setup_inputs();
|
||||
|
||||
if (nir->num_uniforms > 0)
|
||||
nir_setup_uniforms(nir);
|
||||
nir_setup_uniforms();
|
||||
|
||||
nir_setup_system_values(nir);
|
||||
nir_setup_system_values();
|
||||
|
||||
/* get the main function and emit it */
|
||||
nir_foreach_overload(nir, overload) {
|
||||
|
|
@ -105,11 +103,11 @@ setup_system_values_block(nir_block *block, void *void_visitor)
|
|||
}
|
||||
|
||||
void
|
||||
vec4_visitor::nir_setup_system_values(nir_shader *shader)
|
||||
vec4_visitor::nir_setup_system_values()
|
||||
{
|
||||
nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
|
||||
|
||||
nir_foreach_overload(shader, overload) {
|
||||
nir_foreach_overload(nir, overload) {
|
||||
assert(strcmp(overload->function->name, "main") == 0);
|
||||
assert(overload->impl);
|
||||
nir_foreach_block(overload->impl, setup_system_values_block, this);
|
||||
|
|
@ -117,11 +115,11 @@ vec4_visitor::nir_setup_system_values(nir_shader *shader)
|
|||
}
|
||||
|
||||
void
|
||||
vec4_visitor::nir_setup_inputs(nir_shader *shader)
|
||||
vec4_visitor::nir_setup_inputs()
|
||||
{
|
||||
nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
|
||||
nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
|
||||
|
||||
foreach_list_typed(nir_variable, var, node, &shader->inputs) {
|
||||
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
||||
int offset = var->data.driver_location;
|
||||
unsigned size = type_size_vec4(var->type);
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
|
|
@ -132,137 +130,17 @@ vec4_visitor::nir_setup_inputs(nir_shader *shader)
|
|||
}
|
||||
|
||||
void
|
||||
vec4_visitor::nir_setup_uniforms(nir_shader *shader)
|
||||
vec4_visitor::nir_setup_uniforms()
|
||||
{
|
||||
uniforms = 0;
|
||||
uniforms = nir->num_uniforms;
|
||||
|
||||
if (shader_prog) {
|
||||
foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
|
||||
/* UBO's, atomics and samplers don't take up space in the
|
||||
uniform file */
|
||||
if (var->interface_type != NULL || var->type->contains_atomic() ||
|
||||
type_size_vec4(var->type) == 0) {
|
||||
continue;
|
||||
}
|
||||
foreach_list_typed(nir_variable, var, node, &nir->uniforms) {
|
||||
/* UBO's and atomics don't take up space in the uniform file */
|
||||
if (var->interface_type != NULL || var->type->contains_atomic())
|
||||
continue;
|
||||
|
||||
assert(uniforms < uniform_array_size);
|
||||
uniform_size[uniforms] = type_size_vec4(var->type);
|
||||
|
||||
if (strncmp(var->name, "gl_", 3) == 0)
|
||||
nir_setup_builtin_uniform(var);
|
||||
else
|
||||
nir_setup_uniform(var);
|
||||
}
|
||||
} else {
|
||||
/* For ARB_vertex_program, only a single "parameters" variable is
|
||||
* generated to support uniform data.
|
||||
*/
|
||||
nir_variable *var = (nir_variable *) shader->uniforms.get_head();
|
||||
assert(shader->uniforms.length() == 1 &&
|
||||
strcmp(var->name, "parameters") == 0);
|
||||
|
||||
assert(uniforms < uniform_array_size);
|
||||
uniform_size[uniforms] = type_size_vec4(var->type);
|
||||
|
||||
struct gl_program_parameter_list *plist = prog->Parameters;
|
||||
for (unsigned p = 0; p < plist->NumParameters; p++) {
|
||||
uniform_vector_size[uniforms] = plist->Parameters[p].Size;
|
||||
|
||||
/* Parameters should be either vec4 uniforms or single component
|
||||
* constants; matrices and other larger types should have been broken
|
||||
* down earlier.
|
||||
*/
|
||||
assert(uniform_vector_size[uniforms] <= 4);
|
||||
|
||||
int i;
|
||||
for (i = 0; i < uniform_vector_size[uniforms]; i++) {
|
||||
stage_prog_data->param[uniforms * 4 + i] = &plist->ParameterValues[p][i];
|
||||
}
|
||||
for (; i < 4; i++) {
|
||||
static const gl_constant_value zero = { 0.0 };
|
||||
stage_prog_data->param[uniforms * 4 + i] = &zero;
|
||||
}
|
||||
|
||||
uniforms++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::nir_setup_uniform(nir_variable *var)
|
||||
{
|
||||
int namelen = strlen(var->name);
|
||||
|
||||
/* The data for our (non-builtin) uniforms is stored in a series of
|
||||
* gl_uniform_driver_storage structs for each subcomponent that
|
||||
* glGetUniformLocation() could name. We know it's been set up in the same
|
||||
* order we'd walk the type, so walk the list of storage and find anything
|
||||
* with our name, or the prefix of a component that starts with our name.
|
||||
*/
|
||||
for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
|
||||
struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
|
||||
|
||||
if (storage->builtin)
|
||||
continue;
|
||||
|
||||
if (strncmp(var->name, storage->name, namelen) != 0 ||
|
||||
(storage->name[namelen] != 0 &&
|
||||
storage->name[namelen] != '.' &&
|
||||
storage->name[namelen] != '[')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
gl_constant_value *components = storage->storage;
|
||||
unsigned vector_count = (MAX2(storage->array_elements, 1) *
|
||||
storage->type->matrix_columns);
|
||||
|
||||
for (unsigned s = 0; s < vector_count; s++) {
|
||||
assert(uniforms < uniform_array_size);
|
||||
uniform_vector_size[uniforms] = storage->type->vector_elements;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < uniform_vector_size[uniforms]; i++) {
|
||||
stage_prog_data->param[uniforms * 4 + i] = components;
|
||||
components++;
|
||||
}
|
||||
for (; i < 4; i++) {
|
||||
static const gl_constant_value zero = { 0.0 };
|
||||
stage_prog_data->param[uniforms * 4 + i] = &zero;
|
||||
}
|
||||
|
||||
uniforms++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
|
||||
{
|
||||
const nir_state_slot *const slots = var->state_slots;
|
||||
assert(var->state_slots != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < var->num_state_slots; i++) {
|
||||
/* This state reference has already been setup by ir_to_mesa,
|
||||
* but we'll get the same index back here. We can reference
|
||||
* ParameterValues directly, since unlike brw_fs.cpp, we never
|
||||
* add new state references during compile.
|
||||
*/
|
||||
int index = _mesa_add_state_reference(prog->Parameters,
|
||||
(gl_state_index *)slots[i].tokens);
|
||||
gl_constant_value *values =
|
||||
&prog->Parameters->ParameterValues[index][0];
|
||||
|
||||
assert(uniforms < uniform_array_size);
|
||||
|
||||
for (unsigned j = 0; j < 4; j++)
|
||||
stage_prog_data->param[uniforms * 4 + j] =
|
||||
&values[GET_SWZ(slots[i].swizzle, j)];
|
||||
|
||||
uniform_vector_size[uniforms] =
|
||||
(var->type->is_scalar() || var->type->is_vector() ||
|
||||
var->type->is_matrix() ? var->type->vector_elements : 4);
|
||||
|
||||
uniforms++;
|
||||
if (type_size_vec4(var->type) > 0)
|
||||
uniform_size[var->data.driver_location] = type_size_vec4(var->type);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -547,8 +425,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
|
||||
unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
|
||||
|
||||
assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
|
||||
|
||||
src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
|
||||
ubo_index);
|
||||
dst_reg result_dst = get_nir_dest(instr->dest);
|
||||
|
|
@ -592,7 +468,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
|
||||
brw_mark_surface_used(&prog_data->base,
|
||||
prog_data->base.binding_table.ubo_start +
|
||||
shader_prog->NumBufferInterfaceBlocks - 1);
|
||||
nir->info.num_ssbos - 1);
|
||||
}
|
||||
|
||||
/* Offset */
|
||||
|
|
@ -739,7 +615,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
*/
|
||||
brw_mark_surface_used(&prog_data->base,
|
||||
prog_data->base.binding_table.ubo_start +
|
||||
shader_prog->NumBufferInterfaceBlocks - 1);
|
||||
nir->info.num_ssbos - 1);
|
||||
}
|
||||
|
||||
src_reg offset_reg = src_reg(this, glsl_type::uint_type);
|
||||
|
|
@ -891,7 +767,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
*/
|
||||
brw_mark_surface_used(&prog_data->base,
|
||||
prog_data->base.binding_table.ubo_start +
|
||||
shader_prog->NumBufferInterfaceBlocks - 1);
|
||||
nir->info.num_ssbos - 1);
|
||||
}
|
||||
|
||||
unsigned const_offset = instr->const_index[1];
|
||||
|
|
@ -923,6 +799,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_memory_barrier: {
|
||||
const vec4_builder bld =
|
||||
vec4_builder(this).at_end().annotate(current_annotation, base_ir);
|
||||
const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
||||
bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
|
||||
->regs_written = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unknown intrinsic");
|
||||
}
|
||||
|
|
@ -952,7 +837,7 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
|
|||
*/
|
||||
brw_mark_surface_used(&prog_data->base,
|
||||
prog_data->base.binding_table.ubo_start +
|
||||
shader_prog->NumBufferInterfaceBlocks - 1);
|
||||
nir->info.num_ssbos - 1);
|
||||
}
|
||||
|
||||
src_reg offset = get_nir_src(instr->src[1], 1);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,649 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** @file brw_vec4_vp.cpp
|
||||
*
|
||||
* A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
|
||||
* ARB_vertex_program and fixed-function vertex processing.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_vec4.h"
|
||||
#include "brw_vs.h"
|
||||
extern "C" {
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/prog_print.h"
|
||||
}
|
||||
using namespace brw;
|
||||
|
||||
void
|
||||
vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
|
||||
dst_reg dst, src_reg src0, src_reg src1,
|
||||
src_reg one)
|
||||
{
|
||||
vec4_instruction *inst;
|
||||
|
||||
inst = emit(CMP(dst_null_f(), src0, src1, conditional_mod));
|
||||
|
||||
inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_vs_visitor::emit_program_code()
|
||||
{
|
||||
this->need_all_constants_in_pull_buffer = false;
|
||||
|
||||
setup_vp_regs();
|
||||
|
||||
/* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
|
||||
* be:
|
||||
*
|
||||
* sel.f0 dst 1.0 0.0
|
||||
*
|
||||
* instead of
|
||||
*
|
||||
* mov dst 0.0
|
||||
* mov.f0 dst 1.0
|
||||
*/
|
||||
src_reg one = src_reg(this, glsl_type::float_type);
|
||||
emit(MOV(dst_reg(one), src_reg(1.0f)));
|
||||
|
||||
for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
|
||||
const struct prog_instruction *vpi = &prog->Instructions[insn];
|
||||
base_ir = vpi;
|
||||
|
||||
dst_reg dst;
|
||||
src_reg src[3];
|
||||
|
||||
/* We always emit into a temporary destination register to avoid
|
||||
* aliasing issues.
|
||||
*/
|
||||
dst = dst_reg(this, glsl_type::vec4_type);
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
src[i] = get_vp_src_reg(vpi->SrcReg[i]);
|
||||
|
||||
switch (vpi->Opcode) {
|
||||
case OPCODE_ABS:
|
||||
src[0].abs = true;
|
||||
src[0].negate = false;
|
||||
emit(MOV(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_ADD:
|
||||
emit(ADD(dst, src[0], src[1]));
|
||||
break;
|
||||
|
||||
case OPCODE_ARL:
|
||||
if (devinfo->gen >= 6) {
|
||||
dst.writemask = WRITEMASK_X;
|
||||
dst_reg dst_f = dst;
|
||||
dst_f.type = BRW_REGISTER_TYPE_F;
|
||||
|
||||
emit(RNDD(dst_f, src[0]));
|
||||
emit(MOV(dst, src_reg(dst_f)));
|
||||
} else {
|
||||
emit(RNDD(dst, src[0]));
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_DP3:
|
||||
emit(DP3(dst, src[0], src[1]));
|
||||
break;
|
||||
case OPCODE_DP4:
|
||||
emit(DP4(dst, src[0], src[1]));
|
||||
break;
|
||||
case OPCODE_DPH:
|
||||
emit(DPH(dst, src[0], src[1]));
|
||||
break;
|
||||
|
||||
case OPCODE_DST: {
|
||||
dst_reg t = dst;
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_X) {
|
||||
t.writemask = WRITEMASK_X;
|
||||
emit(MOV(t, src_reg(1.0f)));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
t.writemask = WRITEMASK_Y;
|
||||
emit(MUL(t, src[0], src[1]));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
t.writemask = WRITEMASK_Z;
|
||||
emit(MOV(t, src[0]));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
|
||||
t.writemask = WRITEMASK_W;
|
||||
emit(MOV(t, src[1]));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_EXP: {
|
||||
dst_reg result = dst;
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_X) {
|
||||
/* tmp_d = floor(src[0].x) */
|
||||
src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
|
||||
assert(tmp_d.type == BRW_REGISTER_TYPE_D);
|
||||
emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
|
||||
|
||||
/* result[0] = 2.0 ^ tmp */
|
||||
/* Adjust exponent for floating point: exp += 127 */
|
||||
dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
|
||||
emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
|
||||
|
||||
/* Install exponent and sign. Excess drops off the edge: */
|
||||
dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
|
||||
emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
result.writemask = WRITEMASK_Y;
|
||||
emit(FRC(result, src[0]));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
result.writemask = WRITEMASK_Z;
|
||||
emit_math(SHADER_OPCODE_EXP2, result, src[0]);
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
|
||||
result.writemask = WRITEMASK_W;
|
||||
emit(MOV(result, src_reg(1.0f)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_EX2:
|
||||
emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_FLR:
|
||||
emit(RNDD(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_FRC:
|
||||
emit(FRC(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_LG2:
|
||||
emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_LIT: {
|
||||
dst_reg result = dst;
|
||||
/* From the ARB_vertex_program spec:
|
||||
*
|
||||
* tmp = VectorLoad(op0);
|
||||
* if (tmp.x < 0) tmp.x = 0;
|
||||
* if (tmp.y < 0) tmp.y = 0;
|
||||
* if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
|
||||
* else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
|
||||
* result.x = 1.0;
|
||||
* result.y = tmp.x;
|
||||
* result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
|
||||
* result.w = 1.0;
|
||||
*
|
||||
* Note that we don't do the clamping to +/- 128. We didn't in
|
||||
* brw_vs_emit.c either.
|
||||
*/
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
|
||||
result.writemask = WRITEMASK_XW;
|
||||
emit(MOV(result, src_reg(1.0f)));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
|
||||
result.writemask = WRITEMASK_YZ;
|
||||
emit(MOV(result, src_reg(0.0f)));
|
||||
|
||||
src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
|
||||
|
||||
emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
result.writemask = WRITEMASK_Y;
|
||||
emit(MOV(result, tmp_x));
|
||||
}
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
/* if (tmp.y < 0) tmp.y = 0; */
|
||||
src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
|
||||
result.writemask = WRITEMASK_Z;
|
||||
emit_minmax(BRW_CONDITIONAL_GE, result, tmp_y, src_reg(0.0f));
|
||||
|
||||
src_reg clamped_y(result);
|
||||
clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
|
||||
|
||||
src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
|
||||
|
||||
emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
|
||||
}
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_LOG: {
|
||||
dst_reg result = dst;
|
||||
result.type = BRW_REGISTER_TYPE_UD;
|
||||
src_reg result_src = src_reg(result);
|
||||
|
||||
src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX);
|
||||
arg0_ud.type = BRW_REGISTER_TYPE_UD;
|
||||
|
||||
/* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
|
||||
* according to spec:
|
||||
*
|
||||
* These almost look likey they could be joined up, but not really
|
||||
* practical:
|
||||
*
|
||||
* result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
|
||||
* result[1].i = (x.i & ((1<<23)-1) + (127<<23)
|
||||
*/
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
|
||||
result.writemask = WRITEMASK_X;
|
||||
emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
|
||||
emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
|
||||
src_reg result_d(result_src);
|
||||
result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
emit(ADD(result, result_d, src_reg(-127)));
|
||||
}
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
|
||||
result.writemask = WRITEMASK_Y;
|
||||
result.type = BRW_REGISTER_TYPE_UD;
|
||||
emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
|
||||
emit(OR(result, result_src, src_reg(127u << 23)));
|
||||
}
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
/* result[2] = result[0] + LOG2(result[1]); */
|
||||
|
||||
/* Why bother? The above is just a hint how to do this with a
|
||||
* taylor series. Maybe we *should* use a taylor series as by
|
||||
* the time all the above has been done it's almost certainly
|
||||
* quicker than calling the mathbox, even with low precision.
|
||||
*
|
||||
* Options are:
|
||||
* - result[0] + mathbox.LOG2(result[1])
|
||||
* - mathbox.LOG2(arg0.x)
|
||||
* - result[0] + inline_taylor_approx(result[1])
|
||||
*/
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
result.writemask = WRITEMASK_Z;
|
||||
src_reg result_x(result), result_y(result), result_z(result);
|
||||
result_x.swizzle = BRW_SWIZZLE_XXXX;
|
||||
result_y.swizzle = BRW_SWIZZLE_YYYY;
|
||||
result_z.swizzle = BRW_SWIZZLE_ZZZZ;
|
||||
emit_math(SHADER_OPCODE_LOG2, result, result_y);
|
||||
emit(ADD(result, result_z, result_x));
|
||||
}
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
result.writemask = WRITEMASK_W;
|
||||
emit(MOV(result, src_reg(1.0f)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_MAD: {
|
||||
src_reg temp = src_reg(this, glsl_type::vec4_type);
|
||||
emit(MUL(dst_reg(temp), src[0], src[1]));
|
||||
emit(ADD(dst, temp, src[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_MAX:
|
||||
emit_minmax(BRW_CONDITIONAL_GE, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_MIN:
|
||||
emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_MOV:
|
||||
emit(MOV(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_MUL:
|
||||
emit(MUL(dst, src[0], src[1]));
|
||||
break;
|
||||
|
||||
case OPCODE_POW:
|
||||
emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_RCP:
|
||||
emit_math(SHADER_OPCODE_RCP, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_RSQ:
|
||||
emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_SGE:
|
||||
emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
|
||||
break;
|
||||
|
||||
case OPCODE_SLT:
|
||||
emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
|
||||
break;
|
||||
|
||||
case OPCODE_SUB: {
|
||||
src_reg neg_src1 = src[1];
|
||||
neg_src1.negate = !src[1].negate;
|
||||
emit(ADD(dst, src[0], neg_src1));
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_SWZ:
|
||||
/* Note that SWZ's extended swizzles are handled in the general
|
||||
* get_src_reg() code.
|
||||
*/
|
||||
emit(MOV(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_XPD: {
|
||||
src_reg t1 = src_reg(this, glsl_type::vec4_type);
|
||||
src_reg t2 = src_reg(this, glsl_type::vec4_type);
|
||||
|
||||
emit(MUL(dst_reg(t1),
|
||||
swizzle(src[0], BRW_SWIZZLE_YZXW),
|
||||
swizzle(src[1], BRW_SWIZZLE_ZXYW)));
|
||||
emit(MUL(dst_reg(t2),
|
||||
swizzle(src[0], BRW_SWIZZLE_ZXYW),
|
||||
swizzle(src[1], BRW_SWIZZLE_YZXW)));
|
||||
t2.negate = true;
|
||||
emit(ADD(dst, t1, t2));
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_END:
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(!"Unsupported opcode in vertex program");
|
||||
}
|
||||
|
||||
/* Copy the temporary back into the actual destination register. */
|
||||
if (_mesa_num_inst_dst_regs(vpi->Opcode) != 0) {
|
||||
emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
|
||||
}
|
||||
}
|
||||
|
||||
/* If we used relative addressing, we need to upload all constants as
|
||||
* pull constants. Do that now.
|
||||
*/
|
||||
if (this->need_all_constants_in_pull_buffer) {
|
||||
const struct gl_program_parameter_list *params = vp->Base.Parameters;
|
||||
unsigned i;
|
||||
for (i = 0; i < params->NumParameters * 4; i++) {
|
||||
stage_prog_data->pull_param[i] =
|
||||
¶ms->ParameterValues[i / 4][i % 4];
|
||||
}
|
||||
stage_prog_data->nr_pull_params = i;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vec4_vs_visitor::setup_vp_regs()
|
||||
{
|
||||
/* PROGRAM_TEMPORARY */
|
||||
int num_temp = prog->NumTemporaries;
|
||||
vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
|
||||
for (int i = 0; i < num_temp; i++)
|
||||
vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
|
||||
|
||||
/* PROGRAM_STATE_VAR etc. */
|
||||
struct gl_program_parameter_list *plist = vp->Base.Parameters;
|
||||
for (unsigned p = 0; p < plist->NumParameters; p++) {
|
||||
unsigned components = plist->Parameters[p].Size;
|
||||
|
||||
/* Parameters should be either vec4 uniforms or single component
|
||||
* constants; matrices and other larger types should have been broken
|
||||
* down earlier.
|
||||
*/
|
||||
assert(components <= 4);
|
||||
|
||||
this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
|
||||
this->uniform_vector_size[this->uniforms] = components;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
stage_prog_data->param[this->uniforms * 4 + i] = i >= components
|
||||
? 0 : &plist->ParameterValues[p][i];
|
||||
}
|
||||
this->uniforms++; /* counted in vec4 units */
|
||||
}
|
||||
|
||||
/* PROGRAM_OUTPUT */
|
||||
for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
|
||||
int varying = prog_data->vue_map.slot_to_varying[slot];
|
||||
if (varying == VARYING_SLOT_PSIZ)
|
||||
output_reg[varying] = dst_reg(this, glsl_type::float_type);
|
||||
else
|
||||
output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
|
||||
assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
|
||||
}
|
||||
|
||||
/* PROGRAM_ADDRESS */
|
||||
this->vp_addr_reg = src_reg(this, glsl_type::int_type);
|
||||
assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
|
||||
}
|
||||
|
||||
dst_reg
|
||||
vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
|
||||
{
|
||||
dst_reg result;
|
||||
|
||||
assert(!dst.RelAddr);
|
||||
|
||||
switch (dst.File) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
result = dst_reg(vp_temp_regs[dst.Index]);
|
||||
break;
|
||||
|
||||
case PROGRAM_OUTPUT:
|
||||
result = output_reg[dst.Index];
|
||||
break;
|
||||
|
||||
case PROGRAM_ADDRESS: {
|
||||
assert(dst.Index == 0);
|
||||
result = dst_reg(this->vp_addr_reg);
|
||||
break;
|
||||
}
|
||||
|
||||
case PROGRAM_UNDEFINED:
|
||||
return dst_null_f();
|
||||
|
||||
default:
|
||||
unreachable("vec4_vp: bad destination register file");
|
||||
}
|
||||
|
||||
result.writemask = dst.WriteMask;
|
||||
return result;
|
||||
}
|
||||
|
||||
src_reg
|
||||
vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
|
||||
{
|
||||
struct gl_program_parameter_list *plist = vp->Base.Parameters;
|
||||
|
||||
src_reg result;
|
||||
|
||||
assert(!src.Abs);
|
||||
|
||||
switch (src.File) {
|
||||
case PROGRAM_UNDEFINED:
|
||||
return src_reg(brw_null_reg());
|
||||
|
||||
case PROGRAM_TEMPORARY:
|
||||
result = vp_temp_regs[src.Index];
|
||||
break;
|
||||
|
||||
case PROGRAM_INPUT:
|
||||
result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
break;
|
||||
|
||||
case PROGRAM_ADDRESS: {
|
||||
assert(src.Index == 0);
|
||||
result = this->vp_addr_reg;
|
||||
break;
|
||||
}
|
||||
|
||||
case PROGRAM_STATE_VAR:
|
||||
case PROGRAM_CONSTANT:
|
||||
/* From the ARB_vertex_program specification:
|
||||
* "Relative addressing can only be used for accessing program
|
||||
* parameter arrays."
|
||||
*/
|
||||
if (src.RelAddr) {
|
||||
/* Since we have no idea what the base of the array is, we need to
|
||||
* upload ALL constants as push constants.
|
||||
*/
|
||||
this->need_all_constants_in_pull_buffer = true;
|
||||
|
||||
/* Add the small constant index to the address register */
|
||||
src_reg reladdr = src_reg(this, glsl_type::int_type);
|
||||
|
||||
dst_reg dst_reladdr = dst_reg(reladdr);
|
||||
dst_reladdr.writemask = WRITEMASK_X;
|
||||
emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
|
||||
|
||||
if (devinfo->gen < 6)
|
||||
emit(MUL(dst_reladdr, reladdr, src_reg(16)));
|
||||
|
||||
#if 0
|
||||
assert(src.Index < this->uniforms);
|
||||
result = src_reg(dst_reg(UNIFORM, 0));
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
result.reladdr = new(mem_ctx) src_reg();
|
||||
memcpy(result.reladdr, &reladdr, sizeof(src_reg));
|
||||
#endif
|
||||
|
||||
result = src_reg(this, glsl_type::vec4_type);
|
||||
src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
|
||||
|
||||
emit_pull_constant_load_reg(dst_reg(result),
|
||||
surf_index,
|
||||
reladdr,
|
||||
NULL, NULL /* before_block/inst */);
|
||||
break;
|
||||
}
|
||||
|
||||
/* We actually want to look at the type in the Parameters list for this,
|
||||
* because this lets us upload constant builtin uniforms as actual
|
||||
* constants.
|
||||
*/
|
||||
switch (plist->Parameters[src.Index].Type) {
|
||||
case PROGRAM_CONSTANT:
|
||||
result = src_reg(this, glsl_type::vec4_type);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
dst_reg t = dst_reg(result);
|
||||
t.writemask = 1 << i;
|
||||
emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
|
||||
}
|
||||
break;
|
||||
|
||||
case PROGRAM_STATE_VAR:
|
||||
assert(src.Index < this->uniforms);
|
||||
result = src_reg(dst_reg(UNIFORM, src.Index));
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(!"Bad uniform in src register file");
|
||||
return src_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(!"Bad src register file");
|
||||
return src_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
|
||||
if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
|
||||
unsigned short zeros_mask = 0;
|
||||
unsigned short ones_mask = 0;
|
||||
unsigned short src_mask = 0;
|
||||
unsigned short src_swiz[4];
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src_swiz[i] = 0; /* initialize for safety */
|
||||
|
||||
/* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
|
||||
* but it's simplest to handle it here.
|
||||
*/
|
||||
int s = GET_SWZ(src.Swizzle, i);
|
||||
switch (s) {
|
||||
case SWIZZLE_X:
|
||||
case SWIZZLE_Y:
|
||||
case SWIZZLE_Z:
|
||||
case SWIZZLE_W:
|
||||
src_mask |= 1 << i;
|
||||
src_swiz[i] = s;
|
||||
break;
|
||||
case SWIZZLE_ZERO:
|
||||
zeros_mask |= 1 << i;
|
||||
break;
|
||||
case SWIZZLE_ONE:
|
||||
ones_mask |= 1 << i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result.swizzle =
|
||||
BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
|
||||
|
||||
/* The hardware doesn't natively handle the SWZ instruction's zero/one
|
||||
* swizzles or per-component negation, so we need to use a temporary.
|
||||
*/
|
||||
if (zeros_mask || ones_mask || src.Negate) {
|
||||
src_reg temp_src(this, glsl_type::vec4_type);
|
||||
dst_reg temp(temp_src);
|
||||
|
||||
if (src_mask) {
|
||||
temp.writemask = src_mask;
|
||||
emit(MOV(temp, result));
|
||||
}
|
||||
|
||||
if (zeros_mask) {
|
||||
temp.writemask = zeros_mask;
|
||||
emit(MOV(temp, src_reg(0.0f)));
|
||||
}
|
||||
|
||||
if (ones_mask) {
|
||||
temp.writemask = ones_mask;
|
||||
emit(MOV(temp, src_reg(1.0f)));
|
||||
}
|
||||
|
||||
if (src.Negate) {
|
||||
temp.writemask = src.Negate;
|
||||
src_reg neg(temp_src);
|
||||
neg.negate = true;
|
||||
emit(MOV(temp, neg));
|
||||
}
|
||||
result = temp_src;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -262,7 +262,6 @@ vec4_vs_visitor::setup_uniform_clipplane_values()
|
|||
{
|
||||
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
|
||||
assert(this->uniforms < uniform_array_size);
|
||||
this->uniform_vector_size[this->uniforms] = 4;
|
||||
this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
|
||||
this->userplane[i].type = BRW_REGISTER_TYPE_F;
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
|
|
@ -302,20 +301,15 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
|
|||
void *log_data,
|
||||
const struct brw_vs_prog_key *key,
|
||||
struct brw_vs_prog_data *vs_prog_data,
|
||||
struct gl_vertex_program *vp,
|
||||
struct gl_shader_program *prog,
|
||||
nir_shader *shader,
|
||||
gl_clip_plane *clip_planes,
|
||||
void *mem_ctx,
|
||||
int shader_time_index,
|
||||
bool use_legacy_snorm_formula)
|
||||
: vec4_visitor(compiler, log_data,
|
||||
&vp->Base, &key->tex, &vs_prog_data->base, prog,
|
||||
MESA_SHADER_VERTEX,
|
||||
mem_ctx, false /* no_spills */,
|
||||
shader_time_index),
|
||||
: vec4_visitor(compiler, log_data, &key->tex, &vs_prog_data->base, shader,
|
||||
mem_ctx, false /* no_spills */, shader_time_index),
|
||||
key(key),
|
||||
vs_prog_data(vs_prog_data),
|
||||
vp(vp),
|
||||
clip_planes(clip_planes),
|
||||
use_legacy_snorm_formula(use_legacy_snorm_formula)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@
|
|||
#include "brw_state.h"
|
||||
#include "program/prog_print.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "brw_nir.h"
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
|
|
@ -65,27 +66,6 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
brw_vs_prog_data_compare(const void *in_a, const void *in_b)
|
||||
{
|
||||
const struct brw_vs_prog_data *a = in_a;
|
||||
const struct brw_vs_prog_data *b = in_b;
|
||||
|
||||
/* Compare the base structure. */
|
||||
if (!brw_stage_prog_data_compare(&a->base.base, &b->base.base))
|
||||
return false;
|
||||
|
||||
/* Compare the rest of the struct. */
|
||||
const unsigned offset = sizeof(struct brw_stage_prog_data);
|
||||
if (memcmp(((char *) a) + offset, ((char *) b) + offset,
|
||||
sizeof(struct brw_vs_prog_data) - offset)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_codegen_vs_prog(struct brw_context *brw,
|
||||
struct gl_shader_program *prog,
|
||||
|
|
@ -102,6 +82,18 @@ brw_codegen_vs_prog(struct brw_context *brw,
|
|||
bool start_busy = false;
|
||||
double start_time = 0;
|
||||
|
||||
if (!vp->program.Base.nir) {
|
||||
/* Normally we generate NIR in LinkShader() or
|
||||
* ProgramStringNotify(), but Mesa's fixed-function vertex program
|
||||
* handling doesn't notify the driver at all. Just do it here, at
|
||||
* the last minute, even though it's lame.
|
||||
*/
|
||||
assert(vp->program.Base.Id == 0 && prog == NULL);
|
||||
vp->program.Base.nir =
|
||||
brw_create_nir(brw, NULL, &vp->program.Base, MESA_SHADER_VERTEX,
|
||||
brw->intelScreen->compiler->scalar_vs);
|
||||
}
|
||||
|
||||
if (prog)
|
||||
vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
|
||||
|
||||
|
|
@ -113,22 +105,22 @@ brw_codegen_vs_prog(struct brw_context *brw,
|
|||
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX,
|
||||
brw->intelScreen->devinfo,
|
||||
prog, &vp->program.Base,
|
||||
&prog_data.base.base, 0);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*/
|
||||
int param_count;
|
||||
if (vs) {
|
||||
/* We add padding around uniform values below vec4 size, with the worst
|
||||
* case being a float value that gets blown up to a vec4, so be
|
||||
* conservative here.
|
||||
*/
|
||||
param_count = vs->base.num_uniform_components * 4 +
|
||||
vs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
|
||||
stage_prog_data->nr_image_params = vs->base.NumImages;
|
||||
} else {
|
||||
param_count = vp->program.Base.Parameters->NumParameters * 4;
|
||||
}
|
||||
int param_count = vp->program.Base.nir->num_uniforms;
|
||||
if (!brw->intelScreen->compiler->scalar_vs)
|
||||
param_count *= 4;
|
||||
|
||||
if (vs)
|
||||
prog_data.base.base.nr_image_params = vs->base.NumImages;
|
||||
|
||||
/* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
|
||||
* planes as uniforms.
|
||||
*/
|
||||
|
|
@ -143,6 +135,15 @@ brw_codegen_vs_prog(struct brw_context *brw,
|
|||
stage_prog_data->nr_image_params);
|
||||
stage_prog_data->nr_params = param_count;
|
||||
|
||||
if (prog) {
|
||||
brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base,
|
||||
&prog_data.base.base,
|
||||
brw->intelScreen->compiler->scalar_vs);
|
||||
} else {
|
||||
brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base,
|
||||
&prog_data.base.base);
|
||||
}
|
||||
|
||||
GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
|
||||
prog_data.inputs_read = vp->program.Base.InputsRead;
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,6 @@ const unsigned *brw_vs_emit(struct brw_context *brw,
|
|||
void brw_vs_debug_recompile(struct brw_context *brw,
|
||||
struct gl_shader_program *prog,
|
||||
const struct brw_vs_prog_key *key);
|
||||
bool brw_vs_prog_data_compare(const void *a, const void *b);
|
||||
|
||||
void
|
||||
brw_upload_vs_prog(struct brw_context *brw);
|
||||
|
|
@ -88,8 +87,7 @@ public:
|
|||
void *log_data,
|
||||
const struct brw_vs_prog_key *key,
|
||||
struct brw_vs_prog_data *vs_prog_data,
|
||||
struct gl_vertex_program *vp,
|
||||
struct gl_shader_program *prog,
|
||||
nir_shader *shader,
|
||||
gl_clip_plane *clip_planes,
|
||||
void *mem_ctx,
|
||||
int shader_time_index,
|
||||
|
|
@ -100,7 +98,6 @@ protected:
|
|||
const glsl_type *type);
|
||||
virtual void setup_payload();
|
||||
virtual void emit_prolog();
|
||||
virtual void emit_program_code();
|
||||
virtual void emit_thread_end();
|
||||
virtual void emit_urb_write_header(int mrf);
|
||||
virtual void emit_urb_slot(dst_reg reg, int varying);
|
||||
|
|
@ -116,7 +113,6 @@ private:
|
|||
|
||||
const struct brw_vs_prog_key *const key;
|
||||
struct brw_vs_prog_data * const vs_prog_data;
|
||||
struct gl_vertex_program *const vp;
|
||||
src_reg *vp_temp_regs;
|
||||
src_reg vp_addr_reg;
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@
|
|||
#include "program/prog_parameter.h"
|
||||
#include "program/program.h"
|
||||
#include "intel_mipmap_tree.h"
|
||||
#include "brw_nir.h"
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
|
|
@ -131,23 +132,24 @@ computed_depth_mode(struct gl_fragment_program *fp)
|
|||
return BRW_PSCDEPTH_OFF;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_wm_prog_data_compare(const void *in_a, const void *in_b)
|
||||
static void
|
||||
assign_fs_binding_table_offsets(const struct brw_device_info *devinfo,
|
||||
const struct gl_shader_program *shader_prog,
|
||||
const struct gl_program *prog,
|
||||
const struct brw_wm_prog_key *key,
|
||||
struct brw_wm_prog_data *prog_data)
|
||||
{
|
||||
const struct brw_wm_prog_data *a = in_a;
|
||||
const struct brw_wm_prog_data *b = in_b;
|
||||
uint32_t next_binding_table_offset = 0;
|
||||
|
||||
/* Compare the base structure. */
|
||||
if (!brw_stage_prog_data_compare(&a->base, &b->base))
|
||||
return false;
|
||||
/* If there are no color regions, we still perform an FB write to a null
|
||||
* renderbuffer, which we place at surface index 0.
|
||||
*/
|
||||
prog_data->binding_table.render_target_start = next_binding_table_offset;
|
||||
next_binding_table_offset += MAX2(key->nr_color_regions, 1);
|
||||
|
||||
/* Compare the rest of the structure. */
|
||||
const unsigned offset = sizeof(struct brw_stage_prog_data);
|
||||
if (memcmp(((char *) a) + offset, ((char *) b) + offset,
|
||||
sizeof(struct brw_wm_prog_data) - offset))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
brw_assign_common_binding_table_offsets(MESA_SHADER_FRAGMENT, devinfo,
|
||||
shader_prog, prog, &prog_data->base,
|
||||
next_binding_table_offset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -188,18 +190,16 @@ brw_codegen_wm_prog(struct brw_context *brw,
|
|||
if (!prog)
|
||||
prog_data.base.use_alt_mode = true;
|
||||
|
||||
assign_fs_binding_table_offsets(brw->intelScreen->devinfo, prog,
|
||||
&fp->program.Base, key, &prog_data);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*/
|
||||
int param_count;
|
||||
if (fs) {
|
||||
param_count = fs->base.num_uniform_components +
|
||||
fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
|
||||
int param_count = fp->program.Base.nir->num_uniforms;
|
||||
if (fs)
|
||||
prog_data.base.nr_image_params = fs->base.NumImages;
|
||||
} else {
|
||||
param_count = fp->program.Base.Parameters->NumParameters * 4;
|
||||
}
|
||||
/* The backend also sometimes adds params for texture size. */
|
||||
param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
|
||||
prog_data.base.param =
|
||||
|
|
@ -211,6 +211,14 @@ brw_codegen_wm_prog(struct brw_context *brw,
|
|||
prog_data.base.nr_image_params);
|
||||
prog_data.base.nr_params = param_count;
|
||||
|
||||
if (prog) {
|
||||
brw_nir_setup_glsl_uniforms(fp->program.Base.nir, prog, &fp->program.Base,
|
||||
&prog_data.base, true);
|
||||
} else {
|
||||
brw_nir_setup_arb_uniforms(fp->program.Base.nir, &fp->program.Base,
|
||||
&prog_data.base);
|
||||
}
|
||||
|
||||
prog_data.barycentric_interp_modes =
|
||||
brw_compute_barycentric_interp_modes(brw, key->flat_shade,
|
||||
key->persample_shading,
|
||||
|
|
|
|||
|
|
@ -85,7 +85,6 @@ bool brw_codegen_wm_prog(struct brw_context *brw,
|
|||
void brw_wm_debug_recompile(struct brw_context *brw,
|
||||
struct gl_shader_program *prog,
|
||||
const struct brw_wm_prog_key *key);
|
||||
bool brw_wm_prog_data_compare(const void *a, const void *b);
|
||||
|
||||
void
|
||||
brw_upload_wm_prog(struct brw_context *brw);
|
||||
|
|
|
|||
|
|
@ -124,12 +124,11 @@ void fs_visitor::setup_payload_gen4()
|
|||
{
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
||||
gl_fragment_program *fp = (gl_fragment_program*) prog;
|
||||
GLuint reg = 2;
|
||||
bool kill_stats_promoted_workaround = false;
|
||||
int lookup = key->iz_lookup;
|
||||
bool uses_depth =
|
||||
(fp->Base.InputsRead & (1 << VARYING_SLOT_POS)) != 0;
|
||||
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
|
||||
|
||||
assert(lookup < IZ_BIT_MAX);
|
||||
|
||||
|
|
|
|||
|
|
@ -380,7 +380,7 @@ brw_update_texture_surface(struct gl_context *ctx,
|
|||
surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
|
||||
SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
|
||||
|
||||
surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
|
||||
surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
|
||||
|
||||
/* Emit relocation to surface contents */
|
||||
drm_intel_bo_emit_reloc(brw->batch.bo,
|
||||
|
|
@ -718,7 +718,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
|
|||
assert(tile_y % 2 == 0);
|
||||
surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
|
||||
(tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
|
||||
(mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
|
||||
(mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
|
||||
|
||||
if (brw->gen < 6) {
|
||||
/* _NEW_COLOR */
|
||||
|
|
@ -1001,6 +1001,32 @@ const struct brw_tracked_state brw_wm_ubo_surfaces = {
|
|||
.emit = brw_upload_wm_ubo_surfaces,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_cs_ubo_surfaces(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* _NEW_PROGRAM */
|
||||
struct gl_shader_program *prog =
|
||||
ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
|
||||
|
||||
if (!prog)
|
||||
return;
|
||||
|
||||
/* BRW_NEW_CS_PROG_DATA */
|
||||
brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
|
||||
&brw->cs.base, &brw->cs.prog_data->base, true);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_cs_ubo_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_CS_PROG_DATA |
|
||||
BRW_NEW_UNIFORM_BUFFER,
|
||||
},
|
||||
.emit = brw_upload_cs_ubo_surfaces,
|
||||
};
|
||||
|
||||
void
|
||||
brw_upload_abo_surfaces(struct brw_context *brw,
|
||||
struct gl_shader_program *prog,
|
||||
|
|
|
|||
|
|
@ -413,7 +413,7 @@ gen6_blorp_emit_surface_state(struct brw_context *brw,
|
|||
assert(tile_y % 2 == 0);
|
||||
surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
|
||||
(tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
|
||||
(surface->mt->align_h == 4 ?
|
||||
(surface->mt->valign == 4 ?
|
||||
BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
|
||||
|
||||
/* Emit relocation to surface contents */
|
||||
|
|
|
|||
|
|
@ -35,15 +35,6 @@ const unsigned MAX_GS_INPUT_VERTICES = 6;
|
|||
|
||||
namespace brw {
|
||||
|
||||
void
|
||||
gen6_gs_visitor::assign_binding_table_offsets()
|
||||
{
|
||||
/* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
|
||||
* feedback surfaces.
|
||||
*/
|
||||
assign_common_binding_table_offsets(BRW_MAX_SOL_BINDINGS);
|
||||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::emit_prolog()
|
||||
{
|
||||
|
|
@ -146,27 +137,6 @@ gen6_gs_visitor::emit_prolog()
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::visit(ir_emit_vertex *ir)
|
||||
{
|
||||
/* To ensure that we don't output more vertices than the shader specified
|
||||
* using max_vertices, do the logic inside a conditional of the form "if
|
||||
* (vertex_count < MAX)"
|
||||
*/
|
||||
unsigned num_output_vertices = c->gp->program.VerticesOut;
|
||||
emit(CMP(dst_null_d(), this->vertex_count,
|
||||
src_reg(num_output_vertices), BRW_CONDITIONAL_L));
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
|
||||
gs_emit_vertex(ir->stream_id());
|
||||
|
||||
this->current_annotation = "emit vertex: increment vertex count";
|
||||
emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
|
||||
src_reg(1u)));
|
||||
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::gs_emit_vertex(int stream_id)
|
||||
{
|
||||
|
|
@ -230,12 +200,6 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
|
|||
this->vertex_output_offset, 1u));
|
||||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::visit(ir_end_primitive *)
|
||||
{
|
||||
gs_end_primitive();
|
||||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::gs_end_primitive()
|
||||
{
|
||||
|
|
@ -356,9 +320,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
if (c->gp->program.OutputType != GL_POINTS) {
|
||||
emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
{
|
||||
visit((ir_end_primitive *) NULL);
|
||||
}
|
||||
gs_end_primitive();
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,18 +39,16 @@ public:
|
|||
void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
struct gl_shader_program *prog,
|
||||
nir_shader *shader,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
int shader_time_index) :
|
||||
vec4_gs_visitor(comp, log_data, c, prog, mem_ctx, no_spills,
|
||||
vec4_gs_visitor(comp, log_data, c, prog, shader, mem_ctx, no_spills,
|
||||
shader_time_index) {}
|
||||
|
||||
protected:
|
||||
virtual void assign_binding_table_offsets();
|
||||
virtual void emit_prolog();
|
||||
virtual void emit_thread_end();
|
||||
virtual void visit(ir_emit_vertex *);
|
||||
virtual void visit(ir_end_primitive *);
|
||||
virtual void gs_emit_vertex(int stream_id);
|
||||
virtual void gs_end_primitive();
|
||||
virtual void emit_urb_write_header(int mrf);
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
|
|||
SET_FIELD(min_array_element, BRW_SURFACE_MIN_ARRAY_ELEMENT) |
|
||||
SET_FIELD(depth - 1, BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT);
|
||||
|
||||
surf[5] = (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0);
|
||||
surf[5] = (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0);
|
||||
|
||||
drm_intel_bo_emit_reloc(brw->batch.bo,
|
||||
offset + 4,
|
||||
|
|
|
|||
|
|
@ -158,9 +158,9 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
|
|||
surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT |
|
||||
gen7_surface_tiling_mode(tiling);
|
||||
|
||||
if (surface->mt->align_h == 4)
|
||||
if (surface->mt->valign == 4)
|
||||
surf[0] |= GEN7_SURFACE_VALIGN_4;
|
||||
if (surface->mt->align_w == 8)
|
||||
if (surface->mt->halign == 8)
|
||||
surf[0] |= GEN7_SURFACE_HALIGN_8;
|
||||
|
||||
if (surface->array_layout == ALL_SLICES_AT_EACH_LOD)
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ brw_upload_cs_state(struct brw_context *brw)
|
|||
|
||||
if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
|
||||
local_id_dwords =
|
||||
brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
|
||||
brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
|
||||
}
|
||||
|
||||
unsigned push_constant_data_size =
|
||||
|
|
@ -216,8 +216,7 @@ const struct brw_tracked_state brw_cs_state = {
|
|||
*
|
||||
*/
|
||||
unsigned
|
||||
brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
|
||||
unsigned dispatch_width)
|
||||
brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width)
|
||||
{
|
||||
return 3 * dispatch_width;
|
||||
}
|
||||
|
|
@ -272,7 +271,7 @@ brw_upload_cs_push_constants(struct brw_context *brw,
|
|||
|
||||
if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
|
||||
local_id_dwords =
|
||||
brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
|
||||
brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
|
||||
}
|
||||
|
||||
/* Updates the ParamaterValues[i] pointers for all parameters of the
|
||||
|
|
|
|||
|
|
@ -288,9 +288,9 @@ gen7_emit_texture_surface_state(struct brw_context *brw,
|
|||
if (target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_CUBE_MAP_ARRAY)
|
||||
surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
|
||||
|
||||
if (mt->align_h == 4)
|
||||
if (mt->valign == 4)
|
||||
surf[0] |= GEN7_SURFACE_VALIGN_4;
|
||||
if (mt->align_w == 8)
|
||||
if (mt->halign == 8)
|
||||
surf[0] |= GEN7_SURFACE_HALIGN_8;
|
||||
|
||||
if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP)
|
||||
|
|
@ -509,9 +509,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
|
|||
GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL) |
|
||||
gen7_surface_tiling_mode(mt->tiling);
|
||||
|
||||
if (irb->mt->align_h == 4)
|
||||
if (irb->mt->valign == 4)
|
||||
surf[0] |= GEN7_SURFACE_VALIGN_4;
|
||||
if (irb->mt->align_w == 8)
|
||||
if (irb->mt->halign == 8)
|
||||
surf[0] |= GEN7_SURFACE_HALIGN_8;
|
||||
|
||||
if (is_array) {
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ vertical_alignment(const struct brw_context *brw,
|
|||
surf_type == BRW_SURFACE_1D))
|
||||
return GEN8_SURFACE_VALIGN_4;
|
||||
|
||||
switch (mt->align_h) {
|
||||
switch (mt->valign) {
|
||||
case 4:
|
||||
return GEN8_SURFACE_VALIGN_4;
|
||||
case 8:
|
||||
|
|
@ -120,7 +120,7 @@ horizontal_alignment(const struct brw_context *brw,
|
|||
gen9_use_linear_1d_layout(brw, mt)))
|
||||
return GEN8_SURFACE_HALIGN_4;
|
||||
|
||||
switch (mt->align_w) {
|
||||
switch (mt->halign) {
|
||||
case 4:
|
||||
return GEN8_SURFACE_HALIGN_4;
|
||||
case 8:
|
||||
|
|
@ -221,8 +221,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
|
|||
* "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
|
||||
* 16 must be used."
|
||||
*/
|
||||
assert(brw->gen < 9 || mt->align_w == 16);
|
||||
assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
|
||||
assert(brw->gen < 9 || mt->halign == 16);
|
||||
assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
|
||||
}
|
||||
|
||||
const uint32_t surf_type = translate_tex_target(target);
|
||||
|
|
@ -470,8 +470,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
|
|||
* "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
|
||||
* 16 must be used."
|
||||
*/
|
||||
assert(brw->gen < 9 || mt->align_w == 16);
|
||||
assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
|
||||
assert(brw->gen < 9 || mt->halign == 16);
|
||||
assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
|
||||
}
|
||||
|
||||
uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ intel_copy_image_sub_data(struct gl_context *ctx,
|
|||
} else {
|
||||
assert(dst_renderbuffer);
|
||||
dst_mt = intel_renderbuffer(dst_renderbuffer)->mt;
|
||||
src_image = src_renderbuffer->TexImage;
|
||||
dst_image = dst_renderbuffer->TexImage;
|
||||
}
|
||||
|
||||
if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) {
|
||||
|
|
|
|||
|
|
@ -694,7 +694,7 @@ intel_miptree_create(struct brw_context *brw,
|
|||
if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
|
||||
intel_miptree_is_fast_clear_capable(brw, mt)) {
|
||||
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
|
||||
assert(brw->gen < 8 || mt->align_w == 16 || num_samples <= 1);
|
||||
assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
|
||||
}
|
||||
|
||||
return mt;
|
||||
|
|
@ -2168,16 +2168,18 @@ intel_miptree_map_blit(struct brw_context *brw,
|
|||
struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice)
|
||||
{
|
||||
map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
|
||||
0, 0,
|
||||
map->w, map->h, 1,
|
||||
0, MIPTREE_LAYOUT_TILING_NONE);
|
||||
map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
|
||||
/* first_level */ 0,
|
||||
/* last_level */ 0,
|
||||
map->w, map->h, 1,
|
||||
/* samples */ 0,
|
||||
MIPTREE_LAYOUT_TILING_NONE);
|
||||
|
||||
if (!map->mt) {
|
||||
if (!map->linear_mt) {
|
||||
fprintf(stderr, "Failed to allocate blit temporary\n");
|
||||
goto fail;
|
||||
}
|
||||
map->stride = map->mt->pitch;
|
||||
map->stride = map->linear_mt->pitch;
|
||||
|
||||
/* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
|
||||
* INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
|
||||
|
|
@ -2188,7 +2190,7 @@ intel_miptree_map_blit(struct brw_context *brw,
|
|||
if (!intel_miptree_blit(brw,
|
||||
mt, level, slice,
|
||||
map->x, map->y, false,
|
||||
map->mt, 0, 0,
|
||||
map->linear_mt, 0, 0,
|
||||
0, 0, false,
|
||||
map->w, map->h, GL_COPY)) {
|
||||
fprintf(stderr, "Failed to blit\n");
|
||||
|
|
@ -2196,7 +2198,7 @@ intel_miptree_map_blit(struct brw_context *brw,
|
|||
}
|
||||
}
|
||||
|
||||
map->ptr = intel_miptree_map_raw(brw, map->mt);
|
||||
map->ptr = intel_miptree_map_raw(brw, map->linear_mt);
|
||||
|
||||
DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
|
||||
map->x, map->y, map->w, map->h,
|
||||
|
|
@ -2206,7 +2208,7 @@ intel_miptree_map_blit(struct brw_context *brw,
|
|||
return;
|
||||
|
||||
fail:
|
||||
intel_miptree_release(&map->mt);
|
||||
intel_miptree_release(&map->linear_mt);
|
||||
map->ptr = NULL;
|
||||
map->stride = 0;
|
||||
}
|
||||
|
|
@ -2220,11 +2222,11 @@ intel_miptree_unmap_blit(struct brw_context *brw,
|
|||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
intel_miptree_unmap_raw(map->mt);
|
||||
intel_miptree_unmap_raw(map->linear_mt);
|
||||
|
||||
if (map->mode & GL_MAP_WRITE_BIT) {
|
||||
bool ok = intel_miptree_blit(brw,
|
||||
map->mt, 0, 0,
|
||||
map->linear_mt, 0, 0,
|
||||
0, 0, false,
|
||||
mt, level, slice,
|
||||
map->x, map->y, false,
|
||||
|
|
@ -2232,7 +2234,7 @@ intel_miptree_unmap_blit(struct brw_context *brw,
|
|||
WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
|
||||
}
|
||||
|
||||
intel_miptree_release(&map->mt);
|
||||
intel_miptree_release(&map->linear_mt);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -2756,7 +2758,7 @@ intel_miptree_unmap(struct brw_context *brw,
|
|||
intel_miptree_unmap_etc(brw, mt, map, level, slice);
|
||||
} else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
|
||||
intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
|
||||
} else if (map->mt) {
|
||||
} else if (map->linear_mt) {
|
||||
intel_miptree_unmap_blit(brw, mt, map, level, slice);
|
||||
#if defined(USE_SSE41)
|
||||
} else if (map->buffer && cpu_has_sse4_1) {
|
||||
|
|
|
|||
|
|
@ -62,9 +62,11 @@ struct intel_resolve_map;
|
|||
struct intel_texture_image;
|
||||
|
||||
/**
|
||||
* This bit extends the set of GL_MAP_*_BIT enums.
|
||||
*
|
||||
* When calling intel_miptree_map() on an ETC-transcoded-to-RGB miptree or a
|
||||
* depthstencil-split-to-separate-stencil miptree, we'll normally make a
|
||||
* tmeporary and recreate the kind of data requested by Mesa core, since we're
|
||||
* temporary and recreate the kind of data requested by Mesa core, since we're
|
||||
* satisfying some glGetTexImage() request or something.
|
||||
*
|
||||
* However, occasionally you want to actually map the miptree's current data
|
||||
|
|
@ -73,14 +75,14 @@ struct intel_texture_image;
|
|||
#define BRW_MAP_DIRECT_BIT 0x80000000
|
||||
|
||||
struct intel_miptree_map {
|
||||
/** Bitfield of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_BIT */
|
||||
/** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
|
||||
GLbitfield mode;
|
||||
/** Region of interest for the map. */
|
||||
int x, y, w, h;
|
||||
/** Possibly malloced temporary buffer for the mapping. */
|
||||
void *buffer;
|
||||
/** Possible pointer to a temporary linear miptree for the mapping. */
|
||||
struct intel_mipmap_tree *mt;
|
||||
struct intel_mipmap_tree *linear_mt;
|
||||
/** Pointer to the start of (map_x, map_y) returned by the mapping. */
|
||||
void *ptr;
|
||||
/** Stride of the mapping. */
|
||||
|
|
@ -140,6 +142,9 @@ struct intel_mipmap_level
|
|||
* \code
|
||||
* x = mt->level[l].slice[s].x_offset
|
||||
* y = mt->level[l].slice[s].y_offset
|
||||
*
|
||||
* On some hardware generations, we program these offsets into
|
||||
* RENDER_SURFACE_STATE.XOffset and RENDER_SURFACE_STATE.YOffset.
|
||||
*/
|
||||
GLuint x_offset;
|
||||
GLuint y_offset;
|
||||
|
|
@ -168,12 +173,16 @@ enum intel_msaa_layout
|
|||
* accommodated by scaling up the width and the height of the surface so
|
||||
* that all the samples corresponding to a pixel are located at nearby
|
||||
* memory locations.
|
||||
*
|
||||
* @see PRM section "Interleaved Multisampled Surfaces"
|
||||
*/
|
||||
INTEL_MSAA_LAYOUT_IMS,
|
||||
|
||||
/**
|
||||
* Uncompressed Multisample Surface. The surface is stored as a 2D array,
|
||||
* with array slice n containing all pixel data for sample n.
|
||||
*
|
||||
* @see PRM section "Uncompressed Multisampled Surfaces"
|
||||
*/
|
||||
INTEL_MSAA_LAYOUT_UMS,
|
||||
|
||||
|
|
@ -185,6 +194,8 @@ enum intel_msaa_layout
|
|||
* the common case (where all samples constituting a pixel have the same
|
||||
* color value) to be stored efficiently by just using a single array
|
||||
* slice.
|
||||
*
|
||||
* @see PRM section "Compressed Multisampled Surfaces"
|
||||
*/
|
||||
INTEL_MSAA_LAYOUT_CMS,
|
||||
};
|
||||
|
|
@ -318,14 +329,34 @@ enum miptree_array_layout {
|
|||
*/
|
||||
struct intel_miptree_aux_buffer
|
||||
{
|
||||
/** Buffer object containing the pixel data. */
|
||||
/**
|
||||
* Buffer object containing the pixel data.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress
|
||||
*/
|
||||
drm_intel_bo *bo;
|
||||
|
||||
uint32_t pitch; /**< pitch in bytes. */
|
||||
/**
|
||||
* Pitch in bytes.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfacePitch
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfacePitch
|
||||
*/
|
||||
uint32_t pitch;
|
||||
|
||||
uint32_t qpitch; /**< The distance in rows between array slices. */
|
||||
/**
|
||||
* The distance in rows between array slices.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceQPitch
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch
|
||||
*/
|
||||
uint32_t qpitch;
|
||||
|
||||
struct intel_mipmap_tree *mt; /**< hiz miptree used with Gen6 */
|
||||
/**
|
||||
* Hiz miptree. Used only by Gen6.
|
||||
*/
|
||||
struct intel_mipmap_tree *mt;
|
||||
};
|
||||
|
||||
/* Tile resource modes */
|
||||
|
|
@ -337,15 +368,49 @@ enum intel_miptree_tr_mode {
|
|||
|
||||
struct intel_mipmap_tree
|
||||
{
|
||||
/** Buffer object containing the pixel data. */
|
||||
/**
|
||||
* Buffer object containing the surface.
|
||||
*
|
||||
* @see intel_mipmap_tree::offset
|
||||
* @see RENDER_SURFACE_STATE.SurfaceBaseAddress
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
|
||||
*/
|
||||
drm_intel_bo *bo;
|
||||
|
||||
uint32_t pitch; /**< pitch in bytes. */
|
||||
/**
|
||||
* Pitch in bytes.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.SurfacePitch
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfacePitch
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfacePitch
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfacePitch
|
||||
* @see 3DSTATE_STENCIL_BUFFER.SurfacePitch
|
||||
*/
|
||||
uint32_t pitch;
|
||||
|
||||
uint32_t tiling; /**< One of the I915_TILING_* flags */
|
||||
/**
|
||||
* One of the I915_TILING_* flags.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.TileMode
|
||||
* @see 3DSTATE_DEPTH_BUFFER.TileMode
|
||||
*/
|
||||
uint32_t tiling;
|
||||
|
||||
/**
|
||||
* @see RENDER_SURFACE_STATE.TiledResourceMode
|
||||
* @see 3DSTATE_DEPTH_BUFFER.TiledResourceMode
|
||||
*/
|
||||
enum intel_miptree_tr_mode tr_mode;
|
||||
|
||||
/* Effectively the key:
|
||||
/**
|
||||
* @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.SurfaceType
|
||||
* @see RENDER_SURFACE_STATE.SurfaceArray
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceType
|
||||
*/
|
||||
GLenum target;
|
||||
|
||||
|
|
@ -362,18 +427,43 @@ struct intel_mipmap_tree
|
|||
*
|
||||
* For ETC1/ETC2 textures, this is one of the uncompressed mesa texture
|
||||
* formats if the hardware lacks support for ETC1/ETC2. See @ref etc_format.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.SurfaceFormat
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat
|
||||
*/
|
||||
mesa_format format;
|
||||
|
||||
/** This variable stores the value of ETC compressed texture format */
|
||||
/**
|
||||
* This variable stores the value of ETC compressed texture format
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.SurfaceFormat
|
||||
*/
|
||||
mesa_format etc_format;
|
||||
|
||||
/**
|
||||
* The X offset of each image in the miptree must be aligned to this.
|
||||
* See the comments in brw_tex_layout.c.
|
||||
* @name Surface Alignment
|
||||
* @{
|
||||
*
|
||||
* This defines the alignment of the upperleft pixel of each "slice" in the
|
||||
* surface. The alignment is in pixel coordinates relative to the surface's
|
||||
* most upperleft pixel, which is the pixel at (x=0, y=0, layer=0,
|
||||
* level=0).
|
||||
*
|
||||
* The hardware docs do not use the term "slice". We use "slice" to mean
|
||||
* the pixels at a given miplevel and layer. For 2D surfaces, the layer is
|
||||
* the array slice; for 3D surfaces, the layer is the z offset.
|
||||
*
|
||||
* In the surface layout equations found in the hardware docs, the
|
||||
* horizontal and vertical surface alignments often appear as variables 'i'
|
||||
* and 'j'.
|
||||
*/
|
||||
unsigned int align_w;
|
||||
unsigned int align_h; /**< \see align_w */
|
||||
|
||||
/** @see RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */
|
||||
uint32_t halign;
|
||||
|
||||
/** @see RENDER_SURFACE_STATE.SurfaceVerticalAlignment */
|
||||
uint32_t valign;
|
||||
/** @} */
|
||||
|
||||
GLuint first_level;
|
||||
GLuint last_level;
|
||||
|
|
@ -388,19 +478,47 @@ struct intel_mipmap_tree
|
|||
*/
|
||||
GLuint physical_width0, physical_height0, physical_depth0;
|
||||
|
||||
GLuint cpp; /**< bytes per pixel (or bytes per block if compressed) */
|
||||
/** Bytes per pixel (or bytes per block if compressed) */
|
||||
GLuint cpp;
|
||||
|
||||
/**
|
||||
* @see RENDER_SURFACE_STATE.NumberOfMultisamples
|
||||
* @see 3DSTATE_MULTISAMPLE.NumberOfMultisamples
|
||||
*/
|
||||
GLuint num_samples;
|
||||
|
||||
bool compressed;
|
||||
|
||||
/**
|
||||
* Level zero image dimensions. These dimensions correspond to the
|
||||
* @name Level zero image dimensions
|
||||
* @{
|
||||
*
|
||||
* These dimensions correspond to the
|
||||
* logical width, height, and depth of the texture as seen by client code.
|
||||
* Accordingly, they do not account for the extra width, height, and/or
|
||||
* depth that must be allocated in order to accommodate multisample
|
||||
* formats, nor do they account for the extra factor of 6 in depth that
|
||||
* must be allocated in order to accommodate cubemap textures.
|
||||
*/
|
||||
uint32_t logical_width0, logical_height0, logical_depth0;
|
||||
|
||||
/**
|
||||
* @see RENDER_SURFACE_STATE.Width
|
||||
* @see 3DSTATE_DEPTH_BUFFER.Width
|
||||
*/
|
||||
uint32_t logical_width0;
|
||||
|
||||
/**
|
||||
* @see RENDER_SURFACE_STATE.Height
|
||||
* @see 3DSTATE_DEPTH_BUFFER.Height
|
||||
*/
|
||||
uint32_t logical_height0;
|
||||
|
||||
/**
|
||||
* @see RENDER_SURFACE_STATE.Depth
|
||||
* @see 3DSTATE_DEPTH_BUFFER.Depth
|
||||
*/
|
||||
uint32_t logical_depth0;
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Indicates if we use the standard miptree layout (ALL_LOD_IN_EACH_SLICE),
|
||||
|
|
@ -417,11 +535,18 @@ struct intel_mipmap_tree
|
|||
* surfaces it is the number of blocks. For 1D array surfaces that have the
|
||||
* mipmap tree stored horizontally it is the number of pixels between each
|
||||
* slice.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.SurfaceQPitch
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceQPitch
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch
|
||||
* @see 3DSTATE_STENCIL_BUFFER.SurfaceQPitch
|
||||
*/
|
||||
uint32_t qpitch;
|
||||
|
||||
/**
|
||||
* MSAA layout used by this buffer.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.MultisampledSurfaceStorageFormat
|
||||
*/
|
||||
enum intel_msaa_layout msaa_layout;
|
||||
|
||||
|
|
@ -430,24 +555,34 @@ struct intel_mipmap_tree
|
|||
GLuint total_width;
|
||||
GLuint total_height;
|
||||
|
||||
/* The 3DSTATE_CLEAR_PARAMS value associated with the last depth clear to
|
||||
* this depth mipmap tree, if any.
|
||||
/**
|
||||
* The depth value used during the most recent fast depth clear performed
|
||||
* on the surface. This field is invalid only if surface has never
|
||||
* underwent a fast depth clear.
|
||||
*
|
||||
* @see 3DSTATE_CLEAR_PARAMS.DepthClearValue
|
||||
*/
|
||||
uint32_t depth_clear_value;
|
||||
|
||||
/* Includes image offset tables:
|
||||
*/
|
||||
/* Includes image offset tables: */
|
||||
struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
|
||||
|
||||
/* Offset into bo where miptree starts:
|
||||
/**
|
||||
* Offset into bo where the surface starts.
|
||||
*
|
||||
* @see intel_mipmap_tree::bo
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
|
||||
*/
|
||||
uint32_t offset;
|
||||
|
||||
/**
|
||||
* \brief HiZ aux buffer
|
||||
*
|
||||
* The hiz miptree contains the miptree's hiz buffer. To allocate the hiz
|
||||
* buffer, use intel_miptree_alloc_hiz().
|
||||
* To allocate the hiz buffer, use intel_miptree_alloc_hiz().
|
||||
*
|
||||
* To determine if hiz is enabled, do not check this pointer. Instead, use
|
||||
* intel_miptree_slice_has_hiz().
|
||||
|
|
@ -472,6 +607,7 @@ struct intel_mipmap_tree
|
|||
* require separate stencil. It always has the true copy of the stencil
|
||||
* bits, regardless of mt->format.
|
||||
*
|
||||
* \see 3DSTATE_STENCIL_BUFFER
|
||||
* \see intel_miptree_map_depthstencil()
|
||||
* \see intel_miptree_unmap_depthstencil()
|
||||
*/
|
||||
|
|
@ -499,6 +635,11 @@ struct intel_mipmap_tree
|
|||
*
|
||||
* This value will only ever contain ones in bits 28-31, so it is safe to
|
||||
* OR into dword 7 of SURFACE_STATE.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.RedClearColor
|
||||
* @see RENDER_SURFACE_STATE.GreenClearColor
|
||||
* @see RENDER_SURFACE_STATE.BlueClearColor
|
||||
* @see RENDER_SURFACE_STATE.AlphaClearColor
|
||||
*/
|
||||
uint32_t fast_clear_color_value;
|
||||
|
||||
|
|
|
|||
|
|
@ -46,10 +46,10 @@ class cmod_propagation_fs_visitor : public fs_visitor
|
|||
public:
|
||||
cmod_propagation_fs_visitor(struct brw_compiler *compiler,
|
||||
struct brw_wm_prog_data *prog_data,
|
||||
struct gl_shader_program *shader_prog)
|
||||
: fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
|
||||
&prog_data->base, shader_prog,
|
||||
(struct gl_program *) NULL, 8, -1) {}
|
||||
nir_shader *shader)
|
||||
: fs_visitor(compiler, NULL, NULL, NULL,
|
||||
&prog_data->base, (struct gl_program *) NULL,
|
||||
shader, 8, -1) {}
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -62,9 +62,9 @@ void cmod_propagation_test::SetUp()
|
|||
|
||||
fp = ralloc(NULL, struct brw_fragment_program);
|
||||
prog_data = ralloc(NULL, struct brw_wm_prog_data);
|
||||
shader_prog = ralloc(NULL, struct gl_shader_program);
|
||||
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
|
||||
v = new cmod_propagation_fs_visitor(compiler, prog_data, shader_prog);
|
||||
v = new cmod_propagation_fs_visitor(compiler, prog_data, shader);
|
||||
|
||||
_mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -46,10 +46,10 @@ class saturate_propagation_fs_visitor : public fs_visitor
|
|||
public:
|
||||
saturate_propagation_fs_visitor(struct brw_compiler *compiler,
|
||||
struct brw_wm_prog_data *prog_data,
|
||||
struct gl_shader_program *shader_prog)
|
||||
: fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
|
||||
&prog_data->base, shader_prog,
|
||||
(struct gl_program *) NULL, 8, -1) {}
|
||||
nir_shader *shader)
|
||||
: fs_visitor(compiler, NULL, NULL, NULL,
|
||||
&prog_data->base, (struct gl_program *) NULL,
|
||||
shader, 8, -1) {}
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -62,9 +62,9 @@ void saturate_propagation_test::SetUp()
|
|||
|
||||
fp = ralloc(NULL, struct brw_fragment_program);
|
||||
prog_data = ralloc(NULL, struct brw_wm_prog_data);
|
||||
shader_prog = ralloc(NULL, struct gl_shader_program);
|
||||
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
|
||||
v = new saturate_propagation_fs_visitor(compiler, prog_data, shader_prog);
|
||||
v = new saturate_propagation_fs_visitor(compiler, prog_data, shader);
|
||||
|
||||
_mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -45,9 +45,8 @@ class copy_propagation_vec4_visitor : public vec4_visitor
|
|||
{
|
||||
public:
|
||||
copy_propagation_vec4_visitor(struct brw_compiler *compiler,
|
||||
struct gl_shader_program *shader_prog)
|
||||
: vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
|
||||
MESA_SHADER_VERTEX, NULL,
|
||||
nir_shader *shader)
|
||||
: vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
|
||||
false /* no_spills */, -1)
|
||||
{
|
||||
}
|
||||
|
|
@ -69,11 +68,6 @@ protected:
|
|||
unreachable("Not reached");
|
||||
}
|
||||
|
||||
virtual void emit_program_code()
|
||||
{
|
||||
unreachable("Not reached");
|
||||
}
|
||||
|
||||
virtual void emit_thread_end()
|
||||
{
|
||||
unreachable("Not reached");
|
||||
|
|
@ -100,9 +94,9 @@ void copy_propagation_test::SetUp()
|
|||
|
||||
vp = ralloc(NULL, struct brw_vertex_program);
|
||||
|
||||
shader_prog = ralloc(NULL, struct gl_shader_program);
|
||||
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
|
||||
|
||||
v = new copy_propagation_vec4_visitor(compiler, shader_prog);
|
||||
v = new copy_propagation_vec4_visitor(compiler, shader);
|
||||
|
||||
_mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -48,9 +48,8 @@ class register_coalesce_vec4_visitor : public vec4_visitor
|
|||
{
|
||||
public:
|
||||
register_coalesce_vec4_visitor(struct brw_compiler *compiler,
|
||||
struct gl_shader_program *shader_prog)
|
||||
: vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
|
||||
MESA_SHADER_VERTEX, NULL,
|
||||
nir_shader *shader)
|
||||
: vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
|
||||
false /* no_spills */, -1)
|
||||
{
|
||||
}
|
||||
|
|
@ -72,11 +71,6 @@ protected:
|
|||
unreachable("Not reached");
|
||||
}
|
||||
|
||||
virtual void emit_program_code()
|
||||
{
|
||||
unreachable("Not reached");
|
||||
}
|
||||
|
||||
virtual void emit_thread_end()
|
||||
{
|
||||
unreachable("Not reached");
|
||||
|
|
@ -103,9 +97,9 @@ void register_coalesce_test::SetUp()
|
|||
|
||||
vp = ralloc(NULL, struct brw_vertex_program);
|
||||
|
||||
shader_prog = ralloc(NULL, struct gl_shader_program);
|
||||
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
|
||||
|
||||
v = new register_coalesce_vec4_visitor(compiler, shader_prog);
|
||||
v = new register_coalesce_vec4_visitor(compiler, shader);
|
||||
|
||||
_mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ _mesa_DispatchComputeIndirect(GLintptr indirect)
|
|||
GET_CURRENT_CONTEXT(ctx);
|
||||
|
||||
if (MESA_VERBOSE & VERBOSE_API)
|
||||
_mesa_debug(ctx, "glDispatchComputeIndirect(%d)\n", indirect);
|
||||
_mesa_debug(ctx, "glDispatchComputeIndirect(%ld)\n", (long) indirect);
|
||||
|
||||
if (!_mesa_validate_DispatchComputeIndirect(ctx, indirect))
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -27,29 +27,27 @@
|
|||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "glheader.h"
|
||||
#include "imports.h"
|
||||
#include "mtypes.h"
|
||||
#include "main/glheader.h"
|
||||
#include "main/context.h"
|
||||
#include "main/imports.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/samplerobj.h"
|
||||
#include "main/texenvprogram.h"
|
||||
#include "main/texobj.h"
|
||||
#include "main/uniforms.h"
|
||||
#include "glsl/ir_builder.h"
|
||||
#include "glsl/ir_optimization.h"
|
||||
#include "glsl/glsl_parser_extras.h"
|
||||
#include "glsl/glsl_symbol_table.h"
|
||||
#include "glsl/glsl_types.h"
|
||||
#include "program/ir_to_mesa.h"
|
||||
#include "program/program.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/programopt.h"
|
||||
#include "program/prog_cache.h"
|
||||
#include "program/prog_instruction.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/prog_print.h"
|
||||
#include "program/prog_statevars.h"
|
||||
#include "program/programopt.h"
|
||||
#include "../glsl/glsl_types.h"
|
||||
#include "../glsl/ir.h"
|
||||
#include "../glsl/ir_builder.h"
|
||||
#include "../glsl/glsl_symbol_table.h"
|
||||
#include "../glsl/glsl_parser_extras.h"
|
||||
#include "../glsl/ir_optimization.h"
|
||||
#include "../program/ir_to_mesa.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
|
|
|
|||
|
|
@ -293,9 +293,10 @@ struct ureg {
|
|||
GLuint file:4;
|
||||
GLint idx:9; /* relative addressing may be negative */
|
||||
/* sizeof(idx) should == sizeof(prog_src_reg::Index) */
|
||||
GLuint abs:1;
|
||||
GLuint negate:1;
|
||||
GLuint swz:12;
|
||||
GLuint pad:6;
|
||||
GLuint pad:5;
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -324,6 +325,7 @@ static const struct ureg undef = {
|
|||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0
|
||||
};
|
||||
|
||||
|
|
@ -342,6 +344,7 @@ static struct ureg make_ureg(GLuint file, GLint idx)
|
|||
struct ureg reg;
|
||||
reg.file = file;
|
||||
reg.idx = idx;
|
||||
reg.abs = 0;
|
||||
reg.negate = 0;
|
||||
reg.swz = SWIZZLE_NOOP;
|
||||
reg.pad = 0;
|
||||
|
|
@ -350,6 +353,14 @@ static struct ureg make_ureg(GLuint file, GLint idx)
|
|||
|
||||
|
||||
|
||||
static struct ureg absolute( struct ureg reg )
|
||||
{
|
||||
reg.abs = 1;
|
||||
reg.negate = 0;
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
static struct ureg negate( struct ureg reg )
|
||||
{
|
||||
reg.negate ^= 1;
|
||||
|
|
@ -526,8 +537,8 @@ static void emit_arg( struct prog_src_register *src,
|
|||
src->File = reg.file;
|
||||
src->Index = reg.idx;
|
||||
src->Swizzle = reg.swz;
|
||||
src->Abs = reg.abs;
|
||||
src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
|
||||
src->Abs = 0;
|
||||
src->RelAddr = 0;
|
||||
/* Check that bitfield sizes aren't exceeded */
|
||||
assert(src->Index == reg.idx);
|
||||
|
|
@ -953,7 +964,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
|
|||
|
||||
emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
|
||||
emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
|
||||
emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
|
||||
emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W));
|
||||
emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
|
||||
|
||||
release_temp(p, spot);
|
||||
|
|
|
|||
|
|
@ -423,6 +423,9 @@ _mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list arg);
|
|||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && !defined(strtok_r)
|
||||
#define strtok_r strtok_s
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
* GL_ARB_separate_shader_objects extension.
|
||||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "main/glheader.h"
|
||||
#include "main/context.h"
|
||||
#include "main/dispatch.h"
|
||||
|
|
@ -42,12 +43,11 @@
|
|||
#include "main/shaderobj.h"
|
||||
#include "main/transformfeedback.h"
|
||||
#include "main/uniforms.h"
|
||||
#include "glsl/glsl_parser_extras.h"
|
||||
#include "glsl/ir_uniform.h"
|
||||
#include "program/program.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "util/ralloc.h"
|
||||
#include <stdbool.h>
|
||||
#include "../glsl/glsl_parser_extras.h"
|
||||
#include "../glsl/ir_uniform.h"
|
||||
|
||||
/**
|
||||
* Delete a pipeline object.
|
||||
|
|
|
|||
|
|
@ -611,11 +611,10 @@ _mesa_program_resource_index(struct gl_shader_program *shProg,
|
|||
return GL_INVALID_INDEX;
|
||||
|
||||
switch (res->Type) {
|
||||
case GL_UNIFORM_BLOCK:
|
||||
case GL_SHADER_STORAGE_BLOCK:
|
||||
return RESOURCE_UBO(res)- shProg->UniformBlocks;
|
||||
case GL_ATOMIC_COUNTER_BUFFER:
|
||||
return RESOURCE_ATC(res) - shProg->AtomicBuffers;
|
||||
case GL_UNIFORM_BLOCK:
|
||||
case GL_SHADER_STORAGE_BLOCK:
|
||||
case GL_TRANSFORM_FEEDBACK_VARYING:
|
||||
default:
|
||||
return calc_resource_index(shProg, res);
|
||||
|
|
|
|||
|
|
@ -1192,18 +1192,18 @@ invalidate_tex_image_error_check(struct gl_context *ctx, GLuint texture,
|
|||
struct gl_texture_object *
|
||||
_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target)
|
||||
{
|
||||
struct gl_texture_object *texObj = NULL;
|
||||
GLint targetIndex;
|
||||
|
||||
if (target == 0)
|
||||
return texObj;
|
||||
|
||||
texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
|
||||
targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
|
||||
assert(targetIndex < NUM_TEXTURE_TARGETS);
|
||||
texObj->TargetIndex = targetIndex;
|
||||
struct gl_texture_object *texObj = NULL;
|
||||
GLint targetIndex;
|
||||
|
||||
if (target == 0)
|
||||
return texObj;
|
||||
|
||||
texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
|
||||
targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
|
||||
assert(targetIndex < NUM_TEXTURE_TARGETS);
|
||||
texObj->TargetIndex = targetIndex;
|
||||
|
||||
return texObj;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1622,25 +1622,79 @@ _mesa_tex_target_to_index(const struct gl_context *ctx, GLenum target)
|
|||
|
||||
|
||||
/**
|
||||
* Bind a named texture to a texturing target.
|
||||
* Do actual texture binding. All error checking should have been done prior
|
||||
* to calling this function. Note that the texture target (1D, 2D, etc) is
|
||||
* always specified by the texObj->TargetIndex.
|
||||
*
|
||||
* \param unit index of texture unit to update
|
||||
* \param texObj the new texture object (cannot be NULL)
|
||||
*/
|
||||
static void
|
||||
bind_texture(struct gl_context *ctx,
|
||||
unsigned unit,
|
||||
struct gl_texture_object *texObj)
|
||||
{
|
||||
struct gl_texture_unit *texUnit;
|
||||
int targetIndex;
|
||||
|
||||
assert(unit < ARRAY_SIZE(ctx->Texture.Unit));
|
||||
texUnit = &ctx->Texture.Unit[unit];
|
||||
|
||||
assert(texObj);
|
||||
assert(valid_texture_object(texObj));
|
||||
|
||||
targetIndex = texObj->TargetIndex;
|
||||
assert(targetIndex >= 0);
|
||||
assert(targetIndex < NUM_TEXTURE_TARGETS);
|
||||
|
||||
/* Check if this texture is only used by this context and is already bound.
|
||||
* If so, just return.
|
||||
*/
|
||||
{
|
||||
bool early_out;
|
||||
mtx_lock(&ctx->Shared->Mutex);
|
||||
early_out = ((ctx->Shared->RefCount == 1)
|
||||
&& (texObj == texUnit->CurrentTex[targetIndex]));
|
||||
mtx_unlock(&ctx->Shared->Mutex);
|
||||
if (early_out) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* flush before changing binding */
|
||||
FLUSH_VERTICES(ctx, _NEW_TEXTURE);
|
||||
|
||||
/* If the refcount on the previously bound texture is decremented to
|
||||
* zero, it'll be deleted here.
|
||||
*/
|
||||
_mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], texObj);
|
||||
|
||||
ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
|
||||
unit + 1);
|
||||
|
||||
if (texObj->Name != 0)
|
||||
texUnit->_BoundTextures |= (1 << targetIndex);
|
||||
else
|
||||
texUnit->_BoundTextures &= ~(1 << targetIndex);
|
||||
|
||||
/* Pass BindTexture call to device driver */
|
||||
if (ctx->Driver.BindTexture) {
|
||||
ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implement glBindTexture(). Do error checking, look-up or create a new
|
||||
* texture object, then bind it in the current texture unit.
|
||||
*
|
||||
* \param target texture target.
|
||||
* \param texName texture name.
|
||||
*
|
||||
* \sa glBindTexture().
|
||||
*
|
||||
* Determines the old texture object bound and returns immediately if rebinding
|
||||
* the same texture. Get the current texture which is either a default texture
|
||||
* if name is null, a named texture from the hash, or a new texture if the
|
||||
* given texture name is new. Increments its reference count, binds it, and
|
||||
* calls dd_function_table::BindTexture. Decrements the old texture reference
|
||||
* count and deletes it if it reaches zero.
|
||||
*/
|
||||
void GLAPIENTRY
|
||||
_mesa_BindTexture( GLenum target, GLuint texName )
|
||||
{
|
||||
GET_CURRENT_CONTEXT(ctx);
|
||||
struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
|
||||
struct gl_texture_object *newTexObj = NULL;
|
||||
GLint targetIndex;
|
||||
|
||||
|
|
@ -1702,95 +1756,12 @@ _mesa_BindTexture( GLenum target, GLuint texName )
|
|||
newTexObj->TargetIndex = targetIndex;
|
||||
}
|
||||
|
||||
assert(valid_texture_object(newTexObj));
|
||||
|
||||
/* Check if this texture is only used by this context and is already bound.
|
||||
* If so, just return.
|
||||
*/
|
||||
{
|
||||
GLboolean early_out;
|
||||
mtx_lock(&ctx->Shared->Mutex);
|
||||
early_out = ((ctx->Shared->RefCount == 1)
|
||||
&& (newTexObj == texUnit->CurrentTex[targetIndex]));
|
||||
mtx_unlock(&ctx->Shared->Mutex);
|
||||
if (early_out) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* flush before changing binding */
|
||||
FLUSH_VERTICES(ctx, _NEW_TEXTURE);
|
||||
|
||||
/* Do the actual binding. The refcount on the previously bound
|
||||
* texture object will be decremented. It'll be deleted if the
|
||||
* count hits zero.
|
||||
*/
|
||||
_mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], newTexObj);
|
||||
ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
|
||||
ctx->Texture.CurrentUnit + 1);
|
||||
assert(texUnit->CurrentTex[targetIndex]);
|
||||
|
||||
if (texName != 0)
|
||||
texUnit->_BoundTextures |= (1 << targetIndex);
|
||||
else
|
||||
texUnit->_BoundTextures &= ~(1 << targetIndex);
|
||||
|
||||
/* Pass BindTexture call to device driver */
|
||||
if (ctx->Driver.BindTexture)
|
||||
ctx->Driver.BindTexture(ctx, ctx->Texture.CurrentUnit, target, newTexObj);
|
||||
bind_texture(ctx, ctx->Texture.CurrentUnit, newTexObj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the actual binding to a numbered texture unit.
|
||||
* The refcount on the previously bound
|
||||
* texture object will be decremented. It'll be deleted if the
|
||||
* count hits zero.
|
||||
*/
|
||||
static void
|
||||
bind_texture_unit(struct gl_context *ctx,
|
||||
GLuint unit,
|
||||
struct gl_texture_object *texObj)
|
||||
{
|
||||
struct gl_texture_unit *texUnit;
|
||||
|
||||
/* Get the texture unit (this is an array look-up) */
|
||||
texUnit = _mesa_get_tex_unit_err(ctx, unit, "glBindTextureUnit");
|
||||
if (!texUnit)
|
||||
return;
|
||||
|
||||
/* Check if this texture is only used by this context and is already bound.
|
||||
* If so, just return.
|
||||
*/
|
||||
{
|
||||
bool early_out;
|
||||
mtx_lock(&ctx->Shared->Mutex);
|
||||
early_out = ((ctx->Shared->RefCount == 1)
|
||||
&& (texObj == texUnit->CurrentTex[texObj->TargetIndex]));
|
||||
mtx_unlock(&ctx->Shared->Mutex);
|
||||
if (early_out) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* flush before changing binding */
|
||||
FLUSH_VERTICES(ctx, _NEW_TEXTURE);
|
||||
|
||||
_mesa_reference_texobj(&texUnit->CurrentTex[texObj->TargetIndex],
|
||||
texObj);
|
||||
assert(texUnit->CurrentTex[texObj->TargetIndex]);
|
||||
ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
|
||||
unit + 1);
|
||||
texUnit->_BoundTextures |= (1 << texObj->TargetIndex);
|
||||
|
||||
|
||||
/* Pass BindTexture call to device driver */
|
||||
if (ctx->Driver.BindTexture) {
|
||||
ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bind a named texture to the specified texture unit.
|
||||
* OpenGL 4.5 / GL_ARB_direct_state_access glBindTextureUnit().
|
||||
*
|
||||
* \param unit texture unit.
|
||||
* \param texture texture name.
|
||||
|
|
@ -1807,6 +1778,18 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
|
|||
{
|
||||
GET_CURRENT_CONTEXT(ctx);
|
||||
struct gl_texture_object *texObj;
|
||||
struct gl_texture_unit *texUnit;
|
||||
|
||||
if (unit >= _mesa_max_tex_unit(ctx)) {
|
||||
_mesa_error(ctx, GL_INVALID_VALUE, "glBindTextureUnit(unit=%u)", unit);
|
||||
return;
|
||||
}
|
||||
|
||||
texUnit = _mesa_get_tex_unit(ctx, unit);
|
||||
assert(texUnit);
|
||||
if (!texUnit) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
|
||||
_mesa_debug(ctx, "glBindTextureUnit %s %d\n",
|
||||
|
|
@ -1833,15 +1816,19 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
|
|||
return;
|
||||
}
|
||||
if (texObj->Target == 0) {
|
||||
_mesa_error(ctx, GL_INVALID_ENUM, "glBindTextureUnit(target)");
|
||||
/* Texture object was gen'd but never bound so the target is not set */
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION, "glBindTextureUnit(target)");
|
||||
return;
|
||||
}
|
||||
assert(valid_texture_object(texObj));
|
||||
|
||||
bind_texture_unit(ctx, unit, texObj);
|
||||
bind_texture(ctx, unit, texObj);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* OpenGL 4.4 / GL_ARB_multi_bind glBindTextures().
|
||||
*/
|
||||
void GLAPIENTRY
|
||||
_mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
|
||||
{
|
||||
|
|
@ -1862,12 +1849,6 @@ _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
|
|||
return;
|
||||
}
|
||||
|
||||
/* Flush before changing bindings */
|
||||
FLUSH_VERTICES(ctx, 0);
|
||||
|
||||
ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
|
||||
first + count);
|
||||
|
||||
if (textures) {
|
||||
/* Note that the error semantics for multi-bind commands differ from
|
||||
* those of other GL commands.
|
||||
|
|
@ -1902,24 +1883,7 @@ _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
|
|||
texObj = _mesa_lookup_texture_locked(ctx, textures[i]);
|
||||
|
||||
if (texObj && texObj->Target != 0) {
|
||||
const gl_texture_index targetIndex = texObj->TargetIndex;
|
||||
|
||||
if (texUnit->CurrentTex[targetIndex] != texObj) {
|
||||
/* Do the actual binding. The refcount on the previously
|
||||
* bound texture object will be decremented. It will be
|
||||
* deleted if the count hits zero.
|
||||
*/
|
||||
_mesa_reference_texobj(&texUnit->CurrentTex[targetIndex],
|
||||
texObj);
|
||||
|
||||
texUnit->_BoundTextures |= (1 << targetIndex);
|
||||
ctx->NewState |= _NEW_TEXTURE;
|
||||
|
||||
/* Pass the BindTexture call to the device driver */
|
||||
if (ctx->Driver.BindTexture)
|
||||
ctx->Driver.BindTexture(ctx, first + i,
|
||||
texObj->Target, texObj);
|
||||
}
|
||||
bind_texture(ctx, first + i, texObj);
|
||||
} else {
|
||||
/* The ARB_multi_bind spec says:
|
||||
*
|
||||
|
|
|
|||
|
|
@ -63,24 +63,6 @@ _mesa_max_tex_unit(struct gl_context *ctx)
|
|||
ctx->Const.MaxTextureCoordUnits);
|
||||
}
|
||||
|
||||
static inline struct gl_texture_unit *
|
||||
_mesa_get_tex_unit_err(struct gl_context *ctx, GLuint unit, const char *func)
|
||||
{
|
||||
if (unit < _mesa_max_tex_unit(ctx))
|
||||
return _mesa_get_tex_unit(ctx, unit);
|
||||
|
||||
/* Note: This error is a precedent set by glBindTextures. From the GL 4.5
|
||||
* specification (30.10.2014) Section 8.1 ("Texture Objects"):
|
||||
*
|
||||
* "An INVALID_OPERATION error is generated if first + count is greater
|
||||
* than the number of texture image units supported by the
|
||||
* implementation."
|
||||
*/
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(unit=%s)", func,
|
||||
_mesa_enum_to_string(GL_TEXTURE0+unit));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
extern void
|
||||
_mesa_copy_texture_state( const struct gl_context *src, struct gl_context *dst );
|
||||
|
|
|
|||
|
|
@ -28,15 +28,14 @@
|
|||
|
||||
#include "main/core.h"
|
||||
#include "main/context.h"
|
||||
#include "ir.h"
|
||||
#include "ir_uniform.h"
|
||||
#include "program/hash_table.h"
|
||||
#include "../glsl/program.h"
|
||||
#include "../glsl/ir_uniform.h"
|
||||
#include "../glsl/glsl_parser_extras.h"
|
||||
#include "main/shaderapi.h"
|
||||
#include "main/shaderobj.h"
|
||||
#include "uniforms.h"
|
||||
#include "main/uniforms.h"
|
||||
#include "glsl/ir.h"
|
||||
#include "glsl/ir_uniform.h"
|
||||
#include "glsl/glsl_parser_extras.h"
|
||||
#include "glsl/program.h"
|
||||
#include "program/hash_table.h"
|
||||
|
||||
|
||||
extern "C" void GLAPIENTRY
|
||||
|
|
|
|||
|
|
@ -26,10 +26,10 @@
|
|||
#ifndef UNIFORMS_H
|
||||
#define UNIFORMS_H
|
||||
|
||||
#include "glheader.h"
|
||||
#include "main/glheader.h"
|
||||
#include "glsl/glsl_types.h"
|
||||
#include "glsl/ir_uniform.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "../glsl/glsl_types.h"
|
||||
#include "../glsl/ir_uniform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
|
|||
|
|
@ -249,6 +249,7 @@ public:
|
|||
wrapper->closure = closure;
|
||||
|
||||
hash_table_call_foreach(this->ht, subtract_one_wrapper, wrapper);
|
||||
free(wrapper);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -31,22 +31,20 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include "main/compiler.h"
|
||||
#include "ir.h"
|
||||
#include "ir_visitor.h"
|
||||
#include "ir_expression_flattening.h"
|
||||
#include "ir_uniform.h"
|
||||
#include "glsl_types.h"
|
||||
#include "glsl_parser_extras.h"
|
||||
#include "../glsl/program.h"
|
||||
#include "ir_optimization.h"
|
||||
#include "ast.h"
|
||||
#include "linker.h"
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "main/shaderapi.h"
|
||||
#include "main/shaderobj.h"
|
||||
#include "main/uniforms.h"
|
||||
|
||||
#include "glsl/ast.h"
|
||||
#include "glsl/ir.h"
|
||||
#include "glsl/ir_expression_flattening.h"
|
||||
#include "glsl/ir_visitor.h"
|
||||
#include "glsl/ir_optimization.h"
|
||||
#include "glsl/ir_uniform.h"
|
||||
#include "glsl/glsl_parser_extras.h"
|
||||
#include "glsl/glsl_types.h"
|
||||
#include "glsl/linker.h"
|
||||
#include "glsl/program.h"
|
||||
#include "program/hash_table.h"
|
||||
#include "program/prog_instruction.h"
|
||||
#include "program/prog_optimize.h"
|
||||
|
|
|
|||
|
|
@ -1122,6 +1122,19 @@ prog_to_nir(const struct gl_program *prog,
|
|||
|
||||
ptn_add_output_stores(c);
|
||||
|
||||
s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
|
||||
s->info.num_textures = _mesa_fls(prog->SamplersUsed);
|
||||
s->info.num_ubos = 0;
|
||||
s->info.num_abos = 0;
|
||||
s->info.num_ssbos = 0;
|
||||
s->info.num_images = 0;
|
||||
s->info.inputs_read = prog->InputsRead;
|
||||
s->info.outputs_written = prog->OutputsWritten;
|
||||
s->info.system_values_read = prog->SystemValuesRead;
|
||||
s->info.uses_texture_gather = false;
|
||||
s->info.uses_clip_distance_out = false;
|
||||
s->info.separate_shader = false;
|
||||
|
||||
fail:
|
||||
if (c->error) {
|
||||
ralloc_free(s);
|
||||
|
|
|
|||
|
|
@ -23,13 +23,12 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ir.h"
|
||||
#include "glsl_types.h"
|
||||
#include "ir_visitor.h"
|
||||
#include "../glsl/program.h"
|
||||
#include "ir_uniform.h"
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "glsl/glsl_types.h"
|
||||
#include "glsl/ir.h"
|
||||
#include "glsl/ir_uniform.h"
|
||||
#include "glsl/ir_visitor.h"
|
||||
#include "glsl/program.h"
|
||||
#include "program/hash_table.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/program.h"
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@
|
|||
#include "main/macros.h"
|
||||
#include "main/varray.h"
|
||||
|
||||
#include "glsl/ir_uniform.h"
|
||||
|
||||
#include "vbo/vbo.h"
|
||||
|
||||
#include "st_context.h"
|
||||
|
|
@ -62,8 +64,6 @@
|
|||
#include "draw/draw_context.h"
|
||||
#include "cso_cache/cso_context.h"
|
||||
|
||||
#include "../glsl/ir_uniform.h"
|
||||
|
||||
|
||||
/**
|
||||
* This is very similar to vbo_all_varyings_in_vbos() but we are
|
||||
|
|
|
|||
|
|
@ -359,10 +359,7 @@ ralloc_strndup(const void *ctx, const char *str, size_t max)
|
|||
if (unlikely(str == NULL))
|
||||
return NULL;
|
||||
|
||||
n = strlen(str);
|
||||
if (n > max)
|
||||
n = max;
|
||||
|
||||
n = strnlen(str, max);
|
||||
ptr = ralloc_array(ctx, char, n + 1);
|
||||
memcpy(ptr, str, n);
|
||||
ptr[n] = '\0';
|
||||
|
|
|
|||
|
|
@ -35,10 +35,7 @@ strndup(const char *str, size_t max)
|
|||
if (!str)
|
||||
return NULL;
|
||||
|
||||
n = strlen(str);
|
||||
if (n > max)
|
||||
n = max;
|
||||
|
||||
n = strnlen(str, max);
|
||||
ptr = (char *) calloc(n + 1, sizeof(char));
|
||||
if (!ptr)
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -883,6 +883,9 @@ setup_nir_io(struct gl_shader *mesa_shader,
|
|||
prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
|
||||
}
|
||||
|
||||
shader->info.inputs_read = prog->InputsRead;
|
||||
shader->info.outputs_written = prog->OutputsWritten;
|
||||
|
||||
mesa_shader->num_uniform_components = shader->num_uniforms;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue