mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
While our LIFO scheduling mode attempts to optimize for register
pressure, it's often hard for a scheduling algorithm to do better than
the instruction order provided by the shader author. Shader authors
often do perfectly reasonable things like using texture results
immediately after fetching them or constructing texture coordinates
immediately before the texture op. When we throw all the instruction
ordering information away, we loose any help the author may have given
us. By attempting NONE before we fall back to the worst case LIFO mode.
And, yes, I tried this with NONE both before and after LIFO and doing
NONE before LIFO is substantially better, according to shader-db.
total instructions in shared programs: 19673152 -> 19665202 (-0.04%)
instructions in affected programs: 33669 -> 25719 (-23.61%)
helped: 20
HURT: 0
helped stats (abs) min: 15 max: 4609 x̄: 397.50 x̃: 107
helped stats (rel) min: 2.33% max: 67.50% x̄: 14.60% x̃: 9.12%
95% mean confidence interval for instructions value: -867.61 72.61
95% mean confidence interval for instructions %-change: -21.74% -7.46%
Inconclusive result (value mean confidence interval includes 0).
total cycles in shared programs: 935562500 -> 935020920 (-0.06%)
cycles in affected programs: 18620349 -> 18078769 (-2.91%)
helped: 104
HURT: 48
helped stats (abs) min: 88 max: 60986 x̄: 8031.48 x̃: 3680
helped stats (rel) min: 0.61% max: 51.44% x̄: 14.95% x̃: 8.87%
HURT stats (abs) min: 10 max: 54724 x̄: 6118.62 x̃: 1530
HURT stats (rel) min: 0.13% max: 46.45% x̄: 10.28% x̃: 6.46%
95% mean confidence interval for cycles value: -5724.34 -1401.71
95% mean confidence interval for cycles %-change: -9.86% -4.10%
Cycles are helped.
total spills in shared programs: 12158 -> 10327 (-15.06%)
spills in affected programs: 1831 -> 0
helped: 20
HURT: 0
total fills in shared programs: 14749 -> 12635 (-14.33%)
fills in affected programs: 2114 -> 0
helped: 20
HURT: 0
LOST: 8
GAINED: 649
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>
196 lines
5.9 KiB
C++
196 lines
5.9 KiB
C++
/*
|
|
* Copyright © 2010 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef BRW_SHADER_H
|
|
#define BRW_SHADER_H
|
|
|
|
#include <stdint.h>
|
|
#include "brw_cfg.h"
|
|
#include "brw_compiler.h"
|
|
#include "compiler/nir/nir.h"
|
|
|
|
#ifdef __cplusplus
|
|
#include "brw_ir_analysis.h"
|
|
#include "brw_ir_allocator.h"
|
|
|
|
enum instruction_scheduler_mode {
|
|
SCHEDULE_PRE,
|
|
SCHEDULE_PRE_NON_LIFO,
|
|
SCHEDULE_PRE_LIFO,
|
|
SCHEDULE_POST,
|
|
SCHEDULE_NONE,
|
|
};
|
|
|
|
#define UBO_START ((1 << 16) - 4)
|
|
|
|
struct backend_shader {
|
|
protected:
|
|
|
|
backend_shader(const struct brw_compiler *compiler,
|
|
void *log_data,
|
|
void *mem_ctx,
|
|
const nir_shader *shader,
|
|
struct brw_stage_prog_data *stage_prog_data,
|
|
bool debug_enabled);
|
|
|
|
public:
|
|
virtual ~backend_shader();
|
|
|
|
const struct brw_compiler *compiler;
|
|
void *log_data; /* Passed to compiler->*_log functions */
|
|
|
|
const struct intel_device_info * const devinfo;
|
|
const nir_shader *nir;
|
|
struct brw_stage_prog_data * const stage_prog_data;
|
|
|
|
/** ralloc context for temporary data used during compile */
|
|
void *mem_ctx;
|
|
|
|
/**
|
|
* List of either fs_inst or vec4_instruction (inheriting from
|
|
* backend_instruction)
|
|
*/
|
|
exec_list instructions;
|
|
|
|
cfg_t *cfg;
|
|
brw_analysis<brw::idom_tree, backend_shader> idom_analysis;
|
|
|
|
gl_shader_stage stage;
|
|
bool debug_enabled;
|
|
const char *stage_name;
|
|
const char *stage_abbrev;
|
|
|
|
brw::simple_allocator alloc;
|
|
|
|
virtual void dump_instruction(const backend_instruction *inst) const = 0;
|
|
virtual void dump_instruction(const backend_instruction *inst, FILE *file) const = 0;
|
|
virtual void dump_instructions() const;
|
|
virtual void dump_instructions(const char *name) const;
|
|
|
|
void calculate_cfg();
|
|
|
|
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
|
};
|
|
|
|
#else
|
|
struct backend_shader;
|
|
#endif /* __cplusplus */
|
|
|
|
enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type);
|
|
enum brw_conditional_mod brw_conditional_for_comparison(unsigned int op);
|
|
uint32_t brw_math_function(enum opcode op);
|
|
const char *brw_instruction_name(const struct intel_device_info *devinfo,
|
|
enum opcode op);
|
|
bool brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg);
|
|
bool brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg);
|
|
bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg);
|
|
|
|
bool opt_predicated_break(struct backend_shader *s);
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* brw_fs_reg_allocate.cpp */
|
|
void brw_fs_alloc_reg_sets(struct brw_compiler *compiler);
|
|
|
|
/* brw_vec4_reg_allocate.cpp */
|
|
void brw_vec4_alloc_reg_set(struct brw_compiler *compiler);
|
|
|
|
/* brw_disasm.c */
|
|
extern const char *const conditional_modifier[16];
|
|
extern const char *const pred_ctrl_align16[16];
|
|
|
|
/* Per-thread scratch space is a power-of-two multiple of 1KB. */
|
|
static inline int
|
|
brw_get_scratch_size(int size)
|
|
{
|
|
return MAX2(1024, util_next_power_of_two(size));
|
|
}
|
|
|
|
|
|
static inline nir_variable_mode
|
|
brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
|
|
gl_shader_stage stage)
|
|
{
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
const bool is_scalar = compiler->scalar_stage[stage];
|
|
nir_variable_mode indirect_mask = (nir_variable_mode) 0;
|
|
|
|
switch (stage) {
|
|
case MESA_SHADER_VERTEX:
|
|
case MESA_SHADER_FRAGMENT:
|
|
indirect_mask |= nir_var_shader_in;
|
|
break;
|
|
|
|
case MESA_SHADER_GEOMETRY:
|
|
if (!is_scalar)
|
|
indirect_mask |= nir_var_shader_in;
|
|
break;
|
|
|
|
default:
|
|
/* Everything else can handle indirect inputs */
|
|
break;
|
|
}
|
|
|
|
if (is_scalar && stage != MESA_SHADER_TESS_CTRL &&
|
|
stage != MESA_SHADER_TASK &&
|
|
stage != MESA_SHADER_MESH)
|
|
indirect_mask |= nir_var_shader_out;
|
|
|
|
/* On HSW+, we allow indirects in scalar shaders. They get implemented
|
|
* using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
|
|
* brw_postprocess_nir.
|
|
*
|
|
* We haven't plumbed through the indirect scratch messages on gfx6 or
|
|
* earlier so doing indirects via scratch doesn't work there. On gfx7 and
|
|
* earlier the scratch space size is limited to 12kB. If we allowed
|
|
* indirects as scratch all the time, we may easily exceed this limit
|
|
* without having any fallback.
|
|
*/
|
|
if (is_scalar && devinfo->verx10 <= 70)
|
|
indirect_mask |= nir_var_function_temp;
|
|
|
|
return indirect_mask;
|
|
}
|
|
|
|
bool brw_texture_offset(const nir_tex_instr *tex, unsigned src,
|
|
uint32_t *offset_bits);
|
|
|
|
/**
|
|
* Scratch data used when compiling a GLSL geometry shader.
|
|
*/
|
|
struct brw_gs_compile
|
|
{
|
|
struct brw_gs_prog_key key;
|
|
struct brw_vue_map input_vue_map;
|
|
|
|
unsigned control_data_bits_per_vertex;
|
|
unsigned control_data_header_size_bits;
|
|
};
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* BRW_SHADER_H */
|