mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
i965: Add a brw_compiler structure and store the register sets in it
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
a85c4c9b3f
commit
ae3870df70
7 changed files with 120 additions and 97 deletions
|
|
@ -1613,10 +1613,10 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
|
|||
|
||||
/* brw_fs_reg_allocate.cpp
|
||||
*/
|
||||
void brw_fs_alloc_reg_sets(struct intel_screen *screen);
|
||||
void brw_fs_alloc_reg_sets(struct brw_compiler *compiler);
|
||||
|
||||
/* brw_vec4_reg_allocate.cpp */
|
||||
void brw_vec4_alloc_reg_set(struct intel_screen *screen);
|
||||
void brw_vec4_alloc_reg_set(struct brw_compiler *compiler);
|
||||
|
||||
/* brw_disasm.c */
|
||||
int brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
|
||||
|
|
|
|||
|
|
@ -71,9 +71,9 @@ fs_visitor::assign_regs_trivial()
|
|||
}
|
||||
|
||||
static void
|
||||
brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
|
||||
brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
|
||||
{
|
||||
const struct brw_device_info *devinfo = screen->devinfo;
|
||||
const struct brw_device_info *devinfo = compiler->devinfo;
|
||||
int base_reg_count = BRW_MAX_GRF;
|
||||
int index = reg_width - 1;
|
||||
|
||||
|
|
@ -112,9 +112,9 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
|
|||
class_sizes[class_count++] = 8;
|
||||
}
|
||||
|
||||
memset(screen->wm_reg_sets[index].class_to_ra_reg_range, 0,
|
||||
sizeof(screen->wm_reg_sets[index].class_to_ra_reg_range));
|
||||
int *class_to_ra_reg_range = screen->wm_reg_sets[index].class_to_ra_reg_range;
|
||||
memset(compiler->fs_reg_sets[index].class_to_ra_reg_range, 0,
|
||||
sizeof(compiler->fs_reg_sets[index].class_to_ra_reg_range));
|
||||
int *class_to_ra_reg_range = compiler->fs_reg_sets[index].class_to_ra_reg_range;
|
||||
|
||||
/* Compute the total number of registers across all classes. */
|
||||
int ra_reg_count = 0;
|
||||
|
|
@ -144,16 +144,16 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
|
|||
class_to_ra_reg_range[i] = class_to_ra_reg_range[i-1];
|
||||
}
|
||||
|
||||
uint8_t *ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count);
|
||||
struct ra_regs *regs = ra_alloc_reg_set(screen, ra_reg_count);
|
||||
uint8_t *ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count);
|
||||
struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count);
|
||||
if (devinfo->gen >= 6)
|
||||
ra_set_allocate_round_robin(regs);
|
||||
int *classes = ralloc_array(screen, int, class_count);
|
||||
int *classes = ralloc_array(compiler, int, class_count);
|
||||
int aligned_pairs_class = -1;
|
||||
|
||||
/* Allocate space for q values. We allocate class_count + 1 because we
|
||||
* want to leave room for the aligned pairs class if we have it. */
|
||||
unsigned int **q_values = ralloc_array(screen, unsigned int *,
|
||||
unsigned int **q_values = ralloc_array(compiler, unsigned int *,
|
||||
class_count + 1);
|
||||
for (int i = 0; i < class_count + 1; ++i)
|
||||
q_values[i] = ralloc_array(q_values, unsigned int, class_count + 1);
|
||||
|
|
@ -273,20 +273,20 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
|
|||
|
||||
ralloc_free(q_values);
|
||||
|
||||
screen->wm_reg_sets[index].regs = regs;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(screen->wm_reg_sets[index].classes); i++)
|
||||
screen->wm_reg_sets[index].classes[i] = -1;
|
||||
compiler->fs_reg_sets[index].regs = regs;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(compiler->fs_reg_sets[index].classes); i++)
|
||||
compiler->fs_reg_sets[index].classes[i] = -1;
|
||||
for (int i = 0; i < class_count; i++)
|
||||
screen->wm_reg_sets[index].classes[class_sizes[i] - 1] = classes[i];
|
||||
screen->wm_reg_sets[index].ra_reg_to_grf = ra_reg_to_grf;
|
||||
screen->wm_reg_sets[index].aligned_pairs_class = aligned_pairs_class;
|
||||
compiler->fs_reg_sets[index].classes[class_sizes[i] - 1] = classes[i];
|
||||
compiler->fs_reg_sets[index].ra_reg_to_grf = ra_reg_to_grf;
|
||||
compiler->fs_reg_sets[index].aligned_pairs_class = aligned_pairs_class;
|
||||
}
|
||||
|
||||
void
|
||||
brw_fs_alloc_reg_sets(struct intel_screen *screen)
|
||||
brw_fs_alloc_reg_sets(struct brw_compiler *compiler)
|
||||
{
|
||||
brw_alloc_reg_set(screen, 1);
|
||||
brw_alloc_reg_set(screen, 2);
|
||||
brw_alloc_reg_set(compiler, 1);
|
||||
brw_alloc_reg_set(compiler, 2);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -524,7 +524,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
|
|||
bool
|
||||
fs_visitor::assign_regs(bool allow_spilling)
|
||||
{
|
||||
struct intel_screen *screen = brw->intelScreen;
|
||||
struct brw_compiler *compiler = brw->intelScreen->compiler;
|
||||
/* Most of this allocation was written for a reg_width of 1
|
||||
* (dispatch_width == 8). In extending to SIMD16, the code was
|
||||
* left in place and it was converted to have the hardware
|
||||
|
|
@ -534,7 +534,7 @@ fs_visitor::assign_regs(bool allow_spilling)
|
|||
int reg_width = dispatch_width / 8;
|
||||
unsigned hw_reg_mapping[this->alloc.count];
|
||||
int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width);
|
||||
int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */
|
||||
int rsi = reg_width - 1; /* Which compiler->fs_reg_sets[] to use */
|
||||
calculate_live_intervals();
|
||||
|
||||
int node_count = this->alloc.count;
|
||||
|
|
@ -544,15 +544,15 @@ fs_visitor::assign_regs(bool allow_spilling)
|
|||
if (devinfo->gen >= 7)
|
||||
node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START;
|
||||
struct ra_graph *g =
|
||||
ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count);
|
||||
ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count);
|
||||
|
||||
for (unsigned i = 0; i < this->alloc.count; i++) {
|
||||
unsigned size = this->alloc.sizes[i];
|
||||
int c;
|
||||
|
||||
assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) &&
|
||||
assert(size <= ARRAY_SIZE(compiler->fs_reg_sets[rsi].classes) &&
|
||||
"Register allocation relies on split_virtual_grfs()");
|
||||
c = screen->wm_reg_sets[rsi].classes[size - 1];
|
||||
c = compiler->fs_reg_sets[rsi].classes[size - 1];
|
||||
|
||||
/* Special case: on pre-GEN6 hardware that supports PLN, the
|
||||
* second operand of a PLN instruction needs to be an
|
||||
|
|
@ -563,10 +563,10 @@ fs_visitor::assign_regs(bool allow_spilling)
|
|||
* any other interpolation modes). So all we need to do is find
|
||||
* that register and set it to the appropriate class.
|
||||
*/
|
||||
if (screen->wm_reg_sets[rsi].aligned_pairs_class >= 0 &&
|
||||
if (compiler->fs_reg_sets[rsi].aligned_pairs_class >= 0 &&
|
||||
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
|
||||
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
|
||||
c = screen->wm_reg_sets[rsi].aligned_pairs_class;
|
||||
c = compiler->fs_reg_sets[rsi].aligned_pairs_class;
|
||||
}
|
||||
|
||||
ra_set_node_class(g, i, c);
|
||||
|
|
@ -595,7 +595,7 @@ fs_visitor::assign_regs(bool allow_spilling)
|
|||
*/
|
||||
if (inst->eot) {
|
||||
int size = alloc.sizes[inst->src[0].reg];
|
||||
int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
|
||||
int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
|
||||
ra_set_node_reg(g, inst->src[0].reg, reg);
|
||||
break;
|
||||
}
|
||||
|
|
@ -663,7 +663,7 @@ fs_visitor::assign_regs(bool allow_spilling)
|
|||
for (unsigned i = 0; i < this->alloc.count; i++) {
|
||||
int reg = ra_get_node_reg(g, i);
|
||||
|
||||
hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg];
|
||||
hw_reg_mapping[i] = compiler->fs_reg_sets[rsi].ra_reg_to_grf[reg];
|
||||
this->grf_used = MAX2(this->grf_used,
|
||||
hw_reg_mapping[i] + this->alloc.sizes[i]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,19 @@
|
|||
#include "glsl/glsl_parser_extras.h"
|
||||
#include "main/shaderapi.h"
|
||||
|
||||
struct brw_compiler *
|
||||
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
||||
{
|
||||
struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
|
||||
|
||||
compiler->devinfo = devinfo;
|
||||
|
||||
brw_fs_alloc_reg_sets(compiler);
|
||||
brw_vec4_alloc_reg_set(compiler);
|
||||
|
||||
return compiler;
|
||||
}
|
||||
|
||||
struct gl_shader *
|
||||
brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -36,6 +36,58 @@
|
|||
#define MAX_SAMPLER_MESSAGE_SIZE 11
|
||||
#define MAX_VGRF_SIZE 16
|
||||
|
||||
struct brw_compiler {
|
||||
const struct brw_device_info *devinfo;
|
||||
|
||||
struct {
|
||||
struct ra_regs *regs;
|
||||
|
||||
/**
|
||||
* Array of the ra classes for the unaligned contiguous register
|
||||
* block sizes used.
|
||||
*/
|
||||
int *classes;
|
||||
|
||||
/**
|
||||
* Mapping for register-allocated objects in *regs to the first
|
||||
* GRF for that object.
|
||||
*/
|
||||
uint8_t *ra_reg_to_grf;
|
||||
} vec4_reg_set;
|
||||
|
||||
struct {
|
||||
struct ra_regs *regs;
|
||||
|
||||
/**
|
||||
* Array of the ra classes for the unaligned contiguous register
|
||||
* block sizes used, indexed by register size.
|
||||
*/
|
||||
int classes[16];
|
||||
|
||||
/**
|
||||
* Mapping from classes to ra_reg ranges. Each of the per-size
|
||||
* classes corresponds to a range of ra_reg nodes. This array stores
|
||||
* those ranges in the form of first ra_reg in each class and the
|
||||
* total number of ra_reg elements in the last array element. This
|
||||
* way the range of the i'th class is given by:
|
||||
* [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
|
||||
*/
|
||||
int class_to_ra_reg_range[17];
|
||||
|
||||
/**
|
||||
* Mapping for register-allocated objects in *regs to the first
|
||||
* GRF for that object.
|
||||
*/
|
||||
uint8_t *ra_reg_to_grf;
|
||||
|
||||
/**
|
||||
* ra class for the aligned pairs we use for PLN, which doesn't
|
||||
* appear in *classes.
|
||||
*/
|
||||
int aligned_pairs_class;
|
||||
} fs_reg_sets[2];
|
||||
};
|
||||
|
||||
enum PACKED register_file {
|
||||
BAD_FILE,
|
||||
GRF,
|
||||
|
|
@ -223,6 +275,9 @@ bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg);
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct brw_compiler *
|
||||
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo);
|
||||
|
||||
bool brw_vs_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog);
|
||||
|
|
|
|||
|
|
@ -91,10 +91,10 @@ vec4_visitor::reg_allocate_trivial()
|
|||
}
|
||||
|
||||
extern "C" void
|
||||
brw_vec4_alloc_reg_set(struct intel_screen *screen)
|
||||
brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
|
||||
{
|
||||
int base_reg_count =
|
||||
screen->devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
|
||||
compiler->devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
|
||||
|
||||
/* After running split_virtual_grfs(), almost all VGRFs will be of size 1.
|
||||
* SEND-from-GRF sources cannot be split, so we also need classes for each
|
||||
|
|
@ -112,14 +112,14 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen)
|
|||
ra_reg_count += base_reg_count - (class_sizes[i] - 1);
|
||||
}
|
||||
|
||||
ralloc_free(screen->vec4_reg_set.ra_reg_to_grf);
|
||||
screen->vec4_reg_set.ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count);
|
||||
ralloc_free(screen->vec4_reg_set.regs);
|
||||
screen->vec4_reg_set.regs = ra_alloc_reg_set(screen, ra_reg_count);
|
||||
if (screen->devinfo->gen >= 6)
|
||||
ra_set_allocate_round_robin(screen->vec4_reg_set.regs);
|
||||
ralloc_free(screen->vec4_reg_set.classes);
|
||||
screen->vec4_reg_set.classes = ralloc_array(screen, int, class_count);
|
||||
ralloc_free(compiler->vec4_reg_set.ra_reg_to_grf);
|
||||
compiler->vec4_reg_set.ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count);
|
||||
ralloc_free(compiler->vec4_reg_set.regs);
|
||||
compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count);
|
||||
if (compiler->devinfo->gen >= 6)
|
||||
ra_set_allocate_round_robin(compiler->vec4_reg_set.regs);
|
||||
ralloc_free(compiler->vec4_reg_set.classes);
|
||||
compiler->vec4_reg_set.classes = ralloc_array(compiler, int, class_count);
|
||||
|
||||
/* Now, add the registers to their classes, and add the conflicts
|
||||
* between them and the base GRF registers (and also each other).
|
||||
|
|
@ -128,19 +128,19 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen)
|
|||
unsigned *q_values[MAX_VGRF_SIZE];
|
||||
for (int i = 0; i < class_count; i++) {
|
||||
int class_reg_count = base_reg_count - (class_sizes[i] - 1);
|
||||
screen->vec4_reg_set.classes[i] = ra_alloc_reg_class(screen->vec4_reg_set.regs);
|
||||
compiler->vec4_reg_set.classes[i] = ra_alloc_reg_class(compiler->vec4_reg_set.regs);
|
||||
|
||||
q_values[i] = new unsigned[MAX_VGRF_SIZE];
|
||||
|
||||
for (int j = 0; j < class_reg_count; j++) {
|
||||
ra_class_add_reg(screen->vec4_reg_set.regs, screen->vec4_reg_set.classes[i], reg);
|
||||
ra_class_add_reg(compiler->vec4_reg_set.regs, compiler->vec4_reg_set.classes[i], reg);
|
||||
|
||||
screen->vec4_reg_set.ra_reg_to_grf[reg] = j;
|
||||
compiler->vec4_reg_set.ra_reg_to_grf[reg] = j;
|
||||
|
||||
for (int base_reg = j;
|
||||
base_reg < j + class_sizes[i];
|
||||
base_reg++) {
|
||||
ra_add_transitive_reg_conflict(screen->vec4_reg_set.regs, base_reg, reg);
|
||||
ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
|
||||
}
|
||||
|
||||
reg++;
|
||||
|
|
@ -158,7 +158,7 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen)
|
|||
}
|
||||
assert(reg == ra_reg_count);
|
||||
|
||||
ra_set_finalize(screen->vec4_reg_set.regs, q_values);
|
||||
ra_set_finalize(compiler->vec4_reg_set.regs, q_values);
|
||||
|
||||
for (int i = 0; i < MAX_VGRF_SIZE; i++)
|
||||
delete[] q_values[i];
|
||||
|
|
@ -191,7 +191,7 @@ vec4_visitor::setup_payload_interference(struct ra_graph *g,
|
|||
bool
|
||||
vec4_visitor::reg_allocate()
|
||||
{
|
||||
struct intel_screen *screen = brw->intelScreen;
|
||||
struct brw_compiler *compiler = brw->intelScreen->compiler;
|
||||
unsigned int hw_reg_mapping[alloc.count];
|
||||
int payload_reg_count = this->first_non_payload_grf;
|
||||
|
||||
|
|
@ -207,12 +207,12 @@ vec4_visitor::reg_allocate()
|
|||
int first_payload_node = node_count;
|
||||
node_count += payload_reg_count;
|
||||
struct ra_graph *g =
|
||||
ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count);
|
||||
ra_alloc_interference_graph(compiler->vec4_reg_set.regs, node_count);
|
||||
|
||||
for (unsigned i = 0; i < alloc.count; i++) {
|
||||
int size = this->alloc.sizes[i];
|
||||
assert(size >= 1 && size <= MAX_VGRF_SIZE);
|
||||
ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]);
|
||||
ra_set_node_class(g, i, compiler->vec4_reg_set.classes[size - 1]);
|
||||
|
||||
for (unsigned j = 0; j < i; j++) {
|
||||
if (virtual_grf_interferes(i, j)) {
|
||||
|
|
@ -248,7 +248,7 @@ vec4_visitor::reg_allocate()
|
|||
for (unsigned i = 0; i < alloc.count; i++) {
|
||||
int reg = ra_get_node_reg(g, i);
|
||||
|
||||
hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg];
|
||||
hw_reg_mapping[i] = compiler->vec4_reg_set.ra_reg_to_grf[reg];
|
||||
prog_data->total_grf = MAX2(prog_data->total_grf,
|
||||
hw_reg_mapping[i] + alloc.sizes[i]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "main/version.h"
|
||||
#include "swrast/s_renderbuffer.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "brw_shader.h"
|
||||
|
||||
#include "utils.h"
|
||||
#include "xmlpool.h"
|
||||
|
|
@ -1406,8 +1407,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
|
|||
psp->extensions = !intelScreen->has_context_reset_notification
|
||||
? intelScreenExtensions : intelRobustScreenExtensions;
|
||||
|
||||
brw_fs_alloc_reg_sets(intelScreen);
|
||||
brw_vec4_alloc_reg_set(intelScreen);
|
||||
intelScreen->compiler = brw_compiler_create(intelScreen,
|
||||
intelScreen->devinfo);
|
||||
|
||||
return (const __DRIconfig**) intel_screen_make_configs(psp);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,53 +63,7 @@ struct intel_screen
|
|||
|
||||
int winsys_msaa_samples_override;
|
||||
|
||||
struct {
|
||||
struct ra_regs *regs;
|
||||
|
||||
/**
|
||||
* Array of the ra classes for the unaligned contiguous register
|
||||
* block sizes used.
|
||||
*/
|
||||
int *classes;
|
||||
|
||||
/**
|
||||
* Mapping for register-allocated objects in *regs to the first
|
||||
* GRF for that object.
|
||||
*/
|
||||
uint8_t *ra_reg_to_grf;
|
||||
} vec4_reg_set;
|
||||
|
||||
struct {
|
||||
struct ra_regs *regs;
|
||||
|
||||
/**
|
||||
* Array of the ra classes for the unaligned contiguous register
|
||||
* block sizes used, indexed by register size.
|
||||
*/
|
||||
int classes[16];
|
||||
|
||||
/**
|
||||
* Mapping from classes to ra_reg ranges. Each of the per-size
|
||||
* classes corresponds to a range of ra_reg nodes. This array stores
|
||||
* those ranges in the form of first ra_reg in each class and the
|
||||
* total number of ra_reg elements in the last array element. This
|
||||
* way the range of the i'th class is given by:
|
||||
* [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
|
||||
*/
|
||||
int class_to_ra_reg_range[17];
|
||||
|
||||
/**
|
||||
* Mapping for register-allocated objects in *regs to the first
|
||||
* GRF for that object.
|
||||
*/
|
||||
uint8_t *ra_reg_to_grf;
|
||||
|
||||
/**
|
||||
* ra class for the aligned pairs we use for PLN, which doesn't
|
||||
* appear in *classes.
|
||||
*/
|
||||
int aligned_pairs_class;
|
||||
} wm_reg_sets[2];
|
||||
struct brw_compiler *compiler;
|
||||
|
||||
/**
|
||||
* Configuration cache with default values for all contexts
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue