From e70f5617f1125e1f39a75d7a8c92ddda86a8056d Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Tue, 23 Jun 2015 11:02:12 -0700
Subject: [PATCH 0001/1208] tgsi_to_nir: Fix translation of TXF on MSAA
targets.
Noticed while trying to add GL_ARB_texture_multisample support to vc4.
Reviewed-by: Ilia Mirkin
---
src/gallium/auxiliary/nir/tgsi_to_nir.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 061f39ac6f3..065bbf050c2 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1078,7 +1078,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
samp = 2;
break;
case TGSI_OPCODE_TXF:
- op = nir_texop_txf;
+ if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+ tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ op = nir_texop_txf_ms;
+ } else {
+ op = nir_texop_txf;
+ }
num_srcs = 2;
break;
case TGSI_OPCODE_TXD:
@@ -1178,7 +1183,10 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
- instr->src[src_number].src_type = nir_tex_src_lod;
+ if (op == nir_texop_txf_ms)
+ instr->src[src_number].src_type = nir_tex_src_ms_index;
+ else
+ instr->src[src_number].src_type = nir_tex_src_lod;
src_number++;
}
From fc0da629b502bb072b945932bae0477eb9b62bd5 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Sat, 20 Jun 2015 15:30:19 -0700
Subject: [PATCH 0002/1208] vc4: Fix printfs for blit fallbacks.
---
src/gallium/drivers/vc4/vc4_blit.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c
index d29e2c9c318..e52a1941730 100644
--- a/src/gallium/drivers/vc4/vc4_blit.c
+++ b/src/gallium/drivers/vc4/vc4_blit.c
@@ -94,7 +94,7 @@ vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
struct vc4_context *vc4 = vc4_context(ctx);
if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
- fprintf(stderr, "blit unsupported %s -> %s",
+ fprintf(stderr, "blit unsupported %s -> %s\n",
util_format_short_name(info->src.resource->format),
util_format_short_name(info->dst.resource->format));
return false;
@@ -135,7 +135,7 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
info.dst.resource->nr_samples <= 1 &&
!util_format_is_depth_or_stencil(info.src.resource->format) &&
!util_format_is_pure_integer(info.src.resource->format)) {
- fprintf(stderr, "color resolve unimplemented");
+ fprintf(stderr, "color resolve unimplemented\n");
return;
}
@@ -147,7 +147,7 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
}
if (info.mask & PIPE_MASK_S) {
- fprintf(stderr, "cannot blit stencil, skipping");
+ fprintf(stderr, "cannot blit stencil, skipping\n");
info.mask &= ~PIPE_MASK_S;
}
From 19056d04296444afefe71ad8094d327ed38967bf Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 22 Jun 2015 17:31:24 -0700
Subject: [PATCH 0003/1208] vc4: Reuse (and extend) the packet.h sizes for
dumping.
---
src/gallium/drivers/vc4/kernel/vc4_packet.h | 7 ++
src/gallium/drivers/vc4/vc4_cl_dump.c | 82 ++++++++++-----------
2 files changed, 48 insertions(+), 41 deletions(-)
diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h
index 88cfc0fa9f0..8e6f2a1ac2c 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_packet.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h
@@ -88,16 +88,22 @@ enum vc4_packet {
#define VC4_PACKET_START_TILE_BINNING_SIZE 1
#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
+#define VC4_PACKET_BRANCH_SIZE 5
#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
#define VC4_PACKET_POINT_SIZE_SIZE 5
@@ -106,6 +112,7 @@ enum vc4_packet {
#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
#define VC4_PACKET_CLIP_WINDOW_SIZE 9
#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
+#define VC4_PACKET_Z_CLIPPING_SIZE 9
#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 69055081daa..4cc197acd77 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -291,63 +291,63 @@ dump_VC4_PACKET_GEM_HANDLES(void *cl, uint32_t offset, uint32_t hw_offset)
offset, hw_offset, handles[0], handles[1]);
}
-#define PACKET_DUMP(name, size) [name] = { #name, size, dump_##name }
-#define PACKET(name, size) [name] = { #name, size, NULL }
+#define PACKET_DUMP(name) [name] = { #name, name ## _SIZE, dump_##name }
+#define PACKET(name) [name] = { #name, name ## _SIZE, NULL }
static const struct packet_info {
const char *name;
uint8_t size;
void (*dump_func)(void *cl, uint32_t offset, uint32_t hw_offset);
} packet_info[] = {
- PACKET(VC4_PACKET_HALT, 1),
- PACKET(VC4_PACKET_NOP, 1),
+ PACKET(VC4_PACKET_HALT),
+ PACKET(VC4_PACKET_NOP),
- PACKET(VC4_PACKET_FLUSH, 1),
- PACKET(VC4_PACKET_FLUSH_ALL, 1),
- PACKET(VC4_PACKET_START_TILE_BINNING, 1),
- PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1),
- PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 1),
+ PACKET(VC4_PACKET_FLUSH),
+ PACKET(VC4_PACKET_FLUSH_ALL),
+ PACKET(VC4_PACKET_START_TILE_BINNING),
+ PACKET(VC4_PACKET_INCREMENT_SEMAPHORE),
+ PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE),
- PACKET(VC4_PACKET_BRANCH, 5),
- PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST, 5),
+ PACKET(VC4_PACKET_BRANCH),
+ PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST),
- PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 1),
- PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 1),
- PACKET(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER, 5),
- PACKET(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER, 5),
- PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 7),
- PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 7),
+ PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER),
+ PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF),
+ PACKET(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER),
+ PACKET(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
+ PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
- PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 14),
- PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 10),
+ PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE),
+ PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
- PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE, 48),
- PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE, 49),
+ PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
+ PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE),
- PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 2),
+ PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT),
- PACKET(VC4_PACKET_GL_SHADER_STATE, 5),
- PACKET(VC4_PACKET_NV_SHADER_STATE, 5),
- PACKET(VC4_PACKET_VG_SHADER_STATE, 5),
+ PACKET(VC4_PACKET_GL_SHADER_STATE),
+ PACKET(VC4_PACKET_NV_SHADER_STATE),
+ PACKET(VC4_PACKET_VG_SHADER_STATE),
- PACKET(VC4_PACKET_CONFIGURATION_BITS, 4),
- PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS, 5),
- PACKET_DUMP(VC4_PACKET_POINT_SIZE, 5),
- PACKET_DUMP(VC4_PACKET_LINE_WIDTH, 5),
- PACKET(VC4_PACKET_RHT_X_BOUNDARY, 3),
- PACKET(VC4_PACKET_DEPTH_OFFSET, 5),
- PACKET(VC4_PACKET_CLIP_WINDOW, 9),
- PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET, 5),
- PACKET(VC4_PACKET_Z_CLIPPING, 9),
- PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING, 9),
- PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING, 9),
+ PACKET(VC4_PACKET_CONFIGURATION_BITS),
+ PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS),
+ PACKET_DUMP(VC4_PACKET_POINT_SIZE),
+ PACKET_DUMP(VC4_PACKET_LINE_WIDTH),
+ PACKET(VC4_PACKET_RHT_X_BOUNDARY),
+ PACKET(VC4_PACKET_DEPTH_OFFSET),
+ PACKET(VC4_PACKET_CLIP_WINDOW),
+ PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET),
+ PACKET(VC4_PACKET_Z_CLIPPING),
+ PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING),
+ PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING),
- PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16),
- PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 11),
- PACKET(VC4_PACKET_CLEAR_COLORS, 14),
- PACKET_DUMP(VC4_PACKET_TILE_COORDINATES, 3),
+ PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG),
+ PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG),
+ PACKET(VC4_PACKET_CLEAR_COLORS),
+ PACKET_DUMP(VC4_PACKET_TILE_COORDINATES),
- PACKET_DUMP(VC4_PACKET_GEM_HANDLES, 9),
+ PACKET_DUMP(VC4_PACKET_GEM_HANDLES),
};
void
From 8fbcabc41a4b2c7d7571585bde2e009e57982da4 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 22 Jun 2015 13:14:57 -0700
Subject: [PATCH 0004/1208] vc4: Add an "args" temporary for RCL setup.
---
.../drivers/vc4/kernel/vc4_render_cl.c | 48 +++++++++----------
1 file changed, 24 insertions(+), 24 deletions(-)
diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
index e2d907ad91f..deb2ccfa0d4 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
@@ -100,7 +100,8 @@ static void emit_tile(struct vc4_exec_info *exec,
struct vc4_rcl_setup *setup,
uint8_t x, uint8_t y, bool first, bool last)
{
- bool has_bin = exec->args->bin_cl_size != 0;
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
/* Note that the load doesn't actually occur until the
* tile coords packet is processed, and only one load
@@ -108,10 +109,9 @@ static void emit_tile(struct vc4_exec_info *exec,
*/
if (setup->color_read) {
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->color_read.bits);
+ rcl_u16(setup, args->color_read.bits);
rcl_u32(setup,
- setup->color_read->paddr +
- exec->args->color_read.offset);
+ setup->color_read->paddr + args->color_read.offset);
}
if (setup->zs_read) {
@@ -122,9 +122,8 @@ static void emit_tile(struct vc4_exec_info *exec,
}
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->zs_read.bits);
- rcl_u32(setup,
- setup->zs_read->paddr + exec->args->zs_read.offset);
+ rcl_u16(setup, args->zs_read.bits);
+ rcl_u32(setup, setup->zs_read->paddr + args->zs_read.offset);
}
/* Clipping depends on tile coordinates having been
@@ -147,11 +146,11 @@ static void emit_tile(struct vc4_exec_info *exec,
if (setup->zs_write) {
rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->zs_write.bits |
+ rcl_u16(setup, args->zs_write.bits |
(setup->color_ms_write ?
VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0));
rcl_u32(setup,
- (setup->zs_write->paddr + exec->args->zs_write.offset) |
+ (setup->zs_write->paddr + args->zs_write.offset) |
((last && !setup->color_ms_write) ?
VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
}
@@ -172,11 +171,12 @@ static void emit_tile(struct vc4_exec_info *exec,
static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
struct vc4_rcl_setup *setup)
{
- bool has_bin = exec->args->bin_cl_size != 0;
- uint8_t min_x_tile = exec->args->min_x_tile;
- uint8_t min_y_tile = exec->args->min_y_tile;
- uint8_t max_x_tile = exec->args->max_x_tile;
- uint8_t max_y_tile = exec->args->max_y_tile;
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
+ uint8_t min_x_tile = args->min_x_tile;
+ uint8_t min_y_tile = args->min_y_tile;
+ uint8_t max_x_tile = args->max_x_tile;
+ uint8_t max_y_tile = args->max_y_tile;
uint8_t xtiles = max_x_tile - min_x_tile + 1;
uint8_t ytiles = max_y_tile - min_y_tile + 1;
uint8_t x, y;
@@ -185,7 +185,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
- if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
size += VC4_PACKET_CLEAR_COLORS_SIZE +
VC4_PACKET_TILE_COORDINATES_SIZE +
VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
@@ -226,23 +226,23 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
rcl_u32(setup,
(setup->color_ms_write ?
(setup->color_ms_write->paddr +
- exec->args->color_ms_write.offset) :
+ args->color_ms_write.offset) :
0));
- rcl_u16(setup, exec->args->width);
- rcl_u16(setup, exec->args->height);
- rcl_u16(setup, exec->args->color_ms_write.bits);
+ rcl_u16(setup, args->width);
+ rcl_u16(setup, args->height);
+ rcl_u16(setup, args->color_ms_write.bits);
/* The tile buffer gets cleared when the previous tile is stored. If
* the clear values changed between frames, then the tile buffer has
* stale clear values in it, so we have to do a store in None mode (no
* writes) so that we trigger the tile buffer clear.
*/
- if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
- rcl_u32(setup, exec->args->clear_color[0]);
- rcl_u32(setup, exec->args->clear_color[1]);
- rcl_u32(setup, exec->args->clear_z);
- rcl_u8(setup, exec->args->clear_s);
+ rcl_u32(setup, args->clear_color[0]);
+ rcl_u32(setup, args->clear_color[1]);
+ rcl_u32(setup, args->clear_z);
+ rcl_u8(setup, args->clear_s);
vc4_tile_coordinates(setup, 0, 0);
From 76851f49a5beac01b4eee7892ca95f44b5e18e29 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 22 Jun 2015 11:45:27 -0700
Subject: [PATCH 0005/1208] vc4: Clarify size calculation for Z/S writes.
It's the same value for loads and stores, because they're basically the
same packet.
---
src/gallium/drivers/vc4/kernel/vc4_render_cl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
index deb2ccfa0d4..f55ffe5a8db 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
@@ -208,7 +208,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
}
if (setup->zs_write)
- loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+ loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
if (setup->color_ms_write) {
if (setup->zs_write)
loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
From af83eb25812fbda89de62b58f9e59a5408ad4654 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Tue, 23 Jun 2015 17:53:07 -0700
Subject: [PATCH 0006/1208] vc4: Pull the blending operation out to a separate
function.
It's fairly separate from the rest of the TLB operations at frag end time,
and we'll need to run it multiple times to support MSAA blending.
---
src/gallium/drivers/vc4/vc4_program.c | 88 +++++++++++++++------------
1 file changed, 50 insertions(+), 38 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index ba47c51d9bd..c620a4a351f 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1371,12 +1371,13 @@ vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
}
}
-static void
-emit_frag_end(struct vc4_compile *c)
+/**
+ * Applies the GL blending pipeline and returns the packed (8888) output
+ * color.
+ */
+static struct qreg
+blend_pipeline(struct vc4_compile *c)
{
- clip_distance_discard(c);
- alpha_test_discard(c);
-
enum pipe_format color_format = c->fs_key->color_format;
const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
@@ -1408,14 +1409,16 @@ emit_frag_end(struct vc4_compile *c)
packed_dst_color = qir_MOV(c, r4);
}
+ struct qreg undef_array[4] = { c->undef, c->undef, c->undef, c->undef };
+ const struct qreg *output_colors = (c->output_color_index != -1 ?
+ c->outputs + c->output_color_index :
+ undef_array);
+ struct qreg blend_src_color[4];
+ for (int i = 0; i < 4; i++)
+ blend_src_color[i] = output_colors[i];
+
struct qreg blend_color[4];
- struct qreg undef_array[4] = {
- c->undef, c->undef, c->undef, c->undef
- };
- vc4_blend(c, blend_color, linear_dst_color,
- (c->output_color_index != -1 ?
- c->outputs + c->output_color_index :
- undef_array));
+ vc4_blend(c, blend_color, linear_dst_color, blend_src_color);
if (util_format_is_srgb(color_format)) {
for (int i = 0; i < 3; i++)
@@ -1439,30 +1442,6 @@ emit_frag_end(struct vc4_compile *c)
format_swiz[i]);
}
- if (c->discard.file != QFILE_NULL)
- qir_TLB_DISCARD_SETUP(c, c->discard);
-
- if (c->fs_key->stencil_enabled) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
- if (c->fs_key->stencil_twoside) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 1));
- }
- if (c->fs_key->stencil_full_writemasks) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 2));
- }
- }
-
- if (c->fs_key->depth_enabled) {
- struct qreg z;
- if (c->output_position_index != -1) {
- z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
- qir_uniform_f(c, 0xffffff)));
- } else {
- z = qir_FRAG_Z(c);
- }
- qir_TLB_Z_WRITE(c, z);
- }
-
struct qreg packed_color = c->undef;
for (int i = 0; i < 4; i++) {
if (swizzled_outputs[i].file == QFILE_NULL)
@@ -1502,8 +1481,41 @@ emit_frag_end(struct vc4_compile *c)
qir_uniform_ui(c, ~colormask)));
}
- qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
- packed_color, c->undef));
+ return packed_color;
+}
+
+static void
+emit_frag_end(struct vc4_compile *c)
+{
+ clip_distance_discard(c);
+ alpha_test_discard(c);
+ struct qreg color = blend_pipeline(c);
+
+ if (c->discard.file != QFILE_NULL)
+ qir_TLB_DISCARD_SETUP(c, c->discard);
+
+ if (c->fs_key->stencil_enabled) {
+ qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
+ if (c->fs_key->stencil_twoside) {
+ qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 1));
+ }
+ if (c->fs_key->stencil_full_writemasks) {
+ qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 2));
+ }
+ }
+
+ if (c->fs_key->depth_enabled) {
+ struct qreg z;
+ if (c->output_position_index != -1) {
+ z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
+ qir_uniform_f(c, 0xffffff)));
+ } else {
+ z = qir_FRAG_Z(c);
+ }
+ qir_TLB_Z_WRITE(c, z);
+ }
+
+ qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef, color, c->undef));
}
static void
From 0f69d59b1c8f5314c1abe18659b96adcfc51a0e5 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Tue, 23 Jun 2015 18:04:00 -0700
Subject: [PATCH 0007/1208] vc4: Make a helper for TLB color writes, too.
We've done so for all the other QIR instruction generation in this file.
---
src/gallium/drivers/vc4/vc4_program.c | 2 +-
src/gallium/drivers/vc4/vc4_qir.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index c620a4a351f..2061631dc9e 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1515,7 +1515,7 @@ emit_frag_end(struct vc4_compile *c)
qir_TLB_Z_WRITE(c, z);
}
- qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef, color, c->undef));
+ qir_TLB_COLOR_WRITE(c, color);
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 732cfd0b306..2f1e261f880 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -523,6 +523,7 @@ QIR_ALU0(FRAG_W)
QIR_ALU0(FRAG_REV_FLAG)
QIR_ALU0(TEX_RESULT)
QIR_ALU0(TLB_COLOR_READ)
+QIR_NODST_1(TLB_COLOR_WRITE)
QIR_NODST_1(TLB_Z_WRITE)
QIR_NODST_1(TLB_DISCARD_SETUP)
QIR_NODST_1(TLB_STENCIL_SETUP)
From 997f6778414a352457162b73ff5295e51e09ad63 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Tue, 23 Jun 2015 18:08:49 -0700
Subject: [PATCH 0008/1208] vc4: Don't try to CSE color reads.
It returns a new value for each sample in the TLB. We've already avoided
trying to get the same index's color multiple times at the vc4_program.c
level, so we're not losing anything by doing this.
---
src/gallium/drivers/vc4/vc4_opt_cse.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
index 92c8260eb59..51a56504e5e 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -130,7 +130,8 @@ qir_opt_cse(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (qir_has_side_effects(c, inst) ||
- qir_has_side_effect_reads(c, inst)) {
+ qir_has_side_effect_reads(c, inst) ||
+ inst->op == QOP_TLB_COLOR_READ) {
continue;
}
From 5458ac01ae046010f3f7e4ddbf8ef18cca04d96c Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 22 Jun 2015 17:34:24 -0700
Subject: [PATCH 0009/1208] vc4: Add dumping for
VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER.
---
src/gallium/drivers/vc4/kernel/vc4_packet.h | 10 +++++++
src/gallium/drivers/vc4/vc4_cl_dump.c | 30 +++++++++++++++++++--
2 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h
index 8e6f2a1ac2c..771e2b78761 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_packet.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h
@@ -141,6 +141,16 @@ enum vc4_packet {
#define VC4_TILING_FORMAT_LT 2
/** @} */
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0)
+
/** @{
*
* byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 4cc197acd77..289d4d6c521 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -46,6 +46,32 @@ dump_VC4_PACKET_BRANCH_TO_SUB_LIST(void *cl, uint32_t offset, uint32_t hw_offset
offset, hw_offset, *addr);
}
+static void
+dump_loadstore_full(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ uint32_t bits = *(uint32_t *)(cl + offset);
+
+ fprintf(stderr, "0x%08x 0x%08x: addr 0x%08x%s%s%s%s\n",
+ offset, hw_offset,
+ bits & ~0xf,
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL) ? "" : " clear",
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_ZS) ? "" : " zs",
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_COLOR) ? "" : " color",
+ (bits & VC4_LOADSTORE_FULL_RES_EOF) ? " eof" : "");
+}
+
+static void
+dump_VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_STORE_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_full(cl, offset, hw_offset);
+}
+
static void
dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
{
@@ -313,8 +339,8 @@ static const struct packet_info {
PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER),
PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF),
- PACKET(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER),
- PACKET(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER),
PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
From 3fd4c80b32e3080d761e176d129a1e46c618584a Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 22 Jun 2015 17:38:14 -0700
Subject: [PATCH 0010/1208] vc4: Also dump VC4_PACKET_LOAD_TILE_BUFFER_GENERAL.
---
src/gallium/drivers/vc4/vc4_cl_dump.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 289d4d6c521..64de79cc830 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -73,7 +73,7 @@ dump_VC4_PACKET_STORE_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t h
}
static void
-dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+dump_loadstore_general(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint8_t *bytes = cl + offset;
uint32_t *addr = cl + offset + 2;
@@ -150,6 +150,18 @@ dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw
(*addr & (1 << 3)) ? " EOF" : "");
}
+static void
+dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_general(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_general(cl, offset, hw_offset);
+}
+
static void
dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
{
@@ -342,7 +354,7 @@ static const struct packet_info {
PACKET_DUMP(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER),
PACKET_DUMP(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER),
PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
- PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
+ PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE),
PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
From 7796e8889a9a2cc1b454dc32d8da3d756404339a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?=
Date: Thu, 21 May 2015 10:49:05 +0900
Subject: [PATCH 0011/1208] winsys/radeon: Unmap GPU VM address range when
destroying BO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
But only when doing so is safe according to the
RADEON_INFO_VA_UNMAP_WORKING kernel query.
This avoids kernel GPU VM address range conflicts when the BO has other
references than the GEM handle being closed, e.g. when the BO is shared.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90537
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90873
Cc: "10.5 10.6"
Reviewed-by: Christian König
---
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 28 ++++++++++++++++---
.../winsys/radeon/drm/radeon_drm_winsys.c | 4 +++
.../winsys/radeon/drm/radeon_drm_winsys.h | 1 +
3 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index fe98870967a..78c95b15eb2 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -305,14 +305,34 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
if (bo->ptr)
os_munmap(bo->ptr, bo->base.size);
+ if (mgr->va) {
+ if (bo->rws->va_unmap_working) {
+ struct drm_radeon_gem_va va;
+
+ va.handle = bo->handle;
+ va.vm_id = 0;
+ va.operation = RADEON_VA_UNMAP;
+ va.flags = RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_WRITEABLE |
+ RADEON_VM_PAGE_SNOOPED;
+ va.offset = bo->va;
+
+ if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_VA, &va,
+ sizeof(va)) != 0 &&
+ va.operation == RADEON_VA_RESULT_ERROR) {
+ fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
+ fprintf(stderr, "radeon: size : %d bytes\n", bo->base.size);
+ fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
+ }
+ }
+
+ radeon_bomgr_free_va(mgr, bo->va, bo->base.size);
+ }
+
/* Close object. */
args.handle = bo->handle;
drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
- if (mgr->va) {
- radeon_bomgr_free_va(mgr, bo->va, bo->base.size);
- }
-
pipe_mutex_destroy(bo->map_mutex);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index ba8d1437b6f..d457f8a5ad1 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -57,6 +57,8 @@
#define RADEON_INFO_READ_REG 0x24
#endif
+#define RADEON_INFO_VA_UNMAP_WORKING 0x25
+
static struct util_hash_table *fd_tab = NULL;
pipe_static_mutex(fd_tab_mutex);
@@ -399,6 +401,8 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
&ib_vm_max_size))
ws->info.r600_virtual_address = FALSE;
+ radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
+ &ws->va_unmap_working);
}
if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", FALSE))
ws->info.r600_virtual_address = FALSE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index 166b6b93d28..99c8b8a8a1d 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -74,6 +74,7 @@ struct radeon_drm_winsys {
enum radeon_generation gen;
struct radeon_info info;
uint32_t va_start;
+ uint32_t va_unmap_working;
uint32_t accel_working2;
struct pb_manager *kman;
From c8b8e8b29b755cd3d80fc5e470f441cb3716152a Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Mon, 22 Jun 2015 14:20:20 -0700
Subject: [PATCH 0012/1208] i965: Don't count NIR instructions for shader-db.
Matt, Jason, and I haven't found this useful in a long time.
Signed-off-by: Kenneth Graunke
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/brw_nir.c | 31 -----------------------------
1 file changed, 31 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index c13708a2f8a..dffb8ab1ca7 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -57,28 +57,6 @@ nir_optimize(nir_shader *nir)
} while (progress);
}
-static bool
-count_nir_instrs_in_block(nir_block *block, void *state)
-{
- int *count = (int *) state;
- nir_foreach_instr(block, instr) {
- *count = *count + 1;
- }
- return true;
-}
-
-static int
-count_nir_instrs(nir_shader *nir)
-{
- int count = 0;
- nir_foreach_overload(nir, overload) {
- if (!overload->impl)
- continue;
- nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
- }
- return count;
-}
-
nir_shader *
brw_create_nir(struct brw_context *brw,
const struct gl_shader_program *shader_prog,
@@ -178,15 +156,6 @@ brw_create_nir(struct brw_context *brw,
nir_print_shader(nir, stderr);
}
- static GLuint msg_id = 0;
- _mesa_gl_debug(&brw->ctx, &msg_id,
- MESA_DEBUG_SOURCE_SHADER_COMPILER,
- MESA_DEBUG_TYPE_OTHER,
- MESA_DEBUG_SEVERITY_NOTIFICATION,
- "%s NIR shader: %d inst\n",
- _mesa_shader_stage_to_abbrev(stage),
- count_nir_instrs(nir));
-
nir_convert_from_ssa(nir);
nir_validate_shader(nir);
From 23132cd13baa7b3e9688a118466261a282594b8e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Tue, 23 Jun 2015 23:15:22 -0700
Subject: [PATCH 0013/1208] i965: Fix whitespace error in gen8_depth_state.c
Trivial.
---
src/mesa/drivers/dri/i965/gen8_depth_state.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 12ac97a5d14..7c4ec06e84d 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -100,7 +100,7 @@ emit_depth_packets(struct brw_context *brw,
}
if (stencil_mt == NULL) {
- BEGIN_BATCH(5);
+ BEGIN_BATCH(5);
OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
From 32a220f1f60980de50ecefb3b9ab1f754ade8c83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Tue, 9 Jun 2015 11:06:56 +0300
Subject: [PATCH 0014/1208] glsl: remove cross validation of interpolation
qualifier with GLSL 4.40
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Tapani Pälli
Reviewed-by: Timothy Arceri
---
src/glsl/link_varyings.cpp | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 278a778797b..020842a54a3 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -128,7 +128,17 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
return;
}
- if (input->data.interpolation != output->data.interpolation) {
+ /* GLSL >= 4.40 removes text requiring interpolation qualifiers
+ * to match cross stage, they must only match within the same stage.
+ *
+ * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
+ *
+ * "It is a link-time error if, within the same stage, the interpolation
+ * qualifiers of variables of the same name do not match.
+ *
+ */
+ if (input->data.interpolation != output->data.interpolation &&
+ prog->Version < 440) {
linker_error(prog,
"%s shader output `%s' specifies %s "
"interpolation qualifier, "
From 29aaab2b5f55cc6d9a84f58ce2bb8607e76a9dde Mon Sep 17 00:00:00 2001
From: Grigori Goronzy
Date: Wed, 24 Jun 2015 03:38:02 +0200
Subject: [PATCH 0015/1208] winsys/radeon: align BO size to page size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is the basic granularity for BO allocations. The alignment also
helps with BO reuse by the cached bufmgr.
This results in a huge 45% speedup in Metro 2033 Redux on my test
system. The game relies on buffer orphaning with very small buffers
(hundreds of bytes in size) and that did not work efficiently
before. This change may also affect other applications and games.
Reviewed-by: Marek Olšák
---
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 78c95b15eb2..1f0caf60197 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -840,6 +840,12 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
memset(&desc, 0, sizeof(desc));
desc.base.alignment = alignment;
+ /* Align size to page size. This is the minimum alignment for normal
+ * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
+ * like constant/uniform buffers, can benefit from better and more reuse.
+ */
+ size = align(size, 4096);
+
/* Only set one usage bit each for domains and flags, or the cache manager
* might consider different sets of domains / flags compatible
*/
From 390f94e3581384838595185a06d5943089d3f9ab Mon Sep 17 00:00:00 2001
From: Grigori Goronzy
Date: Wed, 24 Jun 2015 03:40:38 +0200
Subject: [PATCH 0016/1208] winsys/radeon: reduce BO cache timeout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
1000 ms is an extreme value for typical interactive loads. A large
cache has some disadvantages. Search for reusable BOs can take a long
time and memory might get exhausted.
Let's be rather conservative and use half of the old value,
500ms. This is beneficial to some loads on my test system and there
are no regressions.
Reviewed-by: Marek Olšák
---
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index d457f8a5ad1..d8bb353df9d 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -710,7 +710,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
if (!ws->kman)
goto fail;
- ws->cman = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0,
+ ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0,
MIN2(ws->info.vram_size, ws->info.gart_size));
if (!ws->cman)
goto fail;
From 30d67d38246410274713380664be87cd1df9486a Mon Sep 17 00:00:00 2001
From: Julien Isorce
Date: Tue, 23 Jun 2015 22:47:05 +0100
Subject: [PATCH 0017/1208] loader: move loader_open_device out of HAVE_LIBUDEV
block
Fixes the following build issue, when building without libudev.
CCLD libGL.la
./.libs/libglx.a(dri2_glx.o): In function `dri2CreateScreen':
src/glx/dri2_glx.c:1186: undefined reference to `loader_open_device'
collect2: ld returned 1 exit status
CCLD libEGL.la
Undefined symbols for architecture x86_64:
"_loader_open_device", referenced from:
_dri2_initialize_x11_dri2 in libegl_dri2.a(platform_x11.o)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91077
Signed-off-by: Julien Isorce
Reviewed-by: Emil Velikov
---
src/loader/loader.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/src/loader/loader.c b/src/loader/loader.c
index fc468153425..8452cd3560e 100644
--- a/src/loader/loader.c
+++ b/src/loader/loader.c
@@ -64,6 +64,8 @@
* Rob Clark
*/
+#include
+#include
#include
#include
#include
@@ -71,10 +73,8 @@
#ifdef HAVE_LIBUDEV
#include
#include
-#include
#include
#include
-#include
#ifdef USE_DRICONF
#include "xmlconfig.h"
#include "xmlpool.h"
@@ -104,6 +104,22 @@ static void default_logger(int level, const char *fmt, ...)
static void (*log_)(int level, const char *fmt, ...) = default_logger;
+int
+loader_open_device(const char *device_name)
+{
+ int fd;
+#ifdef O_CLOEXEC
+ fd = open(device_name, O_RDWR | O_CLOEXEC);
+ if (fd == -1 && errno == EINVAL)
+#endif
+ {
+ fd = open(device_name, O_RDWR);
+ if (fd != -1)
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+ }
+ return fd;
+}
+
#ifdef HAVE_LIBUDEV
#include
@@ -314,22 +330,6 @@ get_id_path_tag_from_fd(struct udev *udev, int fd)
return id_path_tag;
}
-int
-loader_open_device(const char *device_name)
-{
- int fd;
-#ifdef O_CLOEXEC
- fd = open(device_name, O_RDWR | O_CLOEXEC);
- if (fd == -1 && errno == EINVAL)
-#endif
- {
- fd = open(device_name, O_RDWR);
- if (fd != -1)
- fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
- }
- return fd;
-}
-
#ifdef USE_DRICONF
const char __driConfigOptionsLoader[] =
DRI_CONF_BEGIN
From a552c897caea31bbff3f16d2af8f5028a58bd344 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Wed, 24 Jun 2015 12:59:55 +0100
Subject: [PATCH 0018/1208] st/wgl: add stw_nopfuncs.h to the sources lists
Signed-off-by: Emil Velikov
---
src/gallium/state_trackers/wgl/Makefile.sources | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/state_trackers/wgl/Makefile.sources b/src/gallium/state_trackers/wgl/Makefile.sources
index 8c463d5f18e..1e00caf97b7 100644
--- a/src/gallium/state_trackers/wgl/Makefile.sources
+++ b/src/gallium/state_trackers/wgl/Makefile.sources
@@ -9,6 +9,7 @@ C_SOURCES := \
stw_framebuffer.c \
stw_getprocaddress.c \
stw_nopfuncs.c \
+ stw_nopfuncs.h \
stw_pixelformat.c \
stw_st.c \
stw_tls.c \
From c1de7df6d4086070e63369ab0af3950f53a03592 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 22 Jun 2015 14:04:09 -0600
Subject: [PATCH 0019/1208] st/mesa: remove unneeded pipe_surface_release() in
st_render_texture()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This caused us to always free the pipe_surface for the renderbuffer.
The subsequent call to st_update_renderbuffer_surface() would typically
just recreate it. Remove the call to pipe_surface_release() and let
st_update_renderbuffer_surface() take care of freeing the old surface
if it needs to be replaced (because of change to mipmap level, etc).
This can save quite a few calls to pipe_context::create_surface() and
surface_destroy().
Reviewed-by: Marek Olšák
Reviewed-by: Jose Fonseca
---
src/mesa/state_tracker/st_cb_fbo.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 0399eef7204..57075904450 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -511,8 +511,6 @@ st_render_texture(struct gl_context *ctx,
strb->rtt_layered = att->Layered;
pipe_resource_reference(&strb->texture, pt);
- pipe_surface_release(pipe, &strb->surface);
-
st_update_renderbuffer_surface(st, strb);
strb->Base.Format = st_pipe_format_to_mesa_format(pt->format);
From e31bce4041122cd00712b60b4dc1eae6486f6579 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 24 Jun 2015 10:41:52 -0600
Subject: [PATCH 0020/1208] svga: silence warnings about unexpected shader type
Trivial.
---
src/gallium/drivers/svga/svga_screen.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 56e486786df..770f4933b4c 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -443,7 +443,9 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
return 0;
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_COMPUTE:
- /* no support for geometry or compute shaders at this time */
+ case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
+ /* no support for geometry, tess or compute shaders at this time */
return 0;
default:
debug_printf("Unexpected shader type (%u) query\n", shader);
From 147cdb53ecd225ea21d8d552607d384217346ecb Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Tue, 23 Jun 2015 23:17:53 -0700
Subject: [PATCH 0021/1208] nir: Use a switch statement for detecting move-like
operations.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Suggested by Jason Ekstrand.
Signed-off-by: Kenneth Graunke
Reviewed-by: Samuel Iglesias Gonsálvez
---
src/glsl/nir/nir_opt_peephole_select.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c
index ef7c9775aa3..6620e5dc81f 100644
--- a/src/glsl/nir/nir_opt_peephole_select.c
+++ b/src/glsl/nir/nir_opt_peephole_select.c
@@ -82,14 +82,22 @@ block_check_for_allowed_instrs(nir_block *block)
break;
case nir_instr_type_alu: {
- /* It must be a move operation */
nir_alu_instr *mov = nir_instr_as_alu(instr);
- if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
- mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
- mov->op != nir_op_fabs && mov->op != nir_op_iabs &&
- mov->op != nir_op_vec2 && mov->op != nir_op_vec3 &&
- mov->op != nir_op_vec4)
+ switch (mov->op) {
+ case nir_op_fmov:
+ case nir_op_imov:
+ case nir_op_fneg:
+ case nir_op_ineg:
+ case nir_op_fabs:
+ case nir_op_iabs:
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ /* It must be a move-like operation. */
+ break;
+ default:
return false;
+ }
/* Can't handle saturate */
if (mov->dest.saturate)
From 9d4b9f1e0c661e5ed8ce2e71c76ce8cc1adf90dd Mon Sep 17 00:00:00 2001
From: Chris Wilson
Date: Thu, 30 Apr 2015 16:53:12 +0100
Subject: [PATCH 0022/1208] i965: Transplant PIPE_CONTROL routines to
brw_pipe_control
Start trimming the fat from intel_batchbuffer.c. First by moving the set
of routines for emitting PIPE_CONTROLS (along with the lore concerning
hardware workarounds) to a separate brw_pipe_control.c
Signed-off-by: Chris Wilson
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_context.h | 11 +
src/mesa/drivers/dri/i965/brw_pipe_control.c | 331 ++++++++++++++++++
src/mesa/drivers/dri/i965/intel_batchbuffer.c | 304 ----------------
src/mesa/drivers/dri/i965/intel_batchbuffer.h | 10 -
5 files changed, 343 insertions(+), 314 deletions(-)
create mode 100644 src/mesa/drivers/dri/i965/brw_pipe_control.c
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 981fe79b132..5a33aacbc23 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -86,6 +86,7 @@ i965_FILES = \
brw_object_purgeable.c \
brw_packed_float.c \
brw_performance_monitor.c \
+ brw_pipe_control.c \
brw_primitive_restart.c \
brw_program.c \
brw_program.h \
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index a7d83f8d7b4..761110beef3 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1998,6 +1998,17 @@ bool
gen9_use_linear_1d_layout(const struct brw_context *brw,
const struct intel_mipmap_tree *mt);
+/* brw_pipe_control.c */
+void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
+void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+ drm_intel_bo *bo, uint32_t offset,
+ uint32_t imm_lower, uint32_t imm_upper);
+void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
+void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
+void intel_emit_depth_stall_flushes(struct brw_context *brw);
+void gen7_emit_vs_workaround_flush(struct brw_context *brw);
+void gen7_emit_cs_stall_flush(struct brw_context *brw);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
new file mode 100644
index 00000000000..bd45a114f2f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_reg.h"
+
+/**
+ * According to the latest documentation, any PIPE_CONTROL with the
+ * "Command Streamer Stall" bit set must also have another bit set,
+ * with five different options:
+ *
+ * - Render Target Cache Flush
+ * - Depth Cache Flush
+ * - Stall at Pixel Scoreboard
+ * - Post-Sync Operation
+ * - Depth Stall
+ *
+ * I chose "Stall at Pixel Scoreboard" since we've used it effectively
+ * in the past, but the choice is fairly arbitrary.
+ */
+static void
+gen8_add_cs_stall_workaround_bits(uint32_t *flags)
+{
+ uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
+ PIPE_CONTROL_DEPTH_STALL;
+
+ /* If we're doing a CS stall, and don't already have one of the
+ * workaround bits set, add "Stall at Pixel Scoreboard."
+ */
+ if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
+ *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+}
+
+/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
+ *
+ * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
+ * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
+ *
+ * Note that the kernel does CS stalls between batches, so we only need
+ * to count them within a batch.
+ */
+static uint32_t
+gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
+{
+ if (brw->gen == 7 && !brw->is_haswell) {
+ if (flags & PIPE_CONTROL_CS_STALL) {
+ /* If we're doing a CS stall, reset the counter and carry on. */
+ brw->batch.pipe_controls_since_last_cs_stall = 0;
+ return 0;
+ }
+
+ /* If this is the fourth pipe control without a CS stall, do one now. */
+ if (++brw->batch.pipe_controls_since_last_cs_stall == 4) {
+ brw->batch.pipe_controls_since_last_cs_stall = 0;
+ return PIPE_CONTROL_CS_STALL;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Emit a PIPE_CONTROL with various flushing flags.
+ *
+ * The caller is responsible for deciding what flags are appropriate for the
+ * given generation.
+ */
+void
+brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
+{
+ if (brw->gen >= 8) {
+ gen8_add_cs_stall_workaround_bits(&flags);
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+ OUT_BATCH(flags);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else if (brw->gen >= 6) {
+ flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+ OUT_BATCH(flags);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+}
+
+/**
+ * Emit a PIPE_CONTROL that writes to a buffer object.
+ *
+ * \p flags should contain one of the following items:
+ * - PIPE_CONTROL_WRITE_IMMEDIATE
+ * - PIPE_CONTROL_WRITE_TIMESTAMP
+ * - PIPE_CONTROL_WRITE_DEPTH_COUNT
+ */
+void
+brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+ drm_intel_bo *bo, uint32_t offset,
+ uint32_t imm_lower, uint32_t imm_upper)
+{
+ if (brw->gen >= 8) {
+ gen8_add_cs_stall_workaround_bits(&flags);
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+ OUT_BATCH(flags);
+ OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ offset);
+ OUT_BATCH(imm_lower);
+ OUT_BATCH(imm_upper);
+ ADVANCE_BATCH();
+ } else if (brw->gen >= 6) {
+ flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+
+ /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
+ * on later platforms. We always use PPGTT on Gen7+.
+ */
+ unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
+
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+ OUT_BATCH(flags);
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ gen6_gtt | offset);
+ OUT_BATCH(imm_lower);
+ OUT_BATCH(imm_upper);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
+ OUT_BATCH(imm_lower);
+ OUT_BATCH(imm_upper);
+ ADVANCE_BATCH();
+ }
+}
+
+/**
+ * Restriction [DevSNB, DevIVB]:
+ *
+ * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
+ * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
+ * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
+ * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
+ * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
+ * unless SW can otherwise guarantee that the pipeline from WM onwards is
+ * already flushed (e.g., via a preceding MI_FLUSH).
+ */
+void
+intel_emit_depth_stall_flushes(struct brw_context *brw)
+{
+ assert(brw->gen >= 6 && brw->gen <= 9);
+
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
+}
+
+/**
+ * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
+ * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
+ * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
+ * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
+ * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
+ * to be sent before any combination of VS associated 3DSTATE."
+ */
+void
+gen7_emit_vs_workaround_flush(struct brw_context *brw)
+{
+ assert(brw->gen == 7);
+ brw_emit_pipe_control_write(brw,
+ PIPE_CONTROL_WRITE_IMMEDIATE
+ | PIPE_CONTROL_DEPTH_STALL,
+ brw->batch.workaround_bo, 0,
+ 0, 0);
+}
+
+
+/**
+ * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
+ */
+void
+gen7_emit_cs_stall_flush(struct brw_context *brw)
+{
+ brw_emit_pipe_control_write(brw,
+ PIPE_CONTROL_CS_STALL
+ | PIPE_CONTROL_WRITE_IMMEDIATE,
+ brw->batch.workaround_bo, 0,
+ 0, 0);
+}
+
+
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6. From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ * "1 of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it. Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either. Notify enable is IRQs, which aren't
+ * really our business. That leaves only stall at scoreboard.
+ */
+void
+intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
+{
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+
+ brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
+ brw->batch.workaround_bo, 0, 0, 0);
+}
+
+/* Emit a pipelined flush to either flush render and texture cache for
+ * reading from a FBO-drawn texture, or flush so that frontbuffer
+ * render appears on the screen in DRI1.
+ *
+ * This is also used for the always_flush_cache driconf debug option.
+ */
+void
+intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
+{
+ if (brw->batch.ring == BLT_RING && brw->gen >= 6) {
+ BEGIN_BATCH_BLT(4);
+ OUT_BATCH(MI_FLUSH_DW);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
+ int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
+ if (brw->gen >= 6) {
+ if (brw->gen == 9) {
+ /* Hardware workaround: SKL
+ *
+ * Emit Pipe Control with all bits set to zero before emitting
+ * a Pipe Control with VF Cache Invalidate set.
+ */
+ brw_emit_pipe_control_flush(brw, 0);
+ }
+
+ flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ PIPE_CONTROL_CS_STALL;
+
+ if (brw->gen == 6) {
+ /* Hardware workaround: SNB B-Spec says:
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
+ * Flush Enable =1, a PIPE_CONTROL with any non-zero
+ * post-sync-op is required.
+ */
+ intel_emit_post_sync_nonzero_flush(brw);
+ }
+ }
+ brw_emit_pipe_control_flush(brw, flags);
+ }
+
+ brw_render_cache_set_clear(brw);
+}
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index ed659ed625e..54081a1412f 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -439,310 +439,6 @@ intel_batchbuffer_data(struct brw_context *brw,
brw->batch.used += bytes >> 2;
}
-/**
- * According to the latest documentation, any PIPE_CONTROL with the
- * "Command Streamer Stall" bit set must also have another bit set,
- * with five different options:
- *
- * - Render Target Cache Flush
- * - Depth Cache Flush
- * - Stall at Pixel Scoreboard
- * - Post-Sync Operation
- * - Depth Stall
- *
- * I chose "Stall at Pixel Scoreboard" since we've used it effectively
- * in the past, but the choice is fairly arbitrary.
- */
-static void
-gen8_add_cs_stall_workaround_bits(uint32_t *flags)
-{
- uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP |
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_DEPTH_STALL;
-
- /* If we're doing a CS stall, and don't already have one of the
- * workaround bits set, add "Stall at Pixel Scoreboard."
- */
- if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
- *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
-}
-
-/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
- *
- * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
- * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
- *
- * Note that the kernel does CS stalls between batches, so we only need
- * to count them within a batch.
- */
-static uint32_t
-gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
-{
- if (brw->gen == 7 && !brw->is_haswell) {
- if (flags & PIPE_CONTROL_CS_STALL) {
- /* If we're doing a CS stall, reset the counter and carry on. */
- brw->batch.pipe_controls_since_last_cs_stall = 0;
- return 0;
- }
-
- /* If this is the fourth pipe control without a CS stall, do one now. */
- if (++brw->batch.pipe_controls_since_last_cs_stall == 4) {
- brw->batch.pipe_controls_since_last_cs_stall = 0;
- return PIPE_CONTROL_CS_STALL;
- }
- }
- return 0;
-}
-
-/**
- * Emit a PIPE_CONTROL with various flushing flags.
- *
- * The caller is responsible for deciding what flags are appropriate for the
- * given generation.
- */
-void
-brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
-{
- if (brw->gen >= 8) {
- gen8_add_cs_stall_workaround_bits(&flags);
-
- BEGIN_BATCH(6);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
- OUT_BATCH(flags);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else if (brw->gen >= 6) {
- flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
-
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(flags);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
-}
-
-/**
- * Emit a PIPE_CONTROL that writes to a buffer object.
- *
- * \p flags should contain one of the following items:
- * - PIPE_CONTROL_WRITE_IMMEDIATE
- * - PIPE_CONTROL_WRITE_TIMESTAMP
- * - PIPE_CONTROL_WRITE_DEPTH_COUNT
- */
-void
-brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
- drm_intel_bo *bo, uint32_t offset,
- uint32_t imm_lower, uint32_t imm_upper)
-{
- if (brw->gen >= 8) {
- gen8_add_cs_stall_workaround_bits(&flags);
-
- BEGIN_BATCH(6);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
- OUT_BATCH(flags);
- OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- offset);
- OUT_BATCH(imm_lower);
- OUT_BATCH(imm_upper);
- ADVANCE_BATCH();
- } else if (brw->gen >= 6) {
- flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
-
- /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
- * on later platforms. We always use PPGTT on Gen7+.
- */
- unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
-
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(flags);
- OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- gen6_gtt | offset);
- OUT_BATCH(imm_lower);
- OUT_BATCH(imm_upper);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
- OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
- OUT_BATCH(imm_lower);
- OUT_BATCH(imm_upper);
- ADVANCE_BATCH();
- }
-}
-
-/**
- * Restriction [DevSNB, DevIVB]:
- *
- * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
- * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
- * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
- * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
- * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
- * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
- * unless SW can otherwise guarantee that the pipeline from WM onwards is
- * already flushed (e.g., via a preceding MI_FLUSH).
- */
-void
-intel_emit_depth_stall_flushes(struct brw_context *brw)
-{
- assert(brw->gen >= 6 && brw->gen <= 9);
-
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-}
-
-/**
- * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
- * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
- * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
- * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
- * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
- * to be sent before any combination of VS associated 3DSTATE."
- */
-void
-gen7_emit_vs_workaround_flush(struct brw_context *brw)
-{
- assert(brw->gen == 7);
- brw_emit_pipe_control_write(brw,
- PIPE_CONTROL_WRITE_IMMEDIATE
- | PIPE_CONTROL_DEPTH_STALL,
- brw->batch.workaround_bo, 0,
- 0, 0);
-}
-
-
-/**
- * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
- */
-void
-gen7_emit_cs_stall_flush(struct brw_context *brw)
-{
- brw_emit_pipe_control_write(brw,
- PIPE_CONTROL_CS_STALL
- | PIPE_CONTROL_WRITE_IMMEDIATE,
- brw->batch.workaround_bo, 0,
- 0, 0);
-}
-
-
-/**
- * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
- * implementing two workarounds on gen6. From section 1.4.7.1
- * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
- *
- * [DevSNB-C+{W/A}] Before any depth stall flush (including those
- * produced by non-pipelined state commands), software needs to first
- * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
- * 0.
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
- * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
- *
- * And the workaround for these two requires this workaround first:
- *
- * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
- * BEFORE the pipe-control with a post-sync op and no write-cache
- * flushes.
- *
- * And this last workaround is tricky because of the requirements on
- * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
- * volume 2 part 1:
- *
- * "1 of the following must also be set:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- * - Notify Enable ([8] of DW1)"
- *
- * The cache flushes require the workaround flush that triggered this
- * one, so we can't use it. Depth stall would trigger the same.
- * Post-sync nonzero is what triggered this second workaround, so we
- * can't use that one either. Notify enable is IRQs, which aren't
- * really our business. That leaves only stall at scoreboard.
- */
-void
-intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
-{
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
-
- brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
- brw->batch.workaround_bo, 0, 0, 0);
-}
-
-/* Emit a pipelined flush to either flush render and texture cache for
- * reading from a FBO-drawn texture, or flush so that frontbuffer
- * render appears on the screen in DRI1.
- *
- * This is also used for the always_flush_cache driconf debug option.
- */
-void
-intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
-{
- if (brw->batch.ring == BLT_RING && brw->gen >= 6) {
- BEGIN_BATCH_BLT(4);
- OUT_BATCH(MI_FLUSH_DW);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else {
- int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
- if (brw->gen >= 6) {
- if (brw->gen == 9) {
- /* Hardware workaround: SKL
- *
- * Emit Pipe Control with all bits set to zero before emitting
- * a Pipe Control with VF Cache Invalidate set.
- */
- brw_emit_pipe_control_flush(brw, 0);
- }
-
- flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_VF_CACHE_INVALIDATE |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CS_STALL;
-
- if (brw->gen == 6) {
- /* Hardware workaround: SNB B-Spec says:
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
- * Flush Enable =1, a PIPE_CONTROL with any non-zero
- * post-sync-op is required.
- */
- intel_emit_post_sync_nonzero_flush(brw);
- }
- }
- brw_emit_pipe_control_flush(brw, flags);
- }
-
- brw_render_cache_set_clear(brw);
-}
-
static void
load_sized_register_mem(struct brw_context *brw,
uint32_t reg,
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 7bdd8364346..ef8a6ffcca8 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -63,16 +63,6 @@ bool intel_batchbuffer_emit_reloc64(struct brw_context *brw,
uint32_t read_domains,
uint32_t write_domain,
uint32_t offset);
-void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
-void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
- drm_intel_bo *bo, uint32_t offset,
- uint32_t imm_lower, uint32_t imm_upper);
-void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
-void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
-void intel_emit_depth_stall_flushes(struct brw_context *brw);
-void gen7_emit_vs_workaround_flush(struct brw_context *brw);
-void gen7_emit_cs_stall_flush(struct brw_context *brw);
-
static inline uint32_t float_as_int(float f)
{
union {
From 4b35ab9bdb4e663f41ff5c9ae5bbcc650b6093f9 Mon Sep 17 00:00:00 2001
From: Chris Wilson
Date: Thu, 30 Apr 2015 17:04:51 +0100
Subject: [PATCH 0023/1208] i965: Rename intel_emit* to reflect their new
location in brw_pipe_control
Signed-off-by: Chris Wilson
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_blorp.cpp | 4 ++--
src/mesa/drivers/dri/i965/brw_clear.c | 4 ++--
src/mesa/drivers/dri/i965/brw_context.h | 6 +++---
src/mesa/drivers/dri/i965/brw_draw.c | 4 ++--
src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 4 ++--
src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c | 8 ++++----
src/mesa/drivers/dri/i965/brw_meta_updownsample.c | 4 ++--
src/mesa/drivers/dri/i965/brw_misc_state.c | 2 +-
src/mesa/drivers/dri/i965/brw_performance_monitor.c | 8 ++++----
src/mesa/drivers/dri/i965/brw_pipe_control.c | 8 ++++----
src/mesa/drivers/dri/i965/brw_state_upload.c | 4 ++--
src/mesa/drivers/dri/i965/gen6_blorp.cpp | 6 +++---
src/mesa/drivers/dri/i965/gen6_depth_state.c | 2 +-
src/mesa/drivers/dri/i965/gen6_queryobj.c | 6 +++---
src/mesa/drivers/dri/i965/gen6_sol.c | 2 +-
src/mesa/drivers/dri/i965/gen6_urb.c | 2 +-
src/mesa/drivers/dri/i965/gen7_blorp.cpp | 4 ++--
src/mesa/drivers/dri/i965/gen7_misc_state.c | 2 +-
src/mesa/drivers/dri/i965/gen7_sol_state.c | 4 ++--
src/mesa/drivers/dri/i965/gen8_depth_state.c | 2 +-
src/mesa/drivers/dri/i965/intel_blit.c | 6 +++---
src/mesa/drivers/dri/i965/intel_buffer_objects.c | 4 ++--
src/mesa/drivers/dri/i965/intel_extensions.c | 6 +++---
src/mesa/drivers/dri/i965/intel_fbo.c | 2 +-
src/mesa/drivers/dri/i965/intel_pixel_read.c | 2 +-
src/mesa/drivers/dri/i965/intel_syncobj.c | 2 +-
src/mesa/drivers/dri/i965/intel_tex_image.c | 2 +-
27 files changed, 55 insertions(+), 55 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index b404869f0c7..2ccfae1d77f 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -220,7 +220,7 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
* data with different formats, which blorp does for stencil and depth
* data.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
retry:
intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
@@ -283,7 +283,7 @@ retry:
/* Flush the sampler cache so any texturing from the destination is
* coherent.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 1d4ba3cac7e..f981388ef1a 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -184,7 +184,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
* must be issued before the rectangle primitive used for the depth
* buffer clear operation.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
if (fb->MaxNumLayers > 0) {
for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) {
@@ -204,7 +204,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
* by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
* followed by Depth FLUSH'
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
/* Now, the HiZ buffer contains data that needs to be resolved to the depth
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 761110beef3..85d8f14a006 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -2003,9 +2003,9 @@ void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
drm_intel_bo *bo, uint32_t offset,
uint32_t imm_lower, uint32_t imm_upper);
-void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
-void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
-void intel_emit_depth_stall_flushes(struct brw_context *brw);
+void brw_emit_mi_flush(struct brw_context *brw);
+void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
+void brw_emit_depth_stall_flushes(struct brw_context *brw);
void gen7_emit_vs_workaround_flush(struct brw_context *brw);
void gen7_emit_cs_stall_flush(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index b91597a9f5d..69ad4d444da 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -217,7 +217,7 @@ static void brw_emit_prim(struct brw_context *brw,
* the besides the draw code.
*/
if (brw->always_flush_cache) {
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
/* If indirect, emit a bunch of loads from the indirect BO. */
@@ -284,7 +284,7 @@ static void brw_emit_prim(struct brw_context *brw,
ADVANCE_BATCH();
if (brw->always_flush_cache) {
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 49f2e3e498c..5b8191c093b 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -623,7 +623,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
* write-flush must be issued before sending any DRAW commands on that
* render target.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* If we had to fall back to plain clear for any buffers, clear those now
* by calling into meta.
@@ -677,7 +677,7 @@ brw_meta_resolve_color(struct brw_context *brw,
GLuint fbo, rbo;
struct rect rect;
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_meta_begin(ctx, MESA_META_ALL);
diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
index d079197a2a9..d4abfe63de7 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -500,11 +500,11 @@ brw_meta_fbo_stencil_blit(struct brw_context *brw,
.mirror_x = mirror_x, .mirror_y = mirror_y };
adjust_mip_level(dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_meta_begin(ctx, MESA_META_ALL);
brw_meta_stencil_blit(brw,
dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
void
@@ -524,7 +524,7 @@ brw_meta_stencil_updownsample(struct brw_context *brw,
if (dst->stencil_mt)
dst = dst->stencil_mt;
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_meta_begin(ctx, MESA_META_ALL);
_mesa_GenFramebuffers(1, &fbo);
@@ -535,7 +535,7 @@ brw_meta_stencil_updownsample(struct brw_context *brw,
GL_RENDERBUFFER, rbo);
brw_meta_stencil_blit(brw, dst, 0, 0, &dims);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_DeleteRenderbuffers(1, &rbo);
_mesa_DeleteFramebuffers(1, &fbo);
diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c
index 21507b1ad2a..f39d50a69e6 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c
@@ -116,7 +116,7 @@ brw_meta_updownsample(struct brw_context *brw,
blit_bit = GL_COLOR_BUFFER_BIT;
}
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_meta_begin(ctx, MESA_META_ALL);
_mesa_GenFramebuffers(2, fbos);
@@ -147,5 +147,5 @@ brw_meta_updownsample(struct brw_context *brw,
_mesa_meta_end(ctx);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 5a4515b582d..1bbb16cf697 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -580,7 +580,7 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
* non-pipelined state that will need the PIPE_CONTROL workaround.
*/
if (brw->gen == 6) {
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
}
unsigned int len;
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index 2c8cd491a8e..0a123754257 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -581,7 +581,7 @@ snapshot_statistics_registers(struct brw_context *brw,
const int group = PIPELINE_STATS_COUNTERS;
const int num_counters = ctx->PerfMonitor.Groups[group].NumCounters;
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
for (int i = 0; i < num_counters; i++) {
if (BITSET_TEST(monitor->base.ActiveCounters[group], i)) {
@@ -687,7 +687,7 @@ stop_oa_counters(struct brw_context *brw)
* The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot,
* including the required PIPE_CONTROL flushes.
*
- * Sandybridge is the worst case scenario: intel_batchbuffer_emit_mi_flush
+ * Sandybridge is the worst case scenario: brw_emit_mi_flush
* expands to three PIPE_CONTROLs which are 4 DWords each. We have to flush
* before and after MI_REPORT_PERF_COUNT, so multiply by two. Finally, add
* the 3 DWords for MI_REPORT_PERF_COUNT itself.
@@ -713,7 +713,7 @@ emit_mi_report_perf_count(struct brw_context *brw,
int batch_used = brw->batch.used;
/* Reports apparently don't always get written unless we flush first. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
if (brw->gen == 5) {
/* Ironlake requires two MI_REPORT_PERF_COUNT commands to write all
@@ -751,7 +751,7 @@ emit_mi_report_perf_count(struct brw_context *brw,
}
/* Reports apparently don't always get written unless we flush after. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
(void) batch_used;
assert(brw->batch.used - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4);
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index bd45a114f2f..b4c86b9dff9 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -189,7 +189,7 @@ brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
* already flushed (e.g., via a preceding MI_FLUSH).
*/
void
-intel_emit_depth_stall_flushes(struct brw_context *brw)
+brw_emit_depth_stall_flushes(struct brw_context *brw)
{
assert(brw->gen >= 6 && brw->gen <= 9);
@@ -270,7 +270,7 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
* really our business. That leaves only stall at scoreboard.
*/
void
-intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
+brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
{
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_CS_STALL |
@@ -287,7 +287,7 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
* This is also used for the always_flush_cache driconf debug option.
*/
void
-intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
+brw_emit_mi_flush(struct brw_context *brw)
{
if (brw->batch.ring == BLT_RING && brw->gen >= 6) {
BEGIN_BATCH_BLT(4);
@@ -321,7 +321,7 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
* Flush Enable =1, a PIPE_CONTROL with any non-zero
* post-sync-op is required.
*/
- intel_emit_post_sync_nonzero_flush(brw);
+ brw_emit_post_sync_nonzero_flush(brw);
}
}
brw_emit_pipe_control_flush(brw, flags);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 08d1ac28885..7662c3b580c 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -349,7 +349,7 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
return;
if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
+ brw_emit_post_sync_nonzero_flush(brw);
brw_upload_invariant_state(brw);
@@ -710,7 +710,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
/* Emit Sandybridge workaround flushes on every primitive, for safety. */
if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
+ brw_emit_post_sync_nonzero_flush(brw);
brw_upload_programs(brw, pipeline);
merge_ctx_state(brw, &state);
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index b6a3d78d849..54c4a6dfdd8 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -821,7 +821,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
/* 3DSTATE_DEPTH_BUFFER */
{
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
/* 3DSTATE_DEPTH_BUFFER dw0 */
@@ -896,7 +896,7 @@ static void
gen6_blorp_emit_depth_disable(struct brw_context *brw,
const brw_blorp_params *params)
{
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -1021,7 +1021,7 @@ gen6_blorp_exec(struct brw_context *brw,
uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
/* Emit workaround flushes when we switch from drawing to blorping. */
- intel_emit_post_sync_nonzero_flush(brw);
+ brw_emit_post_sync_nonzero_flush(brw);
gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
gen6_emit_3dstate_sample_mask(brw,
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 1df0bd47571..8f0d7dc5431 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -65,7 +65,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
*/
bool enable_hiz_ss = hiz || separate_stencil;
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
if (!irb)
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index ba5c944fb3d..9f4a5db3592 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -86,7 +86,7 @@ static void
write_primitives_generated(struct brw_context *brw,
drm_intel_bo *query_bo, int stream, int idx)
{
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
if (brw->gen >= 7 && stream > 0) {
brw_store_register_mem64(brw, query_bo,
@@ -100,7 +100,7 @@ static void
write_xfb_primitives_written(struct brw_context *brw,
drm_intel_bo *bo, int stream, int idx)
{
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
if (brw->gen >= 7) {
brw_store_register_mem64(brw, bo, GEN7_SO_NUM_PRIMS_WRITTEN(stream), idx);
@@ -157,7 +157,7 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo,
/* Emit a flush to make sure various parts of the pipeline are complete and
* we get an accurate value
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
brw_store_register_mem64(brw, bo, reg, idx);
}
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index be80d7bdfc5..3899ce9451f 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -292,5 +292,5 @@ brw_end_transform_feedback(struct gl_context *ctx,
* simplicity, just do a full flush.
*/
struct brw_context *brw = brw_context(ctx);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index 107a4f24fa6..c7311fd0b03 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -120,7 +120,7 @@ gen6_upload_urb( struct brw_context *brw )
* a workaround.
*/
if (brw->urb.gs_present && !gs_present)
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
brw->urb.gs_present = gs_present;
}
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 2bdc82bc895..abace6df37e 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -645,7 +645,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
/* 3DSTATE_DEPTH_BUFFER */
{
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -696,7 +696,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
static void
gen7_blorp_emit_depth_disable(struct brw_context *brw)
{
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index f4f665219d6..a14d4a0c50d 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -57,7 +57,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
return;
}
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
if (!irb)
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index aec4f44bb73..41573a80a52 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -365,7 +365,7 @@ gen7_save_primitives_written_counters(struct brw_context *brw,
}
/* Flush any drawing so that the counters have the right values. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Emit MI_STORE_REGISTER_MEM commands to write the values. */
for (int i = 0; i < streams; i++) {
@@ -502,7 +502,7 @@ gen7_pause_transform_feedback(struct gl_context *ctx,
(struct brw_transform_feedback_object *) obj;
/* Flush any drawing so that the counters have the right values. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Save the SOL buffer offset register values. */
if (brw->gen < 8) {
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 7c4ec06e84d..81447f8d0b5 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -57,7 +57,7 @@ emit_depth_packets(struct brw_context *brw,
return;
}
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
/* _NEW_BUFFERS, _NEW_DEPTH, _NEW_STENCIL */
BEGIN_BATCH(8);
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index d3ab769356c..9fac63d56a1 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -460,7 +460,7 @@ intelEmitCopyBlit(struct brw_context *brw,
ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
return true;
}
@@ -544,7 +544,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
intel_batchbuffer_data(brw, src_bits, dwords * 4, BLT_RING);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
return true;
}
@@ -667,5 +667,5 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
ADVANCE_BATCH_TILED(dst_y_tiled, false);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 627c487f0e7..ff05b5cd0e7 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -560,7 +560,7 @@ brw_unmap_buffer(struct gl_context *ctx,
* flush. Once again, we wish for a domain tracker in libdrm to cover
* usage inside of a batchbuffer.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
drm_intel_bo_unreference(intel_obj->range_map_bo[index]);
intel_obj->range_map_bo[index] = NULL;
@@ -632,7 +632,7 @@ brw_copy_buffer_subdata(struct gl_context *ctx,
* flush. Once again, we wish for a domain tracker in libdrm to cover
* usage inside of a batchbuffer.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
void
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 365b4b8f718..3423190c485 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -76,7 +76,7 @@ can_do_pipelined_register_writes(struct brw_context *brw)
OUT_BATCH(expected_value);
ADVANCE_BATCH();
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Save the register's value back to the buffer. */
BEGIN_BATCH(3);
@@ -132,7 +132,7 @@ can_write_oacontrol(struct brw_context *brw)
OUT_BATCH(expected_value);
ADVANCE_BATCH();
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Save the register's value back to the buffer. */
BEGIN_BATCH(3);
@@ -143,7 +143,7 @@ can_write_oacontrol(struct brw_context *brw)
offset * sizeof(uint32_t));
ADVANCE_BATCH();
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Set OACONTROL back to zero (everything off). */
BEGIN_BATCH(3);
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 1b3a72f3ec2..9e6a7116630 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -1076,7 +1076,7 @@ brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo)
if (!_mesa_set_search(brw->render_cache, bo))
return;
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
/**
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 30380570d62..3fe506e3cf1 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -247,7 +247,7 @@ intelReadPixels(struct gl_context * ctx,
* rendered to via a PBO at any point, so it seems better to just
* flush here unconditionally.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
return;
}
diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c
index 3cfa7e593ab..c44c4beceef 100644
--- a/src/mesa/drivers/dri/i965/intel_syncobj.c
+++ b/src/mesa/drivers/dri/i965/intel_syncobj.c
@@ -69,7 +69,7 @@ brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
assert(!fence->batch_bo);
assert(!fence->signalled);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
fence->batch_bo = brw->batch.bo;
drm_intel_bo_reference(fence->batch_bo);
intel_batchbuffer_flush(brw);
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index ebe84b664d4..e077d5e4743 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -490,7 +490,7 @@ intel_get_tex_image(struct gl_context *ctx,
* See the related comment in intelReadPixels() for a more detailed
* explanation.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
return;
}
From c2ff3485b3d48749ea9dcad07bc1a691627dc3e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Thu, 11 Jun 2015 10:41:52 +0300
Subject: [PATCH 0024/1208] glsl: clone inputs and outputs during linking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This increases memory pressure during linking but makes it easier
for backend to free IR after it is not needed anymore.
v2: use resource list as ralloc context in case of relink (Kenneth)
Signed-off-by: Tapani Pälli
Reviewed-by: Kenneth Graunke
Cc: mesa-stable@lists.freedesktop.org
---
src/glsl/linker.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 4a726d4e2e7..5da9cadcb08 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2637,7 +2637,9 @@ add_interface_variables(struct gl_shader_program *shProg,
continue;
};
- if (!add_program_resource(shProg, programInterface, var,
+ /* Clone ir_variable data so that backend is able to free memory. */
+ if (!add_program_resource(shProg, programInterface,
+ var->clone(shProg->ProgramResourceList, NULL),
build_stageref(shProg, var->name) | mask))
return false;
}
From 104c8fc2c2aa5621261f80aa6b4f76c3163078f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Thu, 11 Jun 2015 10:41:53 +0300
Subject: [PATCH 0025/1208] i965: Delete linked GLSL IR when using NIR.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is based on Kenneth's patch to delete 'most of the IR'. Due to
linker changes to clone variables, we can now free all of IR.
Saves 58MB of memory when replaying a Dota 2 trace on Broadwell.
Signed-off-by: Tapani Pälli
Reviewed-by: Kenneth Graunke
Cc: mesa-stable@lists.freedesktop.org
---
src/mesa/drivers/dri/i965/brw_shader.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 32c40131434..5653d6ba1e4 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -387,8 +387,11 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
brw_add_texrect_params(prog);
- if (options->NirOptions)
+ if (options->NirOptions) {
prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
+ ralloc_free(shader->ir);
+ shader->ir = NULL;
+ }
_mesa_reference_program(ctx, &prog, NULL);
}
From b2c6ba0c4b21391dc35018e1c8c4f7f7d8952bea Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Mon, 8 Jun 2015 16:03:19 -0700
Subject: [PATCH 0026/1208] i965/fs_live_variables: Do liveness analysis
bottom-to-top
From Muchnick's Advanced Compiler Design and Implementation:
"To determine which variables are live at each point in a flowgraph, we
perform a backward data-flow analysis"
Previously, we were walking the blocks forwards and updating the livein and
then the liveout. However, the livein calculation depends on the liveout
and the liveout depends on the successor blocks. The net result is that it
takes one full iteration to go from liveout to livein and then another
full iteration to propagate to the predecessors. This works out to an
O(n^2) computation where n is the number of blocks. If we run things in
the other order, it's O(nl) where l is the maximum loop depth which is
practically bounded by 3.
On my HSW desktop, one particular shadertoy test gets a 20% improvement in
compile times:
N Min Max Median Avg Stddev
x 10 15.965 16.884 16.026 16.1822 0.34736846
+ 10 12.813 13.052 12.876 12.8891 0.06913666
Difference at 95.0% confidence
-3.2931 +/- 0.235316
-20.3501% +/- 1.45417%
(Student's t, pooled s = 0.250444)
Reviewed-by: Matt Turner
---
.../dri/i965/brw_fs_live_variables.cpp | 38 +++++++++----------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 502161d5128..19aec92fad1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -204,27 +204,9 @@ fs_live_variables::compute_live_variables()
while (cont) {
cont = false;
- foreach_block (block, cfg) {
+ foreach_block_reverse (block, cfg) {
struct block_data *bd = &block_data[block->num];
- /* Update livein */
- for (int i = 0; i < bitset_words; i++) {
- BITSET_WORD new_livein = (bd->use[i] |
- (bd->liveout[i] &
- ~bd->def[i]));
- if (new_livein & ~bd->livein[i]) {
- bd->livein[i] |= new_livein;
- cont = true;
- }
- }
- BITSET_WORD new_livein = (bd->flag_use[0] |
- (bd->flag_liveout[0] &
- ~bd->flag_def[0]));
- if (new_livein & ~bd->flag_livein[0]) {
- bd->flag_livein[0] |= new_livein;
- cont = true;
- }
-
/* Update liveout */
foreach_list_typed(bblock_link, child_link, link, &block->children) {
struct block_data *child_bd = &block_data[child_link->block->num];
@@ -244,6 +226,24 @@ fs_live_variables::compute_live_variables()
cont = true;
}
}
+
+ /* Update livein */
+ for (int i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_livein = (bd->use[i] |
+ (bd->liveout[i] &
+ ~bd->def[i]));
+ if (new_livein & ~bd->livein[i]) {
+ bd->livein[i] |= new_livein;
+ cont = true;
+ }
+ }
+ BITSET_WORD new_livein = (bd->flag_use[0] |
+ (bd->flag_liveout[0] &
+ ~bd->flag_def[0]));
+ if (new_livein & ~bd->flag_livein[0]) {
+ bd->flag_livein[0] |= new_livein;
+ cont = true;
+ }
}
}
}
From 9f261dc18dba0aa4dc43fc560d343ba9ffd486e9 Mon Sep 17 00:00:00 2001
From: Ian Romanick
Date: Mon, 22 Jun 2015 11:09:27 -0700
Subject: [PATCH 0027/1208] radeon: Advertise correct
GL_QUERY_COUNTER_BITS/GL_SAMPLES_PASSED value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Commit b765119c changed the default value of all the counter bits to
64. However, older hardware only has 32 counter bits.
This has only been build-tested. We don't have any tests that verify
the advertised value against implementation behavior, so I don't know
what additional testing could be done.
NOTE: It appears that many Gallium drivers (at least r300 and i915g)
have the same problem, but I don't see a way for the state-tracker to
determine the counter size. Marek says, "For Gallium, a new PIPE_CAP or
new get_xxx_param function will be needed."
Signed-off-by: Ian Romanick
Reviewed-by: Marek Olšák
Cc: Alex Deucher
---
.../dri/radeon/radeon_common_context.c | 23 +++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 9699dcbfcdc..3d0cedaf33a 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -194,6 +194,29 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
radeon_init_dma(radeon);
+ /* _mesa_initialize_context calls _mesa_init_queryobj which
+ * initializes all of the counter sizes to 64. The counters on r100
+ * and r200 are only 32-bits for occlusion queries. Those are the
+ * only counters, so set the other sizes to zero.
+ */
+ radeon->glCtx.Const.QueryCounterBits.SamplesPassed = 32;
+
+ radeon->glCtx.Const.QueryCounterBits.TimeElapsed = 0;
+ radeon->glCtx.Const.QueryCounterBits.Timestamp = 0;
+ radeon->glCtx.Const.QueryCounterBits.PrimitivesGenerated = 0;
+ radeon->glCtx.Const.QueryCounterBits.PrimitivesWritten = 0;
+ radeon->glCtx.Const.QueryCounterBits.VerticesSubmitted = 0;
+ radeon->glCtx.Const.QueryCounterBits.PrimitivesSubmitted = 0;
+ radeon->glCtx.Const.QueryCounterBits.VsInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.TessPatches = 0;
+ radeon->glCtx.Const.QueryCounterBits.TessInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.GsInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.GsPrimitives = 0;
+ radeon->glCtx.Const.QueryCounterBits.FsInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.ComputeInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.ClInPrimitives = 0;
+ radeon->glCtx.Const.QueryCounterBits.ClOutPrimitives = 0;
+
return GL_TRUE;
}
From d1663ccb4c664b0f544ed5d6f0761f3ae2435199 Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Wed, 17 Jun 2015 15:50:11 -0700
Subject: [PATCH 0028/1208] i965/bxt: Add basic Broxton infrastructure
The thread counts and URB information are all speculative numbers that were
based on some CHV numbers at the time.
v2:
Originally this patch had PCI IDs. I've moved that to a new patch at the end of
the series.
Remove is_cherryview hack.
Add PCI ids. These match the ones defined in the kernel. The only one tested by
us is 0x0a84.
Capitalize the hex string (Mark)
Signed-off-by: Ben Widawsky
Tested-by: "Lecluse, Philippe"
Reviewed-by: Mark Janes
---
include/pci_ids/i965_pci_ids.h | 3 +++
src/mesa/drivers/dri/i965/brw_context.c | 1 +
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/brw_device_info.c | 16 ++++++++++++++++
src/mesa/drivers/dri/i965/brw_device_info.h | 1 +
5 files changed, 22 insertions(+)
diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 8d757aaa767..8a425999429 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -128,3 +128,6 @@ CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B3, chv, "Intel(R) HD Graphics (Cherryview)")
+CHIPSET(0x0A84, bxt, "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A84, bxt, "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics (Broxton)")
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index cf408830620..4b51fe5da56 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -715,6 +715,7 @@ brwCreateContext(gl_api api,
brw->is_baytrail = devinfo->is_baytrail;
brw->is_haswell = devinfo->is_haswell;
brw->is_cherryview = devinfo->is_cherryview;
+ brw->is_broxton = devinfo->is_broxton;
brw->has_llc = devinfo->has_llc;
brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 85d8f14a006..3553f6ec48c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1117,6 +1117,7 @@ struct brw_context
bool is_baytrail;
bool is_haswell;
bool is_cherryview;
+ bool is_broxton;
bool has_hiz;
bool has_separate_stencil;
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 97243a47293..342e56622b7 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -334,6 +334,22 @@ static const struct brw_device_info brw_device_info_skl_gt3 = {
.supports_simd16_3src = true,
};
+static const struct brw_device_info brw_device_info_bxt = {
+ GEN9_FEATURES,
+ .is_broxton = 1,
+ .gt = 1,
+ .has_llc = false,
+ .max_vs_threads = 112,
+ .max_gs_threads = 112,
+ .max_wm_threads = 32,
+ .urb = {
+ .size = 64,
+ .min_vs_entries = 34,
+ .max_vs_entries = 640,
+ .max_gs_entries = 256,
+ }
+};
+
const struct brw_device_info *
brw_get_device_info(int devid, int revision)
{
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h
index 65c024ceeed..7b7a1fc046a 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -35,6 +35,7 @@ struct brw_device_info
bool is_baytrail;
bool is_haswell;
bool is_cherryview;
+ bool is_broxton;
bool has_hiz_and_separate_stencil;
bool must_use_separate_stencil;
From 77a78c65f80323059d892c501ca551ccf324b17d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 25 Jun 2015 00:56:32 +0200
Subject: [PATCH 0029/1208] softpipe,llvmpipe: fix PIPE_SHADER_CAP_MAX_INPUTS
value
PIPE_MAX_SHADER_INPUTS was recently bumped to 80 because of tessellation.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91099
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91101
Reviewed-by: Brian Paul
Reviewed-by: Roland Scheidegger
---
src/gallium/auxiliary/gallivm/lp_bld_limits.h | 2 +-
src/gallium/auxiliary/tgsi/tgsi_exec.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index db503514881..2851fd10b04 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -100,7 +100,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
return LP_MAX_TGSI_NESTING;
case PIPE_SHADER_CAP_MAX_INPUTS:
- return PIPE_MAX_SHADER_INPUTS;
+ return 32;
case PIPE_SHADER_CAP_MAX_OUTPUTS:
return 32;
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 208640cfd46..e8ee2565831 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -213,7 +213,7 @@ struct tgsi_sampler
* input register files, this is the stride between two 1D
* arrays.
*/
-#define TGSI_EXEC_MAX_INPUT_ATTRIBS PIPE_MAX_SHADER_INPUTS
+#define TGSI_EXEC_MAX_INPUT_ATTRIBS 32
/* The maximum number of bytes per constant buffer.
*/
From 6026f7e8fb993a34f3e2ad1638d7a842a5cefd80 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 11 Jun 2015 01:59:44 -0700
Subject: [PATCH 0030/1208] nir: Recognize max(min(a, 1.0), 0.0) as fsat(a).
We already recognize min(max(a, 0.0), 1.0) as a saturate, but neglected
this variant (which is also handled by the GLSL IR pass).
shader-db results on Broadwell:
total instructions in shared programs: 7363046 -> 7362788 (-0.00%)
instructions in affected programs: 11928 -> 11670 (-2.16%)
helped: 64
HURT: 0
Signed-off-by: Kenneth Graunke
Reviewed-by: Iago Toral Quiroga
---
src/glsl/nir/nir_opt_algebraic.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index eace791f5b0..3068445cbfe 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -101,6 +101,7 @@ optimizations = [
(('umin', a, a), a),
(('umax', a, a), a),
(('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+ (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
From c97105ee12e54ab893351ebbda8c2348c899adde Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Wed, 24 Jun 2015 00:04:11 -0700
Subject: [PATCH 0031/1208] i965: Drop brw->depthstencil.stencil_offset from
gen8_depth_state.c.
This is always 0 - only brw_workaround_depthstencil_alignment ever sets
it, and that doesn't run on Gen6+. My initial Broadwell depth state
commit had this mistake.
Signed-off-by: Kenneth Graunke
Reviewed-by: Anuj Phogat
---
src/mesa/drivers/dri/i965/gen8_depth_state.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 81447f8d0b5..bc05a310544 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -41,7 +41,6 @@ emit_depth_packets(struct brw_context *brw,
bool depth_writable,
struct intel_mipmap_tree *stencil_mt,
bool stencil_writable,
- uint32_t stencil_offset,
bool hiz,
uint32_t width,
uint32_t height,
@@ -127,8 +126,7 @@ emit_depth_packets(struct brw_context *brw,
OUT_BATCH(HSW_STENCIL_ENABLED | mocs_wb << 22 |
(2 * stencil_mt->pitch - 1));
OUT_RELOC64(stencil_mt->bo,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- stencil_offset);
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
OUT_BATCH(stencil_mt ? stencil_mt->qpitch >> 2 : 0);
ADVANCE_BATCH();
}
@@ -220,7 +218,6 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
emit_depth_packets(brw, depth_mt, brw_depthbuffer_format(brw), surftype,
ctx->Depth.Mask != 0,
stencil_mt, ctx->Stencil._WriteEnabled,
- brw->depthstencil.stencil_offset,
hiz, width, height, depth, lod, min_array_element);
}
@@ -439,7 +436,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
brw_depth_format(brw, mt->format),
BRW_SURFACE_2D,
true, /* depth writes */
- NULL, false, 0, /* no stencil for now */
+ NULL, false, /* no stencil for now */
true, /* hiz */
surface_width,
surface_height,
From 101a73846b48ebac8e2386a25b24659f013c66a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 25 Jun 2015 14:58:37 +0200
Subject: [PATCH 0032/1208] radeonsi: don't fail in
si_shader_io_get_unique_index
Trivial. Picked from my tessellation branch.
---
src/gallium/drivers/radeonsi/si_shader.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 47e5f96cbed..a293ef36fbb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -133,8 +133,12 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
return 4 + index;
default:
- assert(0);
- return 63;
+ /* Don't fail here. The result of this function is only used
+ * for LS, TCS, TES, and GS, where legacy GL semantics can't
+ * occur, but this function is called for all vertex shaders
+ * before it's known whether LS will be compiled or not.
+ */
+ return 0;
}
}
From c1151b18f2dce7c6f238f057e9c4fa8d912ce6b5 Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Wed, 24 Jun 2015 20:07:54 -0700
Subject: [PATCH 0033/1208] i965/skl: Use more compact hiz dimensions
gen8 had some special restrictions which don't seem to carry over to gen9.
Quoting the spec for SKL:
"The Z_Height and Z_Width values must equal those present in
3DSTATE_DEPTH_BUFFER incremented by one."
This fixes nothing in piglit (and regresses nothing).
Signed-off-by: Ben Widawsky
Reviewed-by: Anuj Phogat
Reviewed-by: Jordan Justen
---
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 32 ++++++++++---------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 6aa969a4930..31386b99656 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1456,21 +1456,23 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
/* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
* adjustments required for Z_Height and Z_Width based on multisampling.
*/
- switch (mt->num_samples) {
- case 0:
- case 1:
- break;
- case 2:
- case 4:
- z_width *= 2;
- z_height *= 2;
- break;
- case 8:
- z_width *= 4;
- z_height *= 2;
- break;
- default:
- unreachable("unsupported sample count");
+ if (brw->gen < 9) {
+ switch (mt->num_samples) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ case 4:
+ z_width *= 2;
+ z_height *= 2;
+ break;
+ case 8:
+ z_width *= 4;
+ z_height *= 2;
+ break;
+ default:
+ unreachable("unsupported sample count");
+ }
}
const unsigned vertical_align = 8; /* 'j' in the docs */
From 316206ee9ea06419c9a2ea6fe48d66a0b805319d Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 25 Jun 2015 08:08:27 -0700
Subject: [PATCH 0034/1208] i965/vec4_live_variables: Do liveness analysis
bottom-to-top
From Muchnick's Advanced Compiler Design and Implementation:
"To determine which variables are live at each point in a flowgraph, we
perform a backward data-flow analysis"
Previously, we were walking the blocks forwards and updating the livein and
then the liveout. However, the livein calculation depends on the liveout
and the liveout depends on the successor blocks. The net result is that it
takes one full iteration to go from liveout to livein and then another
full iteration to propagate to the predecessors. This works out to an
O(n^2) computation where n is the number of blocks. If we run things in
the other order, it's O(nl) where l is the maximum loop depth which is
practically bounded by 3.
In b2c6ba0c4b21391dc35018e1c8c4f7f7d8952bea, we made this same change in
the FS backend to great effect. Might as well keep it consistent and make
the same change for vec4. Also, this took the time to run the test:
ES31-CTS.arrays_of_arrays.InteractionFunctionCalls1
from 6:49.62 to 3:31.40 on Timothy Arceri's machine.
Reviewed-by: Matt Turner
---
.../dri/i965/brw_vec4_live_variables.cpp | 38 +++++++++----------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index 95b9d9017e2..29b4a53418a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -133,27 +133,9 @@ vec4_live_variables::compute_live_variables()
while (cont) {
cont = false;
- foreach_block (block, cfg) {
+ foreach_block_reverse (block, cfg) {
struct block_data *bd = &block_data[block->num];
- /* Update livein */
- for (int i = 0; i < bitset_words; i++) {
- BITSET_WORD new_livein = (bd->use[i] |
- (bd->liveout[i] &
- ~bd->def[i]));
- if (new_livein & ~bd->livein[i]) {
- bd->livein[i] |= new_livein;
- cont = true;
- }
- }
- BITSET_WORD new_livein = (bd->flag_use[0] |
- (bd->flag_liveout[0] &
- ~bd->flag_def[0]));
- if (new_livein & ~bd->flag_livein[0]) {
- bd->flag_livein[0] |= new_livein;
- cont = true;
- }
-
/* Update liveout */
foreach_list_typed(bblock_link, child_link, link, &block->children) {
struct block_data *child_bd = &block_data[child_link->block->num];
@@ -173,6 +155,24 @@ vec4_live_variables::compute_live_variables()
cont = true;
}
}
+
+ /* Update livein */
+ for (int i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_livein = (bd->use[i] |
+ (bd->liveout[i] &
+ ~bd->def[i]));
+ if (new_livein & ~bd->livein[i]) {
+ bd->livein[i] |= new_livein;
+ cont = true;
+ }
+ }
+ BITSET_WORD new_livein = (bd->flag_use[0] |
+ (bd->flag_liveout[0] &
+ ~bd->flag_def[0]));
+ if (new_livein & ~bd->flag_livein[0]) {
+ bd->flag_livein[0] |= new_livein;
+ cont = true;
+ }
}
}
}
From fbba25bba017b3dde5f6613698004b0086bdea00 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga
Date: Tue, 23 Jun 2015 08:42:14 +0200
Subject: [PATCH 0035/1208] mesa: remove unnecessary checks in
_mesa_readpixels_needs_slow_path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
readpixels_can_use_memcpy will later call _mesa_format_matches_format_and_type
which does much tighter checks than these to decide if we can use
memcpy for readpixels.
Also, the checks do not seem to be extensive enough anyway, since we are
checking for signed/unsigned conversion only when the framebuffer has integers,
but the same checks could be done for other types anyway, since as long as
there is a signed/unsigned conversion we can't memcpy.
No regressions observed on i965/llvmpipe.
Reviewed-by: Anuj Phogat
Reviewed-by: Marek Olšák
---
src/mesa/main/readpix.c | 16 ----------------
1 file changed, 16 deletions(-)
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index a3357cd6419..e256695480a 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -128,7 +128,6 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format,
{
struct gl_renderbuffer *rb =
_mesa_get_read_renderbuffer_for_format(ctx, format);
- GLenum srcType;
assert(rb);
@@ -153,21 +152,6 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format,
return GL_TRUE;
}
- /* Conversion between signed and unsigned integers needs masking
- * (it isn't just memcpy). */
- srcType = _mesa_get_format_datatype(rb->Format);
-
- if ((srcType == GL_INT &&
- (type == GL_UNSIGNED_INT ||
- type == GL_UNSIGNED_SHORT ||
- type == GL_UNSIGNED_BYTE)) ||
- (srcType == GL_UNSIGNED_INT &&
- (type == GL_INT ||
- type == GL_SHORT ||
- type == GL_BYTE))) {
- return GL_TRUE;
- }
-
/* And finally, see if there are any transfer ops. */
return get_readpixels_transfer_ops(ctx, rb->Format, format, type,
uses_blit) != 0;
From 36d107e92cc4c1d2b60e0017dbe998af3a2e8b75 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Tue, 23 Jun 2015 23:59:31 -0600
Subject: [PATCH 0036/1208] ilo: introduce ilo_vma
This cleans up the code a bit and makes ilo_state_vector_resource_renamed()
simpler and more robust. It also allows a single bo to back mulitple VMAs.
---
src/gallium/drivers/ilo/Makefile.sources | 5 +-
src/gallium/drivers/ilo/core/ilo_buffer.h | 5 -
.../drivers/ilo/core/ilo_builder_3d_bottom.h | 43 ++++---
.../drivers/ilo/core/ilo_builder_3d_top.h | 65 ++++++-----
src/gallium/drivers/ilo/core/ilo_image.h | 7 --
src/gallium/drivers/ilo/core/ilo_state_sol.c | 36 +++---
src/gallium/drivers/ilo/core/ilo_state_sol.h | 22 ++--
.../drivers/ilo/core/ilo_state_surface.c | 25 +++-
.../drivers/ilo/core/ilo_state_surface.h | 32 +++---
src/gallium/drivers/ilo/core/ilo_state_vf.c | 34 +++---
src/gallium/drivers/ilo/core/ilo_state_vf.h | 16 +--
src/gallium/drivers/ilo/core/ilo_state_zs.c | 52 ++++++---
src/gallium/drivers/ilo/core/ilo_state_zs.h | 25 ++--
src/gallium/drivers/ilo/core/ilo_vma.h | 73 ++++++++++++
src/gallium/drivers/ilo/ilo_blitter_blt.c | 41 +++----
src/gallium/drivers/ilo/ilo_draw.c | 11 +-
src/gallium/drivers/ilo/ilo_render_surface.c | 27 ++---
src/gallium/drivers/ilo/ilo_resource.c | 37 +++---
src/gallium/drivers/ilo/ilo_resource.h | 21 ++--
src/gallium/drivers/ilo/ilo_state.c | 108 +++++++-----------
src/gallium/drivers/ilo/ilo_transfer.c | 51 ++++++---
21 files changed, 420 insertions(+), 316 deletions(-)
create mode 100644 src/gallium/drivers/ilo/core/ilo_vma.h
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index e1bbb9a0781..35d76bd4948 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -43,6 +43,7 @@ C_SOURCES := \
core/ilo_state_viewport.h \
core/ilo_state_zs.c \
core/ilo_state_zs.h \
+ core/ilo_vma.h \
core/intel_winsys.h \
ilo_blit.c \
ilo_blit.h \
@@ -65,8 +66,6 @@ C_SOURCES := \
ilo_public.h \
ilo_query.c \
ilo_query.h \
- ilo_resource.c \
- ilo_resource.h \
ilo_render.c \
ilo_render.h \
ilo_render_gen.h \
@@ -76,6 +75,8 @@ C_SOURCES := \
ilo_render_gen8.c \
ilo_render_media.c \
ilo_render_surface.c \
+ ilo_resource.c \
+ ilo_resource.h \
ilo_screen.c \
ilo_screen.h \
ilo_shader.c \
diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_buffer.h
index ca3c61ff890..f2fb63064c0 100644
--- a/src/gallium/drivers/ilo/core/ilo_buffer.h
+++ b/src/gallium/drivers/ilo/core/ilo_buffer.h
@@ -28,17 +28,12 @@
#ifndef ILO_BUFFER_H
#define ILO_BUFFER_H
-#include "intel_winsys.h"
-
#include "ilo_core.h"
#include "ilo_debug.h"
#include "ilo_dev.h"
struct ilo_buffer {
unsigned bo_size;
-
- /* managed by users */
- struct intel_bo *bo;
};
static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 6d9e3699125..5efe9da2d22 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -39,6 +39,7 @@
#include "ilo_state_shader.h"
#include "ilo_state_viewport.h"
#include "ilo_state_zs.h"
+#include "ilo_vma.h"
#include "ilo_builder.h"
#include "ilo_builder_3d_top.h"
@@ -674,9 +675,10 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT;
- if (zs->depth_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->depth_bo,
- zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->z_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->z_vma->bo,
+ zs->z_vma->bo_offset + zs->depth[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->depth[0];
@@ -691,9 +693,10 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
else
dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT;
- if (zs->depth_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->depth_bo,
- zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->z_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->z_vma->bo,
+ zs->z_vma->bo_offset + zs->depth[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
@@ -724,9 +727,10 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
- if (zs->stencil_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->stencil_bo,
- zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->s_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->s_vma->bo,
+ zs->s_vma->bo_offset + zs->stencil[1],
+ (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->stencil[0];
@@ -734,9 +738,10 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT;
- if (zs->stencil_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->stencil_bo,
- zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->s_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->s_vma->bo,
+ zs->s_vma->bo_offset + zs->stencil[1],
+ (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
@@ -767,9 +772,10 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
- if (zs->hiz_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_bo,
- zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->hiz_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_vma->bo,
+ zs->hiz_vma->bo_offset + zs->hiz[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->hiz[0];
@@ -777,9 +783,10 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT;
- if (zs->hiz_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_bo,
- zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->hiz_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_vma->bo,
+ zs->hiz_vma->bo_offset + zs->hiz[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
index 8d30095e6f6..6e94fb25f1f 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
@@ -39,6 +39,7 @@
#include "ilo_state_surface.h"
#include "ilo_state_urb.h"
#include "ilo_state_vf.h"
+#include "ilo_vma.h"
#include "ilo_builder.h"
static inline void
@@ -318,8 +319,10 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
dw[3] = 0;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- if (b->need_bo)
- ilo_builder_batch_reloc64(builder, pos + 1, b->bo, b->vb[1], 0);
+ if (b->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 1, b->vma->bo,
+ b->vma->bo_offset + b->vb[1], 0);
+ }
dw[3] |= b->vb[2];
} else {
@@ -331,9 +334,11 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
dw[3] |= vf->user_instancing[elem][1];
}
- if (b->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 1, b->bo, b->vb[1], 0);
- ilo_builder_batch_reloc(builder, pos + 2, b->bo, b->vb[2], 0);
+ if (b->vma) {
+ ilo_builder_batch_reloc(builder, pos + 1, b->vma->bo,
+ b->vma->bo_offset + b->vb[1], 0);
+ ilo_builder_batch_reloc(builder, pos + 2, b->vma->bo,
+ b->vma->bo_offset + b->vb[2], 0);
}
}
@@ -429,9 +434,11 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- if (ib->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 1, ib->bo, ib->ib[1], 0);
- ilo_builder_batch_reloc(builder, pos + 2, ib->bo, ib->ib[2], 0);
+ if (ib->vma) {
+ ilo_builder_batch_reloc(builder, pos + 1, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[1], 0);
+ ilo_builder_batch_reloc(builder, pos + 2, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[2], 0);
} else {
dw[1] = 0;
dw[2] = 0;
@@ -456,8 +463,9 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
dw[1] = ib->ib[0] |
builder->mocs << GEN8_IB_DW1_MOCS__SHIFT;
- if (ib->need_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, ib->bo, ib->ib[1], 0);
+ if (ib->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[1], 0);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -801,11 +809,11 @@ gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT |
sol->strides[buffer] << GEN7_SO_BUF_DW1_PITCH__SHIFT;
- if (sb->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, sb->bo,
- sb->so_buf[0], INTEL_RELOC_WRITE);
- ilo_builder_batch_reloc(builder, pos + 3, sb->bo,
- sb->so_buf[1], INTEL_RELOC_WRITE);
+ if (sb->vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[0], INTEL_RELOC_WRITE);
+ ilo_builder_batch_reloc(builder, pos + 3, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -832,9 +840,9 @@ gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT |
builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
- if (sb->need_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, sb->bo,
- sb->so_buf[1], INTEL_RELOC_WRITE);
+ if (sb->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -842,9 +850,10 @@ gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
dw[4] = sb->so_buf[2];
- if (sb->need_write_offset_bo) {
- ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_bo,
- sizeof(uint32_t) * buffer, INTEL_RELOC_WRITE);
+ if (sb->write_offset_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_vma->bo,
+ sb->write_offset_vma->bo_offset + sizeof(uint32_t) * buffer,
+ INTEL_RELOC_WRITE);
} else {
dw[5] = 0;
dw[6] = 0;
@@ -1254,14 +1263,15 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
memcpy(dw, surf->surface, state_len << 2);
- if (surf->bo) {
+ if (surf->vma) {
const uint32_t mocs = (surf->scanout) ?
(GEN8_MOCS_MT_PTE | GEN8_MOCS_CT_L3) : builder->mocs;
dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT;
- ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo,
- surf->surface[8], (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
+ ilo_builder_surface_reloc64(builder, state_offset, 8, surf->vma->bo,
+ surf->vma->bo_offset + surf->surface[8],
+ (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
state_align = 32;
@@ -1271,15 +1281,16 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
memcpy(dw, surf->surface, state_len << 2);
- if (surf->bo) {
+ if (surf->vma) {
/*
* For scanouts, we should not enable caching in LLC. Since we only
* enable that on Gen8+, we are fine here.
*/
dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT;
- ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
- surf->surface[1], (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
+ ilo_builder_surface_reloc(builder, state_offset, 1, surf->vma->bo,
+ surf->vma->bo_offset + surf->surface[1],
+ (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index af15e856028..77747ed7492 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -29,7 +29,6 @@
#define ILO_IMAGE_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
@@ -141,13 +140,7 @@ struct ilo_image {
unsigned walk_layer_height;
unsigned bo_stride;
unsigned bo_height;
-
- /* managed by users */
- struct intel_bo *bo;
} aux;
-
- /* managed by users */
- struct intel_bo *bo;
};
struct pipe_resource;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c
index 38c0b719ab3..dd1ef5e7887 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sol.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c
@@ -26,7 +26,7 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
+#include "ilo_vma.h"
#include "ilo_state_sol.h"
static bool
@@ -270,9 +270,6 @@ sol_buffer_validate_gen7(const struct ilo_dev *dev,
{
ILO_DEV_ASSERT(dev, 7, 8);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 208:
*
@@ -281,9 +278,17 @@ sol_buffer_validate_gen7(const struct ilo_dev *dev,
*/
assert(info->offset % 4 == 0);
+ if (info->vma) {
+ assert(info->vma->vm_alignment % 4 == 0);
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
+ }
+
/* Gen8+ only */
- if (info->write_offset_load || info->write_offset_save)
- assert(ilo_dev_gen(dev) >= ILO_GEN(8));
+ if (info->write_offset_load || info->write_offset_save) {
+ assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma);
+ assert(info->write_offset_offset + sizeof(uint32_t) <=
+ info->write_offset_vma->vm_size);
+ }
/*
* From the Broadwell PRM, volume 2b, page 206:
@@ -304,25 +309,15 @@ static uint32_t
sol_buffer_get_gen6_size(const struct ilo_dev *dev,
const struct ilo_state_sol_buffer_info *info)
{
- uint32_t size;
-
ILO_DEV_ASSERT(dev, 6, 8);
- if (!info->buf)
- return 0;
-
- size = (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 208:
*
* "(Surface End Address) This field specifies the ending DWord
* address..."
*/
- size &= ~3;
-
- return size;
+ return (info->vma) ? info->size & ~3 : 0;
}
static bool
@@ -359,7 +354,7 @@ sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
dw1 = 0;
- if (info->buf)
+ if (info->vma)
dw1 |= GEN8_SO_BUF_DW1_ENABLE;
if (info->write_offset_load)
dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE;
@@ -443,9 +438,8 @@ ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
else
ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info);
- sb->need_bo = (info->size > 0);
- sb->need_write_offset_bo = (info->write_offset_save ||
- (info->write_offset_load && !info->write_offset_imm_enable));
+ sb->vma = info->vma;
+ sb->write_offset_vma = info->write_offset_vma;
assert(ret);
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h
index 2513fcb4979..f0968b39e27 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sol.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h
@@ -107,17 +107,17 @@ struct ilo_state_sol {
uint8_t decl_count;
};
-struct ilo_buffer;
+struct ilo_vma;
struct ilo_state_sol_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
- /*
- * Gen8+ only. When enabled, require a write offset bo of at least
- * (sizeof(uint32_t) * ILO_STATE_SOL_MAX_BUFFER_COUNT) bytes
- */
+ /* Gen8+ only; at least sizeof(uint32_t) bytes */
+ const struct ilo_vma *write_offset_vma;
+ uint32_t write_offset_offset;
+
bool write_offset_load;
bool write_offset_save;
@@ -126,14 +126,10 @@ struct ilo_state_sol_buffer_info {
};
struct ilo_state_sol_buffer {
- uint32_t so_buf[4];
+ uint32_t so_buf[5];
- bool need_bo;
- bool need_write_offset_bo;
-
- /* managed by users */
- struct intel_bo *bo;
- struct intel_bo *write_offset_bo;
+ const struct ilo_vma *vma;
+ const struct ilo_vma *write_offset_vma;
};
static inline size_t
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c
index 5be9f8f6270..402bbf4b52a 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c
@@ -26,8 +26,8 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
#include "ilo_image.h"
+#include "ilo_vma.h"
#include "ilo_state_surface.h"
static bool
@@ -104,7 +104,7 @@ surface_validate_gen6_buffer(const struct ilo_dev *dev,
if (ilo_dev_gen(dev) >= ILO_GEN(7))
assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB);
- if (info->offset + info->size > info->buf->bo_size) {
+ if (info->offset + info->size > info->vma->vm_size) {
ilo_warn("invalid buffer range\n");
return false;
}
@@ -155,7 +155,8 @@ surface_validate_gen6_buffer(const struct ilo_dev *dev,
if (info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB) {
assert(info->struct_size % info->format_size == 0);
- if (info->offset % info->struct_size) {
+ if (info->offset % info->struct_size ||
+ info->vma->vm_alignment % info->struct_size) {
ilo_warn("bad buffer offset\n");
return false;
}
@@ -177,7 +178,7 @@ surface_validate_gen6_buffer(const struct ilo_dev *dev,
* Nothing is said about Untyped* messages, but I guess they require the
* base address to be DWord aligned.
*/
- if (info->offset % 4) {
+ if (info->offset % 4 || info->vma->vm_alignment % 4) {
ilo_warn("bad RAW buffer offset\n");
return false;
}
@@ -408,6 +409,17 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
break;
}
+ assert(info->img && info->vma);
+
+ if (info->img->tiling != GEN6_TILING_NONE)
+ assert(info->vma->vm_alignment % 4096 == 0);
+
+ if (info->aux_vma) {
+ assert(ilo_image_can_enable_aux(info->img, info->level_base));
+ /* always tiled */
+ assert(info->aux_vma->vm_alignment % 4096 == 0);
+ }
+
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 78:
*
@@ -1107,6 +1119,7 @@ ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_null_SURFACE_STATE(surf, dev);
+ surf->vma = NULL;
surf->type = GEN6_SURFTYPE_NULL;
surf->readonly = true;
@@ -1129,6 +1142,7 @@ ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_buffer_SURFACE_STATE(surf, dev, info);
+ surf->vma = info->vma;
surf->readonly = info->readonly;
assert(ret);
@@ -1150,6 +1164,9 @@ ilo_state_surface_init_for_image(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_image_SURFACE_STATE(surf, dev, info);
+ surf->vma = info->vma;
+ surf->aux_vma = info->aux_vma;
+
surf->is_integer = info->is_integer;
surf->readonly = info->readonly;
surf->scanout = info->img->scanout;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h
index 9c025428d50..b9921134a1e 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h
@@ -29,14 +29,10 @@
#define ILO_STATE_SURFACE_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
-struct ilo_buffer;
-struct ilo_image;
-
enum ilo_state_surface_access {
ILO_STATE_SURFACE_ACCESS_SAMPLER, /* sampling engine surfaces */
ILO_STATE_SURFACE_ACCESS_DP_RENDER, /* render target surfaces */
@@ -46,8 +42,13 @@ enum ilo_state_surface_access {
ILO_STATE_SURFACE_ACCESS_DP_SVB,
};
+struct ilo_vma;
+struct ilo_image;
+
struct ilo_state_surface_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
+ uint32_t offset;
+ uint32_t size;
enum ilo_state_surface_access access;
@@ -56,13 +57,17 @@ struct ilo_state_surface_buffer_info {
bool readonly;
uint16_t struct_size;
-
- uint32_t offset;
- uint32_t size;
};
struct ilo_state_surface_image_info {
const struct ilo_image *img;
+ uint8_t level_base;
+ uint8_t level_count;
+ uint16_t slice_base;
+ uint16_t slice_count;
+
+ const struct ilo_vma *vma;
+ const struct ilo_vma *aux_vma;
enum ilo_state_surface_access access;
@@ -72,16 +77,14 @@ struct ilo_state_surface_image_info {
bool readonly;
bool is_cube_map;
bool is_array;
-
- uint8_t level_base;
- uint8_t level_count;
- uint16_t slice_base;
- uint16_t slice_count;
};
struct ilo_state_surface {
uint32_t surface[13];
+ const struct ilo_vma *vma;
+ const struct ilo_vma *aux_vma;
+
enum gen_surface_type type;
uint8_t min_lod;
uint8_t mip_count;
@@ -89,9 +92,6 @@ struct ilo_state_surface {
bool readonly;
bool scanout;
-
- /* managed by users */
- struct intel_bo *bo;
};
bool
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c
index ddc75428ed7..2dd72276e63 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c
@@ -26,7 +26,7 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
+#include "ilo_vma.h"
#include "ilo_state_vf.h"
static bool
@@ -479,8 +479,8 @@ vertex_buffer_validate_gen6(const struct ilo_dev *dev,
{
ILO_DEV_ASSERT(dev, 6, 8);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
+ if (info->vma)
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 86:
@@ -500,6 +500,9 @@ vertex_buffer_validate_gen6(const struct ilo_dev *dev,
* aligned address, and BufferPitch must be a multiple of 64-bits."
*/
if (info->cv_has_double) {
+ if (info->vma)
+ assert(info->vma->vm_alignment % 8 == 0);
+
assert(info->stride % 8 == 0);
assert((info->offset + info->cv_double_vertex_offset_mod_8) % 8 == 0);
}
@@ -512,12 +515,7 @@ vertex_buffer_get_gen6_size(const struct ilo_dev *dev,
const struct ilo_state_vertex_buffer_info *info)
{
ILO_DEV_ASSERT(dev, 6, 8);
-
- if (!info->buf)
- return 0;
-
- return (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
+ return (info->vma) ? info->size : 0;
}
static bool
@@ -537,7 +535,7 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb,
if (ilo_dev_gen(dev) >= ILO_GEN(7))
dw0 |= GEN7_VB_DW0_ADDR_MODIFIED;
- if (!info->buf)
+ if (!info->vma)
dw0 |= GEN6_VB_DW0_IS_NULL;
STATIC_ASSERT(ARRAY_SIZE(vb->vb) >= 3);
@@ -551,7 +549,7 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb,
vb->vb[2] = (size) ? info->offset + size - 1 : 0;
}
- vb->need_bo = (info->buf != NULL);
+ vb->vma = info->vma;
return true;
}
@@ -586,8 +584,10 @@ index_buffer_validate_gen6(const struct ilo_dev *dev,
*/
assert(info->offset % format_size == 0);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
+ if (info->vma) {
+ assert(info->vma->vm_alignment % format_size == 0);
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
+ }
return true;
}
@@ -600,12 +600,10 @@ index_buffer_get_gen6_size(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!info->buf)
+ if (!info->vma)
return 0;
- size = (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
-
+ size = info->size;
if (ilo_dev_gen(dev) < ILO_GEN(8)) {
const uint32_t format_size = get_index_format_size(info->format);
size -= (size % format_size);
@@ -638,7 +636,7 @@ index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(struct ilo_state_index_buffer *ib,
ib->ib[2] = (size) ? info->offset + size - 1 : 0;
}
- ib->need_bo = (info->buf != NULL);
+ ib->vma = info->vma;
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h
index f15c63a248a..30734476435 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h
@@ -126,10 +126,10 @@ struct ilo_state_vf_delta {
uint32_t dirty;
};
-struct ilo_buffer;
+struct ilo_vma;
struct ilo_state_vertex_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
@@ -143,14 +143,11 @@ struct ilo_state_vertex_buffer_info {
struct ilo_state_vertex_buffer {
uint32_t vb[3];
- bool need_bo;
-
- /* managed by users */
- struct intel_bo *bo;
+ const struct ilo_vma *vma;
};
struct ilo_state_index_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
@@ -160,10 +157,7 @@ struct ilo_state_index_buffer_info {
struct ilo_state_index_buffer {
uint32_t ib[3];
- bool need_bo;
-
- /* managed by users */
- struct intel_bo *bo;
+ const struct ilo_vma *vma;
};
static inline size_t
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.c b/src/gallium/drivers/ilo/core/ilo_state_zs.c
index 901fedb5599..7b82f1acf6f 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.c
@@ -25,10 +25,9 @@
* Chia-I Wu
*/
-#include "intel_winsys.h"
-
#include "ilo_debug.h"
#include "ilo_image.h"
+#include "ilo_vma.h"
#include "ilo_state_zs.h"
static bool
@@ -128,6 +127,24 @@ zs_validate_gen6(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
+ assert(!info->z_img == !info->z_vma);
+ assert(!info->s_img == !info->s_vma);
+
+ /* all tiled */
+ if (info->z_img) {
+ assert(info->z_img->tiling == GEN6_TILING_Y);
+ assert(info->z_vma->vm_alignment % 4096 == 0);
+ }
+ if (info->s_img) {
+ assert(info->s_img->tiling == GEN8_TILING_W);
+ assert(info->s_vma->vm_alignment % 4096 == 0);
+ }
+ if (info->hiz_vma) {
+ assert(info->z_img &&
+ ilo_image_can_enable_aux(info->z_img, info->level));
+ assert(info->z_vma->vm_alignment % 4096 == 0);
+ }
+
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
@@ -146,11 +163,6 @@ zs_validate_gen6(const struct ilo_dev *dev,
assert(info->level < img->level_count);
assert(img->bo_stride);
- if (info->hiz_enable) {
- assert(info->z_img &&
- ilo_image_can_enable_aux(info->z_img, info->level));
- }
-
if (info->is_cube_map) {
assert(get_gen6_surface_type(dev, img) == GEN6_SURFTYPE_2D);
@@ -162,11 +174,6 @@ zs_validate_gen6(const struct ilo_dev *dev,
assert(img->width0 == img->height0);
}
- if (info->z_img)
- assert(info->z_img->tiling == GEN6_TILING_Y);
- if (info->s_img)
- assert(info->s_img->tiling == GEN8_TILING_W);
-
return true;
}
@@ -274,7 +281,7 @@ zs_get_gen6_depth_extent(const struct ilo_dev *dev,
w = img->width0;
h = img->height0;
- if (info->hiz_enable) {
+ if (info->hiz_vma) {
uint16_t align_w, align_h;
get_gen6_hiz_alignments(dev, info->z_img, &align_w, &align_h);
@@ -439,7 +446,7 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
* to the same value (enabled or disabled) as Hierarchical Depth
* Buffer Enable."
*/
- if (!info->hiz_enable && format == GEN6_ZFORMAT_D24_UNORM_X8_UINT)
+ if (!info->hiz_vma && format == GEN6_ZFORMAT_D24_UNORM_X8_UINT)
format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
/* info->z_readonly and info->s_readonly are ignored on Gen6 */
@@ -450,7 +457,7 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
if (info->z_img)
dw1 |= (info->z_img->bo_stride - 1) << GEN6_DEPTH_DW1_PITCH__SHIFT;
- if (info->hiz_enable || !info->z_img) {
+ if (info->hiz_vma || !info->z_img) {
dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
GEN6_DEPTH_DW1_SEPARATE_STENCIL;
}
@@ -508,7 +515,7 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
if (info->z_img) {
if (!info->z_readonly)
dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
- if (info->hiz_enable)
+ if (info->hiz_vma)
dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
dw1 |= (info->z_img->bo_stride - 1) << GEN7_DEPTH_DW1_PITCH__SHIFT;
@@ -683,11 +690,15 @@ ilo_state_zs_init(struct ilo_state_zs *zs, const struct ilo_dev *dev,
else
ret &= zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(zs, dev);
- if (info->z_img && info->hiz_enable)
+ if (info->z_img && info->hiz_vma)
ret &= zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(zs, dev, info);
else
ret &= zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ zs->z_vma = info->z_vma;
+ zs->s_vma = info->s_vma;
+ zs->hiz_vma = info->hiz_vma;
+
zs->z_readonly = info->z_readonly;
zs->s_readonly = info->s_readonly;
@@ -720,8 +731,11 @@ ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
*/
assert(ilo_dev_gen(dev) >= ILO_GEN(7));
- zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
- zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ if (zs->hiz_vma) {
+ zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
+ zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ zs->hiz_vma = NULL;
+ }
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.h b/src/gallium/drivers/ilo/core/ilo_state_zs.h
index 98212daf74f..6f32b7e2efe 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.h
@@ -29,28 +29,30 @@
#define ILO_STATE_ZS_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
+struct ilo_vma;
struct ilo_image;
struct ilo_state_zs_info {
- /* both are optional */
+ /* both optional */
const struct ilo_image *z_img;
const struct ilo_image *s_img;
+ uint8_t level;
+ uint16_t slice_base;
+ uint16_t slice_count;
+
+ const struct ilo_vma *z_vma;
+ const struct ilo_vma *s_vma;
+ const struct ilo_vma *hiz_vma;
/* ignored prior to Gen7 */
bool z_readonly;
bool s_readonly;
- bool hiz_enable;
bool is_cube_map;
-
- uint8_t level;
- uint16_t slice_base;
- uint16_t slice_count;
};
struct ilo_state_zs {
@@ -58,16 +60,15 @@ struct ilo_state_zs {
uint32_t stencil[3];
uint32_t hiz[3];
+ const struct ilo_vma *z_vma;
+ const struct ilo_vma *s_vma;
+ const struct ilo_vma *hiz_vma;
+
/* TODO move this to ilo_image */
enum gen_depth_format depth_format;
bool z_readonly;
bool s_readonly;
-
- /* managed by users */
- struct intel_bo *depth_bo;
- struct intel_bo *stencil_bo;
- struct intel_bo *hiz_bo;
};
bool
diff --git a/src/gallium/drivers/ilo/core/ilo_vma.h b/src/gallium/drivers/ilo/core/ilo_vma.h
new file mode 100644
index 00000000000..ad2a1d4b33e
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_vma.h
@@ -0,0 +1,73 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu
+ */
+
+#ifndef ILO_VMA_H
+#define ILO_VMA_H
+
+#include "ilo_core.h"
+#include "ilo_debug.h"
+#include "ilo_dev.h"
+
+struct intel_bo;
+
+/**
+ * A virtual memory area.
+ */
+struct ilo_vma {
+ /* address space */
+ uint32_t vm_size;
+ uint32_t vm_alignment;
+
+ /* backing storage */
+ struct intel_bo *bo;
+ uint32_t bo_offset;
+};
+
+static inline bool
+ilo_vma_init(struct ilo_vma *vma, const struct ilo_dev *dev,
+ uint32_t size, uint32_t alignment)
+{
+ assert(ilo_is_zeroed(vma, sizeof(*vma)));
+ assert(size && alignment);
+
+ vma->vm_alignment = alignment;
+ vma->vm_size = size;
+
+ return true;
+}
+
+static inline void
+ilo_vma_set_bo(struct ilo_vma *vma, const struct ilo_dev *dev,
+ struct intel_bo *bo, uint32_t offset)
+{
+ assert(offset % vma->vm_alignment == 0);
+
+ vma->bo = bo;
+ vma->bo_offset = offset;
+}
+
+#endif /* ILO_VMA_H */
diff --git a/src/gallium/drivers/ilo/ilo_blitter_blt.c b/src/gallium/drivers/ilo/ilo_blitter_blt.c
index d55dc35e360..52b4b25d827 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_blt.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_blt.c
@@ -127,7 +127,7 @@ ilo_blitter_blt_end(struct ilo_blitter *blitter, uint32_t swctrl)
static bool
buf_clear_region(struct ilo_blitter *blitter,
- struct ilo_buffer *buf, unsigned offset,
+ struct ilo_buffer_resource *buf, unsigned offset,
uint32_t val, unsigned size,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
@@ -140,8 +140,8 @@ buf_clear_region(struct ilo_blitter *blitter,
if (offset % cpp || size % cpp)
return false;
- dst.bo = buf->bo;
- dst.offset = offset;
+ dst.bo = buf->vma.bo;
+ dst.offset = buf->vma.bo_offset + offset;
ilo_blitter_blt_begin(blitter, GEN6_COLOR_BLT__SIZE *
(1 + size / 32764 / gen6_blt_max_scanlines),
@@ -179,25 +179,26 @@ buf_clear_region(struct ilo_blitter *blitter,
static bool
buf_copy_region(struct ilo_blitter *blitter,
- struct ilo_buffer *dst_buf, unsigned dst_offset,
- struct ilo_buffer *src_buf, unsigned src_offset,
+ struct ilo_buffer_resource *dst_buf, unsigned dst_offset,
+ struct ilo_buffer_resource *src_buf, unsigned src_offset,
unsigned size)
{
const uint8_t rop = 0xcc; /* SRCCOPY */
struct ilo_builder *builder = &blitter->ilo->cp->builder;
struct gen6_blt_bo dst, src;
- dst.bo = dst_buf->bo;
- dst.offset = dst_offset;
+ dst.bo = dst_buf->vma.bo;
+ dst.offset = dst_buf->vma.bo_offset + dst_offset;
dst.pitch = 0;
- src.bo = src_buf->bo;
- src.offset = src_offset;
+ src.bo = src_buf->vma.bo;
+ src.offset = src_buf->vma.bo_offset + src_offset;
src.pitch = 0;
ilo_blitter_blt_begin(blitter, GEN6_SRC_COPY_BLT__SIZE *
(1 + size / 32764 / gen6_blt_max_scanlines),
- dst_buf->bo, GEN6_TILING_NONE, src_buf->bo, GEN6_TILING_NONE);
+ dst_buf->vma.bo, GEN6_TILING_NONE,
+ src_buf->vma.bo, GEN6_TILING_NONE);
while (size) {
unsigned width, height;
@@ -258,14 +259,14 @@ tex_clear_region(struct ilo_blitter *blitter,
if (dst_box->width * cpp > gen6_blt_max_bytes_per_scanline)
return false;
- dst.bo = dst_tex->image.bo;
- dst.offset = 0;
+ dst.bo = dst_tex->vma.bo;
+ dst.offset = dst_tex->vma.bo_offset;
dst.pitch = dst_tex->image.bo_stride;
dst.tiling = dst_tex->image.tiling;
swctrl = ilo_blitter_blt_begin(blitter,
GEN6_XY_COLOR_BLT__SIZE * dst_box->depth,
- dst_tex->image.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE);
+ dst_tex->vma.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE);
for (slice = 0; slice < dst_box->depth; slice++) {
unsigned x, y;
@@ -347,13 +348,13 @@ tex_copy_region(struct ilo_blitter *blitter,
break;
}
- dst.bo = dst_tex->image.bo;
- dst.offset = 0;
+ dst.bo = dst_tex->vma.bo;
+ dst.offset = dst_tex->vma.bo_offset;
dst.pitch = dst_tex->image.bo_stride;
dst.tiling = dst_tex->image.tiling;
- src.bo = src_tex->image.bo;
- src.offset = 0;
+ src.bo = src_tex->vma.bo;
+ src.offset = src_tex->vma.bo_offset;
src.pitch = src_tex->image.bo_stride;
src.tiling = src_tex->image.tiling;
@@ -423,8 +424,8 @@ ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter,
src_box->height == 1 &&
src_box->depth == 1);
- success = buf_copy_region(blitter,
- ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size);
+ success = buf_copy_region(blitter, ilo_buffer_resource(dst), dst_offset,
+ ilo_buffer_resource(src), src_offset, size);
}
else if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) {
success = tex_copy_region(blitter,
@@ -488,7 +489,7 @@ ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter,
if (offset + size > end)
size = end - offset;
- success = buf_clear_region(blitter, ilo_buffer(rt->texture),
+ success = buf_clear_region(blitter, ilo_buffer_resource(rt->texture),
offset, packed.ui[0], size, mask, mask);
}
else {
diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c
index e8e1a4cd14c..433348d9326 100644
--- a/src/gallium/drivers/ilo/ilo_draw.c
+++ b/src/gallium/drivers/ilo/ilo_draw.c
@@ -444,6 +444,7 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
const struct pipe_draw_info *info)
{
const struct ilo_ib_state *ib = &ilo->state_vector.ib;
+ const struct ilo_vma *vma;
union {
const void *ptr;
const uint8_t *u8;
@@ -453,10 +454,12 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
/* we will draw with IB mapped */
if (ib->state.buffer) {
- u.ptr = intel_bo_map(ilo_buffer(ib->state.buffer)->bo, false);
+ vma = ilo_resource_get_vma(ib->state.buffer);
+ u.ptr = intel_bo_map(vma->bo, false);
if (u.ptr)
- u.u8 += ib->state.offset;
+ u.u8 += vma->bo_offset + ib->state.offset;
} else {
+ vma = NULL;
u.ptr = ib->state.user_buffer;
}
@@ -500,8 +503,8 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
#undef DRAW_VBO_WITH_SW_RESTART
- if (ib->state.buffer)
- intel_bo_unmap(ilo_buffer(ib->state.buffer)->bo);
+ if (vma)
+ intel_bo_unmap(vma->bo);
}
static bool
diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c
index ad053564294..3bf8646b344 100644
--- a/src/gallium/drivers/ilo/ilo_render_surface.c
+++ b/src/gallium/drivers/ilo/ilo_render_surface.c
@@ -42,14 +42,17 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder,
const struct pipe_stream_output_info *so_info,
int so_index)
{
- struct ilo_buffer *buf = ilo_buffer(so->buffer);
struct ilo_state_surface_buffer_info info;
struct ilo_state_surface surf;
ILO_DEV_ASSERT(builder->dev, 6, 6);
memset(&info, 0, sizeof(info));
- info.buf = buf;
+
+ info.vma = ilo_resource_get_vma(so->buffer);
+ info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+ info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB;
switch (so_info->output[so_index].num_components) {
@@ -78,12 +81,9 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder,
info.struct_size =
so_info->stride[so_info->output[so_index].output_buffer] * 4;
- info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
- info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, builder->dev, &info);
- surf.bo = info.buf->bo;
return gen6_SURFACE_STATE(builder, &surf);
}
@@ -482,18 +482,19 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r,
return;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(session->input->buffer);
+
+ info.vma = ilo_resource_get_vma(session->input->buffer);
+ info.offset = session->input->buffer_offset;
+ info.size = session->input->buffer_size;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
info.format = GEN6_FORMAT_RAW;
info.format_size = 1;
info.struct_size = 1;
info.readonly = true;
- info.offset = session->input->buffer_offset;
- info.size = session->input->buffer_size;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
- surf.bo = info.buf->bo;
assert(count == 1 && session->input->buffer);
surface_state[base] = gen6_SURFACE_STATE(r->builder, &surf);
@@ -538,23 +539,23 @@ gen6_emit_launch_grid_surface_global(struct ilo_render *r,
surface_state += base;
for (i = 0; i < count; i++) {
if (i < vec->global_binding.count && bindings[i].resource) {
- const struct ilo_buffer *buf = ilo_buffer(bindings[i].resource);
struct ilo_state_surface_buffer_info info;
struct ilo_state_surface surf;
assert(bindings[i].resource->target == PIPE_BUFFER);
memset(&info, 0, sizeof(info));
- info.buf = buf;
+
+ info.vma = ilo_resource_get_vma(bindings[i].resource);
+ info.size = info.vma->vm_size;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
info.format = GEN6_FORMAT_RAW;
info.format_size = 1;
info.struct_size = 1;
- info.size = buf->bo_size;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
- surf.bo = info.buf->bo;
surface_state[i] = gen6_SURFACE_STATE(r->builder, &surf);
} else {
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index be9fd10a84c..065e665d895 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -178,8 +178,8 @@ tex_create_bo(struct ilo_texture *tex)
if (!bo)
return false;
- intel_bo_unref(tex->image.bo);
- tex->image.bo = bo;
+ intel_bo_unref(tex->vma.bo);
+ ilo_vma_set_bo(&tex->vma, &is->dev, bo, 0);
return true;
}
@@ -215,15 +215,16 @@ static bool
tex_create_hiz(struct ilo_texture *tex)
{
const struct pipe_resource *templ = &tex->base;
+ const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height;
struct ilo_screen *is = ilo_screen(tex->base.screen);
struct intel_bo *bo;
- bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture",
- tex->image.aux.bo_stride * tex->image.aux.bo_height, false);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture", size, false);
if (!bo)
return false;
- tex->image.aux.bo = bo;
+ ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096);
+ ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0);
if (tex->imported) {
unsigned lv;
@@ -246,17 +247,18 @@ tex_create_hiz(struct ilo_texture *tex)
static bool
tex_create_mcs(struct ilo_texture *tex)
{
+ const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height;
struct ilo_screen *is = ilo_screen(tex->base.screen);
struct intel_bo *bo;
assert(tex->image.aux.enables == (1 << (tex->base.last_level + 1)) - 1);
- bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture",
- tex->image.aux.bo_stride * tex->image.aux.bo_height, false);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture", size, false);
if (!bo)
return false;
- tex->image.aux.bo = bo;
+ ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096);
+ ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0);
return true;
}
@@ -267,8 +269,8 @@ tex_destroy(struct ilo_texture *tex)
if (tex->separate_s8)
tex_destroy(tex->separate_s8);
- intel_bo_unref(tex->image.bo);
- intel_bo_unref(tex->image.aux.bo);
+ intel_bo_unref(tex->vma.bo);
+ intel_bo_unref(tex->aux_vma.bo);
tex_free_slices(tex);
FREE(tex);
@@ -327,7 +329,9 @@ tex_import_handle(struct ilo_texture *tex,
return false;
}
- tex->image.bo = bo;
+ ilo_vma_init(&tex->vma, &is->dev,
+ tex->image.bo_stride * tex->image.bo_height, 4096);
+ ilo_vma_set_bo(&tex->vma, &is->dev, bo, 0);
tex->imported = true;
@@ -347,6 +351,8 @@ tex_init_image(struct ilo_texture *tex,
return false;
} else {
ilo_image_init(img, &is->dev, templ);
+ ilo_vma_init(&tex->vma, &is->dev,
+ img->bo_stride * img->bo_height, 4096);
}
if (img->bo_height > ilo_max_resource_size / img->bo_stride)
@@ -406,7 +412,7 @@ tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle)
else
tiling = surface_to_winsys_tiling(tex->image.tiling);
- err = intel_winsys_export_handle(is->dev.winsys, tex->image.bo, tiling,
+ err = intel_winsys_export_handle(is->dev.winsys, tex->vma.bo, tiling,
tex->image.bo_stride, tex->image.bo_height, handle);
return !err;
@@ -425,8 +431,8 @@ buf_create_bo(struct ilo_buffer_resource *buf)
if (!bo)
return false;
- intel_bo_unref(buf->buffer.bo);
- buf->buffer.bo = bo;
+ intel_bo_unref(buf->vma.bo);
+ ilo_vma_set_bo(&buf->vma, &is->dev, bo, 0);
return true;
}
@@ -434,7 +440,7 @@ buf_create_bo(struct ilo_buffer_resource *buf)
static void
buf_destroy(struct ilo_buffer_resource *buf)
{
- intel_bo_unref(buf->buffer.bo);
+ intel_bo_unref(buf->vma.bo);
FREE(buf);
}
@@ -472,6 +478,7 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
size = align(size, 4096);
ilo_buffer_init(&buf->buffer, &is->dev, size, templ->bind, templ->flags);
+ ilo_vma_init(&buf->vma, &is->dev, buf->buffer.bo_size, 4096);
if (buf->buffer.bo_size < templ->width0 ||
buf->buffer.bo_size > ilo_max_resource_size ||
diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h
index d602e0cbf70..0357499f44a 100644
--- a/src/gallium/drivers/ilo/ilo_resource.h
+++ b/src/gallium/drivers/ilo/ilo_resource.h
@@ -31,6 +31,7 @@
#include "core/intel_winsys.h"
#include "core/ilo_buffer.h"
#include "core/ilo_image.h"
+#include "core/ilo_vma.h"
#include "ilo_common.h"
#include "ilo_screen.h"
@@ -93,6 +94,8 @@ struct ilo_texture {
bool imported;
struct ilo_image image;
+ struct ilo_vma vma;
+ struct ilo_vma aux_vma;
/* XXX thread-safety */
struct ilo_texture_slice *slices[PIPE_MAX_TEXTURE_LEVELS];
@@ -104,13 +107,14 @@ struct ilo_buffer_resource {
struct pipe_resource base;
struct ilo_buffer buffer;
+ struct ilo_vma vma;
};
-static inline struct ilo_buffer *
-ilo_buffer(struct pipe_resource *res)
+static inline struct ilo_buffer_resource *
+ilo_buffer_resource(struct pipe_resource *res)
{
- return (res && res->target == PIPE_BUFFER) ?
- &((struct ilo_buffer_resource *) res)->buffer : NULL;
+ return (struct ilo_buffer_resource *)
+ ((res && res->target == PIPE_BUFFER) ? res : NULL);
}
static inline struct ilo_texture *
@@ -127,13 +131,14 @@ bool
ilo_resource_rename_bo(struct pipe_resource *res);
/**
- * Return the bo of the resource.
+ * Return the VMA of the resource.
*/
-static inline struct intel_bo *
-ilo_resource_get_bo(struct pipe_resource *res)
+static inline const struct ilo_vma *
+ilo_resource_get_vma(struct pipe_resource *res)
{
return (res->target == PIPE_BUFFER) ?
- ilo_buffer(res)->bo : ilo_texture(res)->image.bo;
+ &((struct ilo_buffer_resource *) res)->vma :
+ &((struct ilo_texture *) res)->vma;
}
static inline struct ilo_texture_slice *
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 63534f33fa7..24ab59aa32b 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -379,13 +379,12 @@ finalize_cbuf_state(struct ilo_context *ilo,
u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size,
cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
- cbuf->cso[i].info.buf = ilo_buffer(cbuf->cso[i].resource);
+ cbuf->cso[i].info.vma = ilo_resource_get_vma(cbuf->cso[i].resource);
cbuf->cso[i].info.offset = offset;
memset(&cbuf->cso[i].surface, 0, sizeof(cbuf->cso[i].surface));
ilo_state_surface_init_for_buffer(&cbuf->cso[i].surface,
ilo->dev, &cbuf->cso[i].info);
- cbuf->cso[i].surface.bo = cbuf->cso[i].info.buf->bo;
ilo->state_vector.dirty |= ILO_DIRTY_CBUF;
}
@@ -466,11 +465,9 @@ finalize_index_buffer(struct ilo_context *ilo)
memset(&info, 0, sizeof(info));
if (vec->ib.hw_resource) {
- info.buf = ilo_buffer(vec->ib.hw_resource);
- info.size = info.buf->bo_size;
+ info.vma = ilo_resource_get_vma(vec->ib.hw_resource);
+ info.size = info.vma->vm_size;
info.format = ilo_translate_index_size(vec->ib.hw_index_size);
-
- vec->ib.ib.bo = info.buf->bo;
}
ilo_state_index_buffer_set_info(&vec->ib.ib, dev, &info);
@@ -532,13 +529,11 @@ finalize_vertex_buffers(struct ilo_context *ilo)
const struct pipe_vertex_buffer *cso = &vec->vb.states[pipe_idx];
if (cso->buffer) {
- info.buf = ilo_buffer(cso->buffer);
+ info.vma = ilo_resource_get_vma(cso->buffer);
info.offset = cso->buffer_offset;
- info.size = info.buf->bo_size;
+ info.size = info.vma->vm_size - cso->buffer_offset;
info.stride = cso->stride;
-
- vec->vb.vb[i].bo = info.buf->bo;
} else {
memset(&info, 0, sizeof(info));
}
@@ -1566,24 +1561,23 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
cso->info.size = buf[i].buffer_size;
if (buf[i].buffer) {
- cso->info.buf = ilo_buffer(buf[i].buffer);
+ cso->info.vma = ilo_resource_get_vma(buf[i].buffer);
cso->info.offset = buf[i].buffer_offset;
memset(&cso->surface, 0, sizeof(cso->surface));
ilo_state_surface_init_for_buffer(&cso->surface, dev, &cso->info);
- cso->surface.bo = cso->info.buf->bo;
cso->user_buffer = NULL;
cbuf->enabled_mask |= 1 << (index + i);
} else if (buf[i].user_buffer) {
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
/* buffer_offset does not apply for user buffer */
cso->user_buffer = buf[i].user_buffer;
cbuf->enabled_mask |= 1 << (index + i);
} else {
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
cso->info.size = 0;
cso->user_buffer = NULL;
@@ -1596,7 +1590,7 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
pipe_resource_reference(&cso->resource, NULL);
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
cso->info.size = 0;
cso->user_buffer = NULL;
@@ -1706,7 +1700,7 @@ ilo_set_framebuffer_state(struct pipe_context *pipe,
const struct ilo_surface_cso *cso =
(const struct ilo_surface_cso *) state->zsbuf;
- fb->has_hiz = cso->u.zs.hiz_bo;
+ fb->has_hiz = cso->u.zs.hiz_vma;
fb->depth_offset_format =
ilo_state_zs_get_depth_format(&cso->u.zs, dev);
} else {
@@ -1945,12 +1939,11 @@ ilo_create_stream_output_target(struct pipe_context *pipe,
target->base.buffer_size = buffer_size;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(res);
+ info.vma = ilo_resource_get_vma(res);
info.offset = buffer_offset;
info.size = buffer_size;
ilo_state_sol_buffer_init(&target->sb, dev, &info);
- target->sb.bo = info.buf->bo;
return &target->base;
}
@@ -2018,18 +2011,17 @@ ilo_create_sampler_view(struct pipe_context *pipe,
struct ilo_state_surface_buffer_info info;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(res);
+ info.vma = ilo_resource_get_vma(res);
+ info.offset = templ->u.buf.first_element * info.struct_size;
+ info.size = (templ->u.buf.last_element -
+ templ->u.buf.first_element + 1) * info.struct_size;
info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
info.format = ilo_format_translate_color(dev, templ->format);
info.format_size = util_format_get_blocksize(templ->format);
info.struct_size = info.format_size;
info.readonly = true;
- info.offset = templ->u.buf.first_element * info.struct_size;
- info.size = (templ->u.buf.last_element -
- templ->u.buf.first_element + 1) * info.struct_size;
ilo_state_surface_init_for_buffer(&view->surface, dev, &info);
- view->surface.bo = info.buf->bo;
} else {
struct ilo_texture *tex = ilo_texture(res);
struct ilo_state_surface_image_info info;
@@ -2042,8 +2034,16 @@ ilo_create_sampler_view(struct pipe_context *pipe,
}
memset(&info, 0, sizeof(info));
- info.img = &tex->image;
+ info.img = &tex->image;
+ info.level_base = templ->u.tex.first_level;
+ info.level_count = templ->u.tex.last_level -
+ templ->u.tex.first_level + 1;
+ info.slice_base = templ->u.tex.first_layer;
+ info.slice_count = templ->u.tex.last_layer -
+ templ->u.tex.first_layer + 1;
+
+ info.vma = &tex->vma;
info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
@@ -2059,15 +2059,7 @@ ilo_create_sampler_view(struct pipe_context *pipe,
info.is_array = util_resource_is_array_texture(&tex->base);
info.readonly = true;
- info.level_base = templ->u.tex.first_level;
- info.level_count = templ->u.tex.last_level -
- templ->u.tex.first_level + 1;
- info.slice_base = templ->u.tex.first_layer;
- info.slice_count = templ->u.tex.last_layer -
- templ->u.tex.first_layer + 1;
-
ilo_state_surface_init_for_image(&view->surface, dev, &info);
- view->surface.bo = info.img->bo;
}
return &view->base;
@@ -2111,18 +2103,23 @@ ilo_create_surface(struct pipe_context *pipe,
assert(tex->base.target != PIPE_BUFFER);
memset(&info, 0, sizeof(info));
+
info.img = &tex->image;
- info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
- info.format = ilo_format_translate_render(dev, templ->format);
- info.is_array = util_resource_is_array_texture(&tex->base);
info.level_base = templ->u.tex.level;
info.level_count = 1;
info.slice_base = templ->u.tex.first_layer;
info.slice_count = templ->u.tex.last_layer -
templ->u.tex.first_layer + 1;
+ info.vma = &tex->vma;
+ if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level))
+ info.aux_vma = &tex->aux_vma;
+
+ info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
+ info.format = ilo_format_translate_render(dev, templ->format);
+ info.is_array = util_resource_is_array_texture(&tex->base);
+
ilo_state_surface_init_for_image(&surf->u.rt, dev, &info);
- surf->u.rt.bo = info.img->bo;
} else {
struct ilo_state_zs_info info;
@@ -2131,13 +2128,19 @@ ilo_create_surface(struct pipe_context *pipe,
memset(&info, 0, sizeof(info));
if (templ->format == PIPE_FORMAT_S8_UINT) {
+ info.s_vma = &tex->vma;
info.s_img = &tex->image;
} else {
+ info.z_vma = &tex->vma;
info.z_img = &tex->image;
- info.s_img = (tex->separate_s8) ? &tex->separate_s8->image : NULL;
- info.hiz_enable =
- ilo_image_can_enable_aux(&tex->image, templ->u.tex.level);
+ if (tex->separate_s8) {
+ info.s_vma = &tex->separate_s8->vma;
+ info.s_img = &tex->separate_s8->image;
+ }
+
+ if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level))
+ info.hiz_vma = &tex->aux_vma;
}
info.level = templ->u.tex.level;
@@ -2146,15 +2149,6 @@ ilo_create_surface(struct pipe_context *pipe,
templ->u.tex.first_layer + 1;
ilo_state_zs_init(&surf->u.zs, dev, &info);
-
- if (info.z_img) {
- surf->u.zs.depth_bo = info.z_img->bo;
- if (info.hiz_enable)
- surf->u.zs.hiz_bo = info.z_img->aux.bo;
- }
-
- if (info.s_img)
- surf->u.zs.stencil_bo = info.s_img->bo;
}
return &surf->base;
@@ -2451,7 +2445,6 @@ void
ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct pipe_resource *res)
{
- struct intel_bo *bo = ilo_resource_get_bo(res);
uint32_t states = 0;
unsigned sh, i;
@@ -2482,10 +2475,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
for (i = 0; i < vec->so.count; i++) {
if (vec->so.states[i]->buffer == res) {
- struct ilo_stream_output_target *target =
- (struct ilo_stream_output_target *) vec->so.states[i];
-
- target->sb.bo = ilo_buffer(res)->bo;
states |= ILO_DIRTY_SO;
break;
}
@@ -2503,7 +2492,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
[PIPE_SHADER_GEOMETRY] = ILO_DIRTY_VIEW_GS,
[PIPE_SHADER_COMPUTE] = ILO_DIRTY_VIEW_CS,
};
- cso->surface.bo = bo;
states |= view_dirty_bits[sh];
break;
@@ -2515,7 +2503,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct ilo_cbuf_cso *cbuf = &vec->cbuf[sh].cso[i];
if (cbuf->resource == res) {
- cbuf->surface.bo = bo;
states |= ILO_DIRTY_CBUF;
break;
}
@@ -2528,7 +2515,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
(struct ilo_surface_cso *) vec->resource.states[i];
if (cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_RESOURCE;
break;
}
@@ -2540,27 +2526,19 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct ilo_surface_cso *cso =
(struct ilo_surface_cso *) vec->fb.state.cbufs[i];
if (cso && cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_FB;
break;
}
}
- if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res) {
- struct ilo_surface_cso *cso =
- (struct ilo_surface_cso *) vec->fb.state.zsbuf;
-
- cso->u.zs.depth_bo = bo;
-
+ if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res)
states |= ILO_DIRTY_FB;
- }
}
for (i = 0; i < vec->cs_resource.count; i++) {
struct ilo_surface_cso *cso =
(struct ilo_surface_cso *) vec->cs_resource.states[i];
if (cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_CS_RESOURCE;
break;
}
diff --git a/src/gallium/drivers/ilo/ilo_transfer.c b/src/gallium/drivers/ilo/ilo_transfer.c
index ec41473f94a..be5aeee8e23 100644
--- a/src/gallium/drivers/ilo/ilo_transfer.c
+++ b/src/gallium/drivers/ilo/ilo_transfer.c
@@ -268,23 +268,27 @@ xfer_alloc_staging_sys(struct ilo_transfer *xfer)
static void *
xfer_map(struct ilo_transfer *xfer)
{
+ const struct ilo_vma *vma;
void *ptr;
switch (xfer->method) {
case ILO_TRANSFER_MAP_CPU:
- ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
- xfer->base.usage & PIPE_TRANSFER_WRITE);
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE);
break;
case ILO_TRANSFER_MAP_GTT:
- ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map_gtt(vma->bo);
break;
case ILO_TRANSFER_MAP_GTT_ASYNC:
- ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map_gtt_async(vma->bo);
break;
case ILO_TRANSFER_MAP_STAGING:
{
const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
- struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
+
+ vma = ilo_resource_get_vma(xfer->staging.res);
/*
* We want a writable, optionally persistent and coherent, mapping
@@ -292,25 +296,29 @@ xfer_map(struct ilo_transfer *xfer)
* this turns out to be fairly simple.
*/
if (is->dev.has_llc)
- ptr = intel_bo_map(bo, true);
+ ptr = intel_bo_map(vma->bo, true);
else
- ptr = intel_bo_map_gtt(bo);
+ ptr = intel_bo_map_gtt(vma->bo);
if (ptr && xfer->staging.res->target == PIPE_BUFFER)
ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
-
}
break;
case ILO_TRANSFER_MAP_SW_CONVERT:
case ILO_TRANSFER_MAP_SW_ZS:
+ vma = NULL;
ptr = xfer->staging.sys;
break;
default:
assert(!"unknown mapping method");
+ vma = NULL;
ptr = NULL;
break;
}
+ if (ptr && vma)
+ ptr = (void *) ((char *) ptr + vma->bo_offset);
+
return ptr;
}
@@ -324,10 +332,10 @@ xfer_unmap(struct ilo_transfer *xfer)
case ILO_TRANSFER_MAP_CPU:
case ILO_TRANSFER_MAP_GTT:
case ILO_TRANSFER_MAP_GTT_ASYNC:
- intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
+ intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo);
break;
case ILO_TRANSFER_MAP_STAGING:
- intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
+ intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo);
break;
default:
break;
@@ -541,9 +549,12 @@ tex_staging_sys_map_bo(struct ilo_texture *tex,
if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
!linear_view))
- ptr = intel_bo_map(tex->image.bo, !for_read_back);
+ ptr = intel_bo_map(tex->vma.bo, !for_read_back);
else
- ptr = intel_bo_map_gtt(tex->image.bo);
+ ptr = intel_bo_map_gtt(tex->vma.bo);
+
+ if (ptr)
+ ptr = (void *) ((char *) ptr + tex->vma.bo_offset);
return ptr;
}
@@ -551,7 +562,7 @@ tex_staging_sys_map_bo(struct ilo_texture *tex,
static void
tex_staging_sys_unmap_bo(struct ilo_texture *tex)
{
- intel_bo_unmap(tex->image.bo);
+ intel_bo_unmap(tex->vma.bo);
}
static bool
@@ -1055,7 +1066,7 @@ choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
return false;
/* see if we can avoid blocking */
- if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
+ if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) {
bool resource_renamed;
if (!xfer_unblock(xfer, &resource_renamed)) {
@@ -1078,11 +1089,11 @@ static void
buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
unsigned usage, int offset, int size, const void *data)
{
- struct ilo_buffer *buf = ilo_buffer(res);
+ struct ilo_buffer_resource *buf = ilo_buffer_resource(res);
bool need_submit;
/* see if we can avoid blocking */
- if (is_bo_busy(ilo, buf->bo, &need_submit)) {
+ if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) {
bool unblocked = false;
if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
@@ -1103,9 +1114,12 @@ buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
templ.bind = PIPE_BIND_TRANSFER_WRITE;
staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
if (staging) {
+ const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging);
struct pipe_box staging_box;
- intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
+ /* offset by staging_vma->bo_offset for pwrite */
+ intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset,
+ size, data);
u_box_1d(0, size, &staging_box);
ilo_blitter_blt_copy_resource(ilo->blitter,
@@ -1123,7 +1137,8 @@ buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
ilo_cp_submit(ilo->cp, "syncing for pwrites");
}
- intel_bo_pwrite(buf->bo, offset, size, data);
+ /* offset by buf->vma.bo_offset for pwrite */
+ intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data);
}
static void
From 9871646c132ba137709b0bfebfe285985dc351e6 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Fri, 26 Jun 2015 13:08:32 +0800
Subject: [PATCH 0037/1208] ilo: remove ilo_buffer
Since the addition of ilo_vma, it was used only to pad a bo for sampling
engine surfaces. Replace it entirely with these functions
ilo_state_surface_buffer_size()
ilo_state_vertex_buffer_size()
ilo_state_index_buffer_size()
ilo_state_sol_buffer_size()
---
src/gallium/drivers/ilo/Makefile.sources | 1 -
src/gallium/drivers/ilo/core/ilo_buffer.h | 59 -------------------
src/gallium/drivers/ilo/core/ilo_state_sol.c | 9 +++
src/gallium/drivers/ilo/core/ilo_state_sol.h | 4 ++
.../drivers/ilo/core/ilo_state_surface.c | 48 +++++++++++++++
.../drivers/ilo/core/ilo_state_surface.h | 5 ++
src/gallium/drivers/ilo/core/ilo_state_vf.c | 18 ++++++
src/gallium/drivers/ilo/core/ilo_state_vf.h | 8 +++
src/gallium/drivers/ilo/ilo_resource.c | 22 +++++--
src/gallium/drivers/ilo/ilo_resource.h | 3 +-
10 files changed, 109 insertions(+), 68 deletions(-)
delete mode 100644 src/gallium/drivers/ilo/core/ilo_buffer.h
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index 35d76bd4948..7a7db938f92 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -1,5 +1,4 @@
C_SOURCES := \
- core/ilo_buffer.h \
core/ilo_builder.c \
core/ilo_builder.h \
core/ilo_builder_3d.h \
diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_buffer.h
deleted file mode 100644
index f2fb63064c0..00000000000
--- a/src/gallium/drivers/ilo/core/ilo_buffer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2013 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu
- */
-
-#ifndef ILO_BUFFER_H
-#define ILO_BUFFER_H
-
-#include "ilo_core.h"
-#include "ilo_debug.h"
-#include "ilo_dev.h"
-
-struct ilo_buffer {
- unsigned bo_size;
-};
-
-static inline void
-ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev,
- unsigned size, uint32_t bind, uint32_t flags)
-{
- assert(ilo_is_zeroed(buf, sizeof(*buf)));
-
- buf->bo_size = size;
-
- /*
- * From the Sandy Bridge PRM, volume 1 part 1, page 118:
- *
- * "For buffers, which have no inherent "height," padding requirements
- * are different. A buffer must be padded to the next multiple of 256
- * array elements, with an additional 16 bytes added beyond that to
- * account for the L1 cache line."
- */
- if (bind & PIPE_BIND_SAMPLER_VIEW)
- buf->bo_size = align(buf->bo_size, 256) + 16;
-}
-
-#endif /* ILO_BUFFER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c
index dd1ef5e7887..6ef2c91a592 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sol.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c
@@ -424,6 +424,15 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
return ilo_state_sol_init(sol, dev, &info);
}
+uint32_t
+ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* DWord aligned without padding */
+ *alignment = 4;
+ return size;
+}
+
bool
ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h
index f0968b39e27..92c5f94725b 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sol.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h
@@ -150,6 +150,10 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
const struct ilo_dev *dev,
bool render_disable);
+uint32_t
+ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c
index 402bbf4b52a..d3581749e56 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c
@@ -1106,6 +1106,54 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
return true;
}
+uint32_t
+ilo_state_surface_buffer_size(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ uint32_t size, uint32_t *alignment)
+{
+ switch (access) {
+ case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 118:
+ *
+ * "For buffers, which have no inherent "height," padding
+ * requirements are different. A buffer must be padded to the next
+ * multiple of 256 array elements, with an additional 16 bytes
+ * added beyond that to account for the L1 cache line."
+ *
+ * Assuming tightly packed GEN6_FORMAT_R32G32B32A32_FLOAT, the size
+ * needs to be padded to 4096 (= 16 * 256).
+ */
+ *alignment = 1;
+ size = align(size, 4096) + 16;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ /* element-size aligned for worst cases */
+ *alignment = 16;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+ /* DWord aligned? */
+ *alignment = 4;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+ /* OWord aligned */
+ *alignment = 16;
+ size = align(size, 16);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+ /* always DWord aligned */
+ *alignment = 4;
+ break;
+ default:
+ assert(!"unknown access");
+ *alignment = 1;
+ break;
+ }
+
+ return size;
+}
+
bool
ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
const struct ilo_dev *dev)
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h
index b9921134a1e..0cda08eb031 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h
@@ -99,6 +99,11 @@ ilo_state_surface_valid_format(const struct ilo_dev *dev,
enum ilo_state_surface_access access,
enum gen_surface_format format);
+uint32_t
+ilo_state_surface_buffer_size(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ uint32_t size, uint32_t *alignment);
+
bool
ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
const struct ilo_dev *dev);
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c
index 2dd72276e63..9faf835fef2 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c
@@ -947,6 +947,15 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
}
}
+uint32_t
+ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* align for doubles without padding */
+ *alignment = 8;
+ return size;
+}
+
/**
* No need to initialize first.
*/
@@ -964,6 +973,15 @@ ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
return ret;
}
+uint32_t
+ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* align for the worst case without padding */
+ *alignment = get_index_format_size(GEN6_INDEX_DWORD);
+ return size;
+}
+
/**
* No need to initialize first.
*/
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h
index 30734476435..16b128bf63c 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h
@@ -209,11 +209,19 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
const struct ilo_state_vf *old,
struct ilo_state_vf_delta *delta);
+uint32_t
+ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
const struct ilo_dev *dev,
const struct ilo_state_vertex_buffer_info *info);
+uint32_t
+ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib,
const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index 065e665d895..a0074e57e99 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -25,6 +25,10 @@
* Chia-I Wu
*/
+#include "core/ilo_state_vf.h"
+#include "core/ilo_state_sol.h"
+#include "core/ilo_state_surface.h"
+
#include "ilo_screen.h"
#include "ilo_resource.h"
@@ -426,8 +430,7 @@ buf_create_bo(struct ilo_buffer_resource *buf)
const bool cpu_init = resource_get_cpu_init(&buf->base);
struct intel_bo *bo;
- bo = intel_winsys_alloc_bo(is->dev.winsys, name,
- buf->buffer.bo_size, cpu_init);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, name, buf->bo_size, cpu_init);
if (!bo)
return false;
@@ -449,6 +452,7 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
{
const struct ilo_screen *is = ilo_screen(screen);
struct ilo_buffer_resource *buf;
+ uint32_t alignment;
unsigned size;
buf = CALLOC_STRUCT(ilo_buffer_resource);
@@ -477,11 +481,17 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
ilo_dev_gen(&is->dev) < ILO_GEN(7.5))
size = align(size, 4096);
- ilo_buffer_init(&buf->buffer, &is->dev, size, templ->bind, templ->flags);
- ilo_vma_init(&buf->vma, &is->dev, buf->buffer.bo_size, 4096);
+ if (templ->bind & PIPE_BIND_VERTEX_BUFFER)
+ size = ilo_state_vertex_buffer_size(&is->dev, size, &alignment);
+ if (templ->bind & PIPE_BIND_INDEX_BUFFER)
+ size = ilo_state_index_buffer_size(&is->dev, size, &alignment);
+ if (templ->bind & PIPE_BIND_STREAM_OUTPUT)
+ size = ilo_state_sol_buffer_size(&is->dev, size, &alignment);
- if (buf->buffer.bo_size < templ->width0 ||
- buf->buffer.bo_size > ilo_max_resource_size ||
+ buf->bo_size = size;
+ ilo_vma_init(&buf->vma, &is->dev, buf->bo_size, 4096);
+
+ if (buf->bo_size < templ->width0 || buf->bo_size > ilo_max_resource_size ||
!buf_create_bo(buf)) {
FREE(buf);
return NULL;
diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h
index 0357499f44a..c28c05abcfe 100644
--- a/src/gallium/drivers/ilo/ilo_resource.h
+++ b/src/gallium/drivers/ilo/ilo_resource.h
@@ -29,7 +29,6 @@
#define ILO_RESOURCE_H
#include "core/intel_winsys.h"
-#include "core/ilo_buffer.h"
#include "core/ilo_image.h"
#include "core/ilo_vma.h"
@@ -106,7 +105,7 @@ struct ilo_texture {
struct ilo_buffer_resource {
struct pipe_resource base;
- struct ilo_buffer buffer;
+ uint32_t bo_size;
struct ilo_vma vma;
};
From 07acf9cb167d4e1f7aebd6837d22e3523ad63109 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Wed, 24 Jun 2015 12:57:57 +0800
Subject: [PATCH 0038/1208] ilo: improve SURFTYPE_BUFFER validations
Reorganize the validations to make them more systematic.
---
.../drivers/ilo/core/ilo_state_surface.c | 223 +++++++++++-------
.../drivers/ilo/core/ilo_state_surface.h | 1 +
2 files changed, 141 insertions(+), 83 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c
index d3581749e56..2caba6df46e 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c
@@ -94,15 +94,127 @@ surface_set_gen7_null_SURFACE_STATE(struct ilo_state_surface *surf,
return true;
}
+static uint32_t
+surface_get_gen6_buffer_offset_alignment(const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info)
+{
+ uint32_t alignment;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+ *
+ * "The Base Address for linear render target surfaces and surfaces
+ * accessed with the typed surface read/write data port messages must
+ * be element-size aligned, for non-YUV surface formats, or a multiple
+ * of 2 element-sizes for YUV surface formats. Other linear surfaces
+ * have no alignment requirements (byte alignment is sufficient)."
+ *
+ * "Certain message types used to access surfaces have more stringent
+ * alignment requirements. Please refer to the specific message
+ * documentation for additional restrictions."
+ */
+ switch (info->access) {
+ case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+ /* no alignment requirements */
+ alignment = 1;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ /* element-size aligned */
+ alignment = info->format_size;
+
+ assert(info->struct_size % alignment == 0);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+ /*
+ * Nothing is said about Untyped* messages, but I think they require the
+ * base address to be DWord aligned.
+ */
+ alignment = 4;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+ *
+ * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the
+ * pitch must be a multiple of 4 bytes."
+ */
+ if (info->struct_size > 1)
+ assert(info->struct_size % alignment == 0);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237:
+ *
+ * "the surface base address must be OWord aligned"
+ *
+ * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord
+ * Dual Block Read/Write.
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249:
+ *
+ * "The surface base address must be DWord aligned"
+ *
+ * for DWord Scattered Read/Write and Byte Scattered Read/Write.
+ */
+ alignment = (info->format_size > 4) ? 16 : 4;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, 237, and
+ * 246:
+ *
+ * "the surface pitch is ignored, the surface is treated as a
+ * 1-dimensional surface. An element size (pitch) of 16 bytes is
+ * used to determine the size of the buffer for out-of-bounds
+ * checking if using the surface state model."
+ *
+ * for OWord Block Read/Write, Unaligned OWord Block Read, OWord
+ * Dual Block Read/Write, and DWord Scattered Read/Write.
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 248:
+ *
+ * "The surface pitch is ignored, the surface is treated as a
+ * 1-dimensional surface. An element size (pitch) of 4 bytes is
+ * used to determine the size of the buffer for out-of-bounds
+ * checking if using the surface state model."
+ *
+ * for Byte Scattered Read/Write.
+ *
+ * It is programmable on Gen7.5+.
+ */
+ if (ilo_dev_gen(dev) < ILO_GEN(7.5)) {
+ const int fixed = (info->format_size > 1) ? 16 : 4;
+ assert(info->struct_size == fixed);
+ }
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 259:
+ *
+ * "Both the surface base address and surface pitch must be DWord
+ * aligned."
+ */
+ alignment = 4;
+
+ assert(info->struct_size % alignment == 0);
+ break;
+ default:
+ assert(!"unknown access");
+ alignment = 1;
+ break;
+ }
+
+ return alignment;
+}
+
static bool
surface_validate_gen6_buffer(const struct ilo_dev *dev,
const struct ilo_state_surface_buffer_info *info)
{
- ILO_DEV_ASSERT(dev, 6, 8);
+ uint32_t alignment;
- /* SVB writes are Gen6-only */
- if (ilo_dev_gen(dev) >= ILO_GEN(7))
- assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB);
+ ILO_DEV_ASSERT(dev, 6, 8);
if (info->offset + info->size > info->vma->vm_size) {
ilo_warn("invalid buffer range\n");
@@ -120,88 +232,34 @@ surface_validate_gen6_buffer(const struct ilo_dev *dev,
return false;
}
+ alignment = surface_get_gen6_buffer_offset_alignment(dev, info);
+ if (info->offset % alignment || info->vma->vm_alignment % alignment) {
+ ilo_warn("bad buffer offset\n");
+ return false;
+ }
+
+ /* no STRBUF on Gen6 */
+ if (info->format == GEN6_FORMAT_RAW && info->struct_size > 1)
+ assert(ilo_dev_gen(dev) >= ILO_GEN(7));
+
+ /* SVB writes are Gen6 only */
+ if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB)
+ assert(ilo_dev_gen(dev) == ILO_GEN(6));
+
/*
- * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+ * From the Ivy Bridge PRM, volume 4 part 1, page 83:
*
- * "The Base Address for linear render target surfaces and surfaces
- * accessed with the typed surface read/write data port messages must
- * be element-size aligned, for non-YUV surface formats, or a multiple
- * of 2 element-sizes for YUV surface formats. Other linear surfaces
- * have no alignment requirements (byte alignment is sufficient)."
+ * "NOTE: "RAW" is supported only with buffers and structured buffers
+ * accessed via the untyped surface read/write and untyped atomic
+ * operation messages, which do not have a column in the table."
*
- * "Certain message types used to access surfaces have more stringent
- * alignment requirements. Please refer to the specific message
- * documentation for additional restrictions."
+ * From the Ivy Bridge PRM, volume 4 part 1, page 252:
*
- * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237:
- *
- * "the surface base address must be OWord aligned"
- *
- * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord Dual
- * Block Read/Write.
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249:
- *
- * "The surface base address must be DWord aligned"
- *
- * for DWord Scattered Read/Write and Byte Scattered Read/Write.
- *
- * We have to rely on users to correctly set info->struct_size here. DWord
- * Scattered Read/Write has conflicting pitch and alignment, but we do not
- * use them yet so we are fine.
- *
- * It is unclear if sampling engine surfaces require aligned offsets.
+ * "For untyped messages, the Surface Format must be RAW and the
+ * Surface Type must be SURFTYPE_BUFFER or SURFTYPE_STRBUF."
*/
- if (info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB) {
- assert(info->struct_size % info->format_size == 0);
-
- if (info->offset % info->struct_size ||
- info->vma->vm_alignment % info->struct_size) {
- ilo_warn("bad buffer offset\n");
- return false;
- }
- }
-
- if (info->format == GEN6_FORMAT_RAW) {
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 97:
- *
- * ""RAW" is supported only with buffers and structured buffers
- * accessed via the untyped surface read/write and untyped atomic
- * operation messages, which do not have a column in the table."
- *
- * We do not have a specific access mode for untyped messages.
- */
- assert(info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED);
-
- /*
- * Nothing is said about Untyped* messages, but I guess they require the
- * base address to be DWord aligned.
- */
- if (info->offset % 4 || info->vma->vm_alignment % 4) {
- ilo_warn("bad RAW buffer offset\n");
- return false;
- }
-
- if (info->struct_size > 1) {
- /* no STRBUF on Gen6 */
- if (ilo_dev_gen(dev) == ILO_GEN(6)) {
- ilo_warn("no STRBUF support\n");
- return false;
- }
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the
- * pitch must be a multiple of 4 bytes."
- */
- if (info->struct_size % 4) {
- ilo_warn("bad STRBUF pitch\n");
- return false;
- }
- }
- }
+ assert((info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED) ==
+ (info->format == GEN6_FORMAT_RAW));
return true;
}
@@ -216,8 +274,7 @@ surface_get_gen6_buffer_struct_count(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
c = info->size / info->struct_size;
- if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB &&
- info->format_size < info->size - info->struct_size * c)
+ if (info->format_size < info->size - info->struct_size * c)
c++;
/*
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h
index 0cda08eb031..835df69882e 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h
@@ -52,6 +52,7 @@ struct ilo_state_surface_buffer_info {
enum ilo_state_surface_access access;
+ /* format_size may be less than, equal to, or greater than struct_size */
enum gen_surface_format format;
uint8_t format_size;
From f825fe8e13adfec4cd488bac3663b7e9c90a8c06 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Thu, 25 Jun 2015 07:18:31 +0800
Subject: [PATCH 0039/1208] ilo: remove ilo_image_disable_aux()
Fail resource creation when aux bo allocation fails.
---
src/gallium/drivers/ilo/core/ilo_image.c | 19 -------------------
src/gallium/drivers/ilo/core/ilo_image.h | 3 ---
src/gallium/drivers/ilo/ilo_resource.c | 8 ++------
3 files changed, 2 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index 0d837d8a9d5..ed9b2883ac0 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -1449,22 +1449,3 @@ ilo_image_init_for_imported(struct ilo_image *img,
return true;
}
-
-bool
-ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev)
-{
- /* HiZ is required for separate stencil on Gen6 */
- if (ilo_dev_gen(dev) == ILO_GEN(6) &&
- img->aux.type == ILO_IMAGE_AUX_HIZ &&
- img->separate_stencil)
- return false;
-
- /* MCS is required for multisample images */
- if (img->aux.type == ILO_IMAGE_AUX_MCS &&
- img->sample_count > 1)
- return false;
-
- img->aux.enables = 0x0;
-
- return true;
-}
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index 77747ed7492..e5dcc4319b6 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -157,9 +157,6 @@ ilo_image_init_for_imported(struct ilo_image *img,
enum gen_surface_tiling tiling,
unsigned bo_stride);
-bool
-ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev);
-
static inline bool
ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level)
{
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index a0074e57e99..3b8e607862c 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -283,8 +283,6 @@ tex_destroy(struct ilo_texture *tex)
static bool
tex_alloc_bos(struct ilo_texture *tex)
{
- struct ilo_screen *is = ilo_screen(tex->base.screen);
-
if (!tex->imported && !tex_create_bo(tex))
return false;
@@ -294,13 +292,11 @@ tex_alloc_bos(struct ilo_texture *tex)
switch (tex->image.aux.type) {
case ILO_IMAGE_AUX_HIZ:
- if (!tex_create_hiz(tex) &&
- !ilo_image_disable_aux(&tex->image, &is->dev))
+ if (!tex_create_hiz(tex))
return false;
break;
case ILO_IMAGE_AUX_MCS:
- if (!tex_create_mcs(tex) &&
- !ilo_image_disable_aux(&tex->image, &is->dev))
+ if (!tex_create_mcs(tex))
return false;
break;
default:
From 934e4a469fd37dac03b8280cce41df4d9f4ed123 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Wed, 24 Jun 2015 22:46:36 +0800
Subject: [PATCH 0040/1208] ilo: initialize ilo_image from ilo_image_info
Convert pipe_resource to ilo_image_info for image initialization.
---
src/gallium/drivers/ilo/core/ilo_image.c | 283 +++++++++++------------
src/gallium/drivers/ilo/core/ilo_image.h | 43 +++-
src/gallium/drivers/ilo/ilo_resource.c | 99 ++++++--
3 files changed, 244 insertions(+), 181 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index ed9b2883ac0..39c6daaafd3 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -42,7 +42,7 @@ enum {
struct ilo_image_params {
const struct ilo_dev *dev;
- const struct pipe_resource *templ;
+ const struct ilo_image_info *info;
unsigned valid_tilings;
bool compressed;
@@ -56,7 +56,7 @@ img_get_slice_size(const struct ilo_image *img,
const struct ilo_image_params *params,
unsigned level, unsigned *width, unsigned *height)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
unsigned w, h;
w = u_minify(img->width0, level);
@@ -112,8 +112,7 @@ img_get_slice_size(const struct ilo_image *img,
* y = align(y, 2) * 2;
*/
if (img->interleaved_samples) {
- switch (templ->nr_samples) {
- case 0:
+ switch (info->sample_count) {
case 1:
break;
case 2:
@@ -157,12 +156,12 @@ static unsigned
img_get_num_layers(const struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
- unsigned num_layers = templ->array_size;
+ const struct ilo_image_info *info = params->info;
+ unsigned num_layers = info->array_size;
/* samples of the same index are stored in a layer */
- if (templ->nr_samples > 1 && !img->interleaved_samples)
- num_layers *= templ->nr_samples;
+ if (info->sample_count > 1 && !img->interleaved_samples)
+ num_layers *= info->sample_count;
return num_layers;
}
@@ -171,7 +170,7 @@ static void
img_init_layer_height(struct ilo_image *img,
struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
unsigned num_layers;
if (img->walk != ILO_IMAGE_WALK_LAYER)
@@ -218,7 +217,7 @@ img_init_layer_height(struct ilo_image *img,
img->walk_layer_height = params->h0 + params->h1 +
((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
- if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 &&
+ if (ilo_dev_gen(params->dev) == ILO_GEN(6) && info->sample_count > 1 &&
img->height0 % 4 == 1)
img->walk_layer_height += 4;
@@ -229,13 +228,13 @@ static void
img_init_lods(struct ilo_image *img,
struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
unsigned cur_x, cur_y;
unsigned lv;
cur_x = 0;
cur_y = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
+ for (lv = 0; lv < info->level_count; lv++) {
unsigned lod_w, lod_h;
img_get_slice_size(img, params, lv, &lod_w, &lod_h);
@@ -261,7 +260,7 @@ img_init_lods(struct ilo_image *img,
cur_y += lod_h;
/* every LOD begins at tile boundaries */
- if (templ->last_level > 0) {
+ if (info->level_count > 1) {
assert(img->format == PIPE_FORMAT_S8_UINT);
cur_x = align(cur_x, 64);
cur_y = align(cur_y, 64);
@@ -269,7 +268,7 @@ img_init_lods(struct ilo_image *img,
break;
case ILO_IMAGE_WALK_3D:
{
- const unsigned num_slices = u_minify(templ->depth0, lv);
+ const unsigned num_slices = u_minify(info->depth, lv);
const unsigned num_slices_per_row = 1 << lv;
const unsigned num_rows =
(num_slices + num_slices_per_row - 1) / num_slices_per_row;
@@ -291,7 +290,7 @@ img_init_lods(struct ilo_image *img,
if (img->walk == ILO_IMAGE_WALK_LAYER) {
params->h0 = img->lods[0].slice_height;
- if (templ->last_level > 0)
+ if (info->level_count > 1)
params->h1 = img->lods[1].slice_height;
else
img_get_slice_size(img, params, 1, &cur_x, ¶ms->h1);
@@ -302,7 +301,7 @@ static void
img_init_alignments(struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 113:
@@ -396,7 +395,7 @@ img_init_alignments(struct ilo_image *img,
/* this happens to be the case */
img->align_i = img->block_width;
img->align_j = img->block_height;
- } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
+ } else if (info->bind_zs) {
if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
switch (img->format) {
case PIPE_FORMAT_Z16_UNORM:
@@ -426,11 +425,11 @@ img_init_alignments(struct ilo_image *img,
}
} else {
const bool valign_4 =
- (templ->nr_samples > 1) ||
+ (info->sample_count > 1) ||
(ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
(ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
img->tiling == GEN6_TILING_Y &&
- (templ->bind & PIPE_BIND_RENDER_TARGET));
+ info->bind_surface_dp_render);
if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
@@ -460,14 +459,14 @@ static void
img_init_tiling(struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
unsigned preferred_tilings = params->valid_tilings;
/* no fencing nor BLT support */
if (preferred_tilings & ~IMAGE_TILING_W)
preferred_tilings &= ~IMAGE_TILING_W;
- if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
+ if (info->bind_surface_dp_render || info->bind_surface_sampler) {
/*
* heuristically set a minimum width/height for enabling tiling
*/
@@ -499,7 +498,7 @@ static void
img_init_walk_gen7(struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
/*
* It is not explicitly states, but render targets are expected to be
@@ -508,14 +507,14 @@ img_init_walk_gen7(struct ilo_image *img,
*
* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
*/
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (info->bind_zs) {
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 111:
*
* "note that the depth buffer and stencil buffer have an implied
* value of ARYSPC_FULL"
*/
- img->walk = (templ->target == PIPE_TEXTURE_3D) ?
+ img->walk = (info->target == PIPE_TEXTURE_3D) ?
ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
img->interleaved_samples = true;
@@ -530,12 +529,12 @@ img_init_walk_gen7(struct ilo_image *img,
* As multisampled resources are not mipmapped, we never use
* ARYSPC_FULL for them.
*/
- if (templ->nr_samples > 1)
- assert(templ->last_level == 0);
+ if (info->sample_count > 1)
+ assert(info->level_count == 1);
img->walk =
- (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
- (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER :
+ (info->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
+ (info->level_count > 1) ? ILO_IMAGE_WALK_LAYER :
ILO_IMAGE_WALK_LOD;
img->interleaved_samples = false;
@@ -558,7 +557,7 @@ img_init_walk_gen6(struct ilo_image *img,
* GEN6 does not support compact spacing otherwise.
*/
img->walk =
- (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
+ (params->info->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
(img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
ILO_IMAGE_WALK_LAYER;
@@ -580,7 +579,7 @@ static unsigned
img_get_valid_tilings(const struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
const enum pipe_format format = img->format;
unsigned valid_tilings = params->valid_tilings;
@@ -590,7 +589,7 @@ img_get_valid_tilings(const struct ilo_image *img,
* "Display/Overlay Y-Major not supported.
* X-Major required for Async Flips"
*/
- if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
+ if (unlikely(info->bind_scanout))
valid_tilings &= IMAGE_TILING_X;
/*
@@ -599,7 +598,7 @@ img_get_valid_tilings(const struct ilo_image *img,
* "The cursor surface address must be 4K byte aligned. The cursor must
* be in linear memory, it cannot be tiled."
*/
- if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
+ if (unlikely(info->bind_cursor))
valid_tilings &= IMAGE_TILING_NONE;
/*
@@ -614,7 +613,7 @@ img_get_valid_tilings(const struct ilo_image *img,
*
* "W-Major Tile Format is used for separate stencil."
*/
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (info->bind_zs) {
switch (format) {
case PIPE_FORMAT_S8_UINT:
valid_tilings &= IMAGE_TILING_W;
@@ -625,7 +624,7 @@ img_get_valid_tilings(const struct ilo_image *img,
}
}
- if (templ->bind & PIPE_BIND_RENDER_TARGET) {
+ if (info->bind_surface_dp_render) {
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 32:
*
@@ -656,7 +655,7 @@ img_get_valid_tilings(const struct ilo_image *img,
valid_tilings &= ~IMAGE_TILING_W;
}
- if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
+ if (info->bind_surface_sampler) {
if (ilo_dev_gen(params->dev) < ILO_GEN(8))
valid_tilings &= ~IMAGE_TILING_W;
}
@@ -671,17 +670,17 @@ static void
img_init_size_and_format(struct ilo_image *img,
struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
- enum pipe_format format = templ->format;
+ const struct ilo_image_info *info = params->info;
+ enum pipe_format format = info->format;
bool require_separate_stencil = false;
- img->target = templ->target;
- img->width0 = templ->width0;
- img->height0 = templ->height0;
- img->depth0 = templ->depth0;
- img->array_size = templ->array_size;
- img->level_count = templ->last_level + 1;
- img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
+ img->target = info->target;
+ img->width0 = info->width;
+ img->height0 = info->height;
+ img->depth0 = info->depth;
+ img->array_size = info->array_size;
+ img->level_count = info->level_count;
+ img->sample_count = info->sample_count;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
@@ -691,7 +690,7 @@ img_init_size_and_format(struct ilo_image *img,
*
* GEN7+ requires separate stencil buffers.
*/
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (info->bind_zs) {
if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
require_separate_stencil = true;
else
@@ -731,15 +730,14 @@ static bool
img_want_mcs(const struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
bool want_mcs = false;
/* MCS is for RT on GEN7+ */
if (ilo_dev_gen(params->dev) < ILO_GEN(7))
return false;
- if (templ->target != PIPE_TEXTURE_2D ||
- !(templ->bind & PIPE_BIND_RENDER_TARGET))
+ if (info->target != PIPE_TEXTURE_2D || !info->bind_surface_dp_render)
return false;
/*
@@ -752,9 +750,9 @@ img_want_mcs(const struct ilo_image *img,
* "This field must be set to 0 for all SINT MSRTs when all RT channels
* are not written"
*/
- if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) {
+ if (info->sample_count > 1 && !util_format_is_pure_sint(info->format)) {
want_mcs = true;
- } else if (templ->nr_samples <= 1) {
+ } else if (info->sample_count == 1 && !info->aux_disable) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 326:
*
@@ -770,7 +768,7 @@ img_want_mcs(const struct ilo_image *img,
* ..."
*/
if (img->tiling != GEN6_TILING_NONE &&
- templ->last_level == 0 && templ->array_size == 1) {
+ info->level_count == 1 && info->array_size == 1) {
switch (img->block_size) {
case 4:
case 8:
@@ -790,27 +788,26 @@ static bool
img_want_hiz(const struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
const struct util_format_description *desc =
- util_format_description(templ->format);
+ util_format_description(info->format);
if (ilo_debug & ILO_DEBUG_NOHIZ)
return false;
- /* we want 8x4 aligned levels */
- if (templ->target == PIPE_TEXTURE_1D)
+ if (info->aux_disable)
return false;
- if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
+ /* we want 8x4 aligned levels */
+ if (info->target == PIPE_TEXTURE_1D)
+ return false;
+
+ if (!info->bind_zs)
return false;
if (!util_format_has_depth(desc))
return false;
- /* no point in having HiZ */
- if (templ->usage == PIPE_USAGE_STAGING)
- return false;
-
/*
* As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
* for every level. This is generally fine except on GEN6, where HiZ and
@@ -819,8 +816,8 @@ img_want_hiz(const struct ilo_image *img,
* can result in incompatible formats.
*/
if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
- templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- templ->last_level)
+ info->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ info->level_count > 1)
return false;
return true;
@@ -839,7 +836,7 @@ img_init_aux(struct ilo_image *img,
static void
img_align(struct ilo_image *img, struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
int align_w = 1, align_h = 1, pad_h = 0;
/*
@@ -864,11 +861,11 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
* padding purposes. The value of 4 for j still applies for mip level
* alignment and QPitch calculation."
*/
- if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
+ if (info->bind_surface_sampler) {
align_w = MAX2(align_w, img->align_i);
align_h = MAX2(align_h, img->align_j);
- if (templ->target == PIPE_TEXTURE_CUBE)
+ if (info->target == PIPE_TEXTURE_CUBE)
pad_h += 2;
if (params->compressed)
@@ -881,7 +878,7 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
* "If the surface contains an odd number of rows of data, a final row
* below the surface must be allocated."
*/
- if (templ->bind & PIPE_BIND_RENDER_TARGET)
+ if (info->bind_surface_dp_render)
align_h = MAX2(align_h, 2);
/*
@@ -889,9 +886,9 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
* for unaligned non-mipmapped and non-array images.
*/
if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
- templ->last_level == 0 &&
- templ->array_size == 1 &&
- templ->depth0 == 1) {
+ info->level_count == 1 &&
+ info->array_size == 1 &&
+ info->depth == 1) {
align_w = MAX2(align_w, 8);
align_h = MAX2(align_h, 4);
}
@@ -925,7 +922,7 @@ img_calculate_bo_size(struct ilo_image *img,
* required above."
*/
if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
- (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
+ params->info->bind_surface_sampler &&
img->tiling == GEN6_TILING_NONE)
h += (64 + img->bo_stride - 1) / img->bo_stride;
@@ -943,7 +940,7 @@ img_calculate_bo_size(struct ilo_image *img,
*
* Different requirements may exist when the bo is used in different
* places, but our alignments here should be good enough that we do not
- * need to check params->templ->bind.
+ * need to check params->info->bind_x.
*/
switch (img->tiling) {
case GEN6_TILING_X:
@@ -994,7 +991,7 @@ img_calculate_bo_size(struct ilo_image *img,
img->tiling = GEN6_TILING_NONE;
/* MCS support for non-MSRTs is limited to tiled RTs */
if (img->aux.type == ILO_IMAGE_AUX_MCS &&
- params->templ->nr_samples <= 1)
+ params->info->sample_count == 1)
img->aux.type = ILO_IMAGE_AUX_NONE;
continue;
@@ -1014,7 +1011,7 @@ static void
img_calculate_hiz_size(struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
const unsigned hz_align_j = 8;
enum ilo_image_walk_type hz_walk;
unsigned hz_width, hz_height, lv;
@@ -1059,7 +1056,7 @@ img_calculate_hiz_size(struct ilo_image *img,
hz_width = align(img->lods[0].slice_width, 16);
- hz_height = hz_qpitch * templ->array_size / 2;
+ hz_height = hz_qpitch * info->array_size / 2;
if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
hz_height = align(hz_height, 8);
@@ -1077,7 +1074,7 @@ img_calculate_hiz_size(struct ilo_image *img,
hz_height = 0;
cur_tx = 0;
cur_ty = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
+ for (lv = 0; lv < info->level_count; lv++) {
unsigned tw, th;
lod_tx[lv] = cur_tx;
@@ -1085,7 +1082,7 @@ img_calculate_hiz_size(struct ilo_image *img,
tw = align(img->lods[lv].slice_width, 16);
th = align(img->lods[lv].slice_height, hz_align_j) *
- templ->array_size / 2;
+ info->array_size / 2;
/* convert to Y-tiles */
tw = align(tw, 128) / 128;
th = align(th, 32) / 32;
@@ -1102,7 +1099,7 @@ img_calculate_hiz_size(struct ilo_image *img,
}
/* convert tile offsets to memory offsets */
- for (lv = 0; lv <= templ->last_level; lv++) {
+ for (lv = 0; lv < info->level_count; lv++) {
img->aux.walk_lod_offsets[lv] =
(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
}
@@ -1114,10 +1111,10 @@ img_calculate_hiz_size(struct ilo_image *img,
hz_width = align(img->lods[0].slice_width, 16);
hz_height = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
+ for (lv = 0; lv < info->level_count; lv++) {
const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
/* according to the formula, slices are packed together vertically */
- hz_height += h * u_minify(templ->depth0, lv);
+ hz_height += h * u_minify(info->depth, lv);
}
hz_height /= 2;
break;
@@ -1136,8 +1133,7 @@ img_calculate_hiz_size(struct ilo_image *img,
*/
hz_clear_w = 8;
hz_clear_h = 4;
- switch (templ->nr_samples) {
- case 0:
+ switch (info->sample_count) {
case 1:
default:
break;
@@ -1158,7 +1154,7 @@ img_calculate_hiz_size(struct ilo_image *img,
break;
}
- for (lv = 0; lv <= templ->last_level; lv++) {
+ for (lv = 0; lv < info->level_count; lv++) {
if (u_minify(img->width0, lv) % hz_clear_w ||
u_minify(img->height0, lv) % hz_clear_h)
break;
@@ -1166,7 +1162,7 @@ img_calculate_hiz_size(struct ilo_image *img,
}
/* we padded to allow this in img_align() */
- if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
+ if (info->level_count == 1 && info->array_size == 1 && info->depth == 1)
img->aux.enables |= 0x1;
/* align to Y-tile */
@@ -1178,13 +1174,13 @@ static void
img_calculate_mcs_size(struct ilo_image *img,
const struct ilo_image_params *params)
{
- const struct pipe_resource *templ = params->templ;
+ const struct ilo_image_info *info = params->info;
int mcs_width, mcs_height, mcs_cpp;
int downscale_x, downscale_y;
assert(img->aux.type == ILO_IMAGE_AUX_MCS);
- if (templ->nr_samples > 1) {
+ if (info->sample_count > 1) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
@@ -1198,7 +1194,7 @@ img_calculate_mcs_size(struct ilo_image *img,
* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
* pixel block in the RT.
*/
- switch (templ->nr_samples) {
+ switch (info->sample_count) {
case 2:
case 4:
downscale_x = 8;
@@ -1295,13 +1291,13 @@ img_calculate_mcs_size(struct ilo_image *img,
mcs_cpp = 16; /* an OWord */
}
- img->aux.enables = (1 << (templ->last_level + 1)) - 1;
+ img->aux.enables = (1 << info->level_count) - 1;
/* align to Y-tile */
img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
img->aux.bo_height = align(mcs_height, 32);
}
-static void
+static bool
img_init(struct ilo_image *img,
struct ilo_image_params *params)
{
@@ -1318,7 +1314,7 @@ img_init(struct ilo_image *img,
img_align(img, params);
img_calculate_bo_size(img, params);
- img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT);
+ img->scanout = params->info->bind_scanout;
switch (img->aux.type) {
case ILO_IMAGE_AUX_HIZ:
@@ -1330,6 +1326,8 @@ img_init(struct ilo_image *img,
default:
break;
}
+
+ return true;
}
/**
@@ -1339,29 +1337,29 @@ img_init(struct ilo_image *img,
static void
img_init_for_transfer(struct ilo_image *img,
const struct ilo_dev *dev,
- const struct pipe_resource *templ)
+ const struct ilo_image_info *info)
{
- const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
- templ->depth0 : templ->array_size;
+ const unsigned num_layers = (info->target == PIPE_TEXTURE_3D) ?
+ info->depth : info->array_size;
unsigned layer_width, layer_height;
- assert(templ->last_level == 0);
- assert(templ->nr_samples <= 1);
+ assert(info->level_count == 1);
+ assert(info->sample_count == 1);
img->aux.type = ILO_IMAGE_AUX_NONE;
- img->target = templ->target;
- img->width0 = templ->width0;
- img->height0 = templ->height0;
- img->depth0 = templ->depth0;
- img->array_size = templ->array_size;
+ img->target = info->target;
+ img->width0 = info->width;
+ img->height0 = info->height;
+ img->depth0 = info->depth;
+ img->array_size = info->array_size;
img->level_count = 1;
img->sample_count = 1;
- img->format = templ->format;
- img->block_width = util_format_get_blockwidth(templ->format);
- img->block_height = util_format_get_blockheight(templ->format);
- img->block_size = util_format_get_blocksize(templ->format);
+ img->format = info->format;
+ img->block_width = util_format_get_blockwidth(info->format);
+ img->block_height = util_format_get_blockheight(info->format);
+ img->block_size = util_format_get_blocksize(info->format);
img->walk = ILO_IMAGE_WALK_LOD;
@@ -1374,8 +1372,8 @@ img_init_for_transfer(struct ilo_image *img,
util_is_power_of_two(img->block_height));
/* use packed layout */
- layer_width = align(templ->width0, img->align_i);
- layer_height = align(templ->height0, img->align_j);
+ layer_width = align(info->width, img->align_i);
+ layer_height = align(info->height, img->align_j);
img->lods[0].slice_width = layer_width;
img->lods[0].slice_height = layer_height;
@@ -1386,66 +1384,57 @@ img_init_for_transfer(struct ilo_image *img,
img->bo_height = (layer_height / img->block_height) * num_layers;
}
+static bool
+img_is_bind_gpu(const struct ilo_image_info *info)
+{
+ return (info->bind_surface_sampler ||
+ info->bind_surface_dp_render ||
+ info->bind_surface_dp_typed ||
+ info->bind_zs ||
+ info->bind_scanout ||
+ info->bind_cursor);
+}
+
/**
* Initialize the image. Callers should zero-initialize \p img first.
*/
-void ilo_image_init(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ)
+bool
+ilo_image_init(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
{
struct ilo_image_params params;
- bool transfer_only;
assert(ilo_is_zeroed(img, sizeof(*img)));
/* use transfer layout when the texture is never bound to GPU */
- transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
- PIPE_BIND_TRANSFER_READ));
- if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
- img_init_for_transfer(img, dev, templ);
- return;
+ if (!img_is_bind_gpu(info) &&
+ info->level_count == 1 &&
+ info->sample_count == 1) {
+ img_init_for_transfer(img, dev, info);
+ return true;
}
memset(¶ms, 0, sizeof(params));
params.dev = dev;
- params.templ = templ;
- params.valid_tilings = IMAGE_TILING_ALL;
+ params.info = info;
+ params.valid_tilings = (info->valid_tilings) ?
+ info->valid_tilings : IMAGE_TILING_ALL;
- img_init(img, ¶ms);
-}
-
-bool
-ilo_image_init_for_imported(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ,
- enum gen_surface_tiling tiling,
- unsigned bo_stride)
-{
- struct ilo_image_params params;
-
- assert(ilo_is_zeroed(img, sizeof(*img)));
-
- if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
- (tiling == GEN6_TILING_Y && bo_stride % 128) ||
- (tiling == GEN8_TILING_W && bo_stride % 64))
+ if (!img_init(img, ¶ms))
return false;
- memset(¶ms, 0, sizeof(params));
- params.dev = dev;
- params.templ = templ;
- params.valid_tilings = 1 << tiling;
+ if (info->force_bo_stride) {
+ if ((img->tiling == GEN6_TILING_X && info->force_bo_stride % 512) ||
+ (img->tiling == GEN6_TILING_Y && info->force_bo_stride % 128) ||
+ (img->tiling == GEN8_TILING_W && info->force_bo_stride % 64))
+ return false;
- img_init(img, ¶ms);
+ if (img->bo_stride > info->force_bo_stride)
+ return false;
- assert(img->tiling == tiling);
- if (img->bo_stride > bo_stride)
- return false;
-
- img->bo_stride = bo_stride;
-
- /* assume imported RTs are also scanouts */
- if (!img->scanout)
- img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET);
+ img->bo_stride = info->force_bo_stride;
+ }
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index e5dcc4319b6..e488bef0d3f 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -67,6 +67,36 @@ enum ilo_image_walk_type {
ILO_IMAGE_WALK_3D,
};
+struct ilo_image_info {
+ enum pipe_texture_target target;
+
+ enum pipe_format format;
+
+ /* image size */
+ uint16_t width;
+ uint16_t height;
+ uint16_t depth;
+ uint16_t array_size;
+ uint8_t level_count;
+ uint8_t sample_count;
+
+ /* disable optional aux */
+ bool aux_disable;
+
+ /* tilings to consider, if any bit is set */
+ uint8_t valid_tilings;
+
+ /* force a stride */
+ uint32_t force_bo_stride;
+
+ bool bind_surface_sampler;
+ bool bind_surface_dp_render;
+ bool bind_surface_dp_typed;
+ bool bind_zs;
+ bool bind_scanout;
+ bool bind_cursor;
+};
+
/*
* When the walk type is ILO_IMAGE_WALK_LAYER, there is only a slice in each
* LOD and this is used to describe LODs in the first array layer. Otherwise,
@@ -143,19 +173,10 @@ struct ilo_image {
} aux;
};
-struct pipe_resource;
-
-void
+bool
ilo_image_init(struct ilo_image *img,
const struct ilo_dev *dev,
- const struct pipe_resource *templ);
-
-bool
-ilo_image_init_for_imported(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ,
- enum gen_surface_tiling tiling,
- unsigned bo_stride);
+ const struct ilo_image_info *info);
static inline bool
ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level)
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index 3b8e607862c..0b0f69c30be 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -87,6 +87,38 @@ resource_get_cpu_init(const struct pipe_resource *templ)
PIPE_BIND_STREAM_OUTPUT)) ? false : true;
}
+static void
+resource_get_image_info(const struct pipe_resource *templ,
+ const struct ilo_dev *dev,
+ enum pipe_format image_format,
+ struct ilo_image_info *info)
+{
+ memset(info, 0, sizeof(*info));
+
+ info->target = templ->target;
+ info->format = image_format;
+
+ info->width = templ->width0;
+ info->height = templ->height0;
+ info->depth = templ->depth0;
+ info->array_size = templ->array_size;
+ info->level_count = templ->last_level + 1;
+ info->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
+
+ info->aux_disable = (templ->usage == PIPE_USAGE_STAGING);
+
+ if (templ->bind & PIPE_BIND_LINEAR)
+ info->valid_tilings = 1 << GEN6_TILING_NONE;
+
+ info->bind_surface_sampler = (templ->bind & PIPE_BIND_SAMPLER_VIEW);
+ info->bind_surface_dp_render = (templ->bind & PIPE_BIND_RENDER_TARGET);
+ info->bind_surface_dp_typed = (templ->bind &
+ (PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE));
+ info->bind_zs = (templ->bind & PIPE_BIND_DEPTH_STENCIL);
+ info->bind_scanout = (templ->bind & PIPE_BIND_SCANOUT);
+ info->bind_cursor = (templ->bind & PIPE_BIND_CURSOR);
+}
+
static enum gen_surface_tiling
winsys_to_surface_tiling(enum intel_tiling_mode tiling)
{
@@ -306,9 +338,10 @@ tex_alloc_bos(struct ilo_texture *tex)
return true;
}
-static bool
+static struct intel_bo *
tex_import_handle(struct ilo_texture *tex,
- const struct winsys_handle *handle)
+ const struct winsys_handle *handle,
+ struct ilo_image_info *info)
{
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = &tex->base;
@@ -319,23 +352,24 @@ tex_import_handle(struct ilo_texture *tex,
bo = intel_winsys_import_handle(is->dev.winsys, name, handle,
tex->image.bo_height, &tiling, &pitch);
- if (!bo)
- return false;
+ /* modify image info */
+ if (bo) {
+ const uint8_t valid_tilings = 1 << winsys_to_surface_tiling(tiling);
- if (!ilo_image_init_for_imported(&tex->image, &is->dev, templ,
- winsys_to_surface_tiling(tiling), pitch)) {
- ilo_err("failed to import handle for texture\n");
- intel_bo_unref(bo);
- return false;
+ if (info->valid_tilings && !(info->valid_tilings & valid_tilings)) {
+ intel_bo_unref(bo);
+ return NULL;
+ }
+
+ info->valid_tilings = valid_tilings;
+ info->force_bo_stride = pitch;
+
+ /* assume imported RTs are also scanouts */
+ if (!info->bind_scanout)
+ info->bind_scanout = (templ->usage & PIPE_BIND_RENDER_TARGET);
}
- ilo_vma_init(&tex->vma, &is->dev,
- tex->image.bo_stride * tex->image.bo_height, 4096);
- ilo_vma_set_bo(&tex->vma, &is->dev, bo, 0);
-
- tex->imported = true;
-
- return true;
+ return bo;
}
static bool
@@ -345,18 +379,33 @@ tex_init_image(struct ilo_texture *tex,
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = &tex->base;
struct ilo_image *img = &tex->image;
+ struct intel_bo *imported_bo = NULL;;
+ struct ilo_image_info info;
+
+ resource_get_image_info(templ, &is->dev, templ->format, &info);
if (handle) {
- if (!tex_import_handle(tex, handle))
+ imported_bo = tex_import_handle(tex, handle, &info);
+ if (!imported_bo)
return false;
- } else {
- ilo_image_init(img, &is->dev, templ);
- ilo_vma_init(&tex->vma, &is->dev,
- img->bo_stride * img->bo_height, 4096);
}
- if (img->bo_height > ilo_max_resource_size / img->bo_stride)
+ if (!ilo_image_init(img, &is->dev, &info)) {
+ intel_bo_unref(imported_bo);
return false;
+ }
+
+ if (img->bo_height > ilo_max_resource_size / img->bo_stride ||
+ !ilo_vma_init(&tex->vma, &is->dev, img->bo_stride * img->bo_height,
+ 4096)) {
+ intel_bo_unref(imported_bo);
+ return false;
+ }
+
+ if (imported_bo) {
+ ilo_vma_set_bo(&tex->vma, &is->dev, imported_bo, 0);
+ tex->imported = true;
+ }
if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
/* require on-the-fly tiling/untiling or format conversion */
@@ -500,13 +549,17 @@ static boolean
ilo_can_create_resource(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
+ struct ilo_screen *is = ilo_screen(screen);
+ struct ilo_image_info info;
struct ilo_image img;
if (templ->target == PIPE_BUFFER)
return (templ->width0 <= ilo_max_resource_size);
+ resource_get_image_info(templ, &is->dev, templ->format, &info);
+
memset(&img, 0, sizeof(img));
- ilo_image_init(&img, &ilo_screen(screen)->dev, templ);
+ ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
return (img.bo_height <= ilo_max_resource_size / img.bo_stride);
}
From dc2e92b2d3d216fc9657f2ef594d7c5d0b03370e Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Wed, 24 Jun 2015 22:47:30 +0800
Subject: [PATCH 0041/1208] ilo: replace pipe_texture_target by
gen_surface_type
Replace pipe_texture_target by gen_surface_type in ilo_image. Change how
GEN6_SURFTYPE_CUBE is specified in ilo_state_surface and ilo_state_zs.
---
src/gallium/drivers/ilo/core/ilo_image.c | 18 ++--
src/gallium/drivers/ilo/core/ilo_image.h | 4 +-
.../drivers/ilo/core/ilo_state_surface.c | 79 ++++++------------
.../drivers/ilo/core/ilo_state_surface.h | 3 +-
src/gallium/drivers/ilo/core/ilo_state_zs.c | 83 ++++++-------------
src/gallium/drivers/ilo/core/ilo_state_zs.h | 4 +-
src/gallium/drivers/ilo/ilo_resource.c | 24 +++++-
src/gallium/drivers/ilo/ilo_state.c | 10 ++-
8 files changed, 99 insertions(+), 126 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index 39c6daaafd3..9ec6792146f 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -514,7 +514,7 @@ img_init_walk_gen7(struct ilo_image *img,
* "note that the depth buffer and stencil buffer have an implied
* value of ARYSPC_FULL"
*/
- img->walk = (info->target == PIPE_TEXTURE_3D) ?
+ img->walk = (info->type == GEN6_SURFTYPE_3D) ?
ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
img->interleaved_samples = true;
@@ -533,7 +533,7 @@ img_init_walk_gen7(struct ilo_image *img,
assert(info->level_count == 1);
img->walk =
- (info->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
+ (info->type == GEN6_SURFTYPE_3D) ? ILO_IMAGE_WALK_3D :
(info->level_count > 1) ? ILO_IMAGE_WALK_LAYER :
ILO_IMAGE_WALK_LOD;
@@ -557,7 +557,7 @@ img_init_walk_gen6(struct ilo_image *img,
* GEN6 does not support compact spacing otherwise.
*/
img->walk =
- (params->info->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
+ (params->info->type == GEN6_SURFTYPE_3D) ? ILO_IMAGE_WALK_3D :
(img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
ILO_IMAGE_WALK_LAYER;
@@ -674,7 +674,7 @@ img_init_size_and_format(struct ilo_image *img,
enum pipe_format format = info->format;
bool require_separate_stencil = false;
- img->target = info->target;
+ img->type = info->type;
img->width0 = info->width;
img->height0 = info->height;
img->depth0 = info->depth;
@@ -737,7 +737,7 @@ img_want_mcs(const struct ilo_image *img,
if (ilo_dev_gen(params->dev) < ILO_GEN(7))
return false;
- if (info->target != PIPE_TEXTURE_2D || !info->bind_surface_dp_render)
+ if (info->type != GEN6_SURFTYPE_2D || !info->bind_surface_dp_render)
return false;
/*
@@ -799,7 +799,7 @@ img_want_hiz(const struct ilo_image *img,
return false;
/* we want 8x4 aligned levels */
- if (info->target == PIPE_TEXTURE_1D)
+ if (info->type == GEN6_SURFTYPE_1D)
return false;
if (!info->bind_zs)
@@ -865,7 +865,7 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
align_w = MAX2(align_w, img->align_i);
align_h = MAX2(align_h, img->align_j);
- if (info->target == PIPE_TEXTURE_CUBE)
+ if (info->type == GEN6_SURFTYPE_CUBE)
pad_h += 2;
if (params->compressed)
@@ -1339,7 +1339,7 @@ img_init_for_transfer(struct ilo_image *img,
const struct ilo_dev *dev,
const struct ilo_image_info *info)
{
- const unsigned num_layers = (info->target == PIPE_TEXTURE_3D) ?
+ const unsigned num_layers = (info->type == GEN6_SURFTYPE_3D) ?
info->depth : info->array_size;
unsigned layer_width, layer_height;
@@ -1348,7 +1348,7 @@ img_init_for_transfer(struct ilo_image *img,
img->aux.type = ILO_IMAGE_AUX_NONE;
- img->target = info->target;
+ img->type = info->type;
img->width0 = info->width;
img->height0 = info->height;
img->depth0 = info->depth;
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index e488bef0d3f..1c4f86c78da 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -68,7 +68,7 @@ enum ilo_image_walk_type {
};
struct ilo_image_info {
- enum pipe_texture_target target;
+ enum gen_surface_type type;
enum pipe_format format;
@@ -117,7 +117,7 @@ struct ilo_image_lod {
* Texture layout.
*/
struct ilo_image {
- enum pipe_texture_target target;
+ enum gen_surface_type type;
/* size, format, etc for programming hardware states */
unsigned width0;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c
index 2caba6df46e..40fe15f316f 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c
@@ -425,29 +425,6 @@ surface_set_gen7_buffer_SURFACE_STATE(struct ilo_state_surface *surf,
return true;
}
-static enum gen_surface_type
-get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- switch (img->target) {
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_NULL;
- }
-}
-
static bool
surface_validate_gen6_image(const struct ilo_dev *dev,
const struct ilo_state_surface_image_info *info)
@@ -487,17 +464,19 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
assert(info->img->bo_stride && info->img->bo_stride <= 512 * 1024 &&
info->img->width0 <= info->img->bo_stride);
- if (info->is_cube_map) {
- assert(get_gen6_surface_type(dev, info->img) == GEN6_SURFTYPE_2D);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 78:
- *
- * "For cube maps, Width must be set equal to the Height."
- */
- assert(info->img->width0 == info->img->height0);
+ if (info->type != info->img->type) {
+ assert(info->type == GEN6_SURFTYPE_2D &&
+ info->img->type == GEN6_SURFTYPE_CUBE);
}
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 78:
+ *
+ * "For cube maps, Width must be set equal to the Height."
+ */
+ if (info->type == GEN6_SURFTYPE_CUBE)
+ assert(info->img->width0 == info->img->height0);
+
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 72:
*
@@ -532,20 +511,21 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
}
static void
-get_gen6_max_extent(const struct ilo_dev *dev,
- const struct ilo_image *img,
- uint16_t *max_w, uint16_t *max_h)
+surface_get_gen6_image_max_extent(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info,
+ uint16_t *max_w, uint16_t *max_h)
{
const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
ILO_DEV_ASSERT(dev, 6, 8);
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
*max_w = max_size;
*max_h = 1;
break;
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
*max_w = max_size;
*max_h = max_size;
break;
@@ -573,7 +553,7 @@ surface_get_gen6_image_extent(const struct ilo_dev *dev,
w = info->img->width0;
h = info->img->height0;
- get_gen6_max_extent(dev, info->img, &max_w, &max_h);
+ surface_get_gen6_image_max_extent(dev, info, &max_w, &max_h);
assert(w && h && w <= max_w && h <= max_h);
*width = w - 1;
@@ -624,16 +604,17 @@ surface_get_gen6_image_slices(const struct ilo_dev *dev,
* layers to (86 * 6), about 512.
*/
- switch (get_gen6_surface_type(dev, info->img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 2048 : 512;
assert(info->img->array_size <= max_slice);
max_slice = info->img->array_size;
d = info->slice_count;
- if (info->is_cube_map) {
+ if (info->type == GEN6_SURFTYPE_CUBE) {
if (info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
if (!d || d % 6) {
ilo_warn("invalid cube slice count\n");
@@ -946,7 +927,6 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
uint8_t min_lod, mip_count;
enum gen_sample_count sample_count;
uint32_t alignments;
- enum gen_surface_type type;
uint32_t dw0, dw2, dw3, dw4, dw5;
ILO_DEV_ASSERT(dev, 6, 6);
@@ -966,10 +946,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
if (info->img->sample_count > 1)
assert(info->img->interleaved_samples);
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- get_gen6_surface_type(dev, info->img);
-
- dw0 = type << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ dw0 = info->type << GEN6_SURFACE_DW0_TYPE__SHIFT |
info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
@@ -996,7 +973,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
* "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this
* field must be programmed to 111111b (all faces enabled)."
*/
- if (info->is_cube_map &&
+ if (info->type == GEN6_SURFTYPE_CUBE &&
info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
dw0 |= GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE |
GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
@@ -1025,7 +1002,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
surf->surface[4] = dw4;
surf->surface[5] = dw5;
- surf->type = type;
+ surf->type = info->type;
surf->min_lod = min_lod;
surf->mip_count = mip_count;
@@ -1041,7 +1018,6 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
uint8_t min_lod, mip_count;
uint32_t alignments;
enum gen_sample_count sample_count;
- enum gen_surface_type type;
uint32_t dw0, dw1, dw2, dw3, dw4, dw5, dw7;
ILO_DEV_ASSERT(dev, 7, 8);
@@ -1055,10 +1031,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
!surface_get_gen6_image_alignments(dev, info, &alignments))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- get_gen6_surface_type(dev, info->img);
-
- dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ dw0 = info->type << GEN7_SURFACE_DW0_TYPE__SHIFT |
info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT |
alignments;
@@ -1092,7 +1065,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
* field must be programmed to 111111b (all faces enabled). This field
* is ignored unless the Surface Type is SURFTYPE_CUBE."
*/
- if (info->is_cube_map &&
+ if (info->type == GEN6_SURFTYPE_CUBE &&
info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER)
dw0 |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
@@ -1156,7 +1129,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
surf->surface[12] = 0;
}
- surf->type = type;
+ surf->type = info->type;
surf->min_lod = min_lod;
surf->mip_count = mip_count;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h
index 835df69882e..e78c7c97db1 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h
@@ -72,11 +72,12 @@ struct ilo_state_surface_image_info {
enum ilo_state_surface_access access;
+ enum gen_surface_type type;
+
enum gen_surface_format format;
bool is_integer;
bool readonly;
- bool is_cube_map;
bool is_array;
};
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.c b/src/gallium/drivers/ilo/core/ilo_state_zs.c
index 7b82f1acf6f..306e6d9aedc 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.c
@@ -60,29 +60,6 @@ zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
return true;
}
-static enum gen_surface_type
-get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- switch (img->target) {
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_NULL;
- }
-}
-
static enum gen_depth_format
get_gen6_depth_format(const struct ilo_dev *dev, const struct ilo_image *img)
{
@@ -148,50 +125,52 @@ zs_validate_gen6(const struct ilo_dev *dev,
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
- * The stencil buffer has a format of S8_UINT, and shares Surface
+ * "The stencil buffer has a format of S8_UINT, and shares Surface
* Type, Height, Width, and Depth, Minimum Array Element, Render
* Target View Extent, Depth Coordinate Offset X/Y, LOD, and Depth
- * Buffer Object Control State fields of the depth buffer.
+ * Buffer Object Control State fields of the depth buffer."
*/
- if (info->z_img == info->s_img) {
- assert(info->z_img->target == info->s_img->target &&
- info->z_img->width0 == info->s_img->width0 &&
+ if (info->z_img && info->s_img && info->z_img != info->s_img) {
+ assert(info->z_img->type == info->s_img->type &&
info->z_img->height0 == info->s_img->height0 &&
info->z_img->depth0 == info->s_img->depth0);
}
+ if (info->type != img->type) {
+ assert(info->type == GEN6_SURFTYPE_2D &&
+ img->type == GEN6_SURFTYPE_CUBE);
+ }
+
assert(info->level < img->level_count);
assert(img->bo_stride);
- if (info->is_cube_map) {
- assert(get_gen6_surface_type(dev, img) == GEN6_SURFTYPE_2D);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 323:
- *
- * "For cube maps, Width must be set equal to Height."
- */
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 323:
+ *
+ * "For cube maps, Width must be set equal to Height."
+ */
+ if (info->type == GEN6_SURFTYPE_CUBE)
assert(img->width0 == img->height0);
- }
return true;
}
static void
-get_gen6_max_extent(const struct ilo_dev *dev,
- const struct ilo_image *img,
- uint16_t *max_w, uint16_t *max_h)
+zs_get_gen6_max_extent(const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info,
+ uint16_t *max_w, uint16_t *max_h)
{
const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
ILO_DEV_ASSERT(dev, 6, 8);
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
*max_w = max_size;
*max_h = 1;
break;
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
*max_w = max_size;
*max_h = max_size;
break;
@@ -297,7 +276,7 @@ zs_get_gen6_depth_extent(const struct ilo_dev *dev,
h = align(h, align_h);
}
- get_gen6_max_extent(dev, img, &max_w, &max_h);
+ zs_get_gen6_max_extent(dev, info, &max_w, &max_h);
assert(w && h && w <= max_w && h <= max_h);
*width = w - 1;
@@ -326,16 +305,17 @@ zs_get_gen6_depth_slices(const struct ilo_dev *dev,
* surfaces. If the volume texture is MIP-mapped, this field specifies
* the depth of the base MIP level."
*/
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
assert(img->array_size <= max_slice);
max_slice = img->array_size;
d = info->slice_count;
- if (info->is_cube_map) {
+ if (info->type == GEN6_SURFTYPE_CUBE) {
/*
* Minumum Array Element and Depth must be 0; Render Target View
* Extent is ignored.
@@ -415,7 +395,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
const struct ilo_state_zs_info *info)
{
uint16_t width, height, depth, array_base, view_extent;
- enum gen_surface_type type;
enum gen_depth_format format;
uint32_t dw1, dw2, dw3, dw4;
@@ -427,10 +406,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
&view_extent))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- (info->z_img) ? get_gen6_surface_type(dev, info->z_img) :
- get_gen6_surface_type(dev, info->s_img);
-
format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
GEN6_ZFORMAT_D32_FLOAT;
@@ -450,7 +425,7 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
/* info->z_readonly and info->s_readonly are ignored on Gen6 */
- dw1 = type << GEN6_DEPTH_DW1_TYPE__SHIFT |
+ dw1 = info->type << GEN6_DEPTH_DW1_TYPE__SHIFT |
GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT |
format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
@@ -488,7 +463,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
const struct ilo_dev *dev,
const struct ilo_state_zs_info *info)
{
- enum gen_surface_type type;
enum gen_depth_format format;
uint16_t width, height, depth;
uint16_t array_base, view_extent;
@@ -502,14 +476,10 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
&view_extent))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- (info->z_img) ? get_gen6_surface_type(dev, info->z_img) :
- get_gen6_surface_type(dev, info->s_img);
-
format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
GEN6_ZFORMAT_D32_FLOAT;
- dw1 = type << GEN7_DEPTH_DW1_TYPE__SHIFT |
+ dw1 = info->type << GEN7_DEPTH_DW1_TYPE__SHIFT |
format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
if (info->z_img) {
@@ -714,6 +684,7 @@ ilo_state_zs_init_for_null(struct ilo_state_zs *zs,
struct ilo_state_zs_info info;
memset(&info, 0, sizeof(info));
+ info.type = GEN6_SURFTYPE_NULL;
return ilo_state_zs_init(zs, dev, &info);
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.h b/src/gallium/drivers/ilo/core/ilo_state_zs.h
index 6f32b7e2efe..d78a12ad516 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.h
@@ -48,11 +48,11 @@ struct ilo_state_zs_info {
const struct ilo_vma *s_vma;
const struct ilo_vma *hiz_vma;
+ enum gen_surface_type type;
+
/* ignored prior to Gen7 */
bool z_readonly;
bool s_readonly;
-
- bool is_cube_map;
};
struct ilo_state_zs {
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index 0b0f69c30be..9ef53213b17 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -87,6 +87,28 @@ resource_get_cpu_init(const struct pipe_resource *templ)
PIPE_BIND_STREAM_OUTPUT)) ? false : true;
}
+static enum gen_surface_type
+get_surface_type(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return GEN6_SURFTYPE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return GEN6_SURFTYPE_2D;
+ case PIPE_TEXTURE_3D:
+ return GEN6_SURFTYPE_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return GEN6_SURFTYPE_CUBE;
+ default:
+ assert(!"unknown texture target");
+ return GEN6_SURFTYPE_NULL;
+ }
+}
+
static void
resource_get_image_info(const struct pipe_resource *templ,
const struct ilo_dev *dev,
@@ -95,7 +117,7 @@ resource_get_image_info(const struct pipe_resource *templ,
{
memset(info, 0, sizeof(*info));
- info->target = templ->target;
+ info->type = get_surface_type(templ->target);
info->format = image_format;
info->width = templ->width0;
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 24ab59aa32b..445df1966f9 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -2045,6 +2045,7 @@ ilo_create_sampler_view(struct pipe_context *pipe,
info.vma = &tex->vma;
info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
+ info.type = tex->image.type;
if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
tex->image.separate_stencil) {
@@ -2054,8 +2055,6 @@ ilo_create_sampler_view(struct pipe_context *pipe,
info.format = ilo_format_translate_texture(dev, templ->format);
}
- info.is_cube_map = (tex->image.target == PIPE_TEXTURE_CUBE ||
- tex->image.target == PIPE_TEXTURE_CUBE_ARRAY);
info.is_array = util_resource_is_array_texture(&tex->base);
info.readonly = true;
@@ -2116,6 +2115,10 @@ ilo_create_surface(struct pipe_context *pipe,
info.aux_vma = &tex->aux_vma;
info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
+
+ info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
+ GEN6_SURFTYPE_2D : tex->image.type;
+
info.format = ilo_format_translate_render(dev, templ->format);
info.is_array = util_resource_is_array_texture(&tex->base);
@@ -2148,6 +2151,9 @@ ilo_create_surface(struct pipe_context *pipe,
info.slice_count = templ->u.tex.last_layer -
templ->u.tex.first_layer + 1;
+ info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
+ GEN6_SURFTYPE_2D : tex->image.type;
+
ilo_state_zs_init(&surf->u.zs, dev, &info);
}
From 2ee95f6d64aca9e9490c1ac293dd711b5f60a16b Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Thu, 25 Jun 2015 07:43:47 +0800
Subject: [PATCH 0042/1208] ilo: always use the specified image format
Move silent promotion of PIPE_FORMAT_ETC1_RGB8 or combined depth/stencil out
of core.
---
src/gallium/drivers/ilo/core/ilo_image.c | 80 ++++++------------
src/gallium/drivers/ilo/core/ilo_image.h | 4 +-
src/gallium/drivers/ilo/ilo_resource.c | 100 ++++++++++++++++++++---
src/gallium/drivers/ilo/ilo_state.c | 2 +-
4 files changed, 116 insertions(+), 70 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index 9ec6792146f..7da5debcb37 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -671,10 +671,30 @@ img_init_size_and_format(struct ilo_image *img,
struct ilo_image_params *params)
{
const struct ilo_image_info *info = params->info;
- enum pipe_format format = info->format;
- bool require_separate_stencil = false;
img->type = info->type;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 314:
+ *
+ * "The separate stencil buffer is always enabled, thus the field in
+ * 3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil
+ * buffer has been removed Surface formats with interleaved depth and
+ * stencil are no longer supported"
+ */
+ if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && info->bind_zs) {
+ const struct util_format_description *desc =
+ util_format_description(info->format);
+
+ assert(info->format == PIPE_FORMAT_S8_UINT ||
+ !util_format_has_stencil(desc));
+ }
+
+ img->format = info->format;
+ img->block_width = util_format_get_blockwidth(info->format);
+ img->block_height = util_format_get_blockheight(info->format);
+ img->block_size = util_format_get_blocksize(info->format);
+
img->width0 = info->width;
img->height0 = info->height;
img->depth0 = info->depth;
@@ -682,46 +702,6 @@ img_init_size_and_format(struct ilo_image *img,
img->level_count = info->level_count;
img->sample_count = info->sample_count;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
- *
- * "This field (Separate Stencil Buffer Enable) must be set to the same
- * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
- *
- * GEN7+ requires separate stencil buffers.
- */
- if (info->bind_zs) {
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
- require_separate_stencil = true;
- else
- require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
- }
-
- switch (format) {
- case PIPE_FORMAT_ETC1_RGB8:
- format = PIPE_FORMAT_R8G8B8X8_UNORM;
- break;
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- if (require_separate_stencil) {
- format = PIPE_FORMAT_Z24X8_UNORM;
- img->separate_stencil = true;
- }
- break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- if (require_separate_stencil) {
- format = PIPE_FORMAT_Z32_FLOAT;
- img->separate_stencil = true;
- }
- break;
- default:
- break;
- }
-
- img->format = format;
- img->block_width = util_format_get_blockwidth(format);
- img->block_height = util_format_get_blockheight(format);
- img->block_size = util_format_get_blocksize(format);
-
params->valid_tilings = img_get_valid_tilings(img, params);
params->compressed = util_format_is_compressed(img->format);
}
@@ -805,19 +785,7 @@ img_want_hiz(const struct ilo_image *img,
if (!info->bind_zs)
return false;
- if (!util_format_has_depth(desc))
- return false;
-
- /*
- * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
- * for every level. This is generally fine except on GEN6, where HiZ and
- * separate stencil are enabled and disabled at the same time. When the
- * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
- * can result in incompatible formats.
- */
- if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
- info->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- info->level_count > 1)
+ if (!util_format_has_depth(desc) || util_format_has_stencil(desc))
return false;
return true;
@@ -1303,8 +1271,8 @@ img_init(struct ilo_image *img,
{
/* there are hard dependencies between every function here */
- img_init_aux(img, params);
img_init_size_and_format(img, params);
+ img_init_aux(img, params);
img_init_walk(img, params);
img_init_tiling(img, params);
img_init_alignments(img, params);
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index 1c4f86c78da..8a2e1438158 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -119,6 +119,8 @@ struct ilo_image_lod {
struct ilo_image {
enum gen_surface_type type;
+ enum pipe_format format;
+
/* size, format, etc for programming hardware states */
unsigned width0;
unsigned height0;
@@ -126,8 +128,6 @@ struct ilo_image {
unsigned array_size;
unsigned level_count;
unsigned sample_count;
- enum pipe_format format;
- bool separate_stencil;
/*
* width, height, and size of pixel blocks for conversion between pixel
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index 9ef53213b17..11833e0c599 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -109,6 +109,45 @@ get_surface_type(enum pipe_texture_target target)
}
}
+static enum pipe_format
+resource_get_image_format(const struct pipe_resource *templ,
+ const struct ilo_dev *dev,
+ bool *separate_stencil_ret)
+{
+ enum pipe_format format = templ->format;
+ bool separate_stencil;
+
+ /* silently promote ETC1 */
+ if (templ->format == PIPE_FORMAT_ETC1_RGB8)
+ format = PIPE_FORMAT_R8G8B8X8_UNORM;
+
+ /* separate stencil buffers */
+ separate_stencil = false;
+ if ((templ->bind & PIPE_BIND_DEPTH_STENCIL) &&
+ util_format_is_depth_and_stencil(templ->format)) {
+ switch (templ->format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ /* Gen6 requires HiZ to be available for all levels */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) || templ->last_level == 0) {
+ format = PIPE_FORMAT_Z32_FLOAT;
+ separate_stencil = true;
+ }
+ break;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ format = PIPE_FORMAT_Z24X8_UNORM;
+ separate_stencil = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (separate_stencil_ret)
+ *separate_stencil_ret = separate_stencil;
+
+ return format;
+}
+
static void
resource_get_image_info(const struct pipe_resource *templ,
const struct ilo_dev *dev,
@@ -340,10 +379,6 @@ tex_alloc_bos(struct ilo_texture *tex)
if (!tex->imported && !tex_create_bo(tex))
return false;
- /* allocate separate stencil resource */
- if (tex->image.separate_stencil && !tex_create_separate_stencil(tex))
- return false;
-
switch (tex->image.aux.type) {
case ILO_IMAGE_AUX_HIZ:
if (!tex_create_hiz(tex))
@@ -396,15 +431,19 @@ tex_import_handle(struct ilo_texture *tex,
static bool
tex_init_image(struct ilo_texture *tex,
- const struct winsys_handle *handle)
+ const struct winsys_handle *handle,
+ bool *separate_stencil)
{
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = &tex->base;
struct ilo_image *img = &tex->image;
struct intel_bo *imported_bo = NULL;;
+ enum pipe_format image_format;
struct ilo_image_info info;
- resource_get_image_info(templ, &is->dev, templ->format, &info);
+ image_format = resource_get_image_format(templ,
+ &is->dev, separate_stencil);
+ resource_get_image_info(templ, &is->dev, image_format, &info);
if (handle) {
imported_bo = tex_import_handle(tex, handle, &info);
@@ -417,6 +456,31 @@ tex_init_image(struct ilo_texture *tex,
return false;
}
+ /*
+ * HiZ requires 8x4 alignment and some levels might need HiZ disabled. It
+ * is generally fine except on Gen6, where HiZ and separate stencil must be
+ * enabled together. For PIPE_FORMAT_Z24X8_UNORM with separate stencil, we
+ * can live with stencil values being interleaved for levels where HiZ is
+ * disabled. But it is not the case for PIPE_FORMAT_Z32_FLOAT with
+ * separate stencil. If HiZ was disabled for a level, we had to change the
+ * format to PIPE_FORMAT_Z32_FLOAT_S8X24_UINT for the level and that format
+ * had a different bpp. In other words, HiZ has to be available for all
+ * levels.
+ */
+ if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
+ templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ image_format == PIPE_FORMAT_Z32_FLOAT &&
+ img->aux.enables != (1 << templ->last_level)) {
+ image_format = templ->format;
+ info.format = image_format;
+
+ memset(img, 0, sizeof(*img));
+ if (!ilo_image_init(img, &is->dev, &info)) {
+ intel_bo_unref(imported_bo);
+ return false;
+ }
+ }
+
if (img->bo_height > ilo_max_resource_size / img->bo_stride ||
!ilo_vma_init(&tex->vma, &is->dev, img->bo_stride * img->bo_height,
4096)) {
@@ -431,8 +495,8 @@ tex_init_image(struct ilo_texture *tex,
if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
/* require on-the-fly tiling/untiling or format conversion */
- if (img->tiling == GEN8_TILING_W || img->separate_stencil ||
- img->format != templ->format)
+ if (img->tiling == GEN8_TILING_W || *separate_stencil ||
+ image_format != templ->format)
return false;
}
@@ -448,6 +512,7 @@ tex_create(struct pipe_screen *screen,
const struct winsys_handle *handle)
{
struct ilo_texture *tex;
+ bool separate_stencil;
tex = CALLOC_STRUCT(ilo_texture);
if (!tex)
@@ -457,12 +522,13 @@ tex_create(struct pipe_screen *screen,
tex->base.screen = screen;
pipe_reference_init(&tex->base.reference, 1);
- if (!tex_init_image(tex, handle)) {
+ if (!tex_init_image(tex, handle, &separate_stencil)) {
FREE(tex);
return NULL;
}
- if (!tex_alloc_bos(tex)) {
+ if (!tex_alloc_bos(tex) ||
+ (separate_stencil && !tex_create_separate_stencil(tex))) {
tex_destroy(tex);
return NULL;
}
@@ -572,17 +638,29 @@ ilo_can_create_resource(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct ilo_screen *is = ilo_screen(screen);
+ enum pipe_format image_format;
struct ilo_image_info info;
struct ilo_image img;
if (templ->target == PIPE_BUFFER)
return (templ->width0 <= ilo_max_resource_size);
- resource_get_image_info(templ, &is->dev, templ->format, &info);
+ image_format = resource_get_image_format(templ, &is->dev, NULL);
+ resource_get_image_info(templ, &is->dev, image_format, &info);
memset(&img, 0, sizeof(img));
ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
+ /* as in tex_init_image() */
+ if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
+ templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ image_format == PIPE_FORMAT_Z32_FLOAT &&
+ img.aux.enables != (1 << templ->last_level)) {
+ info.format = templ->format;
+ memset(&img, 0, sizeof(img));
+ ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
+ }
+
return (img.bo_height <= ilo_max_resource_size / img.bo_stride);
}
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 445df1966f9..20d3c001188 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -2048,7 +2048,7 @@ ilo_create_sampler_view(struct pipe_context *pipe,
info.type = tex->image.type;
if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- tex->image.separate_stencil) {
+ tex->separate_s8) {
info.format = ilo_format_translate_texture(dev,
PIPE_FORMAT_Z32_FLOAT);
} else {
From cbdc26aa3f76dc20285caa7e62ca8809cb2fe638 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Thu, 25 Jun 2015 22:27:04 +0800
Subject: [PATCH 0043/1208] ilo: replace pipe_format by gen_surface_format
Replace pipe_format by gen_surface_format in ilo_image. Change how depth
format is specified in ilo_state_zs.
---
src/gallium/drivers/ilo/core/ilo_core.h | 2 -
src/gallium/drivers/ilo/core/ilo_image.c | 70 +++++++------
src/gallium/drivers/ilo/core/ilo_image.h | 12 ++-
src/gallium/drivers/ilo/core/ilo_state_zs.c | 99 ++++++-------------
src/gallium/drivers/ilo/core/ilo_state_zs.h | 11 +--
src/gallium/drivers/ilo/ilo_blitter_blt.c | 2 +-
.../drivers/ilo/ilo_blitter_rectlist.c | 4 +-
src/gallium/drivers/ilo/ilo_common.h | 3 +
src/gallium/drivers/ilo/ilo_format.h | 35 +++++++
src/gallium/drivers/ilo/ilo_resource.c | 49 +++++++--
src/gallium/drivers/ilo/ilo_resource.h | 1 +
src/gallium/drivers/ilo/ilo_state.c | 8 +-
src/gallium/drivers/ilo/ilo_transfer.c | 20 ++--
13 files changed, 174 insertions(+), 142 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_core.h b/src/gallium/drivers/ilo/core/ilo_core.h
index 0a7f7d9d3fe..d95a80aabd3 100644
--- a/src/gallium/drivers/ilo/core/ilo_core.h
+++ b/src/gallium/drivers/ilo/core/ilo_core.h
@@ -30,11 +30,9 @@
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
-#include "pipe/p_format.h"
#include "util/u_debug.h"
#include "util/list.h"
-#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index 7da5debcb37..7a1100288d3 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -45,8 +45,6 @@ struct ilo_image_params {
const struct ilo_image_info *info;
unsigned valid_tilings;
- bool compressed;
-
unsigned h0, h1;
unsigned max_x, max_y;
};
@@ -261,7 +259,7 @@ img_init_lods(struct ilo_image *img,
/* every LOD begins at tile boundaries */
if (info->level_count > 1) {
- assert(img->format == PIPE_FORMAT_S8_UINT);
+ assert(img->format == GEN6_FORMAT_R8_UINT);
cur_x = align(cur_x, 64);
cur_y = align(cur_y, 64);
}
@@ -334,7 +332,7 @@ img_init_alignments(struct ilo_image *img,
*
* align_i align_j
* compressed formats block width block height
- * PIPE_FORMAT_S8_UINT 4 2
+ * GEN6_FORMAT_R8_UINT 4 2
* other depth/stencil formats 4 4
* 4x multisampled 4 4
* bpp 96 4 2
@@ -382,27 +380,27 @@ img_init_alignments(struct ilo_image *img,
*
* align_i align_j
* compressed formats block width block height
- * PIPE_FORMAT_Z16_UNORM 8 4
- * PIPE_FORMAT_S8_UINT 8 8
+ * GEN6_FORMAT_R16_UNORM 8 4
+ * GEN6_FORMAT_R8_UINT 8 8
* other depth/stencil formats 4 4
* 2x or 4x multisampled 4 or 8 4
* tiled Y 4 or 8 4 (if rt)
- * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
+ * GEN6_FORMAT_R32G32B32_FLOAT 4 or 8 2
* others 4 or 8 2 or 4
*/
- if (params->compressed) {
+ if (info->compressed) {
/* this happens to be the case */
img->align_i = img->block_width;
img->align_j = img->block_height;
} else if (info->bind_zs) {
if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
switch (img->format) {
- case PIPE_FORMAT_Z16_UNORM:
+ case GEN6_FORMAT_R16_UNORM:
img->align_i = 8;
img->align_j = 4;
break;
- case PIPE_FORMAT_S8_UINT:
+ case GEN6_FORMAT_R8_UINT:
img->align_i = 8;
img->align_j = 8;
break;
@@ -413,7 +411,7 @@ img_init_alignments(struct ilo_image *img,
}
} else {
switch (img->format) {
- case PIPE_FORMAT_S8_UINT:
+ case GEN6_FORMAT_R8_UINT:
img->align_i = 4;
img->align_j = 2;
break;
@@ -433,7 +431,7 @@ img_init_alignments(struct ilo_image *img,
if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
- assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
+ assert(img->format != GEN6_FORMAT_R32G32B32_FLOAT);
img->align_i = 4;
img->align_j = (valign_4) ? 4 : 2;
@@ -558,7 +556,7 @@ img_init_walk_gen6(struct ilo_image *img,
*/
img->walk =
(params->info->type == GEN6_SURFTYPE_3D) ? ILO_IMAGE_WALK_3D :
- (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
+ (img->format == GEN6_FORMAT_R8_UINT) ? ILO_IMAGE_WALK_LOD :
ILO_IMAGE_WALK_LAYER;
/* GEN6 supports only interleaved samples */
@@ -580,7 +578,6 @@ img_get_valid_tilings(const struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct ilo_image_info *info = params->info;
- const enum pipe_format format = img->format;
unsigned valid_tilings = params->valid_tilings;
/*
@@ -614,8 +611,8 @@ img_get_valid_tilings(const struct ilo_image *img,
* "W-Major Tile Format is used for separate stencil."
*/
if (info->bind_zs) {
- switch (format) {
- case PIPE_FORMAT_S8_UINT:
+ switch (info->format) {
+ case GEN6_FORMAT_R8_UINT:
valid_tilings &= IMAGE_TILING_W;
break;
default:
@@ -649,7 +646,7 @@ img_get_valid_tilings(const struct ilo_image *img,
*/
if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
- img->format == PIPE_FORMAT_R32G32B32_FLOAT)
+ img->format == GEN6_FORMAT_R32G32B32_FLOAT)
valid_tilings &= ~IMAGE_TILING_Y;
valid_tilings &= ~IMAGE_TILING_W;
@@ -682,18 +679,13 @@ img_init_size_and_format(struct ilo_image *img,
* buffer has been removed Surface formats with interleaved depth and
* stencil are no longer supported"
*/
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && info->bind_zs) {
- const struct util_format_description *desc =
- util_format_description(info->format);
-
- assert(info->format == PIPE_FORMAT_S8_UINT ||
- !util_format_has_stencil(desc));
- }
+ if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && info->bind_zs)
+ assert(!info->interleaved_stencil);
img->format = info->format;
- img->block_width = util_format_get_blockwidth(info->format);
- img->block_height = util_format_get_blockheight(info->format);
- img->block_size = util_format_get_blocksize(info->format);
+ img->block_width = info->block_width;
+ img->block_height = info->block_height;
+ img->block_size = info->block_size;
img->width0 = info->width;
img->height0 = info->height;
@@ -703,7 +695,6 @@ img_init_size_and_format(struct ilo_image *img,
img->sample_count = info->sample_count;
params->valid_tilings = img_get_valid_tilings(img, params);
- params->compressed = util_format_is_compressed(img->format);
}
static bool
@@ -730,7 +721,7 @@ img_want_mcs(const struct ilo_image *img,
* "This field must be set to 0 for all SINT MSRTs when all RT channels
* are not written"
*/
- if (info->sample_count > 1 && !util_format_is_pure_sint(info->format)) {
+ if (info->sample_count > 1 && !info->is_integer) {
want_mcs = true;
} else if (info->sample_count == 1 && !info->aux_disable) {
/*
@@ -769,8 +760,6 @@ img_want_hiz(const struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct ilo_image_info *info = params->info;
- const struct util_format_description *desc =
- util_format_description(info->format);
if (ilo_debug & ILO_DEBUG_NOHIZ)
return false;
@@ -785,10 +774,17 @@ img_want_hiz(const struct ilo_image *img,
if (!info->bind_zs)
return false;
- if (!util_format_has_depth(desc) || util_format_has_stencil(desc))
+ if (info->interleaved_stencil)
return false;
- return true;
+ switch (info->format) {
+ case GEN6_FORMAT_R32_FLOAT:
+ case GEN6_FORMAT_R24_UNORM_X8_TYPELESS:
+ case GEN6_FORMAT_R16_UNORM:
+ return true;
+ default:
+ return false;
+ }
}
static void
@@ -836,7 +832,7 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
if (info->type == GEN6_SURFTYPE_CUBE)
pad_h += 2;
- if (params->compressed)
+ if (info->compressed)
align_h = MAX2(align_h, img->align_j * 2);
}
@@ -1325,9 +1321,9 @@ img_init_for_transfer(struct ilo_image *img,
img->sample_count = 1;
img->format = info->format;
- img->block_width = util_format_get_blockwidth(info->format);
- img->block_height = util_format_get_blockheight(info->format);
- img->block_size = util_format_get_blocksize(info->format);
+ img->block_width = info->block_width;
+ img->block_height = info->block_height;
+ img->block_size = info->block_size;
img->walk = ILO_IMAGE_WALK_LOD;
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index 8a2e1438158..0fe0d4da75a 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -70,7 +70,14 @@ enum ilo_image_walk_type {
struct ilo_image_info {
enum gen_surface_type type;
- enum pipe_format format;
+ enum gen_surface_format format;
+ bool interleaved_stencil;
+ bool is_integer;
+ /* width, height and size of pixel blocks */
+ bool compressed;
+ unsigned block_width;
+ unsigned block_height;
+ unsigned block_size;
/* image size */
uint16_t width;
@@ -119,7 +126,8 @@ struct ilo_image_lod {
struct ilo_image {
enum gen_surface_type type;
- enum pipe_format format;
+ enum gen_surface_format format;
+ bool interleaved_stencil;
/* size, format, etc for programming hardware states */
unsigned width0;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.c b/src/gallium/drivers/ilo/core/ilo_state_zs.c
index 306e6d9aedc..827632764b2 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.c
@@ -55,47 +55,9 @@ zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = 0;
zs->depth[4] = 0;
- zs->depth_format = format;
-
return true;
}
-static enum gen_depth_format
-get_gen6_depth_format(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- switch (img->format) {
- case PIPE_FORMAT_Z32_FLOAT:
- return GEN6_ZFORMAT_D32_FLOAT;
- case PIPE_FORMAT_Z24X8_UNORM:
- return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- case PIPE_FORMAT_Z16_UNORM:
- return GEN6_ZFORMAT_D16_UNORM;
- default:
- assert(!"unknown depth format");
- return GEN6_ZFORMAT_D32_FLOAT;
- }
- } else {
- switch (img->format) {
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
- case PIPE_FORMAT_Z32_FLOAT:
- return GEN6_ZFORMAT_D32_FLOAT;
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- return GEN6_ZFORMAT_D24_UNORM_S8_UINT;
- case PIPE_FORMAT_Z24X8_UNORM:
- return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- case PIPE_FORMAT_Z16_UNORM:
- return GEN6_ZFORMAT_D16_UNORM;
- default:
- assert(!"unknown depth format");
- return GEN6_ZFORMAT_D32_FLOAT;
- }
- }
-}
-
static bool
zs_validate_gen6(const struct ilo_dev *dev,
const struct ilo_state_zs_info *info)
@@ -141,6 +103,35 @@ zs_validate_gen6(const struct ilo_dev *dev,
img->type == GEN6_SURFTYPE_CUBE);
}
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ switch (info->format) {
+ case GEN6_ZFORMAT_D32_FLOAT:
+ case GEN6_ZFORMAT_D24_UNORM_X8_UINT:
+ case GEN6_ZFORMAT_D16_UNORM:
+ break;
+ default:
+ assert(!"unknown depth format");
+ break;
+ }
+ } else {
+ /*
+ * From the Ironlake PRM, volume 2 part 1, page 330:
+ *
+ * "If this field (Separate Stencil Buffer Enable) is disabled, the
+ * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 321:
+ *
+ * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be
+ * set to the same value (enabled or disabled) as Hierarchical
+ * Depth Buffer Enable."
+ */
+ if (info->hiz_vma)
+ assert(info->format != GEN6_ZFORMAT_D24_UNORM_S8_UINT);
+ else
+ assert(info->format != GEN6_ZFORMAT_D24_UNORM_X8_UINT);
+ }
+
assert(info->level < img->level_count);
assert(img->bo_stride);
@@ -395,7 +386,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
const struct ilo_state_zs_info *info)
{
uint16_t width, height, depth, array_base, view_extent;
- enum gen_depth_format format;
uint32_t dw1, dw2, dw3, dw4;
ILO_DEV_ASSERT(dev, 6, 6);
@@ -406,28 +396,10 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
&view_extent))
return false;
- format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
- GEN6_ZFORMAT_D32_FLOAT;
-
- /*
- * From the Ironlake PRM, volume 2 part 1, page 330:
- *
- * "If this field (Separate Stencil Buffer Enable) is disabled, the
- * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 321:
- *
- * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be set
- * to the same value (enabled or disabled) as Hierarchical Depth
- * Buffer Enable."
- */
- if (!info->hiz_vma && format == GEN6_ZFORMAT_D24_UNORM_X8_UINT)
- format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
-
/* info->z_readonly and info->s_readonly are ignored on Gen6 */
dw1 = info->type << GEN6_DEPTH_DW1_TYPE__SHIFT |
GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT |
- format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+ info->format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
if (info->z_img)
dw1 |= (info->z_img->bo_stride - 1) << GEN6_DEPTH_DW1_PITCH__SHIFT;
@@ -453,8 +425,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = dw4;
zs->depth[4] = 0;
- zs->depth_format = format;
-
return true;
}
@@ -463,7 +433,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
const struct ilo_dev *dev,
const struct ilo_state_zs_info *info)
{
- enum gen_depth_format format;
uint16_t width, height, depth;
uint16_t array_base, view_extent;
uint32_t dw1, dw2, dw3, dw4, dw6;
@@ -476,11 +445,8 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
&view_extent))
return false;
- format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
- GEN6_ZFORMAT_D32_FLOAT;
-
dw1 = info->type << GEN7_DEPTH_DW1_TYPE__SHIFT |
- format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
+ info->format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
if (info->z_img) {
if (!info->z_readonly)
@@ -516,8 +482,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = dw4;
zs->depth[4] = dw6;
- zs->depth_format = format;
-
return true;
}
@@ -685,6 +649,7 @@ ilo_state_zs_init_for_null(struct ilo_state_zs *zs,
memset(&info, 0, sizeof(info));
info.type = GEN6_SURFTYPE_NULL;
+ info.format = GEN6_ZFORMAT_D32_FLOAT;
return ilo_state_zs_init(zs, dev, &info);
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.h b/src/gallium/drivers/ilo/core/ilo_state_zs.h
index d78a12ad516..6a25a873897 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.h
@@ -49,6 +49,7 @@ struct ilo_state_zs_info {
const struct ilo_vma *hiz_vma;
enum gen_surface_type type;
+ enum gen_depth_format format;
/* ignored prior to Gen7 */
bool z_readonly;
@@ -64,9 +65,6 @@ struct ilo_state_zs {
const struct ilo_vma *s_vma;
const struct ilo_vma *hiz_vma;
- /* TODO move this to ilo_image */
- enum gen_depth_format depth_format;
-
bool z_readonly;
bool s_readonly;
};
@@ -84,11 +82,4 @@ bool
ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
const struct ilo_dev *dev);
-static inline enum gen_depth_format
-ilo_state_zs_get_depth_format(const struct ilo_state_zs *zs,
- const struct ilo_dev *dev)
-{
- return zs->depth_format;
-}
-
#endif /* ILO_STATE_ZS_H */
diff --git a/src/gallium/drivers/ilo/ilo_blitter_blt.c b/src/gallium/drivers/ilo/ilo_blitter_blt.c
index 52b4b25d827..66203e86137 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_blt.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_blt.c
@@ -300,7 +300,7 @@ tex_copy_region(struct ilo_blitter *blitter,
const struct pipe_box *src_box)
{
const struct util_format_description *desc =
- util_format_description(dst_tex->image.format);
+ util_format_description(dst_tex->image_format);
const unsigned max_extent = 32767; /* INT16_MAX */
const uint8_t rop = 0xcc; /* SRCCOPY */
struct ilo_builder *builder = &blitter->ilo->cp->builder;
diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
index 13c8f500680..86e67084d6e 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
@@ -318,7 +318,7 @@ hiz_can_clear_zs(const struct ilo_blitter *blitter,
* The truth is when HiZ is enabled, separate stencil is also enabled on
* all GENs. The depth buffer format cannot be combined depth/stencil.
*/
- switch (tex->image.format) {
+ switch (tex->image_format) {
case PIPE_FORMAT_Z16_UNORM:
if (ilo_dev_gen(blitter->ilo->dev) == ILO_GEN(6) &&
tex->base.width0 % 16)
@@ -355,7 +355,7 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter,
if (ilo_dev_gen(blitter->ilo->dev) >= ILO_GEN(8))
clear_value = fui(depth);
else
- clear_value = util_pack_z(tex->image.format, depth);
+ clear_value = util_pack_z(tex->image_format, depth);
ilo_blit_resolve_surface(blitter->ilo, zs,
ILO_TEXTURE_RENDER_WRITE | ILO_TEXTURE_CLEAR);
diff --git a/src/gallium/drivers/ilo/ilo_common.h b/src/gallium/drivers/ilo/ilo_common.h
index 9ebbf76e81e..4d6604b29b2 100644
--- a/src/gallium/drivers/ilo/ilo_common.h
+++ b/src/gallium/drivers/ilo/ilo_common.h
@@ -28,6 +28,9 @@
#ifndef ILO_COMMON_H
#define ILO_COMMON_H
+#include "pipe/p_format.h"
+#include "util/u_format.h"
+
#include "core/ilo_core.h"
#include "core/ilo_debug.h"
#include "core/ilo_dev.h"
diff --git a/src/gallium/drivers/ilo/ilo_format.h b/src/gallium/drivers/ilo/ilo_format.h
index 4e955c09c14..0a19c02659e 100644
--- a/src/gallium/drivers/ilo/ilo_format.h
+++ b/src/gallium/drivers/ilo/ilo_format.h
@@ -165,4 +165,39 @@ ilo_format_translate_vertex(const struct ilo_dev *dev,
return ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
}
+static inline enum gen_depth_format
+ilo_format_translate_depth(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_ZFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_ZFORMAT_D16_UNORM;
+ default:
+ assert(!"unknown depth format");
+ return GEN6_ZFORMAT_D32_FLOAT;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_ZFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_ZFORMAT_D16_UNORM;
+ default:
+ assert(!"unknown depth format");
+ return GEN6_ZFORMAT_D32_FLOAT;
+ }
+ }
+}
+
#endif /* ILO_FORMAT_H */
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index 11833e0c599..962c710a57a 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -30,6 +30,7 @@
#include "core/ilo_state_surface.h"
#include "ilo_screen.h"
+#include "ilo_format.h"
#include "ilo_resource.h"
/*
@@ -148,6 +149,26 @@ resource_get_image_format(const struct pipe_resource *templ,
return format;
}
+static inline enum gen_surface_format
+pipe_to_surface_format(const struct ilo_dev *dev, enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_FORMAT_R32_FLOAT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_FORMAT_R24_UNORM_X8_TYPELESS;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_FORMAT_R16_UNORM;
+ case PIPE_FORMAT_S8_UINT:
+ return GEN6_FORMAT_R8_UINT;
+ default:
+ return ilo_format_translate_color(dev, format);
+ }
+}
+
static void
resource_get_image_info(const struct pipe_resource *templ,
const struct ilo_dev *dev,
@@ -157,7 +178,14 @@ resource_get_image_info(const struct pipe_resource *templ,
memset(info, 0, sizeof(*info));
info->type = get_surface_type(templ->target);
- info->format = image_format;
+
+ info->format = pipe_to_surface_format(dev, image_format);
+ info->interleaved_stencil = util_format_is_depth_and_stencil(image_format);
+ info->is_integer = util_format_is_pure_integer(image_format);
+ info->compressed = util_format_is_compressed(image_format);
+ info->block_width = util_format_get_blockwidth(image_format);
+ info->block_height = util_format_get_blockheight(image_format);
+ info->block_size = util_format_get_blocksize(image_format);
info->width = templ->width0;
info->height = templ->height0;
@@ -303,7 +331,7 @@ tex_create_separate_stencil(struct ilo_texture *tex)
tex->separate_s8 = ilo_texture(s8);
- assert(tex->separate_s8->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->separate_s8->image_format == PIPE_FORMAT_S8_UINT);
return true;
}
@@ -438,12 +466,11 @@ tex_init_image(struct ilo_texture *tex,
const struct pipe_resource *templ = &tex->base;
struct ilo_image *img = &tex->image;
struct intel_bo *imported_bo = NULL;;
- enum pipe_format image_format;
struct ilo_image_info info;
- image_format = resource_get_image_format(templ,
+ tex->image_format = resource_get_image_format(templ,
&is->dev, separate_stencil);
- resource_get_image_info(templ, &is->dev, image_format, &info);
+ resource_get_image_info(templ, &is->dev, tex->image_format, &info);
if (handle) {
imported_bo = tex_import_handle(tex, handle, &info);
@@ -469,10 +496,11 @@ tex_init_image(struct ilo_texture *tex,
*/
if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- image_format == PIPE_FORMAT_Z32_FLOAT &&
+ tex->image_format == PIPE_FORMAT_Z32_FLOAT &&
img->aux.enables != (1 << templ->last_level)) {
- image_format = templ->format;
- info.format = image_format;
+ tex->image_format = templ->format;
+ info.format = pipe_to_surface_format(&is->dev, tex->image_format);
+ info.interleaved_stencil = true;
memset(img, 0, sizeof(*img));
if (!ilo_image_init(img, &is->dev, &info)) {
@@ -496,7 +524,7 @@ tex_init_image(struct ilo_texture *tex,
if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
/* require on-the-fly tiling/untiling or format conversion */
if (img->tiling == GEN8_TILING_W || *separate_stencil ||
- image_format != templ->format)
+ tex->image_format != templ->format)
return false;
}
@@ -656,7 +684,8 @@ ilo_can_create_resource(struct pipe_screen *screen,
templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
image_format == PIPE_FORMAT_Z32_FLOAT &&
img.aux.enables != (1 << templ->last_level)) {
- info.format = templ->format;
+ info.format = pipe_to_surface_format(&is->dev, templ->format);
+ info.interleaved_stencil = true;
memset(&img, 0, sizeof(img));
ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
}
diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h
index c28c05abcfe..8378af54741 100644
--- a/src/gallium/drivers/ilo/ilo_resource.h
+++ b/src/gallium/drivers/ilo/ilo_resource.h
@@ -92,6 +92,7 @@ struct ilo_texture {
bool imported;
+ enum pipe_format image_format;
struct ilo_image image;
struct ilo_vma vma;
struct ilo_vma aux_vma;
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 20d3c001188..c13fa9c2e18 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -1699,10 +1699,11 @@ ilo_set_framebuffer_state(struct pipe_context *pipe,
if (state->zsbuf) {
const struct ilo_surface_cso *cso =
(const struct ilo_surface_cso *) state->zsbuf;
+ const struct ilo_texture *tex = ilo_texture(cso->base.texture);
fb->has_hiz = cso->u.zs.hiz_vma;
fb->depth_offset_format =
- ilo_state_zs_get_depth_format(&cso->u.zs, dev);
+ ilo_format_translate_depth(dev, tex->image_format);
} else {
fb->has_hiz = false;
fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT;
@@ -2154,6 +2155,11 @@ ilo_create_surface(struct pipe_context *pipe,
info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
GEN6_SURFTYPE_2D : tex->image.type;
+ info.format = ilo_format_translate_depth(dev, tex->image_format);
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && !info.hiz_vma &&
+ tex->image_format == PIPE_FORMAT_Z24X8_UNORM)
+ info.format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+
ilo_state_zs_init(&surf->u.zs, dev, &info);
}
diff --git a/src/gallium/drivers/ilo/ilo_transfer.c b/src/gallium/drivers/ilo/ilo_transfer.c
index be5aeee8e23..5abd3bebf68 100644
--- a/src/gallium/drivers/ilo/ilo_transfer.c
+++ b/src/gallium/drivers/ilo/ilo_transfer.c
@@ -100,7 +100,7 @@ resource_get_transfer_method(struct pipe_resource *res,
m = ILO_TRANSFER_MAP_SW_ZS;
need_convert = true;
}
- } else if (tex->image.format != tex->base.format) {
+ } else if (tex->image_format != tex->base.format) {
m = ILO_TRANSFER_MAP_SW_CONVERT;
need_convert = true;
}
@@ -601,7 +601,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
- assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
dst_cpp = 4;
dst_s8_pos = 3;
@@ -609,7 +609,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
}
else {
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
- assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
+ assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
dst_cpp = 8;
dst_s8_pos = 4;
@@ -655,7 +655,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
tex_staging_sys_unmap_bo(s8_tex);
}
else {
- assert(tex->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->image_format == PIPE_FORMAT_S8_UINT);
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y;
@@ -728,7 +728,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
- assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
src_cpp = 4;
src_s8_pos = 3;
@@ -736,7 +736,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
}
else {
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
- assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
+ assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
src_cpp = 8;
src_s8_pos = 4;
@@ -782,7 +782,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
tex_staging_sys_unmap_bo(s8_tex);
}
else {
- assert(tex->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->image_format == PIPE_FORMAT_S8_UINT);
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y;
@@ -840,8 +840,8 @@ tex_staging_sys_convert_write(struct ilo_texture *tex,
else
dst_slice_stride = 0;
- if (unlikely(tex->image.format == tex->base.format)) {
- util_copy_box(dst, tex->image.format, tex->image.bo_stride,
+ if (unlikely(tex->image_format == tex->base.format)) {
+ util_copy_box(dst, tex->image_format, tex->image.bo_stride,
dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
0, 0, 0);
@@ -853,7 +853,7 @@ tex_staging_sys_convert_write(struct ilo_texture *tex,
switch (tex->base.format) {
case PIPE_FORMAT_ETC1_RGB8:
- assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM);
for (slice = 0; slice < box->depth; slice++) {
const void *src =
From 7de85694fa606b112b8badd4f07969aef782efb8 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Fri, 26 Jun 2015 11:38:46 +0800
Subject: [PATCH 0044/1208] ilo: define ILO_IMAGE_MAX_LEVEL_COUNT
Define ILO_IMAGE_MAX_LEVEL_COUNT for ilo_image and remove unnecessary header
includes.
---
src/gallium/drivers/ilo/core/ilo_core.h | 4 ----
src/gallium/drivers/ilo/core/ilo_image.c | 4 ++--
src/gallium/drivers/ilo/core/ilo_image.h | 11 +++++++++--
src/gallium/drivers/ilo/ilo_common.h | 5 +++++
4 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_core.h b/src/gallium/drivers/ilo/core/ilo_core.h
index d95a80aabd3..da7db90a54b 100644
--- a/src/gallium/drivers/ilo/core/ilo_core.h
+++ b/src/gallium/drivers/ilo/core/ilo_core.h
@@ -29,13 +29,9 @@
#define ILO_CORE_H
#include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
#include "util/u_debug.h"
-#include "util/list.h"
-#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_pointer.h"
#endif /* ILO_CORE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index 7a1100288d3..3209674154b 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -1029,8 +1029,8 @@ img_calculate_hiz_size(struct ilo_image *img,
break;
case ILO_IMAGE_WALK_LOD:
{
- unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
- unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
+ unsigned lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
unsigned cur_tx, cur_ty;
/* figure out the tile offsets of LODs */
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index 0fe0d4da75a..cfe18b9e8d1 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -33,6 +33,13 @@
#include "ilo_core.h"
#include "ilo_dev.h"
+/*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 75:
+ *
+ * "(MIP Count / LOD) representing [1,15] MIP levels"
+ */
+#define ILO_IMAGE_MAX_LEVEL_COUNT 15
+
enum ilo_image_aux_type {
ILO_IMAGE_AUX_NONE,
ILO_IMAGE_AUX_HIZ,
@@ -154,7 +161,7 @@ struct ilo_image {
unsigned align_i;
unsigned align_j;
- struct ilo_image_lod lods[PIPE_MAX_TEXTURE_LEVELS];
+ struct ilo_image_lod lods[ILO_IMAGE_MAX_LEVEL_COUNT];
/* physical layer height for ILO_IMAGE_WALK_LAYER */
unsigned walk_layer_height;
@@ -173,7 +180,7 @@ struct ilo_image {
unsigned enables;
/* LOD offsets for ILO_IMAGE_WALK_LOD */
- unsigned walk_lod_offsets[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned walk_lod_offsets[ILO_IMAGE_MAX_LEVEL_COUNT];
unsigned walk_layer_height;
unsigned bo_stride;
diff --git a/src/gallium/drivers/ilo/ilo_common.h b/src/gallium/drivers/ilo/ilo_common.h
index 4d6604b29b2..3dbe79fb872 100644
--- a/src/gallium/drivers/ilo/ilo_common.h
+++ b/src/gallium/drivers/ilo/ilo_common.h
@@ -29,7 +29,12 @@
#define ILO_COMMON_H
#include "pipe/p_format.h"
+#include "pipe/p_defines.h"
+
+#include "util/list.h"
#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_pointer.h"
#include "core/ilo_core.h"
#include "core/ilo_debug.h"
From 229450520a23ba211fd9f7b3c9bc80f291229ec1 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Wed, 24 Jun 2015 14:06:33 +0100
Subject: [PATCH 0045/1208] mesa: fold duplicated GL/GL_CORE/GLES3 entry in
get_hash_params.py
Signed-off-by: Emil Velikov
Reviewed-by: Matt Turner
---
src/mesa/main/get_hash_params.py | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 74ff3ba6619..c25e1b6555f 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -351,6 +351,9 @@ descriptor=[
# GL_ARB_framebuffer_object
[ "MAX_SAMPLES", "CONTEXT_INT(Const.MaxSamples), extra_ARB_framebuffer_object_EXT_framebuffer_multisample" ],
+# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0
+ [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ],
+
# GL_ARB_sync
[ "MAX_SERVER_WAIT_TIMEOUT", "CONTEXT_INT64(Const.MaxServerWaitTimeout), extra_ARB_sync" ],
@@ -404,11 +407,6 @@ descriptor=[
[ "TEXTURE_EXTERNAL_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_OES_EGL_image_external" ],
]},
-{ "apis": ["GL", "GL_CORE", "GLES3"], "params": [
-# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0
- [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ],
-]},
-
# Enums in OpenGL Core profile and ES 3.1
{ "apis": ["GL_CORE", "GLES3"], "params": [
# GL_ARB_draw_indirect / GLES 3.1
From 404a90b82786080564fe32716f83ce055b9a934f Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Wed, 10 Jun 2015 16:30:56 -0700
Subject: [PATCH 0046/1208] mesa: Enable subdir-objects globally.
Reviewed-by: Emil Velikov
---
configure.ac | 2 +-
src/Makefile.am | 2 --
src/gallium/auxiliary/Makefile.am | 2 --
src/gallium/drivers/freedreno/Makefile.am | 2 --
src/gallium/drivers/ilo/Makefile.am | 2 --
src/gallium/drivers/nouveau/Makefile.am | 2 --
src/gallium/drivers/r300/Makefile.am | 2 --
src/gallium/drivers/r600/Makefile.am | 2 --
src/gallium/drivers/svga/Makefile.am | 2 --
src/gallium/drivers/vc4/Makefile.am | 2 --
src/gallium/state_trackers/clover/Makefile.am | 2 --
src/gallium/state_trackers/xvmc/Makefile.am | 1 -
src/gallium/targets/opencl/Makefile.am | 2 --
src/gbm/Makefile.am | 2 --
src/glsl/Makefile.am | 2 --
src/gtest/Makefile.am | 2 --
src/mapi/Makefile.am | 2 --
src/mesa/Makefile.am | 2 --
18 files changed, 1 insertion(+), 34 deletions(-)
diff --git a/configure.ac b/configure.ac
index ddc757e1629..af61aa2018c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -44,7 +44,7 @@ AC_INIT([Mesa], [MESA_VERSION],
AC_CONFIG_AUX_DIR([bin])
AC_CONFIG_MACRO_DIR([m4])
AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE([foreign tar-ustar dist-xz])
+AM_INIT_AUTOMAKE([foreign tar-ustar dist-xz subdir-objects])
dnl We only support native Windows builds (MinGW/MSVC) through SCons.
case "$host_os" in
diff --git a/src/Makefile.am b/src/Makefile.am
index 5d69abd996d..90bf94737cf 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
SUBDIRS = . gtest util mapi/glapi/gen mapi
if NEED_OPENGL_COMMON
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 89c7a13e913..ab91062ef1b 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
if HAVE_LOADER_GALLIUM
SUBDIRS := pipe-loader
endif
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
index cbf62c6daae..dff95ba5270 100644
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/ilo/Makefile.am b/src/gallium/drivers/ilo/Makefile.am
index a8785a5e8c4..1f14153748e 100644
--- a/src/gallium/drivers/ilo/Makefile.am
+++ b/src/gallium/drivers/ilo/Makefile.am
@@ -21,8 +21,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am
index d05f0a17ab4..c52d62e54a2 100644
--- a/src/gallium/drivers/nouveau/Makefile.am
+++ b/src/gallium/drivers/nouveau/Makefile.am
@@ -20,8 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/r300/Makefile.am b/src/gallium/drivers/r300/Makefile.am
index dd1a5ede19b..081f332683e 100644
--- a/src/gallium/drivers/r300/Makefile.am
+++ b/src/gallium/drivers/r300/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am
index dc0d90d759b..8317da727a2 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/svga/Makefile.am b/src/gallium/drivers/svga/Makefile.am
index e0a8cad7208..d46de95e4b4 100644
--- a/src/gallium/drivers/svga/Makefile.am
+++ b/src/gallium/drivers/svga/Makefile.am
@@ -20,8 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index 3f62ce21a9f..774463138d0 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am
index f46d9ef457d..fd0ccf88cc5 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
AM_CPPFLAGS = \
diff --git a/src/gallium/state_trackers/xvmc/Makefile.am b/src/gallium/state_trackers/xvmc/Makefile.am
index 047d05b3719..3c7c35c8c37 100644
--- a/src/gallium/state_trackers/xvmc/Makefile.am
+++ b/src/gallium/state_trackers/xvmc/Makefile.am
@@ -20,7 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am
index 5daf327fb47..70e60e20052 100644
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include $(top_srcdir)/src/gallium/Automake.inc
lib_LTLIBRARIES = lib@OPENCL_LIBNAME@.la
diff --git a/src/gbm/Makefile.am b/src/gbm/Makefile.am
index 918fdf7d6ad..da4195df31c 100644
--- a/src/gbm/Makefile.am
+++ b/src/gbm/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = main/gbm.pc
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index fa8c9f5d3ca..98dcb37fc74 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
diff --git a/src/gtest/Makefile.am b/src/gtest/Makefile.am
index 47d392bc705..29d6c6d1998 100644
--- a/src/gtest/Makefile.am
+++ b/src/gtest/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
AM_CFLAGS = $(DEFINES) -I$(top_srcdir)/src/gtest/include
AM_CXXFLAGS = $(DEFINES) -I$(top_srcdir)/src/gtest/include
diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am
index 50c5b2ebba3..160a255af6a 100644
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
SUBDIRS =
TESTS =
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index 71794b5dada..c86ded979b9 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
SUBDIRS = . main/tests
if HAVE_X11_DRIVER
From 3cf90bb183c7f403ded4c069a78eae1fd71f8eab Mon Sep 17 00:00:00 2001
From: Neil Roberts
Date: Tue, 16 Jun 2015 13:53:40 +0100
Subject: [PATCH 0047/1208] i965/skl: Fix aligning mt->total_width to the block
size
brw_miptree_layout_2d tries to ensure that mt->total_width is a
multiple of the compressed block size, presumably because it wouldn't
be possible to make an image that has a fraction of a block. However
it was doing this by aligning mt->total_width to align_w. Previously
align_w has been used as a shortcut for getting the block width
because before Gen9 the block width was always equal to the alignment.
Commit 4ab8d59a2 tried to fix these cases to use the block width
instead of the alignment but it missed this case.
I think in practice this probably won't make any difference because
the buffer for the texture will be allocated to be large enough to
contain the entire pitch and libdrm aligns the pitch to the tile width
anyway. However I think the patch is worth having to make the
intention clearer.
Reviewed-by: Anuj Phogat
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/brw_tex_layout.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 998d8c42770..455984309fa 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -366,9 +366,8 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
mt->total_width = mt->physical_width0;
- if (mt->compressed) {
- mt->total_width = ALIGN(mt->physical_width0, mt->align_w);
- }
+ if (mt->compressed)
+ mt->total_width = ALIGN(mt->total_width, bw);
/* May need to adjust width to accommodate the placement of
* the 2nd mipmap. This occurs when the alignment
From 052b3d4e2f159038137504f01e9ff2380a67af8b Mon Sep 17 00:00:00 2001
From: Boyan Ding
Date: Sat, 13 Jun 2015 15:36:27 +0800
Subject: [PATCH 0048/1208] egl_dri2: Remove trailing whitespaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Boyan Ding
Reviewed-by: Marek Olšák
---
src/egl/drivers/dri2/egl_dri2.c | 10 +++++-----
src/egl/drivers/dri2/egl_dri2.h | 6 +++---
src/egl/drivers/dri2/platform_wayland.c | 8 ++++----
src/egl/drivers/dri2/platform_x11.c | 2 +-
4 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index a1cbd437f53..223cc5c5fbf 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -139,7 +139,7 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
dri2_dpy = disp->DriverData;
_eglInitConfig(&base, disp, id);
-
+
i = 0;
double_buffer = 0;
bind_to_texture_rgb = 0;
@@ -155,7 +155,7 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
else
return NULL;
_eglSetConfigKey(&base, EGL_COLOR_BUFFER_TYPE, value);
- break;
+ break;
case __DRI_ATTRIB_CONFIG_CAVEAT:
if (value & __DRI_ATTRIB_NON_CONFORMANT_CONFIG)
@@ -365,7 +365,7 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy,
}
}
}
-
+
for (j = 0; matches[j].name; j++) {
field = ((char *) dri2_dpy + matches[j].offset);
if (*(const __DRIextension **) field == NULL) {
@@ -624,7 +624,7 @@ dri2_create_screen(_EGLDisplay *disp)
dri2_dpy->own_dri_screen = 1;
extensions = dri2_dpy->core->getExtensions(dri2_dpy->dri_screen);
-
+
if (dri2_dpy->dri2) {
if (!dri2_bind_extensions(dri2_dpy, dri2_core_extensions, extensions))
goto cleanup_dri_screen;
@@ -1970,7 +1970,7 @@ dri2_create_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp,
if (attrs.DRMBufferUseMESA & EGL_DRM_BUFFER_USE_CURSOR_MESA)
dri_use |= __DRI_IMAGE_USE_CURSOR;
- dri2_img->dri_image =
+ dri2_img->dri_image =
dri2_dpy->image->createImage(dri2_dpy->dri_screen,
attrs.Width, attrs.Height,
format, dri_use, dri2_img);
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 9985c49f984..f0cc6da1867 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -120,9 +120,9 @@ struct dri2_egl_display_vtbl {
EGLBoolean (*swap_buffers)(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLSurface *surf);
- EGLBoolean (*swap_buffers_with_damage)(_EGLDriver *drv, _EGLDisplay *dpy,
- _EGLSurface *surface,
- const EGLint *rects, EGLint n_rects);
+ EGLBoolean (*swap_buffers_with_damage)(_EGLDriver *drv, _EGLDisplay *dpy,
+ _EGLSurface *surface,
+ const EGLint *rects, EGLint n_rects);
EGLBoolean (*swap_buffers_region)(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLSurface *surf, EGLint numRects,
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index 1c985523862..160fa8ce8d7 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -138,7 +138,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
_eglError(EGL_BAD_ALLOC, "dri2_create_surface");
return NULL;
}
-
+
if (!_eglInitSurface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list))
goto cleanup_surf;
@@ -157,7 +157,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
dri2_surf->base.Width = -1;
dri2_surf->base.Height = -1;
- dri2_surf->dri_drawable =
+ dri2_surf->dri_drawable =
(*dri2_dpy->dri2->createNewDrawable) (dri2_dpy->dri_screen,
dri2_conf->dri_double_config,
dri2_surf);
@@ -361,7 +361,7 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
}
if (dri2_surf->back->dri_image == NULL) {
- dri2_surf->back->dri_image =
+ dri2_surf->back->dri_image =
dri2_dpy->image->createImage(dri2_dpy->dri_screen,
dri2_surf->base.Width,
dri2_surf->base.Height,
@@ -1220,7 +1220,7 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
wl_event_queue_destroy(dri2_dpy->wl_queue);
cleanup_dpy:
free(dri2_dpy);
-
+
return EGL_FALSE;
}
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index 56c14288204..0fbf4e40f2f 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -56,7 +56,7 @@ swrastCreateDrawable(struct dri2_egl_display * dri2_dpy,
uint32_t mask;
const uint32_t function = GXcopy;
uint32_t valgc[2];
-
+
/* create GC's */
dri2_surf->gc = xcb_generate_id(dri2_dpy->conn);
mask = XCB_GC_FUNCTION;
From ad62ec8316a926682958e7ab52639992867c3755 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Fri, 26 Jun 2015 15:01:22 -0400
Subject: [PATCH 0049/1208] nv50/ir: propagate modifier to right arg when
const-folding mad
An immediate has to be the second arg of an ADD operation. However we
were mistakenly propagating the modifier of the non-folded value to the
folded immediate argument.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91117
Signed-off-by: Ilia Mirkin
Cc: "10.5 10.6"
---
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index ae739eeda83..ad9bf6f4aa9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -608,9 +608,12 @@ ConstantFolding::expr(Instruction *i,
case OP_FMA: {
i->op = OP_ADD;
+ /* Move the immediate to the second arg, otherwise the ADD operation
+ * won't be emittable
+ */
i->setSrc(1, i->getSrc(0));
- i->src(1).mod = i->src(2).mod;
i->setSrc(0, i->getSrc(2));
+ i->src(0).mod = i->src(2).mod;
i->setSrc(2, NULL);
ImmediateValue src0;
From 35d83793047b3de31a706fa2a62a233090ea7cfc Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 18 Jun 2015 13:55:52 -0700
Subject: [PATCH 0050/1208] i965/fs: Fix ir_txs in emit_texture_gen4_simd16().
We were not emitting the LOD, which led to message lengths of 1 instead
of 3. Setting has_lod makes us emit the LOD, but I had to make changes
to avoid emitting the non-existent coordinate as well.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91022
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke
Reviewed-by: Jordan Justen
---
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9a4bad6bcf5..4e13199eec0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -247,7 +247,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
uint32_t sampler)
{
fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width);
- bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf;
+ bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf || op == ir_txs;
if (has_lod && shadow_c.file != BAD_FILE)
no16("TXB and TXL with shadow comparison unsupported in SIMD16.");
@@ -264,14 +264,15 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
fs_reg msg_end = offset(message, vector_elements);
/* Messages other than sample and ld require all three components */
- if (has_lod || shadow_c.file != BAD_FILE) {
+ if (vector_elements > 0 && (has_lod || shadow_c.file != BAD_FILE)) {
for (int i = vector_elements; i < 3; i++) {
bld.MOV(offset(message, i), fs_reg(0.0f));
}
+ msg_end = offset(message, 3);
}
if (has_lod) {
- fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ?
+ fs_reg msg_lod = retype(msg_end, op == ir_txf ?
BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
bld.MOV(msg_lod, lod);
msg_end = offset(msg_lod, 1);
From 7e5064360c03b8dbdd60298b46e1595418c6cea3 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Thu, 25 Jun 2015 03:36:23 +0100
Subject: [PATCH 0051/1208] radeonsi: add support for viewport array (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This isn't pretty and I'd suggest it the pm4 interface builder
could be tweaked to do this more efficently, but I'd need
guidance on how that would look.
This seems to pass the few piglit tests I threw at it.
v2: handle passing layer/viewport index to fragment shader.
fix crash in blit changes,
add support to io_get_unique_index for layer/viewport index
update docs.
v3: avoid looking up viewport index and layer in es (Marek).
Reviewed-by: Marek Olšák
Signed-off-by: Dave Airlie
---
docs/GL3.txt | 4 +-
docs/relnotes/10.7.0.html | 3 +
src/gallium/drivers/radeonsi/si_blit.c | 8 +--
src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
src/gallium/drivers/radeonsi/si_shader.c | 27 ++++++--
src/gallium/drivers/radeonsi/si_state.c | 66 ++++++++++++-------
src/gallium/drivers/radeonsi/si_state.h | 4 +-
.../drivers/radeonsi/si_state_shaders.c | 2 -
8 files changed, 74 insertions(+), 42 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 220bcc8742f..df913bdd8c9 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10:
GL_ARB_separate_shader_objects DONE (all drivers)
GL_ARB_shader_precision started (Micah)
GL_ARB_vertex_attrib_64bit DONE (nvc0, softpipe)
- GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, llvmpipe)
+ GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
GL 4.2, GLSL 4.20:
@@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware)
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
- GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, llvmpipe)
+ GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
GL_ARB_framebuffer_no_attachments DONE (i965)
GL_ARB_internalformat_query2 not started
GL_ARB_invalidate_subdata DONE (all drivers)
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
index e089889667d..fcc50811b69 100644
--- a/docs/relnotes/10.7.0.html
+++ b/docs/relnotes/10.7.0.html
@@ -44,8 +44,11 @@ Note: some of the new features are only available with certain drivers.
+GL_AMD_vertex_shader_viewport_index on radeonsi
GL_ARB_framebuffer_no_attachments on i965
GL_ARB_shader_stencil_export on llvmpipe
+GL_ARB_viewport_array on radeonsi
+GL_ARB_fragment_layer_viewport on radeonsi
Bug fixes
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 1f2c4082dbc..6c7b383a4a3 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -63,11 +63,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
util_blitter_save_sample_mask(sctx->blitter,
sctx->queued.named.sample_mask->sample_mask);
}
- if (sctx->queued.named.viewport) {
- util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport);
+ if (sctx->queued.named.viewport[0]) {
+ util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
}
- if (sctx->queued.named.scissor) {
- util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor->scissor);
+ if (sctx->queued.named.scissor[0]) {
+ util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
}
util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 53ae71a8c92..480a3010d31 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 8;
case PIPE_CAP_MAX_VIEWPORTS:
- return 1;
+ return 16;
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a293ef36fbb..4ca31728dff 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1132,7 +1132,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
LLVMValueRef args[9];
LLVMValueRef pos_args[4][9] = { { 0 } };
- LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL;
+ LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
unsigned semantic_name, semantic_index;
unsigned target;
unsigned param_count = 0;
@@ -1158,7 +1158,12 @@ handle_semantic:
continue;
case TGSI_SEMANTIC_LAYER:
layer_value = outputs[i].values[0];
- continue;
+ semantic_name = TGSI_SEMANTIC_GENERIC;
+ goto handle_semantic;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ viewport_index_value = outputs[i].values[0];
+ semantic_name = TGSI_SEMANTIC_GENERIC;
+ goto handle_semantic;
case TGSI_SEMANTIC_POSITION:
target = V_008DFC_SQ_EXP_POS;
break;
@@ -1224,11 +1229,13 @@ handle_semantic:
/* Write the misc vector (point size, edgeflag, layer, viewport). */
if (shader->selector->info.writes_psize ||
shader->selector->info.writes_edgeflag ||
+ shader->selector->info.writes_viewport_index ||
shader->selector->info.writes_layer) {
pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
shader->selector->info.writes_psize |
(shader->selector->info.writes_edgeflag << 1) |
- (shader->selector->info.writes_layer << 2));
+ (shader->selector->info.writes_layer << 2) |
+ (shader->selector->info.writes_viewport_index << 3));
pos_args[1][1] = uint->zero; /* EXEC mask */
pos_args[1][2] = uint->zero; /* last export? */
pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
@@ -1259,6 +1266,9 @@ handle_semantic:
if (shader->selector->info.writes_layer)
pos_args[1][7] = layer_value;
+
+ if (shader->selector->info.writes_viewport_index)
+ pos_args[1][8] = viewport_index_value;
}
for (i = 0; i < 4; i++)
@@ -1299,10 +1309,15 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
si_shader_ctx->radeon_bld.soa.outputs[i];
- int param_index = get_param_index(info->output_semantic_name[i],
- info->output_semantic_index[i],
- es->key.vs.gs_used_inputs);
+ int param_index;
+ if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
+ info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
+ continue;
+
+ param_index = get_param_index(info->output_semantic_name[i],
+ info->output_semantic_index[i],
+ es->key.vs.gs_used_inputs);
if (param_index < 0)
continue;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6c18836d189..752467bcfd7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -489,11 +489,13 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
info->writes_edgeflag ||
- info->writes_layer) |
+ info->writes_layer ||
+ info->writes_viewport_index) |
(sctx->queued.named.rasterizer->clip_plane_enable &
clipdist_mask));
r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -509,20 +511,26 @@ static void si_set_scissor_states(struct pipe_context *ctx,
const struct pipe_scissor_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
- struct si_pm4_state *pm4 = &scissor->pm4;
+ struct si_state_scissor *scissor;
+ struct si_pm4_state *pm4;
+ int i;
- if (scissor == NULL)
- return;
+ for (i = start_slot; i < start_slot + num_scissors; i++) {
+ int idx = i - start_slot;
+ int offset = i * 4 * 2;
- scissor->scissor = *state;
- si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
- S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
- si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
- S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
-
- si_pm4_set_state(sctx, scissor, scissor);
+ scissor = CALLOC_STRUCT(si_state_scissor);
+ if (scissor == NULL)
+ return;
+ pm4 = &scissor->pm4;
+ scissor->scissor = state[idx];
+ si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
+ S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
+ S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
+ si_pm4_set_state(sctx, scissor[i], scissor);
+ }
}
static void si_set_viewport_states(struct pipe_context *ctx,
@@ -531,21 +539,29 @@ static void si_set_viewport_states(struct pipe_context *ctx,
const struct pipe_viewport_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
- struct si_pm4_state *pm4 = &viewport->pm4;
+ struct si_state_viewport *viewport;
+ struct si_pm4_state *pm4;
+ int i;
- if (viewport == NULL)
- return;
+ for (i = start_slot; i < start_slot + num_viewports; i++) {
+ int idx = i - start_slot;
+ int offset = i * 4 * 6;
- viewport->viewport = *state;
- si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
- si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
- si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
- si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
- si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
- si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
+ viewport = CALLOC_STRUCT(si_state_viewport);
+ if (!viewport)
+ return;
+ pm4 = &viewport->pm4;
- si_pm4_set_state(sctx, viewport, viewport);
+ viewport->viewport = state[idx];
+ si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, fui(state[idx].scale[0]));
+ si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0 + offset, fui(state[idx].translate[0]));
+ si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0 + offset, fui(state[idx].scale[1]));
+ si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0 + offset, fui(state[idx].translate[1]));
+ si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0 + offset, fui(state[idx].scale[2]));
+ si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0 + offset, fui(state[idx].translate[2]));
+
+ si_pm4_set_state(sctx, viewport[i], viewport);
+ }
}
/*
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 5e68b162137..d1f2dff2c3f 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -92,8 +92,8 @@ union si_state {
struct si_pm4_state *blend_color;
struct si_pm4_state *clip;
struct si_state_sample_mask *sample_mask;
- struct si_state_scissor *scissor;
- struct si_state_viewport *viewport;
+ struct si_state_scissor *scissor[16];
+ struct si_state_viewport *viewport[16];
struct si_state_rasterizer *rasterizer;
struct si_state_dsa *dsa;
struct si_pm4_state *fb_rs;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 208c8523ef1..48128fa44e1 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -187,8 +187,6 @@ static void si_shader_vs(struct si_shader *shader)
case TGSI_SEMANTIC_POSITION:
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_EDGEFLAG:
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- case TGSI_SEMANTIC_LAYER:
break;
default:
nparams++;
From 556dd4af76ca0be9b0698139c06e6d12d52e8ff3 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Thu, 25 Jun 2015 03:55:54 +0100
Subject: [PATCH 0052/1208] radeonsi: add support for geometry shader
invocations.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Marek Olšák
Signed-off-by: Dave Airlie
---
docs/GL3.txt | 2 +-
src/gallium/drivers/radeonsi/si_shader.c | 5 +++++
src/gallium/drivers/radeonsi/si_shader.h | 1 +
src/gallium/drivers/radeonsi/si_state.c | 1 -
src/gallium/drivers/radeonsi/si_state_shaders.c | 7 +++++++
5 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index df913bdd8c9..81014a5f9b5 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -104,7 +104,7 @@ GL 4.0, GLSL 4.00:
- Fused multiply-add DONE ()
- Packing/bitfield/conversion functions DONE (r600, radeonsi, softpipe)
- Enhanced textureGather DONE (r600, radeonsi, softpipe)
- - Geometry shader instancing DONE (r600, llvmpipe, softpipe)
+ - Geometry shader instancing DONE (r600, radeonsi, llvmpipe, softpipe)
- Geometry shader multiple streams DONE ()
- Enhanced per-sample shading DONE (r600, radeonsi)
- Interpolation functions DONE (r600)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 4ca31728dff..4d97b58aec8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -630,6 +630,11 @@ static void declare_system_value(
SI_PARAM_BASE_VERTEX);
break;
+ case TGSI_SEMANTIC_INVOCATIONID:
+ value = LLVMGetParam(radeon_bld->main_fn,
+ SI_PARAM_GS_INSTANCE_ID);
+ break;
+
case TGSI_SEMANTIC_SAMPLEID:
value = get_sample_id(radeon_bld);
break;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 51055afe36a..b4339ae2b36 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -115,6 +115,7 @@ struct si_shader_selector {
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
+ unsigned gs_num_invocations;
uint64_t gs_used_inputs; /* mask of "get_unique_index" bits */
};
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 752467bcfd7..0dd08a248f4 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3078,7 +3078,6 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
- si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 48128fa44e1..eef3baad164 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -76,6 +76,7 @@ static void si_shader_gs(struct si_shader *shader)
unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+ unsigned gs_num_invocations = shader->selector->gs_num_invocations;
unsigned cut_mode;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
@@ -118,6 +119,10 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
+ si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+ S_028B90_ENABLE(gs_num_invocations > 0));
+
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
@@ -490,6 +495,8 @@ static void *si_create_shader_state(struct pipe_context *ctx,
sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
sel->gs_max_out_vertices =
sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+ sel->gs_num_invocations =
+ sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
for (i = 0; i < sel->info.num_inputs; i++) {
unsigned name = sel->info.input_semantic_name[i];
From 2a210b797eacd27a556af9c5e0edca940f9486c5 Mon Sep 17 00:00:00 2001
From: Mike Stroyan
Date: Fri, 26 Jun 2015 15:15:46 -0600
Subject: [PATCH 0053/1208] meta: Only change and restore viewport 0 in mesa
meta mode
The meta code was setting a default depth range for all viewports
and 'restoring' all viewports to depth range values saved from viewport 0.
Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/common/meta.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 214a68a9129..9a75019d059 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -728,7 +728,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
save->DepthNear = ctx->ViewportArray[0].Near;
save->DepthFar = ctx->ViewportArray[0].Far;
/* set depth range to default */
- _mesa_DepthRange(0.0, 1.0);
+ _mesa_set_depth_range(ctx, 0, 0.0, 1.0);
}
if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
@@ -1129,7 +1129,7 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_set_viewport(ctx, 0, save->ViewportX, save->ViewportY,
save->ViewportW, save->ViewportH);
}
- _mesa_DepthRange(save->DepthNear, save->DepthFar);
+ _mesa_set_depth_range(ctx, 0, save->DepthNear, save->DepthFar);
}
if (state & MESA_META_CLAMP_FRAGMENT_COLOR &&
From a98600b0ebdfc8481c168aae6c5670071e22fc29 Mon Sep 17 00:00:00 2001
From: Mario Kleiner
Date: Fri, 5 Jun 2015 15:36:52 +0200
Subject: [PATCH 0054/1208] nouveau: Use dup fd as key in drm-winsys hash table
to fix ZaphodHeads.
The dup'ed fd owned by the nouveau_screen for a device node
must also be used as key for the winsys hash table, instead
of using the original fd passed in for a screen, to make
multi-x-screen ZaphodHeads configurations work on nouveau.
The original fd's lifetime differs from that of the nouveau_screen stored
in the hash. The hash key is the fd, and in order to compare hash entries
we fstat them, so the fd must be around for as long as the screen is.
This is an extension of the fix in commit a59f2bb1 (nouveau: dup fd
before passing it to device).
Cc: "10.3 10.4 10.5 10.6"
Signed-off-by: Mario Kleiner
Reviewed-by: Ilia Mirkin
---
src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 063524655b6..5a4c256539d 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -120,7 +120,11 @@ nouveau_drm_screen_create(int fd)
if (!screen)
goto err;
- util_hash_table_set(fd_tab, intptr_to_pointer(fd), screen);
+ /* Use dupfd in hash table, to avoid errors if the original fd gets
+ * closed by its owner. The hash key needs to live at least as long as
+ * the screen.
+ */
+ util_hash_table_set(fd_tab, intptr_to_pointer(dupfd), screen);
screen->refcount = 1;
pipe_mutex_unlock(nouveau_screen_mutex);
return &screen->base;
From b4b4406e1e8dcf577551087cc6eb068e5303efdf Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset
Date: Wed, 24 Jun 2015 21:11:27 +0200
Subject: [PATCH 0055/1208] gallium/hud: prevent NULL pointer dereference with
pipe_query functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The HUD doesn't check if query_create() fails and it calls other
pipe_query functions with NULL pointer instead of a valid query object.
Signed-off-by: Samuel Pitoiset
Reviewed-by: Marek Olšák
---
src/gallium/auxiliary/hud/hud_driver_query.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c
index 603aba7e8cd..ee71678e894 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -62,7 +62,8 @@ query_new_value(struct hud_graph *gr)
uint64_t now = os_time_get();
if (info->last_time) {
- pipe->end_query(pipe, info->query[info->head]);
+ if (info->query[info->head])
+ pipe->end_query(pipe, info->query[info->head]);
/* read query results */
while (1) {
@@ -70,7 +71,7 @@ query_new_value(struct hud_graph *gr)
union pipe_query_result result;
uint64_t *res64 = (uint64_t *)&result;
- if (pipe->get_query_result(pipe, query, FALSE, &result)) {
+ if (query && pipe->get_query_result(pipe, query, FALSE, &result)) {
info->results_cumulative += res64[info->result_index];
info->num_results++;
@@ -88,7 +89,8 @@ query_new_value(struct hud_graph *gr)
"gallium_hud: all queries are busy after %i frames, "
"can't add another query\n",
NUM_QUERIES);
- pipe->destroy_query(pipe, info->query[info->head]);
+ if (info->query[info->head])
+ pipe->destroy_query(pipe, info->query[info->head]);
info->query[info->head] =
pipe->create_query(pipe, info->query_type, 0);
}
@@ -113,15 +115,15 @@ query_new_value(struct hud_graph *gr)
info->results_cumulative = 0;
info->num_results = 0;
}
-
- pipe->begin_query(pipe, info->query[info->head]);
}
else {
/* initialize */
info->last_time = now;
info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
- pipe->begin_query(pipe, info->query[info->head]);
}
+
+ if (info->query[info->head])
+ pipe->begin_query(pipe, info->query[info->head]);
}
static void
From 17e8fca626c908dcbedabf57ce175113840e65c2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Fri, 29 May 2015 22:40:07 -0700
Subject: [PATCH 0056/1208] i965: Write at least some data in SIMD8 URB write
messages.
According to the "URB SIMD8 Write > Write Data Payload" documentation,
"The write data payload can be between 1 and 8 message phases long."
Apparently, the simulator considers it an error if you issue an URB
SIMD8 message with only a header and no actual data to write.
v2: Try to put in a better PRM citation, now that the Broadwell docs
actually exist (requested by Jordan).
Signed-off-by: Kenneth Graunke
Reviewed-by: Jordan Justen
---
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 4e13199eec0..0cbaf17f1e4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1799,16 +1799,23 @@ fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes)
compute_clip_distance(clip_planes);
/* If we don't have any valid slots to write, just do a minimal urb write
- * send to terminate the shader. */
+ * send to terminate the shader. This includes 1 slot of undefined data,
+ * because it's invalid to write 0 data:
+ *
+ * From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions -
+ * Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read >
+ * Write Data Payload:
+ *
+ * "The write data payload can be between 1 and 8 message phases long."
+ */
if (vue_map->slots_valid == 0) {
-
- fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(GRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
BRW_REGISTER_TYPE_UD)));
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
inst->eot = true;
- inst->mlen = 1;
+ inst->mlen = 2;
inst->offset = 1;
return;
}
From 19a0ba130fd0d0f3b86181a8d05cf5391420360d Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Fri, 26 Jun 2015 15:05:13 -0700
Subject: [PATCH 0057/1208] i965/vs: Move compute_clip_distance() out of
emit_urb_writes().
Legacy user clipping (using gl_Position or gl_ClipVertex) is handled by
turning those into the modern gl_ClipDistance equivalents.
This is unnecessary in Core Profile: if user clipping is enabled, but
the shader doesn't write the corresponding gl_ClipDistance entry,
results are undefined. Hence, it is also unnecessary for geometry
shaders.
This patch moves the call up to run_vs(). This is equivalent for VS,
but removes the need to pass clip distances into emit_urb_writes().
Signed-off-by: Kenneth Graunke
Reviewed-by: Jason Ekstrand
Reviewed-by: Chris Forbes
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 4 +++-
src/mesa/drivers/dri/i965/brw_fs.h | 2 +-
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 16 +++++++++++-----
3 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 4292aa6b9fb..8658554e96b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3816,7 +3816,9 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
if (failed)
return false;
- emit_urb_writes(clip_planes);
+ compute_clip_distance(clip_planes);
+
+ emit_urb_writes();
if (shader_time_index >= 0)
emit_shader_time_end();
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 243baf688de..d08d438a40e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -271,7 +271,7 @@ public:
fs_reg src0_alpha, unsigned components,
unsigned exec_size, bool use_2nd_half = false);
void emit_fb_writes();
- void emit_urb_writes(gl_clip_plane *clip_planes);
+ void emit_urb_writes();
void emit_cs_terminate();
void emit_barrier();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 0cbaf17f1e4..34bf32d7ab3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1731,6 +1731,12 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
}
}
+/**
+ * Lower legacy fixed-function and gl_ClipVertex clipping to clip distances.
+ *
+ * This does nothing if the shader uses gl_ClipDistance or user clipping is
+ * disabled altogether.
+ */
void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
{
struct brw_vue_prog_data *vue_prog_data =
@@ -1738,6 +1744,10 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
const struct brw_vue_prog_key *key =
(const struct brw_vue_prog_key *) this->key;
+ /* Bail unless some sort of legacy clipping is enabled */
+ if (!key->userclip_active || prog->UsesClipDistanceOut)
+ return;
+
/* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
*
* "If a linked set of shaders forming the vertex stage contains no
@@ -1781,7 +1791,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
}
void
-fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes)
+fs_visitor::emit_urb_writes()
{
int slot, urb_offset, length;
struct brw_vs_prog_data *vs_prog_data =
@@ -1794,10 +1804,6 @@ fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes)
bool flush;
fs_reg sources[8];
- /* Lower legacy ff and ClipVertex clipping to clip distances */
- if (key->base.userclip_active && !prog->UsesClipDistanceOut)
- compute_clip_distance(clip_planes);
-
/* If we don't have any valid slots to write, just do a minimal urb write
* send to terminate the shader. This includes 1 slot of undefined data,
* because it's invalid to write 0 data:
From b5622313ea2e070cc0c20c7cdccd844d383713d0 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Sun, 28 Jun 2015 22:30:27 -0400
Subject: [PATCH 0058/1208] nv40: enable base vertex
Still appears to have issues with negative indices less than -1M, but
that's a corner case of a corner case.
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h | 2 ++
src/gallium/drivers/nouveau/nv30/nv30_context.h | 1 +
src/gallium/drivers/nouveau/nv30/nv30_vbo.c | 6 ++----
3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h b/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
index 447f4b3b7ae..95468e580dd 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
@@ -1459,6 +1459,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV40_3D_VTX_CACHE_INVALIDATE 0x00001714
+#define NV40_3D_VB_ELEMENT_BASE 0x0000173c
+
#define NV30_3D_VTXFMT(i0) (0x00001740 + 0x4*(i0))
#define NV30_3D_VTXFMT__ESIZE 0x00000004
#define NV30_3D_VTXFMT__LEN 0x00000010
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h b/src/gallium/drivers/nouveau/nv30/nv30_context.h
index 592cdbe24f9..7181336b562 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h
@@ -51,6 +51,7 @@ struct nv30_context {
unsigned rt_enable;
unsigned scissor_off;
unsigned num_vtxelts;
+ int index_bias;
boolean prim_restart;
struct nv30_fragprog *fragprog;
} state;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
index d4e384b21d2..faa8812528a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -461,13 +461,11 @@ nv30_draw_elements(struct nv30_context *nv30, boolean shorten,
struct nouveau_object *eng3d = nv30->screen->eng3d;
unsigned prim = nv30_prim_gl(mode);
-#if 0 /*XXX*/
- if (index_bias != nv30->state.index_bias) {
- BEGIN_NV04(push, NV30_3D(VB_ELEMENT_BASE), 1);
+ if (eng3d->oclass >= NV40_3D_CLASS && index_bias != nv30->state.index_bias) {
+ BEGIN_NV04(push, NV40_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, index_bias);
nv30->state.index_bias = index_bias;
}
-#endif
if (eng3d->oclass == NV40_3D_CLASS && index_size > 1 &&
nv30->idxbuf.buffer) {
From 61912036d1cb67e52b1cc191bdff8ebded439e8c Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Mon, 29 Jun 2015 00:23:55 -0400
Subject: [PATCH 0059/1208] nv30: avoid leaking blit fp/vp
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nv30/nv30_context.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c
index 617b0887810..ef035e58f3c 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c
@@ -165,6 +165,12 @@ nv30_context_destroy(struct pipe_context *pipe)
if (nv30->draw)
draw_destroy(nv30->draw);
+ if (nv30->blit_vp)
+ nouveau_heap_free(&nv30->blit_vp);
+
+ if (nv30->blit_fp)
+ pipe_resource_reference(&nv30->blit_fp, NULL);
+
if (nv30->screen->base.pushbuf->user_priv == &nv30->bufctx)
nv30->screen->base.pushbuf->user_priv = NULL;
From cae701fc8ed0faeaaaafd1cf57f6143031edcab2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Sun, 28 Jun 2015 22:17:09 -0700
Subject: [PATCH 0060/1208] Revert "i965: Delete linked GLSL IR when using
NIR."
This reverts commit 104c8fc2c2aa5621261f80aa6b4f76c3163078f1.
---
src/mesa/drivers/dri/i965/brw_shader.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 5653d6ba1e4..32c40131434 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -387,11 +387,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
brw_add_texrect_params(prog);
- if (options->NirOptions) {
+ if (options->NirOptions)
prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
- ralloc_free(shader->ir);
- shader->ir = NULL;
- }
_mesa_reference_program(ctx, &prog, NULL);
}
From 6218c68bece0cea671f2940a651119a87ab8b24e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Sun, 28 Jun 2015 22:17:16 -0700
Subject: [PATCH 0061/1208] Revert "glsl: clone inputs and outputs during
linking"
This reverts commit c2ff3485b3d48749ea9dcad07bc1a691627dc3e5.
Ilia and I noticed a memory leak caused by this patch: at least with
fixed-function programs, we clone things using ProgramResourceList as
the context before reralloc makes it non-NULL.
I believe Tapani found other bugs with these patches, so I'm just going
to revert them for now and let him pursue them further.
---
src/glsl/linker.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 5da9cadcb08..4a726d4e2e7 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2637,9 +2637,7 @@ add_interface_variables(struct gl_shader_program *shProg,
continue;
};
- /* Clone ir_variable data so that backend is able to free memory. */
- if (!add_program_resource(shProg, programInterface,
- var->clone(shProg->ProgramResourceList, NULL),
+ if (!add_program_resource(shProg, programInterface, var,
build_stageref(shProg, var->name) | mask))
return false;
}
From 07158c508ac9b933d60dd3e2cd1e748601c44b68 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Mon, 29 Jun 2015 08:23:14 +0100
Subject: [PATCH 0062/1208] Add release notes for the 10.6.1 release
Signed-off-by: Emil Velikov
(cherry picked from commit a871e80fc6237fa029d6970f7e9b414fd097bd98)
---
docs/relnotes/10.6.1.html | 103 ++++++++++++++++++++++++++++++++++++++
1 file changed, 103 insertions(+)
create mode 100644 docs/relnotes/10.6.1.html
diff --git a/docs/relnotes/10.6.1.html b/docs/relnotes/10.6.1.html
new file mode 100644
index 00000000000..03b5b086eb1
--- /dev/null
+++ b/docs/relnotes/10.6.1.html
@@ -0,0 +1,103 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 10.6.1 Release Notes / June 29, 2015
+
+
+Mesa 10.6.1 is a bug fix release which fixes bugs found since the 10.6.0 release.
+
+
+Mesa 10.6.1 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+TBD
+
+
+
+
New features
+
None
+
+
Bug fixes
+
+
This list is likely incomplete.
+
+
+Bug 90347 - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)
+
+
+
+
+
Changes
+
+
Anuj Phogat (4):
+
+ mesa: Handle integer formats in need_rgb_to_luminance_conversion()
+ mesa: Use helper function need_rgb_to_luminance_conversion()
+ mesa: Turn need_rgb_to_luminance_conversion() in to a global function
+ meta: Abort meta path if ReadPixels need rgb to luminance conversion
+
+
+
Ben Widawsky (1):
+
+ i965/gen9: Implement Push Constant Buffer workaround
+
+
+
Boyan Ding (2):
+
+ egl/x11: Set version of swrastLoader to 2
+ egl/x11: Remove duplicate call to dri2_x11_add_configs_for_visuals
+
+
+
Emil Velikov (6):
+
+ docs: Add sha256sums for the 10.6.0 release
+ configure: warn about shared_glapi & xlib-glx only when both are set
+ configure: error out when building backend-less libEGL
+ configure: error out when building libEGL without shared-glapi
+ gbm: do not (over)link against libglapi.so
+ Update version to 10.6.1
+
+
+
Frank Henigman (1):
+
+ gbm: dlopen libglapi so gbm_create_device works
+
+
+
Ilia Mirkin (9):
+
+ nvc0/ir: fix collection of first uses for texture barrier insertion
+ nv50,nvc0: clamp uniform size to 64k
+ nvc0/ir: can't have a join on a load with an indirect source
+ glsl: handle conversions to double when comparing param matches
+ glsl: add version checks to conditionals for builtin variable enablement
+ mesa: add GL_PROGRAM_PIPELINE support in KHR_debug calls
+ glsl: binding point is a texture unit, which is a combined space
+ nvc0: always put all tfb bufs into bufctx
+ nv50,nvc0: make sure to pushbuf_refn before putting bo into pushbuf_data
+
+
+
+
+
+
From 24df6cd0f7723e163d75ed3eb0b7e22adc3ffd7f Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Mon, 29 Jun 2015 09:00:24 +0100
Subject: [PATCH 0063/1208] docs: Add sha256 checksums for the 10.6.1 release
Signed-off-by: Emil Velikov
(cherry picked from commit 6ff3ae8deb1d99037f2f8e5890b09bd984059cf0)
---
docs/relnotes/10.6.1.html | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/docs/relnotes/10.6.1.html b/docs/relnotes/10.6.1.html
index 03b5b086eb1..f197b0f3a42 100644
--- a/docs/relnotes/10.6.1.html
+++ b/docs/relnotes/10.6.1.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
SHA256 checksums
-TBD
+b4cccd4d0eabcc2bca00c3175d3ad88fdda57ffdb883a7998525b873a21fe607 mesa-10.6.1.tar.gz
+6c80a2b647e57c85dc36e609d9aed17f878f0d8e0cf9ace86d14cf604101e1eb mesa-10.6.1.tar.xz
From dd9ceb0219f6ca7864940ee1961f1b1890d27cea Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Mon, 29 Jun 2015 09:03:19 +0100
Subject: [PATCH 0064/1208] docs: add news item and link release notes for mesa
10.6.1
Signed-off-by: Emil Velikov
---
docs/index.html | 6 ++++++
docs/relnotes.html | 1 +
2 files changed, 7 insertions(+)
diff --git a/docs/index.html b/docs/index.html
index 80c6e03e3f1..29e62791131 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,12 @@
News
+June 29, 2015
+
+Mesa 10.6.1 is released.
+This is a bug-fix release.
+
+
June 20, 2015
Mesa 10.5.8 is released.
diff --git a/docs/relnotes.html b/docs/relnotes.html
index 5fd80025a39..3e2d13c84ed 100644
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
+10.6.1 release notes
10.5.8 release notes
10.6.0 release notes
10.5.7 release notes
From c0ca6c30eaf7f488f154c462a01a8945cb4a3103 Mon Sep 17 00:00:00 2001
From: Neil Roberts
Date: Fri, 26 Jun 2015 17:54:15 +0100
Subject: [PATCH 0065/1208] i965: Don't try to print the GLSL IR if it has been
freed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Since commit 104c8fc2c2aa5621261f8 the GLSL IR will be freed if NIR is
being used. This was causing it to segfault if INTEL_DEBUG=wm is set.
This patch just makes it avoid dumping the GLSL IR in that case.
Reviewed-by: Ben Widawsky
Reviewed-by: Tapani Pälli
---
src/mesa/drivers/dri/i965/brw_program.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 2327af77ad3..85e271d2351 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -574,10 +574,13 @@ brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
struct gl_shader *shader, struct gl_program *prog)
{
if (shader_prog) {
- fprintf(stderr,
- "GLSL IR for native %s shader %d:\n", stage, shader_prog->Name);
- _mesa_print_ir(stderr, shader->ir, NULL);
- fprintf(stderr, "\n\n");
+ if (shader->ir) {
+ fprintf(stderr,
+ "GLSL IR for native %s shader %d:\n",
+ stage, shader_prog->Name);
+ _mesa_print_ir(stderr, shader->ir, NULL);
+ fprintf(stderr, "\n\n");
+ }
} else {
fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
stage, prog->Id, stage);
From 249a9df7fce0a6bebc70852ab583c5324208bf06 Mon Sep 17 00:00:00 2001
From: Grigori Goronzy
Date: Thu, 28 May 2015 12:40:29 +0200
Subject: [PATCH 0066/1208] gallium: add PIPE_COMPUTE_CAP_SUBGROUP_SIZE
We need this to implement OpenCL's
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE.
Reviewed-by: Francisco Jerez
---
src/gallium/docs/source/screen.rst | 2 ++
src/gallium/drivers/ilo/ilo_screen.c | 8 ++++++++
.../drivers/nouveau/nvc0/nvc0_screen.c | 4 ++++
src/gallium/drivers/radeon/r600_pipe_common.c | 6 ++++++
src/gallium/drivers/radeon/r600_pipe_common.h | 20 +++++++++++++++++++
src/gallium/include/pipe/p_defines.h | 3 ++-
6 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 8f64817fe5f..74636207d06 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -384,6 +384,8 @@ pipe_screen::get_compute_param.
Value type: ``uint32_t``
* ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
non-zero means yes, zero means no. Value type: ``uint32_t``
+* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in
+ threads. Also known as wavefront size, warp size or SIMD width.
.. _pipe_bind:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 94105559b80..faebb9279b3 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -193,6 +193,7 @@ ilo_get_compute_param(struct pipe_screen *screen,
uint32_t max_clock_frequency;
uint32_t max_compute_units;
uint32_t images_supported;
+ uint32_t subgroup_size;
} val;
const void *ptr;
int size;
@@ -284,6 +285,13 @@ ilo_get_compute_param(struct pipe_screen *screen,
ptr = &val.images_supported;
size = sizeof(val.images_supported);
break;
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ /* best case is actually SIMD32 */
+ val.subgroup_size = 16;
+
+ ptr = &val.subgroup_size;
+ size = sizeof(val.subgroup_size);
+ break;
default:
ptr = NULL;
size = 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 56c230e42fc..4c53106289c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -341,6 +341,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_compute_cap param, void *data)
{
uint64_t *data64 = (uint64_t *)data;
+ uint32_t *data32 = (uint32_t *)data;
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
switch (param) {
@@ -372,6 +373,9 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
data64[0] = 4096;
return 8;
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ data32[0] = 32;
+ return 4;
default:
return 0;
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3def4446882..775cf53ba88 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -636,6 +636,12 @@ static int r600_get_compute_param(struct pipe_screen *screen,
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
break; /* unused */
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ if (ret) {
+ uint32_t *subgroup_size = ret;
+ *subgroup_size = r600_wavefront_size(rscreen->family);
+ }
+ return sizeof(uint32_t);
}
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 6ce81d33ddd..51fd016229c 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -570,6 +570,26 @@ static inline unsigned r600_tex_aniso_filter(unsigned filter)
/* else */ return 4;
}
+static inline unsigned r600_wavefront_size(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ case CHIP_RS880:
+ return 16;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return 32;
+ default:
+ return 64;
+ }
+}
+
#define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 88b7b7699c1..153897af754 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -700,7 +700,8 @@ enum pipe_compute_cap
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY,
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
- PIPE_COMPUTE_CAP_IMAGES_SUPPORTED
+ PIPE_COMPUTE_CAP_IMAGES_SUPPORTED,
+ PIPE_COMPUTE_CAP_SUBGROUP_SIZE
};
/**
From d15b32ebded278243eb648bb9ecd4c5f5d6d0569 Mon Sep 17 00:00:00 2001
From: Grigori Goronzy
Date: Thu, 28 May 2015 13:01:51 +0200
Subject: [PATCH 0067/1208] clover: implement
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
Work-group size should always be aligned to subgroup size; this is a
basic requirement, otherwise some work-items will be no-operation.
It might make sense to refine the value according to a kernel's
resource usage, but that's a possible optimization for the future.
Reviewed-by: Francisco Jerez
---
src/gallium/state_trackers/clover/api/kernel.cpp | 2 +-
src/gallium/state_trackers/clover/core/device.cpp | 5 +++++
src/gallium/state_trackers/clover/core/device.hpp | 1 +
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp
index 05cc392a914..857a152b554 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -169,7 +169,7 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
break;
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
- buf.as_scalar() = 1;
+ buf.as_scalar() = dev.subgroup_size();
break;
case CL_KERNEL_PRIVATE_MEM_SIZE:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
index 42b45b7f2b8..c42d1d26004 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -185,6 +185,11 @@ device::max_block_size() const {
return { v.begin(), v.end() };
}
+cl_uint
+device::subgroup_size() const {
+ return get_compute_param(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
+}
+
std::string
device::device_name() const {
return pipe->get_name(pipe);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp
index de5fc6bb9c4..285784744f3 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -67,6 +67,7 @@ namespace clover {
bool has_doubles() const;
std::vector max_block_size() const;
+ cl_uint subgroup_size() const;
std::string device_name() const;
std::string vendor_name() const;
enum pipe_shader_ir ir_format() const;
From 73d2b5af526676fd3f34243cdc155b3e1341b988 Mon Sep 17 00:00:00 2001
From: Erik Faye-Lund
Date: Sun, 28 Jun 2015 14:51:04 +0200
Subject: [PATCH 0068/1208] mesa/main: Get rid of outdated GDB-hack
All of these enums are now in use around in the code, so there's no need
to explicitly use them here any more.
Signed-off-by: Erik Faye-Lund
Reviewed-by: Matt Turner
Reviewed-by: Brian Paul
---
src/mesa/main/context.c | 27 ---------------------------
1 file changed, 27 deletions(-)
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 79fa01849e0..265f98aea46 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -337,31 +337,6 @@ _mesa_destroy_visual( struct gl_config *vis )
/*@{*/
-/**
- * This is lame. gdb only seems to recognize enum types that are
- * actually used somewhere. We want to be able to print/use enum
- * values such as TEXTURE_2D_INDEX in gdb. But we don't actually use
- * the gl_texture_index type anywhere. Thus, this lame function.
- */
-static void
-dummy_enum_func(void)
-{
- gl_buffer_index bi = BUFFER_FRONT_LEFT;
- gl_face_index fi = FACE_POS_X;
- gl_frag_result fr = FRAG_RESULT_DEPTH;
- gl_texture_index ti = TEXTURE_2D_ARRAY_INDEX;
- gl_vert_attrib va = VERT_ATTRIB_POS;
- gl_varying_slot vs = VARYING_SLOT_POS;
-
- (void) bi;
- (void) fi;
- (void) fr;
- (void) ti;
- (void) va;
- (void) vs;
-}
-
-
/**
* One-time initialization mutex lock.
*
@@ -434,8 +409,6 @@ one_time_init( struct gl_context *ctx )
* #ifdef tests here.
*/
atexit(_mesa_destroy_shader_compiler);
-
- dummy_enum_func();
}
From ba5e1612c892282b930e278b5b98f1578cbe7dbb Mon Sep 17 00:00:00 2001
From: Erik Faye-Lund
Date: Sun, 28 Jun 2015 14:51:05 +0200
Subject: [PATCH 0069/1208] dri: don't touch the shader compiler
This function is for deleting per-screen resources, and the shader
compiler resources are not of such nature. Besides, dri shouldn't
need to even know about the presence of a shader compiler.
These resources will already be released when mesa gets unloaded,
and that should be sufficient.
Signed-off-by: Erik Faye-Lund
Reviewed-by: Matt Turner
Reviewed-by: Brian Paul
---
src/mesa/drivers/dri/common/dri_util.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index e7ababe0b67..ae4592c739d 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -46,7 +46,6 @@
#include "dri_util.h"
#include "utils.h"
#include "xmlpool.h"
-#include "../glsl/glsl_parser_extras.h"
#include "main/mtypes.h"
#include "main/version.h"
#include "main/errors.h"
@@ -238,8 +237,6 @@ static void driDestroyScreen(__DRIscreen *psp)
* stream open to the X-server anymore.
*/
- _mesa_destroy_shader_compiler();
-
psp->driver->DestroyScreen(psp);
driDestroyOptionCache(&psp->optionCache);
From 195ab79ddecbdbf1f1714c233df278bff46c13e8 Mon Sep 17 00:00:00 2001
From: Erik Faye-Lund
Date: Sun, 28 Jun 2015 14:51:06 +0200
Subject: [PATCH 0070/1208] mesa/main: only call _mesa_destroy_shader_compiler
once on exit
There's no point in calling _mesa_destroy_shader_compiler multiple
times on exit; the resources will only be released once anyway.
So let's move the atexit-call into the part that is only called
once.
Signed-off-by: Erik Faye-Lund
Reviewed-by: Matt Turner
Reviewed-by: Brian Paul
---
src/mesa/main/context.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 265f98aea46..c4af8ea16db 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -382,6 +382,8 @@ one_time_init( struct gl_context *ctx )
_mesa_ubyte_to_float_color_tab[i] = (float) i / 255.0F;
}
+ atexit(_mesa_destroy_shader_compiler);
+
#if defined(DEBUG) && defined(__DATE__) && defined(__TIME__)
if (MESA_VERBOSE != 0) {
_mesa_debug(ctx, "Mesa %s DEBUG build %s %s\n",
@@ -404,11 +406,6 @@ one_time_init( struct gl_context *ctx )
api_init_mask |= 1 << ctx->API;
mtx_unlock(&OneTimeLock);
-
- /* Hopefully atexit() is widely available. If not, we may need some
- * #ifdef tests here.
- */
- atexit(_mesa_destroy_shader_compiler);
}
From de3e323be1bdc40a2a7d724d0f3db7a81a93bbbb Mon Sep 17 00:00:00 2001
From: Erik Faye-Lund
Date: Sun, 28 Jun 2015 14:51:07 +0200
Subject: [PATCH 0071/1208] glsl: No need to lock in _mesa_glsl_release_types
This function only gets called while mesa is unloading, so there's
no potential of racing or multiple calls at the same time. So let's
just get rid of the locking.
Signed-off-by: Erik Faye-Lund
Reviewed-by: Matt Turner
Reviewed-by: Brian Paul
---
src/glsl/glsl_types.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index f675e90cb0d..c6223808bb8 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -324,8 +324,10 @@ const glsl_type *glsl_type::get_scalar_type() const
void
_mesa_glsl_release_types(void)
{
- mtx_lock(&glsl_type::mutex);
-
+ /* Should only be called during atexit (either when unloading shared
+ * object, or if process terminates), so no mutex-locking should be
+ * necessary.
+ */
if (glsl_type::array_types != NULL) {
hash_table_dtor(glsl_type::array_types);
glsl_type::array_types = NULL;
@@ -335,8 +337,6 @@ _mesa_glsl_release_types(void)
hash_table_dtor(glsl_type::record_types);
glsl_type::record_types = NULL;
}
-
- mtx_unlock(&glsl_type::mutex);
}
From c61bc6ed844b39e600cc64e3e552c7bf1894d7ba Mon Sep 17 00:00:00 2001
From: Erik Faye-Lund
Date: Sun, 28 Jun 2015 14:51:08 +0200
Subject: [PATCH 0072/1208] util: port _mesa_strto[df] to C
_mesa_strtod and _mesa_strtof are only used from the GLSL compiler and
the ARB_[vertex|fragment]_program code, meaning that the locale doesn't
need to be initialized before the first OpenGL context gets initialized.
So let's use explicit initialization from the one-time init code instead
of depending on a C++ compiler to initialize at image-load time.
Signed-off-by: Erik Faye-Lund
Reviewed-by: Matt Turner
Reviewed-by: Brian Paul
---
src/glsl/glcpp/glcpp.c | 3 +++
src/glsl/main.cpp | 3 +++
src/mesa/main/context.c | 3 +++
src/util/Makefile.sources | 2 +-
src/util/{strtod.cpp => strtod.c} | 14 ++++++++------
src/util/strtod.h | 3 +++
6 files changed, 21 insertions(+), 7 deletions(-)
rename src/util/{strtod.cpp => strtod.c} (89%)
diff --git a/src/glsl/glcpp/glcpp.c b/src/glsl/glcpp/glcpp.c
index 5144516a69c..c62f4efec9d 100644
--- a/src/glsl/glcpp/glcpp.c
+++ b/src/glsl/glcpp/glcpp.c
@@ -29,6 +29,7 @@
#include "glcpp.h"
#include "main/mtypes.h"
#include "main/shaderobj.h"
+#include "util/strtod.h"
extern int glcpp_parser_debug;
@@ -168,6 +169,8 @@ main (int argc, char *argv[])
if (shader == NULL)
return 1;
+ _mesa_locale_init();
+
ret = glcpp_preprocess(ctx, &shader, &info_log, NULL, &gl_ctx);
printf("%s", shader);
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 23412980dce..58651df10a0 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -38,6 +38,7 @@
#include "program/hash_table.h"
#include "loop_analysis.h"
#include "standalone_scaffolding.h"
+#include "util/strtod.h"
static int glsl_version = 330;
@@ -46,6 +47,8 @@ initialize_context(struct gl_context *ctx, gl_api api)
{
initialize_context_to_defaults(ctx, api);
+ _mesa_locale_init();
+
/* The standalone compiler needs to claim support for almost
* everything in order to compile the built-in functions.
*/
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index c4af8ea16db..e68de68d645 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -120,6 +120,7 @@
#include "shaderobj.h"
#include "shaderimage.h"
#include "util/simple_list.h"
+#include "util/strtod.h"
#include "state.h"
#include "stencil.h"
#include "texcompress_s3tc.h"
@@ -374,6 +375,8 @@ one_time_init( struct gl_context *ctx )
assert( sizeof(GLint) == 4 );
assert( sizeof(GLuint) == 4 );
+ _mesa_locale_init();
+
_mesa_one_time_init_extension_overrides();
_mesa_get_cpu_features();
diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index dc559391823..82df3bcb00a 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -19,7 +19,7 @@ MESA_UTIL_FILES := \
set.c \
set.h \
simple_list.h \
- strtod.cpp \
+ strtod.c \
strtod.h \
texcompress_rgtc_tmp.h \
u_atomic.h
diff --git a/src/util/strtod.cpp b/src/util/strtod.c
similarity index 89%
rename from src/util/strtod.cpp
rename to src/util/strtod.c
index 2b4dd982a80..a4a60e0404a 100644
--- a/src/util/strtod.cpp
+++ b/src/util/strtod.c
@@ -30,18 +30,20 @@
#include
#ifdef HAVE_XLOCALE_H
#include
+static locale_t loc;
#endif
#endif
#include "strtod.h"
+void
+_mesa_locale_init(void)
+{
#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
-static struct locale_initializer {
- locale_initializer() { loc = newlocale(LC_CTYPE_MASK, "C", NULL); }
- locale_t loc;
-} loc_init;
+ loc = newlocale(LC_CTYPE_MASK, "C", NULL);
#endif
+}
/**
* Wrapper around strtod which uses the "C" locale so the decimal
@@ -51,7 +53,7 @@ double
_mesa_strtod(const char *s, char **end)
{
#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
- return strtod_l(s, end, loc_init.loc);
+ return strtod_l(s, end, loc);
#else
return strtod(s, end);
#endif
@@ -66,7 +68,7 @@ float
_mesa_strtof(const char *s, char **end)
{
#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
- return strtof_l(s, end, loc_init.loc);
+ return strtof_l(s, end, loc);
#elif defined(HAVE_STRTOF)
return strtof(s, end);
#else
diff --git a/src/util/strtod.h b/src/util/strtod.h
index 02c25ddb78f..b7e2beb5f30 100644
--- a/src/util/strtod.h
+++ b/src/util/strtod.h
@@ -31,6 +31,9 @@
extern "C" {
#endif
+extern void
+_mesa_locale_init(void);
+
extern double
_mesa_strtod(const char *s, char **end);
From e566e5203aaba98109a67766cf28991de3358490 Mon Sep 17 00:00:00 2001
From: Erik Faye-Lund
Date: Sun, 28 Jun 2015 14:51:09 +0200
Subject: [PATCH 0073/1208] mesa/main: free locale at exit
In order to save a small leak if mesa is continously loaded and
unloaded, let's free the locale when the shared object is unloaded.
Signed-off-by: Erik Faye-Lund
Reviewed-by: Matt Turner
Reviewed-by: Brian Paul
---
src/mesa/main/context.c | 12 +++++++++++-
src/util/strtod.c | 8 ++++++++
src/util/strtod.h | 3 +++
3 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index e68de68d645..fdef41287f7 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -346,6 +346,16 @@ _mesa_destroy_visual( struct gl_config *vis )
mtx_t OneTimeLock = _MTX_INITIALIZER_NP;
+/**
+ * Calls all the various one-time-fini functions in Mesa
+ */
+
+static void
+one_time_fini(void)
+{
+ _mesa_destroy_shader_compiler();
+ _mesa_locale_fini();
+}
/**
* Calls all the various one-time-init functions in Mesa.
@@ -385,7 +395,7 @@ one_time_init( struct gl_context *ctx )
_mesa_ubyte_to_float_color_tab[i] = (float) i / 255.0F;
}
- atexit(_mesa_destroy_shader_compiler);
+ atexit(one_time_fini);
#if defined(DEBUG) && defined(__DATE__) && defined(__TIME__)
if (MESA_VERBOSE != 0) {
diff --git a/src/util/strtod.c b/src/util/strtod.c
index a4a60e0404a..ea7d395e2da 100644
--- a/src/util/strtod.c
+++ b/src/util/strtod.c
@@ -45,6 +45,14 @@ _mesa_locale_init(void)
#endif
}
+void
+_mesa_locale_fini(void)
+{
+#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+ freelocale(loc);
+#endif
+}
+
/**
* Wrapper around strtod which uses the "C" locale so the decimal
* point is always '.'
diff --git a/src/util/strtod.h b/src/util/strtod.h
index b7e2beb5f30..60e15cfa0eb 100644
--- a/src/util/strtod.h
+++ b/src/util/strtod.h
@@ -34,6 +34,9 @@ extern "C" {
extern void
_mesa_locale_init(void);
+extern void
+_mesa_locale_fini(void);
+
extern double
_mesa_strtod(const char *s, char **end);
From 06f76b7fa68db1ac74ecca015412f71b3a5e9f9c Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Thu, 4 Jun 2015 16:57:02 -0700
Subject: [PATCH 0074/1208] i965: Make a helper function
intel_miptree_set_alignment()
Signed-off-by: Anuj Phogat
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/brw_tex_layout.c | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 455984309fa..e35cb645c24 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -761,16 +761,13 @@ intel_miptree_set_total_width_height(struct brw_context *brw,
mt->total_width, mt->total_height, mt->cpp);
}
-void
-brw_miptree_layout(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
- enum intel_miptree_tiling_mode requested,
- uint32_t layout_flags)
+static void
+intel_miptree_set_alignment(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ uint32_t layout_flags)
{
bool gen6_hiz_or_stencil = false;
- mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE;
-
if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
const GLenum base_format = _mesa_get_format_base_format(mt->format);
gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
@@ -805,7 +802,17 @@ brw_miptree_layout(struct brw_context *brw,
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
mt->align_h = intel_vertical_texture_alignment_unit(brw, mt);
}
+}
+void
+brw_miptree_layout(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ enum intel_miptree_tiling_mode requested,
+ uint32_t layout_flags)
+{
+ mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE;
+
+ intel_miptree_set_alignment(brw, mt, layout_flags);
intel_miptree_set_total_width_height(brw, mt);
if (!mt->total_width || !mt->total_height) {
From c9dbdc08b9de016ab3b076feac3df4c81009996e Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Tue, 14 Apr 2015 22:06:49 -0700
Subject: [PATCH 0075/1208] i965/gen9: Plugin the code for selecting YF/YS
tiling on skl+
Buffers with Yf/Ys tiling end up using meta upload / download
paths or the blitter for cases where they used tiled_memcpy paths
in case of Y tiling. This has exposed some bugs in meta path. To
avoid any piglit regressions on SKL this patch keeps the Yf/Ys
tiling disabled at the moment.
V3: Make brw_miptree_choose_tr_mode() actually choose TRMODE. (Ben)
Few cosmetic changes.
V4: Get rid of brw_miptree_choose_tr_mode().
Take care of all tile resource modes {Yf, Ys, none} for all
generations at one place.
Signed-off-by: Anuj Phogat
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/brw_tex_layout.c | 100 +++++++++++++++++----
1 file changed, 81 insertions(+), 19 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index e35cb645c24..45bbf7e7d0f 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -810,27 +810,89 @@ brw_miptree_layout(struct brw_context *brw,
enum intel_miptree_tiling_mode requested,
uint32_t layout_flags)
{
- mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE;
-
- intel_miptree_set_alignment(brw, mt, layout_flags);
- intel_miptree_set_total_width_height(brw, mt);
-
- if (!mt->total_width || !mt->total_height) {
- intel_miptree_release(&mt);
- return;
- }
-
- /* On Gen9+ the alignment values are expressed in multiples of the block
- * size
+ const unsigned bpp = mt->cpp * 8;
+ /* Enable YF/YS tiling only for color surfaces because depth and
+ * stencil surfaces are not supported in blitter using fast copy
+ * blit and meta PBO upload, download paths. No other paths
+ * currently support Yf/Ys tiled surfaces.
+ * FINISHME: Remove this restriction once we have a tiled_memcpy()
+ * path to do depth/stencil data upload/download to Yf/Ys tiled
+ * surfaces.
*/
- if (brw->gen >= 9) {
- unsigned int i, j;
- _mesa_get_format_block_size(mt->format, &i, &j);
- mt->align_w /= i;
- mt->align_h /= j;
- }
+ const bool is_tr_mode_yf_ys_allowed =
+ brw->gen >= 9 &&
+ !(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
+ !mt->compressed &&
+ _mesa_is_format_color_format(mt->format) &&
+ (requested == INTEL_MIPTREE_TILING_Y ||
+ requested == INTEL_MIPTREE_TILING_ANY) &&
+ (bpp && is_power_of_two(bpp)) &&
+ /* FIXME: To avoid piglit regressions keep the Yf/Ys tiling
+ * disabled at the moment.
+ */
+ false;
+
+ /* Lower index (Yf) is the higher priority mode */
+ const uint32_t tr_mode[3] = {INTEL_MIPTREE_TRMODE_YF,
+ INTEL_MIPTREE_TRMODE_YS,
+ INTEL_MIPTREE_TRMODE_NONE};
+ int i = is_tr_mode_yf_ys_allowed ? 0 : ARRAY_SIZE(tr_mode) - 1;
+
+ while (i < ARRAY_SIZE(tr_mode)) {
+ if (brw->gen < 9)
+ assert(tr_mode[i] == INTEL_MIPTREE_TRMODE_NONE);
+ else
+ assert(tr_mode[i] == INTEL_MIPTREE_TRMODE_YF ||
+ tr_mode[i] == INTEL_MIPTREE_TRMODE_YS ||
+ tr_mode[i] == INTEL_MIPTREE_TRMODE_NONE);
+
+ mt->tr_mode = tr_mode[i];
+ intel_miptree_set_alignment(brw, mt, layout_flags);
+ intel_miptree_set_total_width_height(brw, mt);
+
+ if (!mt->total_width || !mt->total_height) {
+ intel_miptree_release(&mt);
+ break;
+ }
+
+ /* On Gen9+ the alignment values are expressed in multiples of the
+ * block size.
+ */
+ if (brw->gen >= 9) {
+ unsigned int i, j;
+ _mesa_get_format_block_size(mt->format, &i, &j);
+ mt->align_w /= i;
+ mt->align_h /= j;
+ }
+
+ /* If there is already a BO, we cannot effect tiling modes */
+ if (layout_flags & MIPTREE_LAYOUT_FOR_BO)
+ break;
- if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0)
mt->tiling = brw_miptree_choose_tiling(brw, requested, mt);
+ if (is_tr_mode_yf_ys_allowed) {
+ unsigned int level = 0;
+
+ if (mt->tiling == I915_TILING_Y ||
+ mt->tiling == (I915_TILING_Y | I915_TILING_X) ||
+ mt->tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
+ /* FIXME: Don't allow YS tiling at the moment. Using 64KB tiling
+ * for small textures might result in to memory wastage. Revisit
+ * this condition when we have more information about the specific
+ * cases where using YS over YF will be useful.
+ */
+ if (mt->tr_mode != INTEL_MIPTREE_TRMODE_YS)
+ break;
+ }
+ /* Failed to use selected tr_mode. Free up the memory allocated
+ * for miptree levels in intel_miptree_total_width_height().
+ */
+ for (level = mt->first_level; level <= mt->last_level; level++) {
+ free(mt->level[level].slice);
+ mt->level[level].slice = NULL;
+ }
+ }
+ i++;
+ }
}
From 385cd3e0bed8113659f2db8976b677b090acc9d8 Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Fri, 5 Jun 2015 10:41:24 -0700
Subject: [PATCH 0076/1208] i965: Make a helper function
intel_miptree_release_levels()
Signed-off-by: Anuj Phogat
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/brw_tex_layout.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 45bbf7e7d0f..fc7454b7ef7 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -804,6 +804,17 @@ intel_miptree_set_alignment(struct brw_context *brw,
}
}
+static void
+intel_miptree_release_levels(struct intel_mipmap_tree *mt)
+{
+ unsigned int level = 0;
+
+ for (level = mt->first_level; level <= mt->last_level; level++) {
+ free(mt->level[level].slice);
+ mt->level[level].slice = NULL;
+ }
+}
+
void
brw_miptree_layout(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -871,8 +882,6 @@ brw_miptree_layout(struct brw_context *brw,
mt->tiling = brw_miptree_choose_tiling(brw, requested, mt);
if (is_tr_mode_yf_ys_allowed) {
- unsigned int level = 0;
-
if (mt->tiling == I915_TILING_Y ||
mt->tiling == (I915_TILING_Y | I915_TILING_X) ||
mt->tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
@@ -887,10 +896,7 @@ brw_miptree_layout(struct brw_context *brw,
/* Failed to use selected tr_mode. Free up the memory allocated
* for miptree levels in intel_miptree_total_width_height().
*/
- for (level = mt->first_level; level <= mt->last_level; level++) {
- free(mt->level[level].slice);
- mt->level[level].slice = NULL;
- }
+ intel_miptree_release_levels(mt);
}
i++;
}
From a1afd59662449803fa4a40a79bdf0db16ffcbcf5 Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Fri, 5 Jun 2015 10:56:40 -0700
Subject: [PATCH 0077/1208] i965: Make a helper function
intel_miptree_can_use_tr_mode()
Signed-off-by: Anuj Phogat
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/brw_tex_layout.c | 30 ++++++++++++++--------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index fc7454b7ef7..389834f012a 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -815,6 +815,23 @@ intel_miptree_release_levels(struct intel_mipmap_tree *mt)
}
}
+static bool
+intel_miptree_can_use_tr_mode(const struct intel_mipmap_tree *mt)
+{
+ if (mt->tiling == I915_TILING_Y ||
+ mt->tiling == (I915_TILING_Y | I915_TILING_X) ||
+ mt->tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
+ /* FIXME: Don't allow YS tiling at the moment. Using 64KB tiling
+ * for small textures might result in to memory wastage. Revisit
+ * this condition when we have more information about the specific
+ * cases where using YS over YF will be useful.
+ */
+ if (mt->tr_mode != INTEL_MIPTREE_TRMODE_YS)
+ return true;
+ }
+ return false;
+}
+
void
brw_miptree_layout(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -882,17 +899,8 @@ brw_miptree_layout(struct brw_context *brw,
mt->tiling = brw_miptree_choose_tiling(brw, requested, mt);
if (is_tr_mode_yf_ys_allowed) {
- if (mt->tiling == I915_TILING_Y ||
- mt->tiling == (I915_TILING_Y | I915_TILING_X) ||
- mt->tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
- /* FIXME: Don't allow YS tiling at the moment. Using 64KB tiling
- * for small textures might result in to memory wastage. Revisit
- * this condition when we have more information about the specific
- * cases where using YS over YF will be useful.
- */
- if (mt->tr_mode != INTEL_MIPTREE_TRMODE_YS)
- break;
- }
+ if (intel_miptree_can_use_tr_mode(mt))
+ break;
/* Failed to use selected tr_mode. Free up the memory allocated
* for miptree levels in intel_miptree_total_width_height().
*/
From 69ee316c1daf93b4a53b1c02301ffe9df9598d28 Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Tue, 14 Apr 2015 22:06:48 -0700
Subject: [PATCH 0078/1208] i965/gen9: Allocate YF/YS tiled buffer objects
In case of I915_TILING_{X,Y} we need to pass tiling format to libdrm
using drm_intel_bo_alloc_tiled(). But, In case of YF/YS tiled buffers
libdrm need not know about the tiling format because these buffers
don't have hardware support to be tiled or detiled through a fenced
region. libdrm still need to know buffer alignment value for its use
in kernel when resolving the relocation.
Using drm_intel_bo_alloc_for_render() for YF/YS tiled buffers
satisfy both the above conditions.
V2: Delete min/max buffer size restrictions not valid for i965+.
Remove redundant align to tile size statements.
Remove some redundant code now when there are no min/max buffer size.
Signed-off-by: Anuj Phogat
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 65 ++++++++++++++++++-
1 file changed, 62 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 31386b99656..fb896a92263 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -558,6 +558,53 @@ intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
}
}
+/* This function computes Yf/Ys tiled bo size, alignment and pitch. */
+static uint64_t
+intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
+ uint64_t *pitch)
+{
+ const uint32_t bpp = mt->cpp * 8;
+ const uint32_t aspect_ratio = (bpp == 16 || bpp == 64) ? 2 : 1;
+ uint32_t tile_width, tile_height;
+ uint64_t stride, size, aligned_y;
+
+ assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
+
+ switch (bpp) {
+ case 8:
+ tile_height = 64;
+ break;
+ case 16:
+ case 32:
+ tile_height = 32;
+ break;
+ case 64:
+ case 128:
+ tile_height = 16;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
+ tile_height *= 4;
+
+ aligned_y = ALIGN(mt->total_height, tile_height);
+ stride = mt->total_width * mt->cpp;
+ tile_width = tile_height * mt->cpp * aspect_ratio;
+ stride = ALIGN(stride, tile_width);
+ size = stride * aligned_y;
+
+ if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YF) {
+ assert(size % 4096 == 0);
+ *alignment = 4096;
+ } else {
+ assert(size % (64 * 1024) == 0);
+ *alignment = 64 * 1024;
+ }
+ *pitch = stride;
+ return size;
+}
struct intel_mipmap_tree *
intel_miptree_create(struct brw_context *brw,
@@ -616,10 +663,22 @@ intel_miptree_create(struct brw_context *brw,
alloc_flags |= BO_ALLOC_FOR_RENDER;
unsigned long pitch;
- mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", total_width,
- total_height, mt->cpp, &mt->tiling,
- &pitch, alloc_flags);
mt->etc_format = etc_format;
+
+ if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
+ unsigned alignment = 0;
+ unsigned long size;
+ size = intel_get_yf_ys_bo_size(mt, &alignment, &pitch);
+ assert(size);
+ mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree",
+ size, alignment);
+ } else {
+ mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
+ total_width, total_height, mt->cpp,
+ &mt->tiling, &pitch,
+ alloc_flags);
+ }
+
mt->pitch = pitch;
/* If the BO is too large to fit in the aperture, we need to use the
From 7f282d05a11e0c29bddc1fac8c7028c7e823234f Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Fri, 5 Jun 2015 19:18:19 -0700
Subject: [PATCH 0079/1208] mesa: Add a new helper function
_mesa_regions_overlap()
Signed-off-by: Anuj Phogat
Reviewed-by: Brian Paul
---
src/mesa/main/blit.c | 26 ++++++++++++++++++++++++++
src/mesa/main/blit.h | 6 ++++++
2 files changed, 32 insertions(+)
diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c
index db8fee5a414..4765198d63a 100644
--- a/src/mesa/main/blit.c
+++ b/src/mesa/main/blit.c
@@ -37,6 +37,7 @@
#include "framebuffer.h"
#include "glformats.h"
#include "mtypes.h"
+#include "macros.h"
#include "state.h"
@@ -58,6 +59,31 @@ find_attachment(const struct gl_framebuffer *fb,
}
+/**
+ * \return true if two regions overlap, false otherwise
+ */
+bool
+_mesa_regions_overlap(int srcX0, int srcY0,
+ int srcX1, int srcY1,
+ int dstX0, int dstY0,
+ int dstX1, int dstY1)
+{
+ if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1))
+ return false; /* dst completely right of src */
+
+ if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1))
+ return false; /* dst completely left of src */
+
+ if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1))
+ return false; /* dst completely above src */
+
+ if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1))
+ return false; /* dst completely below src */
+
+ return true; /* some overlap */
+}
+
+
/**
* Helper function for checking if the datatypes of color buffers are
* compatible for glBlitFramebuffer. From the 3.1 spec, page 198:
diff --git a/src/mesa/main/blit.h b/src/mesa/main/blit.h
index 54b946e3192..88dd4a9ec8d 100644
--- a/src/mesa/main/blit.h
+++ b/src/mesa/main/blit.h
@@ -28,6 +28,12 @@
#include "glheader.h"
+extern bool
+_mesa_regions_overlap(int srcX0, int srcY0,
+ int srcX1, int srcY1,
+ int dstX0, int dstY0,
+ int dstX1, int dstY1);
+
extern void
_mesa_blit_framebuffer(struct gl_context *ctx,
struct gl_framebuffer *readFb,
From 2a397c7958089f766aa0d3c66016742fdf7494dd Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Fri, 5 Jun 2015 19:23:46 -0700
Subject: [PATCH 0080/1208] mesa/st: Use global function
_mesa_regions_overlap()
Signed-off-by: Anuj Phogat
Reviewed-by: Brian Paul
---
src/mesa/state_tracker/st_cb_drawpixels.c | 30 +++--------------------
1 file changed, 3 insertions(+), 27 deletions(-)
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index a6a98c83aa6..e736d4b5083 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -33,6 +33,7 @@
#include "main/imports.h"
#include "main/image.h"
#include "main/bufferobj.h"
+#include "main/blit.h"
#include "main/format_pack.h"
#include "main/macros.h"
#include "main/mtypes.h"
@@ -1312,31 +1313,6 @@ st_get_color_read_renderbuffer(struct gl_context *ctx)
}
-/**
- * \return TRUE if two regions overlap, FALSE otherwise
- */
-static boolean
-regions_overlap(int srcX0, int srcY0,
- int srcX1, int srcY1,
- int dstX0, int dstY0,
- int dstX1, int dstY1)
-{
- if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1))
- return FALSE; /* src completely left of dst */
-
- if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1))
- return FALSE; /* dst completely left of src */
-
- if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1))
- return FALSE; /* src completely above dst */
-
- if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1))
- return FALSE; /* dst completely above src */
-
- return TRUE; /* some overlap */
-}
-
-
/**
* Try to do a glCopyPixels for simple cases with a blit by calling
* pipe->blit().
@@ -1420,8 +1396,8 @@ blit_copy_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
}
if (rbRead != rbDraw ||
- !regions_overlap(readX, readY, readX + readW, readY + readH,
- drawX, drawY, drawX + drawW, drawY + drawH)) {
+ !_mesa_regions_overlap(readX, readY, readX + readW, readY + readH,
+ drawX, drawY, drawX + drawW, drawY + drawH)) {
struct pipe_blit_info blit;
memset(&blit, 0, sizeof(blit));
From ca21c9ab28df24ef015ead28df1dcccd90387df6 Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Tue, 9 Jun 2015 15:18:13 -0700
Subject: [PATCH 0081/1208] mesa/swrast: Use global function
_mesa_regions_overlap()
Signed-off-by: Anuj Phogat
Reviewed-by: Brian Paul
---
src/mesa/swrast/s_copypix.c | 16 +++-------------
1 file changed, 3 insertions(+), 13 deletions(-)
diff --git a/src/mesa/swrast/s_copypix.c b/src/mesa/swrast/s_copypix.c
index 68c83e44e12..8fde0c29540 100644
--- a/src/mesa/swrast/s_copypix.c
+++ b/src/mesa/swrast/s_copypix.c
@@ -27,6 +27,7 @@
#include "main/context.h"
#include "main/condrender.h"
#include "main/macros.h"
+#include "main/blit.h"
#include "main/pixeltransfer.h"
#include "main/imports.h"
@@ -52,19 +53,8 @@ regions_overlap(GLint srcx, GLint srcy,
GLfloat zoomX, GLfloat zoomY)
{
if (zoomX == 1.0 && zoomY == 1.0) {
- /* no zoom */
- if (srcx >= dstx + width || (srcx + width <= dstx)) {
- return GL_FALSE;
- }
- else if (srcy < dsty) { /* this is OK */
- return GL_FALSE;
- }
- else if (srcy > dsty + height) {
- return GL_FALSE;
- }
- else {
- return GL_TRUE;
- }
+ return _mesa_regions_overlap(srcx, srcy, srcx + width, srcy + height,
+ dstx, dsty, dstx + width, dsty + height);
}
else {
/* add one pixel of slop when zooming, just to be safe */
From 412c8c8e7eaeec2763bb21a30626544b5a711cb2 Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Tue, 14 Apr 2015 22:06:49 -0700
Subject: [PATCH 0082/1208] i965/gen9: Add XY_FAST_COPY_BLT support to
intelEmitCopyBlit()
This patch enables using XY_FAST_COPY_BLT only for Yf/Ys tiled buffers.
It can be later turned on for other tiling patterns (X,Y) too.
V3: Flush in between sequential fast copy blits.
Fix src/dst alignment requirements.
Make can_fast_copy_blit() helper.
Use ffs(), is_power_of_two()
Move overlap computation inside intel_miptree_blit().
V4: Use _mesa_regions_overlap() function.
Add check for src_buffer == dst_buffer.
Simplify horizontal and vertical alignment computations.
Signed-off-by: Anuj Phogat
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/intel_blit.c | 302 ++++++++++++++++---
src/mesa/drivers/dri/i965/intel_blit.h | 28 +-
src/mesa/drivers/dri/i965/intel_copy_image.c | 2 +
src/mesa/drivers/dri/i965/intel_reg.h | 16 +
4 files changed, 286 insertions(+), 62 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 9fac63d56a1..c773cbca974 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -27,6 +27,7 @@
#include "main/mtypes.h"
+#include "main/blit.h"
#include "main/context.h"
#include "main/enums.h"
#include "main/colormac.h"
@@ -43,6 +44,23 @@
#define FILE_DEBUG_FLAG DEBUG_BLIT
+#define SET_TILING_XY_FAST_COPY_BLT(tiling, tr_mode, type) \
+({ \
+ switch (tiling) { \
+ case I915_TILING_X: \
+ CMD |= type ## _TILED_X; \
+ break; \
+ case I915_TILING_Y: \
+ if (tr_mode == INTEL_MIPTREE_TRMODE_YS) \
+ CMD |= type ## _TILED_64K; \
+ else \
+ CMD |= type ## _TILED_Y; \
+ break; \
+ default: \
+ unreachable("not reached"); \
+ } \
+})
+
static void
intel_miptree_set_alpha_to_one(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -75,6 +93,10 @@ static uint32_t
br13_for_cpp(int cpp)
{
switch (cpp) {
+ case 16:
+ return BR13_32323232;
+ case 8:
+ return BR13_16161616;
case 4:
return BR13_8888;
case 2:
@@ -86,6 +108,64 @@ br13_for_cpp(int cpp)
}
}
+static uint32_t
+get_tr_horizontal_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
+ /* Alignment tables for YF/YS tiled surfaces. */
+ const uint32_t align_2d_yf[] = {64, 64, 32, 32, 16};
+ const uint32_t bpp = cpp * 8;
+ const uint32_t shift = is_src ? 17 : 10;
+ uint32_t align;
+ int i = 0;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+ return 0;
+
+ /* Compute array index. */
+ assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp));
+ i = ffs(bpp / 8) - 1;
+
+ align = tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+ align_2d_yf[i] :
+ 4 * align_2d_yf[i];
+
+ assert(is_power_of_two(align));
+
+ /* XY_FAST_COPY_BLT doesn't support horizontal alignment of 16. */
+ if (align == 16)
+ align = 32;
+
+ return (ffs(align) - 6) << shift;
+}
+
+static uint32_t
+get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
+ /* Vertical alignment tables for YF/YS tiled surfaces. */
+ const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
+ const uint32_t bpp = cpp * 8;
+ const uint32_t shift = is_src ? 15 : 8;
+ uint32_t align;
+ int i = 0;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+ return 0;
+
+ /* Compute array index. */
+ assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp));
+ i = ffs(bpp / 8) - 1;
+
+ align = tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+ align_2d_yf[i] :
+ 4 * align_2d_yf[i];
+
+ assert(is_power_of_two(align));
+
+ /* XY_FAST_COPY_BLT doesn't support vertical alignments of 16 and 32. */
+ if (align == 16 || align == 32)
+ align = 64;
+
+ return (ffs(align) - 7) << shift;
+}
+
/**
* Emits the packet for switching the blitter from X to Y tiled or back.
*
@@ -278,9 +358,11 @@ intel_miptree_blit(struct brw_context *brw,
src_pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,
+ src_mt->tr_mode,
dst_mt->pitch,
dst_mt->bo, dst_mt->offset,
dst_mt->tiling,
+ dst_mt->tr_mode,
src_x, src_y,
dst_x, dst_y,
width, height,
@@ -313,6 +395,67 @@ alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling)
return true;
}
+static bool
+can_fast_copy_blit(struct brw_context *brw,
+ drm_intel_bo *src_buffer,
+ int16_t src_x, int16_t src_y,
+ uintptr_t src_offset, uint32_t src_pitch,
+ uint32_t src_tiling, uint32_t src_tr_mode,
+ drm_intel_bo *dst_buffer,
+ int16_t dst_x, int16_t dst_y,
+ uintptr_t dst_offset, uint32_t dst_pitch,
+ uint32_t dst_tiling, uint32_t dst_tr_mode,
+ int16_t w, int16_t h, uint32_t cpp)
+{
+ const bool dst_tiling_none = dst_tiling == I915_TILING_NONE;
+ const bool src_tiling_none = src_tiling == I915_TILING_NONE;
+
+ if (brw->gen < 9)
+ return false;
+
+ if (src_buffer->handle == dst_buffer->handle &&
+ _mesa_regions_overlap(src_x, src_y, src_x + w, src_y + h,
+ dst_x, dst_y, dst_x + w, dst_y + h))
+ return false;
+
+ /* Enable fast copy blit only if the surfaces are Yf/Ys tiled.
+ * FIXME: Based on performance data, remove this condition later to
+ * enable for all types of surfaces.
+ */
+ if (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE &&
+ dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+ return false;
+
+ /* For all surface types buffers must be cacheline-aligned. */
+ if ((dst_offset | src_offset) & 63)
+ return false;
+
+ /* Color depth greater than 128 bits not supported. */
+ if (cpp > 16)
+ return false;
+
+ /* For Fast Copy Blits the pitch cannot be a negative number. So, bit 15
+ * of the destination pitch must be zero.
+ */
+ if ((src_pitch >> 15 & 1) != 0 || (dst_pitch >> 15 & 1) != 0)
+ return false;
+
+ /* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. */
+ if ((src_tiling_none && src_pitch % 16 != 0) ||
+ (dst_tiling_none && dst_pitch % 16 != 0))
+ return false;
+
+ /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
+ * (X direction width of the Tile). This means the pitch value will
+ * always be Cache Line aligned (64byte multiple).
+ */
+ if ((!dst_tiling_none && dst_pitch % 64 != 0) ||
+ (!src_tiling_none && src_pitch % 64 != 0))
+ return false;
+
+ return true;
+}
+
/* Copy BitBlt
*/
bool
@@ -322,10 +465,12 @@ intelEmitCopyBlit(struct brw_context *brw,
drm_intel_bo *src_buffer,
GLuint src_offset,
uint32_t src_tiling,
+ uint32_t src_tr_mode,
GLshort dst_pitch,
drm_intel_bo *dst_buffer,
GLuint dst_offset,
uint32_t dst_tiling,
+ uint32_t dst_tr_mode,
GLshort src_x, GLshort src_y,
GLshort dst_x, GLshort dst_y,
GLshort w, GLshort h,
@@ -337,18 +482,11 @@ intelEmitCopyBlit(struct brw_context *brw,
drm_intel_bo *aper_array[3];
bool dst_y_tiled = dst_tiling == I915_TILING_Y;
bool src_y_tiled = src_tiling == I915_TILING_Y;
-
- if (!alignment_valid(brw, dst_offset, dst_tiling))
- return false;
- if (!alignment_valid(brw, src_offset, src_tiling))
- return false;
+ bool use_fast_copy_blit = false;
if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
return false;
- assert(!dst_y_tiled || (dst_pitch % 128) == 0);
- assert(!src_y_tiled || (src_pitch % 128) == 0);
-
/* do space check before going any further */
do {
aper_array[0] = brw->batch.bo;
@@ -373,52 +511,114 @@ intelEmitCopyBlit(struct brw_context *brw,
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
- /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
- * the low bits. Offsets must be naturally aligned.
- */
- if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
- dst_pitch % 4 != 0 || dst_offset % cpp != 0)
- return false;
+ use_fast_copy_blit = can_fast_copy_blit(brw,
+ src_buffer,
+ src_x, src_y,
+ src_offset, src_pitch,
+ src_tiling, src_tr_mode,
+ dst_buffer,
+ dst_x, dst_y,
+ dst_offset, dst_pitch,
+ dst_tiling, dst_tr_mode,
+ w, h, cpp);
+ assert(use_fast_copy_blit ||
+ (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE &&
+ dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE));
- /* For big formats (such as floating point), do the copy using 16 or 32bpp
- * and multiply the coordinates.
- */
- if (cpp > 4) {
- if (cpp % 4 == 2) {
- dst_x *= cpp / 2;
- dst_x2 *= cpp / 2;
- src_x *= cpp / 2;
- cpp = 2;
- } else {
- assert(cpp % 4 == 0);
- dst_x *= cpp / 4;
- dst_x2 *= cpp / 4;
- src_x *= cpp / 4;
- cpp = 4;
+ if (use_fast_copy_blit) {
+ /* When two sequential fast copy blits have different source surfaces,
+ * but their destinations refer to the same destination surfaces and
+ * therefore destinations overlap it is imperative that a flush be
+ * inserted between the two blits.
+ *
+ * FIXME: Figure out a way to avoid flushing when not required.
+ */
+ brw_emit_mi_flush(brw);
+
+ assert(cpp <= 16);
+ BR13 = br13_for_cpp(cpp);
+
+ if (src_tr_mode == INTEL_MIPTREE_TRMODE_YF)
+ BR13 |= XY_FAST_SRC_TRMODE_YF;
+
+ if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF)
+ BR13 |= XY_FAST_DST_TRMODE_YF;
+
+ CMD = XY_FAST_COPY_BLT_CMD;
+
+ if (dst_tiling != I915_TILING_NONE) {
+ SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_FAST_DST);
+ /* Pitch value should be specified as a number of Dwords. */
+ dst_pitch /= 4;
+ }
+ if (src_tiling != I915_TILING_NONE) {
+ SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_FAST_SRC);
+ /* Pitch value should be specified as a number of Dwords. */
+ src_pitch /= 4;
}
- }
- BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+ CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */);
+ CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */);
- switch (cpp) {
- case 1:
- case 2:
- CMD = XY_SRC_COPY_BLT_CMD;
- break;
- case 4:
- CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
- break;
- default:
- return false;
- }
+ CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */);
+ CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */);
- if (dst_tiling != I915_TILING_NONE) {
- CMD |= XY_DST_TILED;
- dst_pitch /= 4;
- }
- if (src_tiling != I915_TILING_NONE) {
- CMD |= XY_SRC_TILED;
- src_pitch /= 4;
+ } else {
+ assert(!dst_y_tiled || (dst_pitch % 128) == 0);
+ assert(!src_y_tiled || (src_pitch % 128) == 0);
+
+ /* For big formats (such as floating point), do the copy using 16 or
+ * 32bpp and multiply the coordinates.
+ */
+ if (cpp > 4) {
+ if (cpp % 4 == 2) {
+ dst_x *= cpp / 2;
+ dst_x2 *= cpp / 2;
+ src_x *= cpp / 2;
+ cpp = 2;
+ } else {
+ assert(cpp % 4 == 0);
+ dst_x *= cpp / 4;
+ dst_x2 *= cpp / 4;
+ src_x *= cpp / 4;
+ cpp = 4;
+ }
+ }
+
+ if (!alignment_valid(brw, dst_offset, dst_tiling))
+ return false;
+ if (!alignment_valid(brw, src_offset, src_tiling))
+ return false;
+
+ /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
+ * the low bits. Offsets must be naturally aligned.
+ */
+ if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
+ dst_pitch % 4 != 0 || dst_offset % cpp != 0)
+ return false;
+
+ assert(cpp <= 4);
+ BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+ switch (cpp) {
+ case 1:
+ case 2:
+ CMD = XY_SRC_COPY_BLT_CMD;
+ break;
+ case 4:
+ CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ break;
+ default:
+ return false;
+ }
+
+ if (dst_tiling != I915_TILING_NONE) {
+ CMD |= XY_DST_TILED;
+ dst_pitch /= 4;
+ }
+ if (src_tiling != I915_TILING_NONE) {
+ CMD |= XY_SRC_TILED;
+ src_pitch /= 4;
+ }
}
if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
@@ -576,7 +776,9 @@ intel_emit_linear_blit(struct brw_context *brw,
dst_x = dst_offset % 64;
ok = intelEmitCopyBlit(brw, 1,
pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+ INTEL_MIPTREE_TRMODE_NONE,
pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+ INTEL_MIPTREE_TRMODE_NONE,
src_x, 0, /* src x/y */
dst_x, 0, /* dst x/y */
pitch, height, /* w, h */
@@ -595,7 +797,9 @@ intel_emit_linear_blit(struct brw_context *brw,
if (size != 0) {
ok = intelEmitCopyBlit(brw, 1,
pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+ INTEL_MIPTREE_TRMODE_NONE,
pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+ INTEL_MIPTREE_TRMODE_NONE,
src_x, 0, /* src x/y */
dst_x, 0, /* dst x/y */
size, 1, /* w, h */
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
index 2287c379c4e..c3d19a5a20e 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -32,19 +32,21 @@
bool
intelEmitCopyBlit(struct brw_context *brw,
- GLuint cpp,
- GLshort src_pitch,
- drm_intel_bo *src_buffer,
- GLuint src_offset,
- uint32_t src_tiling,
- GLshort dst_pitch,
- drm_intel_bo *dst_buffer,
- GLuint dst_offset,
- uint32_t dst_tiling,
- GLshort srcx, GLshort srcy,
- GLshort dstx, GLshort dsty,
- GLshort w, GLshort h,
- GLenum logicop );
+ GLuint cpp,
+ GLshort src_pitch,
+ drm_intel_bo *src_buffer,
+ GLuint src_offset,
+ uint32_t src_tiling,
+ uint32_t src_tr_mode,
+ GLshort dst_pitch,
+ drm_intel_bo *dst_buffer,
+ GLuint dst_offset,
+ uint32_t dst_tiling,
+ uint32_t dst_tr_mode,
+ GLshort srcx, GLshort srcy,
+ GLshort dstx, GLshort dsty,
+ GLshort w, GLshort h,
+ GLenum logicop);
bool intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst);
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c
index f4c7eff2904..3706704bf1a 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -126,9 +126,11 @@ copy_image_with_blitter(struct brw_context *brw,
src_mt->pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,
+ src_mt->tr_mode,
dst_mt->pitch,
dst_mt->bo, dst_mt->offset,
dst_mt->tiling,
+ dst_mt->tr_mode,
src_x, src_y,
dst_x, dst_y,
src_width, src_height,
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index bd14e189da3..4223e11e78c 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -102,6 +102,8 @@
#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22))
+#define XY_FAST_COPY_BLT_CMD (CMD_2D | (0x42 << 22))
+
#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22))
# define XY_TEXT_BYTE_PACKED (1 << 16)
@@ -111,10 +113,24 @@
#define XY_SRC_TILED (1 << 15)
#define XY_DST_TILED (1 << 11)
+/* BR00 */
+#define XY_FAST_SRC_TILED_64K (3 << 20)
+#define XY_FAST_SRC_TILED_Y (2 << 20)
+#define XY_FAST_SRC_TILED_X (1 << 20)
+
+#define XY_FAST_DST_TILED_64K (3 << 13)
+#define XY_FAST_DST_TILED_Y (2 << 13)
+#define XY_FAST_DST_TILED_X (1 << 13)
+
/* BR13 */
#define BR13_8 (0x0 << 24)
#define BR13_565 (0x1 << 24)
#define BR13_8888 (0x3 << 24)
+#define BR13_16161616 (0x4 << 24)
+#define BR13_32323232 (0x5 << 24)
+
+#define XY_FAST_SRC_TRMODE_YF (1 << 31)
+#define XY_FAST_DST_TRMODE_YF (1 << 30)
/* Pipeline Statistics Counter Registers */
#define IA_VERTICES_COUNT 0x2310
From 3df5aaaa158bfb878e9e5ce467dd654466942880 Mon Sep 17 00:00:00 2001
From: Anuj Phogat
Date: Wed, 27 May 2015 19:28:34 -0700
Subject: [PATCH 0083/1208] i965/skl: Extract the blit command setup in to a
helper
Signed-off-by: Anuj Phogat
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/intel_blit.c | 93 +++++++++++++++++---------
1 file changed, 61 insertions(+), 32 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index c773cbca974..bc390535c86 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -456,6 +456,51 @@ can_fast_copy_blit(struct brw_context *brw,
return true;
}
+static uint32_t
+xy_blit_cmd(uint32_t src_tiling, uint32_t src_tr_mode,
+ uint32_t dst_tiling, uint32_t dst_tr_mode,
+ uint32_t cpp, bool use_fast_copy_blit)
+{
+ uint32_t CMD = 0;
+
+ if (use_fast_copy_blit) {
+ CMD = XY_FAST_COPY_BLT_CMD;
+
+ if (dst_tiling != I915_TILING_NONE)
+ SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_FAST_DST);
+
+ if (src_tiling != I915_TILING_NONE)
+ SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_FAST_SRC);
+
+ CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */);
+ CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */);
+
+ CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */);
+ CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */);
+
+ } else {
+ assert(cpp <= 4);
+ switch (cpp) {
+ case 1:
+ case 2:
+ CMD = XY_SRC_COPY_BLT_CMD;
+ break;
+ case 4:
+ CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ if (dst_tiling != I915_TILING_NONE)
+ CMD |= XY_DST_TILED;
+
+ if (src_tiling != I915_TILING_NONE)
+ CMD |= XY_SRC_TILED;
+ }
+ return CMD;
+}
+
/* Copy BitBlt
*/
bool
@@ -544,24 +589,18 @@ intelEmitCopyBlit(struct brw_context *brw,
if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF)
BR13 |= XY_FAST_DST_TRMODE_YF;
- CMD = XY_FAST_COPY_BLT_CMD;
+ CMD = xy_blit_cmd(src_tiling, src_tr_mode,
+ dst_tiling, dst_tr_mode,
+ cpp, use_fast_copy_blit);
- if (dst_tiling != I915_TILING_NONE) {
- SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_FAST_DST);
- /* Pitch value should be specified as a number of Dwords. */
+ /* For tiled source and destination, pitch value should be specified
+ * as a number of Dwords.
+ */
+ if (dst_tiling != I915_TILING_NONE)
dst_pitch /= 4;
- }
- if (src_tiling != I915_TILING_NONE) {
- SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_FAST_SRC);
- /* Pitch value should be specified as a number of Dwords. */
+
+ if (src_tiling != I915_TILING_NONE)
src_pitch /= 4;
- }
-
- CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */);
- CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */);
-
- CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */);
- CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */);
} else {
assert(!dst_y_tiled || (dst_pitch % 128) == 0);
@@ -599,26 +638,16 @@ intelEmitCopyBlit(struct brw_context *brw,
assert(cpp <= 4);
BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
- switch (cpp) {
- case 1:
- case 2:
- CMD = XY_SRC_COPY_BLT_CMD;
- break;
- case 4:
- CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
- break;
- default:
- return false;
- }
- if (dst_tiling != I915_TILING_NONE) {
- CMD |= XY_DST_TILED;
+ CMD = xy_blit_cmd(src_tiling, src_tr_mode,
+ dst_tiling, dst_tr_mode,
+ cpp, use_fast_copy_blit);
+
+ if (dst_tiling != I915_TILING_NONE)
dst_pitch /= 4;
- }
- if (src_tiling != I915_TILING_NONE) {
- CMD |= XY_SRC_TILED;
+
+ if (src_tiling != I915_TILING_NONE)
src_pitch /= 4;
- }
}
if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
From 54afb10f0e0a3b72a977c239c0aee04ea5dec967 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Mon, 29 Jun 2015 22:04:50 -0400
Subject: [PATCH 0084/1208] nv30: provide a minimum map buffer alignment
Otherwise we return 0, which is out of spec. Return 64 like all the
other nouveau drivers.
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nv30/nv30_screen.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 2e38a1978ae..4b3ed7dc157 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -69,6 +69,8 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return PIPE_ENDIAN_LITTLE;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 16;
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_MAX_VIEWPORTS:
return 1;
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
@@ -135,7 +137,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
- case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
From b875198f1f0b7c90bcb22511c0050b06d8a33ac4 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Mon, 29 Jun 2015 02:16:23 -0400
Subject: [PATCH 0085/1208] nv30: modernize fp upload logic
Signed-off-by: Ilia Mirkin
---
.../drivers/nouveau/nv30/nv30_fragprog.c | 24 +++++++++++--------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
index 7f227868f73..dbf36fd53b5 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
@@ -37,22 +37,26 @@ nv30_fragprog_upload(struct nv30_context *nv30)
struct nouveau_context *nv = &nv30->base;
struct nv30_fragprog *fp = nv30->fragprog.program;
struct pipe_context *pipe = &nv30->base.pipe;
- struct pipe_transfer *transfer;
- uint32_t *map;
- int i; (void)i;
- if (unlikely(!fp->buffer)) {
+ if (unlikely(!fp->buffer))
fp->buffer = pipe_buffer_create(pipe->screen, 0, 0, fp->insn_len * 4);
- }
- map = pipe_buffer_map(pipe, fp->buffer, PIPE_TRANSFER_WRITE, &transfer);
#ifndef PIPE_ARCH_BIG_ENDIAN
- memcpy(map, fp->insn, fp->insn_len * 4);
+ pipe_buffer_write(pipe, fp->buffer, 0, fp->insn_len * 4, fp->insn);
#else
- for (i = 0; i < fp->insn_len; i++)
- *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16);
+ {
+ struct pipe_transfer *transfer;
+ uint32_t *map;
+ int i;
+
+ map = pipe_buffer_map(pipe, fp->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
+ &transfer);
+ for (i = 0; i < fp->insn_len; i++)
+ *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16);
+ pipe_buffer_unmap(pipe, transfer);
+ }
#endif
- pipe_buffer_unmap(pipe, transfer);
if (nv04_resource(fp->buffer)->domain != NOUVEAU_BO_VRAM)
nouveau_buffer_migrate(nv, nv04_resource(fp->buffer), NOUVEAU_BO_VRAM);
From bad107f2ec24b16118f4d99c54b853277b1a966d Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Mon, 29 Jun 2015 02:38:38 -0400
Subject: [PATCH 0086/1208] nv30: reset fragprog bufctx at bind time
A clear will do a partial validate, which will in turn reference all the
buffers in the bufctx again. However the fragprog last validated might
have already been deleted. So reset the bufctx when updating state.
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nv30/nv30_fragprog.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
index dbf36fd53b5..54f91bbd48b 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
@@ -165,8 +165,15 @@ static void
nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_fragprog *fp = hwcso;
- nv30->fragprog.program = hwcso;
+ /* reset the bucftx so that we don't keep a dangling reference to the fp
+ * code
+ */
+ if (fp != nv30->state.fragprog)
+ PUSH_RESET(nv30->base.pushbuf, BUFCTX_FRAGPROG);
+
+ nv30->fragprog.program = fp;
nv30->dirty |= NV30_NEW_FRAGPROG;
}
From dacf9efd6326bed1166750680bfaa4e173315eba Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Mon, 29 Jun 2015 21:58:11 -0400
Subject: [PATCH 0087/1208] nv30: allow vertex state creation with 0 elements
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nv30/nv30_vbo.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
index faa8812528a..adea1dcb77c 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -202,6 +202,9 @@ nv30_vbo_validate(struct nv30_context *nv30)
return;
redefine = MAX2(vertex->num_elements, nv30->state.num_vtxelts);
+ if (redefine == 0)
+ return;
+
BEGIN_NV04(push, NV30_3D(VTXFMT(0)), redefine);
for (i = 0; i < vertex->num_elements; i++) {
@@ -254,8 +257,6 @@ nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements,
struct translate_key transkey;
unsigned i;
- assert(num_elements);
-
so = MALLOC(sizeof(*so) + sizeof(*so->element) * num_elements);
if (!so)
return NULL;
From 089e7c378838e7972d2c0588bb84a316fb929a59 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Mon, 29 Jun 2015 21:58:54 -0400
Subject: [PATCH 0088/1208] nv30: align transfer stride to 64, required by
blit, sifm transfer impls
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nv30/nv30_miptree.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index 1a4b8929c0f..846dcebc3a5 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -242,8 +242,8 @@ nv30_miptree_transfer_map(struct pipe_context *pipe, struct pipe_resource *pt,
tx->base.level = level;
tx->base.usage = usage;
tx->base.box = *box;
- tx->base.stride = util_format_get_nblocksx(pt->format, box->width) *
- util_format_get_blocksize(pt->format);
+ tx->base.stride = align(util_format_get_nblocksx(pt->format, box->width) *
+ util_format_get_blocksize(pt->format), 64);
tx->base.layer_stride = util_format_get_nblocksy(pt->format, box->height) *
tx->base.stride;
From e22e0de0d7c3a412bdd53c6d53825b7646624e3d Mon Sep 17 00:00:00 2001
From: Alexander von Gluck IV
Date: Mon, 29 Jun 2015 23:29:44 -0500
Subject: [PATCH 0089/1208] egl/haiku: fix Mesa build under Haiku
Performing a goto crosses the initialization of 'BWindow* win'
breaking the build. We also fix a missing semicolon.
---
src/egl/drivers/haiku/egl_haiku.cpp | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp
index 3d00e47c8e6..ef74f657b14 100644
--- a/src/egl/drivers/haiku/egl_haiku.cpp
+++ b/src/egl/drivers/haiku/egl_haiku.cpp
@@ -92,8 +92,11 @@ haiku_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
return NULL;
}
- if (!_eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT, conf, attrib_list))
- goto cleanup_surface;
+ if (!_eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT,
+ conf, attrib_list)) {
+ free(surface);
+ return NULL;
+ }
(&surface->surf)->SwapInterval = 1;
@@ -110,10 +113,6 @@ haiku_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
TRACE("Showing window\n");
win->Show();
return &surface->surf;
-
-cleanup_surface:
- free(surface);
- return NULL;
}
@@ -139,7 +138,7 @@ haiku_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
if (_eglPutSurface(surf)) {
// XXX: detach haiku_egl_surface::gl from the native window and destroy it
free(surf);
- }
+ }
return EGL_TRUE;
}
@@ -153,7 +152,7 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy)
conf = (struct haiku_egl_config*) calloc(1, sizeof (*conf));
if (!conf) {
_eglError(EGL_BAD_ALLOC, "haiku_add_configs_for_visuals");
- return NULL;
+ return EGL_FALSE;
}
_eglInitConfig(&conf->base, dpy, 1);
@@ -165,7 +164,7 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy)
_eglSetConfigKey(&conf->base, EGL_LUMINANCE_SIZE, 0);
_eglSetConfigKey(&conf->base, EGL_ALPHA_SIZE, 8);
_eglSetConfigKey(&conf->base, EGL_COLOR_BUFFER_TYPE, EGL_RGB_BUFFER);
- EGLint r = (_eglGetConfigKey(&conf->base, EGL_RED_SIZE)
+ EGLint r = (_eglGetConfigKey(&conf->base, EGL_RED_SIZE)
+ _eglGetConfigKey(&conf->base, EGL_GREEN_SIZE)
+ _eglGetConfigKey(&conf->base, EGL_BLUE_SIZE)
+ _eglGetConfigKey(&conf->base, EGL_ALPHA_SIZE));
@@ -195,7 +194,7 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy)
goto cleanup;
}
TRACE("Validated config\n");
-
+
_eglLinkConfig(&conf->base);
if (!_eglGetArraySize(dpy->Configs)) {
_eglLog(_EGL_WARNING, "Haiku: failed to create any config");
@@ -210,6 +209,7 @@ cleanup:
return EGL_FALSE;
}
+
extern "C"
EGLBoolean
init_haiku(_EGLDriver *drv, _EGLDisplay *dpy)
@@ -221,7 +221,7 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy)
return EGL_FALSE;
dpy->Version = 14;
-
+
TRACE("Initialization finished\n");
return EGL_TRUE;
@@ -271,7 +271,7 @@ haiku_destroy_context(_EGLDriver* drv, _EGLDisplay *disp, _EGLContext* ctx)
if (_eglPutContext(ctx)) {
// XXX: teardown the context ?
free(context);
- ctx = NULL
+ ctx = NULL;
}
return EGL_TRUE;
}
@@ -280,7 +280,7 @@ haiku_destroy_context(_EGLDriver* drv, _EGLDisplay *disp, _EGLContext* ctx)
extern "C"
EGLBoolean
haiku_make_current(_EGLDriver* drv, _EGLDisplay* dpy, _EGLSurface *dsurf,
- _EGLSurface *rsurf, _EGLContext *ctx)
+ _EGLSurface *rsurf, _EGLContext *ctx)
{
CALLED();
@@ -314,7 +314,7 @@ extern "C"
void
haiku_unload(_EGLDriver* drv)
{
-
+
}
From 21b7c58b8a0cbf18c9ed90c260f01d00fefe0db2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Tue, 23 Jun 2015 23:57:31 -0700
Subject: [PATCH 0090/1208] i965: Don't use GCC extension for ?: with only two
operands.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
From the "apparently I don't know C" files...GCC apparently supports:
x ?: y
which is equivalent to
x ? x : y
except that it doesn't cause side-effects to occur twice. See:
https://gcc.gnu.org/onlinedocs/gcc/Conditionals.html#Conditionals
This was confusing and looked like a typo. It doesn't really buy us
anything, so just write the obvious code in normal C.
Signed-off-by: Kenneth Graunke
Reviewed-by: Samuel Iglesias Gonsálvez
---
src/mesa/drivers/dri/i965/intel_fbo.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 9e6a7116630..05e3f8b7ae2 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -551,10 +551,12 @@ intel_renderbuffer_update_wrapper(struct brw_context *brw,
irb->mt_layer = layer_multiplier * layer;
- if (layered) {
- irb->layer_count = image->TexObject->NumLayers ?: mt->level[level].depth / layer_multiplier;
- } else {
+ if (!layered) {
irb->layer_count = 1;
+ } else if (image->TexObject->NumLayers > 0) {
+ irb->layer_count = image->TexObject->NumLayers;
+ } else {
+ irb->layer_count = mt->level[level].depth / layer_multiplier;
}
intel_miptree_reference(&irb->mt, mt);
From d5f1253b0c4637ad996fd0da45095165006d61d3 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Tue, 30 Jun 2015 02:46:26 -0400
Subject: [PATCH 0091/1208] nv50/ir: fix emission of address reg in 3rd source
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91056
Signed-off-by: Ilia Mirkin
Cc: "10.5 10.6"
---
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 1bfc8e32e84..6de8f45047a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -499,10 +499,14 @@ CodeEmitterNV50::emitForm_MAD(const Instruction *i)
setSrc(i, 2, 2);
if (i->getIndirect(0, 0)) {
- assert(!i->getIndirect(1, 0));
+ assert(!i->srcExists(1) || !i->getIndirect(1, 0));
+ assert(!i->srcExists(2) || !i->getIndirect(2, 0));
setAReg16(i, 0);
- } else {
+ } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
+ assert(!i->srcExists(2) || !i->getIndirect(2, 0));
setAReg16(i, 1);
+ } else {
+ setAReg16(i, 2);
}
}
From edb8383c98ee23385731d0fc23a6b6673528a8ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Tue, 9 Jun 2015 13:28:44 +0300
Subject: [PATCH 0092/1208] glsl: Allow dynamic sampler array indexing with
GLSL ES < 3.00
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Dynamic indexing of sampler arrays is prohibited by GLSL ES 3.00.
Earlier versions allow 'constant-index-expression' indexing, where
index can contain a loop induction variable.
Patch allows dynamic indexing for sampler arrays when GLSL ES < 3.00.
This change makes 'sampler-array-index.frag' parser test in Piglit
pass + fishgl.com works when running Chrome on OpenGL ES 2.0 backend
v2: small change and some more commit message (Tapani)
v3: refactor checks to make it more readable (Ian Romanick)
v4: change warning comment in GLSL ES case (Curro)
Signed-off-by: Tapani Pälli
Signed-off-by: Kalyan Kondapally
Reviewed-by: Francisco Jerez
Cc: "10.5" and "10.6"
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84225
---
src/glsl/ast_array_index.cpp | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index 752d86f72fd..2c79002ebcb 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -226,24 +226,24 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
* dynamically uniform expression is undefined.
*/
if (array->type->without_array()->is_sampler()) {
- if (!state->is_version(130, 100)) {
- if (state->es_shader) {
- _mesa_glsl_warning(&loc, state,
- "sampler arrays indexed with non-constant "
- "expressions is optional in %s",
- state->get_version_string());
- } else {
- _mesa_glsl_warning(&loc, state,
- "sampler arrays indexed with non-constant "
- "expressions will be forbidden in GLSL 1.30 "
- "and later");
- }
- } else if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
- _mesa_glsl_error(&loc, state,
- "sampler arrays indexed with non-constant "
- "expressions is forbidden in GLSL 1.30 and "
- "later");
- }
+ if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
+ if (state->is_version(130, 300))
+ _mesa_glsl_error(&loc, state,
+ "sampler arrays indexed with non-constant "
+ "expressions are forbidden in GLSL %s "
+ "and later",
+ state->es_shader ? "ES 3.00" : "1.30");
+ else if (state->es_shader)
+ _mesa_glsl_warning(&loc, state,
+ "sampler arrays indexed with non-constant "
+ "expressions will be forbidden in GLSL "
+ "3.00 and later");
+ else
+ _mesa_glsl_warning(&loc, state,
+ "sampler arrays indexed with non-constant "
+ "expressions will be forbidden in GLSL "
+ "1.30 and later");
+ }
}
}
From e4512e1581cf90f56d13cfa6a809832ef3517283 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Tue, 9 Jun 2015 13:33:39 +0300
Subject: [PATCH 0093/1208] mesa/glsl: new compiler option
EmitNoIndirectSampler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Patch provides new compiler option for backend to force unroll loops
that have non-constant expression indexing on sampler arrays.
This makes sure that we can never end up with a shader that uses loop
induction variable as sampler array index but does not unroll because
of having too much instructions. This would not work without dynamic
indexing support.
v2: change option name as EmitNoIndirectSampler
Signed-off-by: Tapani Pälli
Reviewed-by: Francisco Jerez
Cc: "10.5" and "10.6"
---
src/glsl/loop_unroll.cpp | 12 ++++++++++++
src/mesa/main/mtypes.h | 1 +
2 files changed, 13 insertions(+)
diff --git a/src/glsl/loop_unroll.cpp b/src/glsl/loop_unroll.cpp
index 635e1dd99cd..7a00fb8fea8 100644
--- a/src/glsl/loop_unroll.cpp
+++ b/src/glsl/loop_unroll.cpp
@@ -100,6 +100,18 @@ public:
virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
{
+ /* Force unroll in case of dynamic indexing with sampler arrays
+ * when EmitNoIndirectSampler is set.
+ */
+ if (options->EmitNoIndirectSampler) {
+ if ((ir->array->type->is_array() &&
+ ir->array->type->contains_sampler()) &&
+ !ir->array_index->constant_expression_value()) {
+ unsupported_variable_indexing = true;
+ return visit_continue;
+ }
+ }
+
/* Check for arrays variably-indexed by a loop induction variable.
* Unrolling the loop may convert that access into constant-indexing.
*
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 983b9dc307b..7b55677de30 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2881,6 +2881,7 @@ struct gl_shader_compiler_options
GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */
GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */
GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */
+ GLboolean EmitNoIndirectSampler; /**< No indirect addressing of samplers */
/*@}*/
GLuint MaxIfDepth; /**< Maximum nested IF blocks */
From 8852e26e93af1fc4b72bf9d57e847f53e1a1371b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Wed, 24 Jun 2015 13:22:43 +0300
Subject: [PATCH 0094/1208] i965: use EmitNoIndirectSampler for gen < 7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Tapani Pälli
Reviewed-by: Francisco Jerez
Cc: "10.5" and "10.6"
---
src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 32c40131434..3e3d78b9ad7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -113,6 +113,10 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
(i == MESA_SHADER_FRAGMENT);
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
compiler->glsl_compiler_options[i].LowerClipDistance = true;
+
+ /* !ARB_gpu_shader5 */
+ if (devinfo->gen < 7)
+ compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
}
compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
From 2dc2b12ed15abb84c7e2b3c2726dcc1b735abcda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Mon, 29 Jun 2015 09:53:45 +0300
Subject: [PATCH 0095/1208] i915: use EmitNoIndirectSampler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Tapani Pälli
Reviewed-by: Francisco Jerez
Cc: "10.5" and "10.6"
---
src/mesa/drivers/dri/i915/i915_context.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index 42ea54e087d..57b033c07ea 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -255,6 +255,8 @@ i915CreateContext(int api,
* FINISHME: vertex shaders?
*/
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true;
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectSampler =
+ true;
struct gl_shader_compiler_options *const fs_options =
& ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT];
@@ -266,6 +268,7 @@ i915CreateContext(int api,
fs_options->EmitNoIndirectOutput = true;
fs_options->EmitNoIndirectUniform = true;
fs_options->EmitNoIndirectTemp = true;
+ fs_options->EmitNoIndirectSampler = true;
ctx->Const.MaxDrawBuffers = 1;
ctx->Const.QueryCounterBits.SamplesPassed = 0;
From f17c8c287f3581fccb52714fbd4b2ea09a58e3d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Mon, 29 Jun 2015 09:48:52 +0300
Subject: [PATCH 0096/1208] mesa/st: use EmitNoIndirectSampler if
!ARB_gpu_shader5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Tapani Pälli
Reviewed-by: Francisco Jerez
Reviewed-by: Marek Olšák
Cc: "10.5" and "10.6"
---
src/mesa/state_tracker/st_context.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index ed9ed0f1b6c..62a0fbee3bb 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -287,6 +287,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
/* For vertex shaders, make sure not to emit saturate when SM 3.0 is not supported */
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoSat = !st->has_shader_model3;
+ if (!ctx->Extensions.ARB_gpu_shader5) {
+ for (i = 0; i < MESA_SHADER_STAGES; i++)
+ ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
+ }
+
_mesa_compute_version(ctx);
if (ctx->Version == 0) {
From 9350ea6979c48772e1fb55d4f1c7c5a3cfa987b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Tue, 19 May 2015 15:01:49 +0300
Subject: [PATCH 0097/1208] glsl: validate sampler array indexing for
'constant-index-expression'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Desktop GLSL < 130 and GLSL ES < 300 allow sampler array indexing where
index can contain a loop induction variable. This extra check will warn
during linking if some of the indexes could not be turned in to constant
expressions.
v2: warning instead of error for backends that did not enable
EmitNoIndirectSampler option (have dynamic indexing)
Signed-off-by: Tapani Pälli
Reviewed-by: Francisco Jerez
Cc: "10.5" and "10.6"
---
src/glsl/linker.cpp | 77 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 4a726d4e2e7..74c2f2d4c92 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -346,6 +346,39 @@ private:
bool uses_non_zero_stream;
};
+/* Class that finds array derefs and check if indexes are dynamic. */
+class dynamic_sampler_array_indexing_visitor : public ir_hierarchical_visitor
+{
+public:
+ dynamic_sampler_array_indexing_visitor() :
+ dynamic_sampler_array_indexing(false)
+ {
+ }
+
+ ir_visitor_status visit_enter(ir_dereference_array *ir)
+ {
+ if (!ir->variable_referenced())
+ return visit_continue;
+
+ if (!ir->variable_referenced()->type->contains_sampler())
+ return visit_continue;
+
+ if (!ir->array_index->constant_expression_value()) {
+ dynamic_sampler_array_indexing = true;
+ return visit_stop;
+ }
+ return visit_continue;
+ }
+
+ bool uses_dynamic_sampler_array_indexing()
+ {
+ return dynamic_sampler_array_indexing;
+ }
+
+private:
+ bool dynamic_sampler_array_indexing;
+};
+
} /* anonymous namespace */
void
@@ -2743,6 +2776,40 @@ build_program_resource_list(struct gl_context *ctx,
*/
}
+/**
+ * This check is done to make sure we allow only constant expression
+ * indexing and "constant-index-expression" (indexing with an expression
+ * that includes loop induction variable).
+ */
+static bool
+validate_sampler_array_indexing(struct gl_context *ctx,
+ struct gl_shader_program *prog)
+{
+ dynamic_sampler_array_indexing_visitor v;
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (prog->_LinkedShaders[i] == NULL)
+ continue;
+
+ bool no_dynamic_indexing =
+ ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler;
+
+ /* Search for array derefs in shader. */
+ v.run(prog->_LinkedShaders[i]->ir);
+ if (v.uses_dynamic_sampler_array_indexing()) {
+ const char *msg = "sampler arrays indexed with non-constant "
+ "expressions is forbidden in GLSL %s %u";
+ /* Backend has indicated that it has no dynamic indexing support. */
+ if (no_dynamic_indexing) {
+ linker_error(prog, msg, prog->IsES ? "ES" : "", prog->Version);
+ return false;
+ } else {
+ linker_warning(prog, msg, prog->IsES ? "ES" : "", prog->Version);
+ }
+ }
+ }
+ return true;
+}
+
void
link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
@@ -2961,6 +3028,16 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
lower_const_arrays_to_uniforms(prog->_LinkedShaders[i]->ir);
}
+ /* Validation for special cases where we allow sampler array indexing
+ * with loop induction variable. This check emits a warning or error
+ * depending if backend can handle dynamic indexing.
+ */
+ if ((!prog->IsES && prog->Version < 130) ||
+ (prog->IsES && prog->Version < 300)) {
+ if (!validate_sampler_array_indexing(ctx, prog))
+ goto done;
+ }
+
/* Check and validate stream emissions in geometry shaders */
validate_geometry_shader_emissions(ctx, prog);
From 01b5f1336330f1c0f937fb08a444efc593b43435 Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Wed, 24 Jun 2015 18:57:22 -0400
Subject: [PATCH 0098/1208] freedreno/ir3: fix constlen in case of
load_uniform_indirect
We can't rely on what we get from the assembler if we have indirect
addressing of constant file, since the assembler doesn't know the array
index. This got lost in the transition to NIR.
Signed-off-by: Rob Clark
---
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 48b1d8f3606..53b8a6fb101 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1372,6 +1372,11 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
dst[i] = create_uniform_indirect(ctx, n,
get_addr(ctx, src[0]));
}
+ /* NOTE: if relative addressing is used, we set constlen in
+ * the compiler (to worst-case value) since we don't know in
+ * the assembler what the max addr reg value can be:
+ */
+ ctx->so->constlen = ctx->s->num_uniforms;
break;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_indirect:
From bb2c4b68f78f0105088c11408f8902fb22802125 Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Fri, 26 Jun 2015 10:52:34 -0400
Subject: [PATCH 0099/1208] freedreno/ir3: fixes for indirect writes
Signed-off-by: Rob Clark
---
src/gallium/drivers/freedreno/ir3/ir3.c | 1 -
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 9 ++++++++-
src/gallium/drivers/freedreno/ir3/ir3_ra.c | 6 ++++--
3 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index a166b67d7cf..6f6dad59793 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -669,7 +669,6 @@ struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
return ir3_instr_create2(block, category, opc, 4);
}
-/* only used by old compiler: */
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
{
struct ir3_instruction *new_instr = instr_create(instr->block,
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 53b8a6fb101..3b36114a5ba 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1307,7 +1307,7 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
* store_output_indirect? or move this into
* create_indirect_store()?
*/
- for (int j = i; j < arr->length; j += 4) {
+ for (int j = i; j < arr->length; j += intr->num_components) {
struct ir3_instruction *split;
split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
@@ -1318,6 +1318,13 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
arr->arr[j] = split;
}
}
+ /* fixup fanout/split neighbors: */
+ for (int i = 0; i < arr->length; i++) {
+ arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
+ arr->arr[i+1] : NULL;
+ arr->arr[i]->cp.left = (i > 0) ?
+ arr->arr[i-1] : NULL;
+ }
break;
}
default:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index e5aba859fab..0436e01ab2c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -291,8 +291,6 @@ is_temp(struct ir3_register *reg)
{
if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
- if (reg->flags & IR3_REG_RELATIV) // TODO
- return false;
if ((reg->num == regid(REG_A0, 0)) ||
(reg->num == regid(REG_P0, 0)))
return false;
@@ -312,6 +310,10 @@ static struct ir3_instruction *
get_definer(struct ir3_instruction *instr, int *sz, int *off)
{
struct ir3_instruction *d = NULL;
+
+ if (instr->fanin)
+ return get_definer(instr->fanin, sz, off);
+
if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
/* What about the case where collect is subset of array, we
* need to find the distance between where actual array starts
From 1370fde8af1b0b5c5e6204c0dea6ebffb85dce0a Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Fri, 26 Jun 2015 14:32:08 -0400
Subject: [PATCH 0100/1208] freedreno/ir3: fix crash in RA
Signed-off-by: Rob Clark
---
src/gallium/drivers/freedreno/ir3/ir3_ra.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 0436e01ab2c..ee610c7d01e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -403,6 +403,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
* the phi, so we don't need to chase definers
*/
struct ir3_register *src;
+ struct ir3_instruction *dd = d;
/* note: don't use foreach_ssa_src as this gets called once
* while assigning regs (which clears SSA flag)
@@ -410,9 +411,11 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
foreach_src(src, d) {
if (!src->instr)
continue;
- if (src->instr->ip < d->ip)
- d = src->instr;
+ if (src->instr->ip < dd->ip)
+ dd = src->instr;
}
+
+ d = dd;
}
if (is_meta(d) && (d->opc == OPC_META_FO)) {
From 0a8c8fa770db4cc4ef3db89a5dae1d136361495d Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Fri, 26 Jun 2015 13:38:03 -0400
Subject: [PATCH 0101/1208] freedreno/ir3: fix crash in fail path
Signed-off-by: Rob Clark
---
src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 3 +++
src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 3 +++
src/gallium/drivers/freedreno/ir3/ir3_shader.c | 9 ++++++---
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index b5838b58eb2..29f4bae93fa 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -60,6 +60,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+ if (!(fd3_emit_get_vp(emit) && fd3_emit_get_fp(emit)))
+ return;
+
fd3_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index de5a306af60..d070f5fd6b7 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -48,6 +48,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
{
const struct pipe_draw_info *info = emit->info;
+ if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
+ return;
+
fd4_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index b5b038100cc..c22b7f8d169 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -46,7 +46,8 @@ delete_variant(struct ir3_shader_variant *v)
{
if (v->ir)
ir3_destroy(v->ir);
- fd_bo_del(v->bo);
+ if (v->bo)
+ fd_bo_del(v->bo);
free(v);
}
@@ -228,8 +229,10 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
/* compile new variant if it doesn't exist already: */
v = create_variant(shader, key);
- v->next = shader->variants;
- shader->variants = v;
+ if (v) {
+ v->next = shader->variants;
+ shader->variants = v;
+ }
return v;
}
From 3244195f48affec1d3c2eb5d0e267c75b046db9f Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Fri, 26 Jun 2015 13:55:49 -0400
Subject: [PATCH 0102/1208] freedreno/a4xx: fix for sparse-samplers
Some piglit tests, like arb_fragment_program-sparse-samplers, result in
having a null samp#0 but valid samp#1.
TODO: a3xx probably needs similar fix
Signed-off-by: Rob Clark
---
src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 4b6eb646aa7..6cd2cd730f5 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -223,15 +223,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
fd4_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
- struct fd_resource *rsc = fd_resource(view->base.texture);
unsigned start = view->base.u.tex.first_level;
- uint32_t offset = fd_resource_offset(rsc, start, 0);
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
OUT_RING(ring, view->texconst2);
OUT_RING(ring, view->texconst3);
- OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ if (view->base.texture) {
+ struct fd_resource *rsc = fd_resource(view->base.texture);
+ uint32_t offset = fd_resource_offset(rsc, start, 0);
+ OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
From db5105b4b35e064f3934154b45de15422a1bdb0a Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Mon, 29 Jun 2015 10:21:08 -0400
Subject: [PATCH 0103/1208] freedreno/ir3: add ir3_shader_disasm()
Split out most of dump_info() from ir3_cmdline compiler into a function
that can be used both by cmdline compiler and also for the disasm debug
option. This way, for FD_MESA_DEBUG=disasm we also get to see intput/
output registers, etc.
Signed-off-by: Rob Clark
---
.../drivers/freedreno/ir3/ir3_cmdline.c | 116 +---------------
.../drivers/freedreno/ir3/ir3_shader.c | 127 +++++++++++++++++-
.../drivers/freedreno/ir3/ir3_shader.h | 1 +
3 files changed, 124 insertions(+), 120 deletions(-)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index ad9d2719d59..2b89fb442b4 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -43,127 +43,15 @@
#include "instr-a3xx.h"
#include "ir3.h"
-static void dump_reg(const char *name, uint32_t r)
-{
- if (r != regid(63,0))
- debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
-}
-
-static void dump_semantic(struct ir3_shader_variant *so,
- unsigned sem, const char *name)
-{
- uint32_t regid;
- regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
- dump_reg(name, regid);
-}
-
static void dump_info(struct ir3_shader_variant *so, const char *str)
{
uint32_t *bin;
const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";
-
- // for debug, dump some before/after info:
// TODO make gpu_id configurable on cmdline
bin = ir3_shader_assemble(so, 320);
- if (fd_mesa_debug & FD_DBG_DISASM) {
- struct ir3 *ir = so->ir;
- struct ir3_register *reg;
- uint8_t regid;
- unsigned i;
-
- debug_printf("; %s: %s\n", type, str);
-
- for (i = 0; i < ir->ninputs; i++) {
- if (!ir->inputs[i]) {
- debug_printf("; in%d unused\n", i);
- continue;
- }
- reg = ir->inputs[i]->regs[0];
- regid = reg->num;
- debug_printf("@in(%sr%d.%c)\tin%d\n",
- (reg->flags & IR3_REG_HALF) ? "h" : "",
- (regid >> 2), "xyzw"[regid & 0x3], i);
- }
-
- for (i = 0; i < ir->noutputs; i++) {
- if (!ir->outputs[i]) {
- debug_printf("; out%d unused\n", i);
- continue;
- }
- /* kill shows up as a virtual output.. skip it! */
- if (is_kill(ir->outputs[i]))
- continue;
- reg = ir->outputs[i]->regs[0];
- regid = reg->num;
- debug_printf("@out(%sr%d.%c)\tout%d\n",
- (reg->flags & IR3_REG_HALF) ? "h" : "",
- (regid >> 2), "xyzw"[regid & 0x3], i);
- }
-
- for (i = 0; i < so->immediates_count; i++) {
- debug_printf("@const(c%d.x)\t", so->first_immediate + i);
- debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
- so->immediates[i].val[0],
- so->immediates[i].val[1],
- so->immediates[i].val[2],
- so->immediates[i].val[3]);
- }
-
- disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
-
- debug_printf("; %s: outputs:", type);
- for (i = 0; i < so->outputs_count; i++) {
- uint8_t regid = so->outputs[i].regid;
- ir3_semantic sem = so->outputs[i].semantic;
- debug_printf(" r%d.%c (%u:%u)",
- (regid >> 2), "xyzw"[regid & 0x3],
- sem2name(sem), sem2idx(sem));
- }
- debug_printf("\n");
- debug_printf("; %s: inputs:", type);
- for (i = 0; i < so->inputs_count; i++) {
- uint8_t regid = so->inputs[i].regid;
- ir3_semantic sem = so->inputs[i].semantic;
- debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
- (regid >> 2), "xyzw"[regid & 0x3],
- sem2name(sem), sem2idx(sem),
- so->inputs[i].compmask,
- so->inputs[i].inloc,
- so->inputs[i].bary);
- }
- debug_printf("\n");
- }
-
- /* print generic shader info: */
- debug_printf("; %s: %u instructions, %d half, %d full\n", type,
- so->info.instrs_count,
- so->info.max_half_reg + 1,
- so->info.max_reg + 1);
-
- /* print shader type specific info: */
- switch (so->type) {
- case SHADER_VERTEX:
- dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
- dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
- break;
- case SHADER_FRAGMENT:
- dump_reg("pos (bary)", so->pos_regid);
- dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
- dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
- /* these two are hard-coded since we don't know how to
- * program them to anything but all 0's...
- */
- if (so->frag_coord)
- debug_printf("; fragcoord: r0.x\n");
- if (so->frag_face)
- debug_printf("; fragface: hr0.x\n");
- break;
- case SHADER_COMPUTE:
- break;
- }
+ debug_printf("; %s: %s\n", type, str);
+ ir3_shader_disasm(so, bin);
free(bin);
-
- debug_printf("\n");
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index c22b7f8d169..bfcc2ca8a53 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -140,6 +140,13 @@ assemble_variant(struct ir3_shader_variant *v)
memcpy(fd_bo_map(v->bo), bin, sz);
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ struct ir3_shader_key key = v->key;
+ DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
+ key.binning_pass, key.color_two_side, key.half_precision);
+ ir3_shader_disasm(v, bin);
+ }
+
free(bin);
/* no need to keep the ir around beyond this point: */
@@ -179,12 +186,6 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
goto fail;
}
- if (fd_mesa_debug & FD_DBG_DISASM) {
- DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
- key.binning_pass, key.color_two_side, key.half_precision);
- disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
- }
-
return v;
fail:
@@ -262,3 +263,117 @@ ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens,
shader->tokens = tgsi_dup_tokens(tokens);
return shader;
}
+
+static void dump_reg(const char *name, uint32_t r)
+{
+ if (r != regid(63,0))
+ debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
+}
+
+static void dump_semantic(struct ir3_shader_variant *so,
+ unsigned sem, const char *name)
+{
+ uint32_t regid;
+ regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
+ dump_reg(name, regid);
+}
+
+void
+ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
+{
+ struct ir3 *ir = so->ir;
+ struct ir3_register *reg;
+ const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";
+ uint8_t regid;
+ unsigned i;
+
+ for (i = 0; i < ir->ninputs; i++) {
+ if (!ir->inputs[i]) {
+ debug_printf("; in%d unused\n", i);
+ continue;
+ }
+ reg = ir->inputs[i]->regs[0];
+ regid = reg->num;
+ debug_printf("@in(%sr%d.%c)\tin%d\n",
+ (reg->flags & IR3_REG_HALF) ? "h" : "",
+ (regid >> 2), "xyzw"[regid & 0x3], i);
+ }
+
+ for (i = 0; i < ir->noutputs; i++) {
+ if (!ir->outputs[i]) {
+ debug_printf("; out%d unused\n", i);
+ continue;
+ }
+ /* kill shows up as a virtual output.. skip it! */
+ if (is_kill(ir->outputs[i]))
+ continue;
+ reg = ir->outputs[i]->regs[0];
+ regid = reg->num;
+ debug_printf("@out(%sr%d.%c)\tout%d\n",
+ (reg->flags & IR3_REG_HALF) ? "h" : "",
+ (regid >> 2), "xyzw"[regid & 0x3], i);
+ }
+
+ for (i = 0; i < so->immediates_count; i++) {
+ debug_printf("@const(c%d.x)\t", so->first_immediate + i);
+ debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
+ so->immediates[i].val[0],
+ so->immediates[i].val[1],
+ so->immediates[i].val[2],
+ so->immediates[i].val[3]);
+ }
+
+ disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
+
+ debug_printf("; %s: outputs:", type);
+ for (i = 0; i < so->outputs_count; i++) {
+ uint8_t regid = so->outputs[i].regid;
+ ir3_semantic sem = so->outputs[i].semantic;
+ debug_printf(" r%d.%c (%u:%u)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ sem2name(sem), sem2idx(sem));
+ }
+ debug_printf("\n");
+ debug_printf("; %s: inputs:", type);
+ for (i = 0; i < so->inputs_count; i++) {
+ uint8_t regid = so->inputs[i].regid;
+ ir3_semantic sem = so->inputs[i].semantic;
+ debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ sem2name(sem), sem2idx(sem),
+ so->inputs[i].compmask,
+ so->inputs[i].inloc,
+ so->inputs[i].bary);
+ }
+ debug_printf("\n");
+
+ /* print generic shader info: */
+ debug_printf("; %s: %u instructions, %d half, %d full\n", type,
+ so->info.instrs_count,
+ so->info.max_half_reg + 1,
+ so->info.max_reg + 1);
+
+ /* print shader type specific info: */
+ switch (so->type) {
+ case SHADER_VERTEX:
+ dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
+ dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
+ break;
+ case SHADER_FRAGMENT:
+ dump_reg("pos (bary)", so->pos_regid);
+ dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
+ dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
+ /* these two are hard-coded since we don't know how to
+ * program them to anything but all 0's...
+ */
+ if (so->frag_coord)
+ debug_printf("; fragcoord: r0.x\n");
+ if (so->frag_face)
+ debug_printf("; fragface: hr0.x\n");
+ break;
+ case SHADER_COMPUTE:
+ break;
+ }
+
+ debug_printf("\n");
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 9f1b0769180..3cf3167e1cc 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -212,6 +212,7 @@ struct ir3_shader * ir3_shader_create(struct pipe_context *pctx,
void ir3_shader_destroy(struct ir3_shader *shader);
struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
struct ir3_shader_key key);
+void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
/*
* Helper/util:
From 906da495272b1be4c278f5f7402594e3c52521c1 Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Sun, 28 Jun 2015 11:13:58 -0400
Subject: [PATCH 0104/1208] freedreno/ir3: fix RA issue with fanin
The fanin source could be grouped, for example with shaders like:
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[9]
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0], LOCAL
0: MOV TEMP[0].xy, IN[1].xyyy
1: MOV TEMP[0].w, IN[1].wwww
2: TXF TEMP[0], TEMP[0], SAMP[0], 2D
3: MOV OUT[1], TEMP[0]
4: MOV OUT[0], IN[0]
5: END
The second arg to the isaml is IN[1].w, so we need to look at the fanin
source to get the correct offset.
Signed-off-by: Rob Clark
---
src/gallium/drivers/freedreno/ir3/ir3_ra.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index ee610c7d01e..9f6ff12a119 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -320,19 +320,25 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
* and fanin.. that probably doesn't happen currently.
*/
struct ir3_register *src;
+ int dsz, doff;
/* note: don't use foreach_ssa_src as this gets called once
* while assigning regs (which clears SSA flag)
*/
- foreach_src(src, instr) {
+ foreach_src_n(src, n, instr) {
+ struct ir3_instruction *dd;
if (!src->instr)
continue;
- if ((!d) || (src->instr->ip < d->ip))
- d = src->instr;
+
+ dd = get_definer(src->instr, &dsz, &doff);
+
+ if ((!d) || (dd->ip < d->ip)) {
+ d = dd;
+ *sz = dsz;
+ *off = doff - n;
+ }
}
- *sz = instr->regs_count - 1;
- *off = 0;
} else if (instr->cp.right || instr->cp.left) {
/* covers also the meta:fo case, which ends up w/ single
@@ -447,6 +453,10 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
struct ir3_instruction *defn;
int cls, sz, off;
+#ifdef DEBUG
+ instr->name = ~0;
+#endif
+
ctx->instr_cnt++;
if (instr->regs_count == 0)
From 00b6b41482985ba4a81fbb479a47c06ec83f3797 Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Mon, 29 Jun 2015 14:49:08 -0400
Subject: [PATCH 0105/1208] freedreno/ir3: cache defining instruction
It is silly to traverse back to find first instruction that writes part
of a larger "virtual" register many times per instruction (plus per use
as a src to later instructions). Cache this information so we only
figure it out once.
Signed-off-by: Rob Clark
---
src/gallium/drivers/freedreno/ir3/ir3.c | 7 +-
src/gallium/drivers/freedreno/ir3/ir3.h | 2 +-
src/gallium/drivers/freedreno/ir3/ir3_ra.c | 151 ++++++++++++---------
3 files changed, 91 insertions(+), 69 deletions(-)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index 6f6dad59793..1da6cf0477e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -722,15 +722,16 @@ ir3_clear_mark(struct ir3 *ir)
}
/* note: this will destroy instr->depth, don't do it until after sched! */
-void
+unsigned
ir3_count_instructions(struct ir3 *ir)
{
- unsigned ip = 0;
+ unsigned cnt = 0;
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- instr->ip = ip++;
+ instr->ip = cnt++;
}
block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
}
+ return cnt;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 9c35a763d58..bc0144568a5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -431,7 +431,7 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
void ir3_block_clear_mark(struct ir3_block *block);
void ir3_clear_mark(struct ir3 *shader);
-void ir3_count_instructions(struct ir3 *ir);
+unsigned ir3_count_instructions(struct ir3 *ir);
static inline int ir3_instr_regno(struct ir3_instruction *instr,
struct ir3_register *reg)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 9f6ff12a119..de48ecfe280 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -241,6 +241,21 @@ ir3_ra_alloc_reg_set(void *memctx)
return set;
}
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+ BITSET_WORD *def; /* variables defined before used in block */
+ BITSET_WORD *use; /* variables used before defined in block */
+ BITSET_WORD *livein; /* which defs reach entry point of block */
+ BITSET_WORD *liveout; /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+ /* cached instruction 'definer' info: */
+ struct ir3_instruction *defn;
+ int off, sz, cls;
+};
+
/* register-assign context, per-shader */
struct ir3_ra_ctx {
struct ir3 *ir;
@@ -254,14 +269,7 @@ struct ir3_ra_ctx {
unsigned class_base[total_class_count];
unsigned instr_cnt;
unsigned *def, *use; /* def/use table */
-};
-
-/* additional block-data (per-block) */
-struct ir3_ra_block_data {
- BITSET_WORD *def; /* variables defined before used in block */
- BITSET_WORD *use; /* variables used before defined in block */
- BITSET_WORD *livein; /* which defs reach entry point of block */
- BITSET_WORD *liveout; /* which defs reach exit point of block */
+ struct ir3_ra_instr_data *instrd;
};
static bool
@@ -307,12 +315,20 @@ writes_gpr(struct ir3_instruction *instr)
}
static struct ir3_instruction *
-get_definer(struct ir3_instruction *instr, int *sz, int *off)
+get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
+ int *sz, int *off)
{
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_instruction *d = NULL;
if (instr->fanin)
- return get_definer(instr->fanin, sz, off);
+ return get_definer(ctx, instr->fanin, sz, off);
+
+ if (id->defn) {
+ *sz = id->sz;
+ *off = id->off;
+ return id->defn;
+ }
if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
/* What about the case where collect is subset of array, we
@@ -330,7 +346,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
if (!src->instr)
continue;
- dd = get_definer(src->instr, &dsz, &doff);
+ dd = get_definer(ctx, src->instr, &dsz, &doff);
if ((!d) || (dd->ip < d->ip)) {
d = dd;
@@ -339,7 +355,6 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
}
}
-
} else if (instr->cp.right || instr->cp.left) {
/* covers also the meta:fo case, which ends up w/ single
* scalar instructions for each component:
@@ -394,7 +409,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
struct ir3_instruction *dd;
int dsz, doff;
- dd = get_definer(phi, &dsz, &doff);
+ dd = get_definer(ctx, phi, &dsz, &doff);
*sz = MAX2(*sz, dsz);
*off = doff;
@@ -428,7 +443,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
struct ir3_instruction *dd;
int dsz, doff;
- dd = get_definer(d->regs[1]->instr, &dsz, &doff);
+ dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
/* by definition, should come before: */
debug_assert(dd->ip < d->ip);
@@ -440,9 +455,25 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
d = dd;
}
+ id->defn = d;
+ id->sz = *sz;
+ id->off = *off;
+
return d;
}
+static void
+ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+ if (instr->regs_count == 0)
+ continue;
+ id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+ id->cls = size_to_class(id->sz, is_half(id->defn));
+ }
+}
+
/* give each instruction a name (and ip), and count up the # of names
* of each class
*/
@@ -450,8 +481,7 @@ static void
ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
#ifdef DEBUG
instr->name = ~0;
@@ -465,9 +495,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (!writes_gpr(instr))
continue;
- defn = get_definer(instr, &sz, &off);
-
- if (defn != instr)
+ if (id->defn != instr)
continue;
/* arrays which don't fit in one of the pre-defined class
@@ -475,9 +503,8 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*
* TODO but we still need to allocate names for them, don't we??
*/
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- instr->name = ctx->class_alloc_count[cls]++;
+ if (id->cls >= 0) {
+ instr->name = ctx->class_alloc_count[id->cls]++;
ctx->alloc_count++;
}
}
@@ -486,8 +513,16 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static void
ra_init(struct ir3_ra_ctx *ctx)
{
+ unsigned n;
+
ir3_clear_mark(ctx->ir);
- ir3_count_instructions(ctx->ir);
+ n = ir3_count_instructions(ctx->ir);
+
+ ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ ra_block_find_definers(ctx, block);
+ }
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
ra_block_name_instructions(ctx, block);
@@ -503,6 +538,7 @@ ra_init(struct ir3_ra_ctx *ctx)
}
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+ ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
}
@@ -570,39 +606,36 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*/
if (writes_gpr(instr)) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- if (defn == instr) {
+ if (id->defn == instr) {
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
*/
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
- ctx->def[name] = defn->ip;
- ctx->use[name] = defn->ip;
+ ctx->def[name] = id->defn->ip;
+ ctx->use[name] = id->defn->ip;
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
BITSET_SET(bd->def, name);
- if (is_half(defn)) {
+ if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
- ctx->set->half_classes[cls - class_count]);
+ ctx->set->half_classes[id->cls - class_count]);
} else {
ra_set_node_class(ctx->g, name,
- ctx->set->classes[cls]);
+ ctx->set->classes[id->cls]);
}
/* extend the live range for phi srcs, which may come
* from the bottom of the loop
*/
- if (defn->regs[0]->flags & IR3_REG_PHI_SRC) {
- struct ir3_instruction *phi = defn->regs[0]->instr;
+ if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+ struct ir3_instruction *phi = id->defn->regs[0]->instr;
foreach_ssa_src(src, phi) {
/* if src is after phi, then we need to extend
* the liverange to the end of src's block:
@@ -621,13 +654,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
foreach_ssa_src(src, instr) {
if (writes_gpr(src)) {
- struct ir3_instruction *srcdefn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
- srcdefn = get_definer(src, &sz, &off);
- cls = size_to_class(sz, is_half(srcdefn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, srcdefn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
@@ -719,13 +749,10 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
/* need to fix things up to keep outputs live: */
for (unsigned i = 0; i < ir->noutputs; i++) {
struct ir3_instruction *instr = ir->outputs[i];
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = ctx->instr_cnt;
}
}
@@ -795,15 +822,12 @@ static void
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
struct ir3_instruction *instr)
{
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
unsigned r = ra_get_node_reg(ctx->g, name);
- unsigned num = ctx->set->ra_reg_to_gpr[r] + off;
+ unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
if (reg->flags & IR3_REG_RELATIV)
num += reg->offset;
@@ -811,7 +835,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
reg->num = num;
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
- if (is_half(defn))
+ if (is_half(id->defn))
reg->flags |= IR3_REG_HALF;
}
}
@@ -866,19 +890,16 @@ ra_alloc(struct ir3_ra_ctx *ctx)
for (j = 0; i < ir->ninputs; i++) {
struct ir3_instruction *instr = ir->inputs[i];
if (instr) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- if (defn == instr) {
+ if (id->defn == instr) {
unsigned name, reg;
- cls = size_to_class(sz, is_half(defn));
- name = ra_name(ctx, cls, defn);
- reg = ctx->set->gpr_to_ra_reg[cls][j];
+ name = ra_name(ctx, id->cls, id->defn);
+ reg = ctx->set->gpr_to_ra_reg[id->cls][j];
ra_set_node_reg(ctx->g, name, reg);
- j += sz;
+ j += id->sz;
}
}
}
From dc7e6463d3ec6980f1517ff10048e0dbf5bb38ad Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Sat, 27 Jun 2015 10:07:18 -0400
Subject: [PATCH 0106/1208] nir: cleanup open-coded instruction casts
Signed-off-by: Rob Clark
Reviewed-by: Jason Ekstrand
---
src/glsl/nir/nir_lower_alu_to_scalar.c | 2 +-
src/glsl/nir/nir_lower_vec_to_movs.c | 2 +-
src/glsl/nir/nir_search.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c
index 25bba4ef0b6..5d15fb2bc32 100644
--- a/src/glsl/nir/nir_lower_alu_to_scalar.c
+++ b/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -164,7 +164,7 @@ lower_alu_to_scalar_block(nir_block *block, void *data)
{
nir_foreach_instr_safe(block, instr) {
if (instr->type == nir_instr_type_alu)
- lower_alu_instr_scalar((nir_alu_instr *)instr, data);
+ lower_alu_instr_scalar(nir_instr_as_alu(instr), data);
}
return true;
diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 602853ea665..e6d522f88ce 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -90,7 +90,7 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
if (instr->type != nir_instr_type_alu)
continue;
- nir_alu_instr *vec = (nir_alu_instr *)instr;
+ nir_alu_instr *vec = nir_instr_as_alu(instr);
switch (vec->op) {
case nir_op_vec2:
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c
index 0c4e48ce965..c33d6c3eb84 100644
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -48,7 +48,7 @@ src_is_bool(nir_src src)
return false;
if (src.ssa->parent_instr->type != nir_instr_type_alu)
return false;
- return alu_instr_is_bool((nir_alu_instr *)src.ssa->parent_instr);
+ return alu_instr_is_bool(nir_instr_as_alu(src.ssa->parent_instr));
}
static bool
From 6082515de7c7b4885bd685d88aee32fc9e5103a1 Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Sat, 27 Jun 2015 09:58:28 -0400
Subject: [PATCH 0107/1208] gallium/ttn: partial fix for output arrays
It isn't quite yet practical to enable TGSI_ANY_INOUT_DECL_RANGE shader
cap yet, at least not in drivers that need lower_to_scalar pass (which
right now is all of the ttn users), since the register arrays do not get
converted to SSA, which angers nir_lower_alu_to_scalar.
Signed-off-by: Rob Clark
Reviewed-by: Eric Anholt
---
src/gallium/auxiliary/nir/tgsi_to_nir.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 065bbf050c2..c5b65eeae0c 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1716,9 +1716,11 @@ ttn_add_output_stores(struct ttn_compile *c)
for (i = 0; i < array_len; i++) {
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+ unsigned loc = var->data.driver_location + i;
store->num_components = 4;
- store->const_index[0] = var->data.driver_location + i;
- store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
+ store->const_index[0] = loc;
+ store->src[0].reg.reg = c->output_regs[loc].reg;
+ store->src[0].reg.base_offset = c->output_regs[loc].offset;
nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
}
}
From d1f0e019797863b23388bfef53a77f659f749d3c Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Fri, 26 Jun 2015 13:48:29 -0400
Subject: [PATCH 0108/1208] gallium/ttn: add TXB2
Signed-off-by: Rob Clark
Reviewed-by: Eric Anholt
---
src/gallium/auxiliary/nir/tgsi_to_nir.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index c5b65eeae0c..bd4cdcbbfcf 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1068,6 +1068,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
op = nir_texop_txb;
num_srcs = 2;
break;
+ case TGSI_OPCODE_TXB2:
+ op = nir_texop_txb;
+ num_srcs = 2;
+ samp = 2;
+ break;
case TGSI_OPCODE_TXL:
op = nir_texop_txl;
num_srcs = 2;
@@ -1169,6 +1174,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
src_number++;
}
+ if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+ instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
+ instr->src[src_number].src_type = nir_tex_src_bias;
+ src_number++;
+ }
+
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
instr->src[src_number].src_type = nir_tex_src_lod;
From 879dcf07f6a3ab56f23d540b0df94c57e0706094 Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Fri, 26 Jun 2015 14:24:08 -0400
Subject: [PATCH 0109/1208] gallium/ttn: don't upset nir_validate w/ BRK's
Previously we were unconditionally doing ttn_get_src() even for
instructions with no src's. Which created a lot of unnecessary
load_const instructions. These were mostly harmless since NIR opt
passes would strip them back out. But for an ENDIF following a
BRK, it would result in load_const instructions created after the
NIR break instruction. Which nir_validate dislikes.
But we can actually just dtrt by using NumSrcRegs instead.
Signed-off-by: Rob Clark
Reviewed-by: Eric Anholt
---
src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index bd4cdcbbfcf..e1647d53a40 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1491,7 +1491,7 @@ ttn_emit_instruction(struct ttn_compile *c)
return;
nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
- for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) {
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
src[i] = ttn_get_src(c, &tgsi_inst->Src[i]);
}
nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
From 6098ef824467f685fb34914eb3fef73b3ba18c6f Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Thu, 18 Jun 2015 20:16:46 +0100
Subject: [PATCH 0110/1208] egl/drm: plug memory leak
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Free the memory for dri2_surf in the unlikely case that one provides
NULL for native_window. Also set the relevant EGL_ERROR to provide
feedback to the user.
Signed-off-by: Emil Velikov
Reviewed-by: Marek Olšák
---
src/egl/drivers/dri2/platform_drm.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index a62da4121fe..0d1f4c6e0a7 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -115,8 +115,11 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
switch (type) {
case EGL_WINDOW_BIT:
- if (!window)
- return NULL;
+ if (!window) {
+ _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+ goto cleanup_surf;
+ }
+
surf = gbm_dri_surface(window);
dri2_surf->gbm_surf = surf;
dri2_surf->base.Width = surf->base.width;
From 0afa6335079093627b47ff08da38bed00972c217 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Thu, 18 Jun 2015 20:19:32 +0100
Subject: [PATCH 0111/1208] egl/wayland: handle NULL native_window in
create_surface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Raise EGL_BAD_NATIVE_WINDOW instead of crashing.
v2: s/Rise/Raise/ (spotted by Michel)
Signed-off-by: Emil Velikov
Reviewed-by: Marek Olšák
---
src/egl/drivers/dri2/platform_wayland.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index 160fa8ce8d7..6f42d90ed96 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -149,6 +149,11 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
else
dri2_surf->format = WL_DRM_FORMAT_ARGB8888;
+ if (!window) {
+ _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+ goto cleanup_surf;
+ }
+
dri2_surf->wl_win = window;
dri2_surf->wl_win->private = dri2_surf;
From 4ea5223a95436b76a3f808732c565e9833f84551 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Thu, 18 Jun 2015 20:22:54 +0100
Subject: [PATCH 0112/1208] egl/wayland: cleanup dri2_wl_create_surface error
path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Emil Velikov
Reviewed-by: Marek Olšák
---
src/egl/drivers/dri2/platform_wayland.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index 6f42d90ed96..1e127607cdd 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -168,13 +168,11 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
dri2_surf);
if (dri2_surf->dri_drawable == NULL) {
_eglError(EGL_BAD_ALLOC, "dri2->createNewDrawable");
- goto cleanup_dri_drawable;
+ goto cleanup_surf;
}
return &dri2_surf->base;
- cleanup_dri_drawable:
- dri2_dpy->core->destroyDrawable(dri2_surf->dri_drawable);
cleanup_surf:
free(dri2_surf);
From af2aea40d29dffd5e584432e0652db114113469b Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Thu, 18 Jun 2015 20:39:28 +0100
Subject: [PATCH 0113/1208] egl/x11: handle when invalid drawable is passed in
create_surface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
0 is not used as a valid drawable id, as such there is no point in
attempting to query its geometry. Just bail out early and provide the
more meaningful EGL_BAD_NATIVE_WINDOW to the user.
Signed-off-by: Emil Velikov
Reviewed-by: Marek Olšák
---
src/egl/drivers/dri2/platform_x11.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index 0fbf4e40f2f..ad40bd57aa6 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -235,6 +235,10 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
dri2_surf->drawable, screen->root,
dri2_surf->base.Width, dri2_surf->base.Height);
} else {
+ if (!drawable) {
+ _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+ goto cleanup_surf;
+ }
dri2_surf->drawable = drawable;
}
From 2b1a1d8b1294f91b7ac563da1f395deba4384765 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Wed, 24 Jun 2015 05:28:34 -0700
Subject: [PATCH 0114/1208] nir/from_ssa: add a flag to not convert everything
from SSA
We already don't convert constants out of SSA, and in our backend we'd
like to have only one way of saying something is still in SSA.
The one tricky part about this is that we may now leave some undef
instructions around if they aren't part of a phi-web, so we have to be
more careful about deleting them.
v2: rename and flip meaning of flag (Jason)
Reviewed-by: Jason Ekstrand
---
src/gallium/drivers/vc4/vc4_program.c | 2 +-
src/glsl/nir/nir.h | 7 ++++++-
src/glsl/nir/nir_from_ssa.c | 25 ++++++++++++++++++-------
src/mesa/drivers/dri/i965/brw_nir.c | 2 +-
4 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 2061631dc9e..7b39a03f01a 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2102,7 +2102,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
nir_remove_dead_variables(c->s);
- nir_convert_from_ssa(c->s);
+ nir_convert_from_ssa(c->s, false);
if (vc4_debug & VC4_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 697d37e95ac..8c99845f04d 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1676,7 +1676,12 @@ bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
void nir_convert_to_ssa_impl(nir_function_impl *impl);
void nir_convert_to_ssa(nir_shader *shader);
-void nir_convert_from_ssa(nir_shader *shader);
+
+/* If convert_everything is true, convert all values (even those not involved
+ * in a phi node) to registers. If false, only convert SSA values involved in
+ * phi nodes to registers.
+ */
+void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
bool nir_opt_algebraic(nir_shader *shader);
bool nir_opt_algebraic_late(nir_shader *shader);
diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 67733e6da4f..e4a153e9584 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -37,6 +37,7 @@
struct from_ssa_state {
void *mem_ctx;
void *dead_ctx;
+ bool phi_webs_only;
struct hash_table *merge_node_table;
nir_instr *instr;
nir_function_impl *impl;
@@ -482,6 +483,9 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
reg = node->set->reg;
} else {
+ if (state->phi_webs_only)
+ return true;
+
/* We leave load_const SSA values alone. They act as immediates to
* the backend. If it got coalesced into a phi, that's ok.
*/
@@ -505,8 +509,15 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx);
assert(list_empty(&def->uses) && list_empty(&def->if_uses));
- if (def->parent_instr->type == nir_instr_type_ssa_undef)
+ if (def->parent_instr->type == nir_instr_type_ssa_undef) {
+ /* If it's an ssa_undef instruction, remove it since we know we just got
+ * rid of all its uses.
+ */
+ nir_instr *parent_instr = def->parent_instr;
+ nir_instr_remove(parent_instr);
+ ralloc_steal(state->dead_ctx, parent_instr);
return true;
+ }
assert(def->parent_instr->type != nir_instr_type_load_const);
@@ -523,7 +534,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
}
/* Resolves ssa definitions to registers. While we're at it, we also
- * remove phi nodes and ssa_undef instructions
+ * remove phi nodes.
*/
static bool
resolve_registers_block(nir_block *block, void *void_state)
@@ -534,8 +545,7 @@ resolve_registers_block(nir_block *block, void *void_state)
state->instr = instr;
nir_foreach_ssa_def(instr, rewrite_ssa_def, state);
- if (instr->type == nir_instr_type_ssa_undef ||
- instr->type == nir_instr_type_phi) {
+ if (instr->type == nir_instr_type_phi) {
nir_instr_remove(instr);
ralloc_steal(state->dead_ctx, instr);
}
@@ -765,13 +775,14 @@ resolve_parallel_copies_block(nir_block *block, void *void_state)
}
static void
-nir_convert_from_ssa_impl(nir_function_impl *impl)
+nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
{
struct from_ssa_state state;
state.mem_ctx = ralloc_parent(impl);
state.dead_ctx = ralloc_context(NULL);
state.impl = impl;
+ state.phi_webs_only = phi_webs_only;
state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
@@ -801,10 +812,10 @@ nir_convert_from_ssa_impl(nir_function_impl *impl)
}
void
-nir_convert_from_ssa(nir_shader *shader)
+nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only)
{
nir_foreach_overload(shader, overload) {
if (overload->impl)
- nir_convert_from_ssa_impl(overload->impl);
+ nir_convert_from_ssa_impl(overload->impl, phi_webs_only);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index dffb8ab1ca7..d87e78312fd 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -156,7 +156,7 @@ brw_create_nir(struct brw_context *brw,
nir_print_shader(nir, stderr);
}
- nir_convert_from_ssa(nir);
+ nir_convert_from_ssa(nir, false);
nir_validate_shader(nir);
/* This is the last pass we run before we start emitting stuff. It
From 864907e2f14523c130e6ff24c081789bb079bae1 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Wed, 24 Jun 2015 12:28:47 -0700
Subject: [PATCH 0115/1208] i965/fs: use SSA values directly
Before, we would use registers, but set a magical "parent_instr" field
to indicate that it was actually purely an SSA value (i.e., it wasn't
involved in any phi nodes). Instead, just use SSA values directly, which
lets us get rid of the hack and reduces memory usage since we're not
allocating a nir_register for every value. It also makes our handling of
load_const more consistent compared to the other instructions.
Reviewed-by: Jason Ekstrand
---
src/mesa/drivers/dri/i965/brw_fs.h | 3 +
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 64 ++++++++++++-------
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 1 +
src/mesa/drivers/dri/i965/brw_nir.c | 2 +-
.../i965/brw_nir_analyze_boolean_resolves.c | 12 ++--
5 files changed, 52 insertions(+), 30 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d08d438a40e..8170f2aa109 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -249,6 +249,8 @@ public:
void nir_emit_block(nir_block *block);
void nir_emit_instr(nir_instr *instr);
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
+ void nir_emit_undef(const brw::fs_builder &bld,
+ nir_ssa_undef_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_texture(const brw::fs_builder &bld,
@@ -345,6 +347,7 @@ public:
unsigned max_grf;
fs_reg *nir_locals;
+ fs_reg *nir_ssa_values;
fs_reg *nir_globals;
fs_reg nir_inputs;
fs_reg nir_outputs;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 59081eab877..166586ff52f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -366,6 +366,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
}
+ nir_ssa_values = reralloc(mem_ctx, nir_ssa_values, fs_reg,
+ impl->ssa_alloc);
+
nir_emit_cf_list(&impl->body);
}
@@ -464,6 +467,10 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
*/
break;
+ case nir_instr_type_ssa_undef:
+ nir_emit_undef(abld, nir_instr_as_ssa_undef(instr));
+ break;
+
case nir_instr_type_jump:
nir_emit_jump(abld, nir_instr_as_jump(instr));
break;
@@ -495,17 +502,12 @@ bool
fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result)
{
- if (instr->src[0].src.is_ssa ||
- !instr->src[0].src.reg.reg ||
- !instr->src[0].src.reg.reg->parent_instr)
- return false;
-
- if (instr->src[0].src.reg.reg->parent_instr->type !=
- nir_instr_type_intrinsic)
+ if (!instr->src[0].src.is_ssa ||
+ instr->src[0].src.ssa->parent_instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *src0 =
- nir_instr_as_intrinsic(instr->src[0].src.reg.reg->parent_instr);
+ nir_instr_as_intrinsic(instr->src[0].src.ssa->parent_instr);
if (src0->intrinsic != nir_intrinsic_load_front_face)
return false;
@@ -1146,6 +1148,13 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
}
}
+void
+fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
+{
+ nir_ssa_values[instr->def.index] = bld.vgrf(BRW_REGISTER_TYPE_D,
+ instr->def.num_components);
+}
+
static fs_reg
fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
unsigned base_offset, nir_src *indirect)
@@ -1171,30 +1180,39 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
fs_reg
fs_visitor::get_nir_src(nir_src src)
{
+ fs_reg reg;
if (src.is_ssa) {
- assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
- nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
- fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components);
+ if (src.ssa->parent_instr->type == nir_instr_type_load_const) {
+ nir_load_const_instr *load =
+ nir_instr_as_load_const(src.ssa->parent_instr);
+ reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components);
- for (unsigned i = 0; i < src.ssa->num_components; ++i)
- bld.MOV(offset(reg, i), fs_reg(load->value.i[i]));
-
- return reg;
+ for (unsigned i = 0; i < src.ssa->num_components; ++i)
+ bld.MOV(offset(reg, i), fs_reg(load->value.i[i]));
+ } else {
+ reg = nir_ssa_values[src.ssa->index];
+ }
} else {
- fs_reg reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
- src.reg.indirect);
-
- /* to avoid floating-point denorm flushing problems, set the type by
- * default to D - instructions that need floating point semantics will set
- * this to F if they need to
- */
- return retype(reg, BRW_REGISTER_TYPE_D);
+ reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
+ src.reg.indirect);
}
+
+ /* to avoid floating-point denorm flushing problems, set the type by
+ * default to D - instructions that need floating point semantics will set
+ * this to F if they need to
+ */
+ return retype(reg, BRW_REGISTER_TYPE_D);
}
fs_reg
fs_visitor::get_nir_dest(nir_dest dest)
{
+ if (dest.is_ssa) {
+ nir_ssa_values[dest.ssa.index] = bld.vgrf(BRW_REGISTER_TYPE_F,
+ dest.ssa.num_components);
+ return nir_ssa_values[dest.ssa.index];
+ }
+
return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
dest.reg.indirect);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 34bf32d7ab3..395af73bc2e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2026,6 +2026,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
this->no16_msg = NULL;
this->nir_locals = NULL;
+ this->nir_ssa_values = NULL;
this->nir_globals = NULL;
memset(&this->payload, 0, sizeof(this->payload));
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index d87e78312fd..3e154c10526 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -156,7 +156,7 @@ brw_create_nir(struct brw_context *brw,
nir_print_shader(nir, stderr);
}
- nir_convert_from_ssa(nir, false);
+ nir_convert_from_ssa(nir, true);
nir_validate_shader(nir);
/* This is the last pass we run before we start emitting stuff. It
diff --git a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
index f0b018cf84a..9eb0ed9bd79 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
@@ -43,8 +43,8 @@
static uint8_t
get_resolve_status_for_src(nir_src *src)
{
- nir_instr *src_instr = nir_src_get_parent_instr(src);
- if (src_instr) {
+ if (src->is_ssa) {
+ nir_instr *src_instr = src->ssa->parent_instr;
uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
/* If the source instruction needs resolve, then from the perspective
@@ -66,8 +66,8 @@ get_resolve_status_for_src(nir_src *src)
static bool
src_mark_needs_resolve(nir_src *src, void *void_state)
{
- nir_instr *src_instr = nir_src_get_parent_instr(src);
- if (src_instr) {
+ if (src->is_ssa) {
+ nir_instr *src_instr = src->ssa->parent_instr;
uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
/* If the source instruction is unresolved, then mark it as needing
@@ -172,11 +172,11 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state)
resolve_status = BRW_NIR_NON_BOOLEAN;
}
- /* If the destination is SSA-like, go ahead allow unresolved booleans.
+ /* If the destination is SSA, go ahead allow unresolved booleans.
* If the destination register doesn't have a well-defined parent_instr
* we need to resolve immediately.
*/
- if (alu->dest.dest.reg.reg->parent_instr == NULL &&
+ if (!alu->dest.dest.is_ssa &&
resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) {
resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE;
}
From 0ecdf04060518149e99a098caf4f6025fd6482a4 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Thu, 25 Jun 2015 16:22:26 -0700
Subject: [PATCH 0116/1208] i965/fs: emit constants only once
Before, we would lazily emit a MOV whenever we encountered a use of a
constant. Now that we have a dedicated file for SSA values, we can
instead only emit the MOV's once, which is more consistent and prevents
us from relying on CSE to re-combine the constants when they aren't
absorbed into the instruction.
total instructions in shared programs: 6078991 -> 6073118 (-0.10%)
instructions in affected programs: 402221 -> 396348 (-1.46%)
helped: 1527
HURT: 0
GAINED: 8
LOST: 2
v2: split this out from the previous commit (Jason)
Reviewed-by: Jason Ekstrand
---
src/mesa/drivers/dri/i965/brw_fs.h | 2 ++
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 27 ++++++++++++------------
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8170f2aa109..f20b540020f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -249,6 +249,8 @@ public:
void nir_emit_block(nir_block *block);
void nir_emit_instr(nir_instr *instr);
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
+ void nir_emit_load_const(const brw::fs_builder &bld,
+ nir_load_const_instr *instr);
void nir_emit_undef(const brw::fs_builder &bld,
nir_ssa_undef_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 166586ff52f..58896d72e14 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -462,9 +462,7 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
break;
case nir_instr_type_load_const:
- /* We can hit these, but we do nothing now and use them as
- * immediates later.
- */
+ nir_emit_load_const(abld, nir_instr_as_load_const(instr));
break;
case nir_instr_type_ssa_undef:
@@ -1148,6 +1146,18 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
}
}
+void
+fs_visitor::nir_emit_load_const(const fs_builder &bld,
+ nir_load_const_instr *instr)
+{
+ fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
+
+ for (unsigned i = 0; i < instr->def.num_components; i++)
+ bld.MOV(offset(reg, i), fs_reg(instr->value.i[i]));
+
+ nir_ssa_values[instr->def.index] = reg;
+}
+
void
fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
{
@@ -1182,16 +1192,7 @@ fs_visitor::get_nir_src(nir_src src)
{
fs_reg reg;
if (src.is_ssa) {
- if (src.ssa->parent_instr->type == nir_instr_type_load_const) {
- nir_load_const_instr *load =
- nir_instr_as_load_const(src.ssa->parent_instr);
- reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components);
-
- for (unsigned i = 0; i < src.ssa->num_components; ++i)
- bld.MOV(offset(reg, i), fs_reg(load->value.i[i]));
- } else {
- reg = nir_ssa_values[src.ssa->index];
- }
+ reg = nir_ssa_values[src.ssa->index];
} else {
reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
src.reg.indirect);
From f49e51ef44ac6400967731b75db871129b6c45f5 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Wed, 24 Jun 2015 12:43:15 -0700
Subject: [PATCH 0117/1208] nir: remove nir_src_get_parent_instr()
It's now unused.
Reviewed-by: Jason Ekstrand
---
src/glsl/nir/nir.h | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 8c99845f04d..e48db72c53d 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -565,16 +565,6 @@ nir_src_for_reg(nir_register *reg)
return src;
}
-static inline nir_instr *
-nir_src_get_parent_instr(const nir_src *src)
-{
- if (src->is_ssa) {
- return src->ssa->parent_instr;
- } else {
- return src->reg.reg->parent_instr;
- }
-}
-
static inline nir_dest
nir_dest_for_reg(nir_register *reg)
{
From aa7d4cecec1a1236d237b83ebf035285f438ee67 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Wed, 24 Jun 2015 12:55:41 -0700
Subject: [PATCH 0118/1208] nir: remove parent_instr from nir_register
It's no longer used.
Reviewed-by: Jason Ekstrand
---
src/glsl/nir/nir.c | 1 -
src/glsl/nir/nir.h | 8 --------
src/glsl/nir/nir_from_ssa.c | 8 --------
3 files changed, 17 deletions(-)
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index f03e80a4e0e..f661249f9bb 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -57,7 +57,6 @@ reg_create(void *mem_ctx, struct exec_list *list)
{
nir_register *reg = ralloc(mem_ctx, nir_register);
- reg->parent_instr = NULL;
list_inithead(®->uses);
list_inithead(®->defs);
list_inithead(®->if_uses);
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e48db72c53d..4cb7d2f1eac 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -389,14 +389,6 @@ typedef struct {
*/
bool is_packed;
- /**
- * If this pointer is non-NULL then this register has exactly one
- * definition and that definition dominates all of its uses. This is
- * set by the out-of-SSA pass so that backends can get SSA-like
- * information even once they have gone out of SSA.
- */
- struct nir_instr *parent_instr;
-
/** set of nir_instr's where this register is used (read from) */
struct list_head uses;
diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index e4a153e9584..1fd8b24d33d 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -496,14 +496,6 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
reg->name = def->name;
reg->num_components = def->num_components;
reg->num_array_elems = 0;
-
- /* This register comes from an SSA definition that is defined and not
- * part of a phi-web. Therefore, we know it has a single unique
- * definition that dominates all of its uses; we can copy the
- * parent_instr from the SSA def safely.
- */
- if (def->parent_instr->type != nir_instr_type_ssa_undef)
- reg->parent_instr = def->parent_instr;
}
nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx);
From 3258e1b80d66ec26f14a24a5eae0629a2d23a444 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 11:53:08 -0700
Subject: [PATCH 0119/1208] i965/fs: Use a switch statement in
fs_inst::regs_read()
This makes things a little simpler, more efficient, and quite a bit more
readable.
Reviewed-by: Iago Toral Quiroga
Reviewed-by: Topi Pohjolainen
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 45 ++++++++++++++--------------
1 file changed, 23 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8658554e96b..79ca33e42ed 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -701,28 +701,29 @@ fs_inst::is_partial_write() const
int
fs_inst::regs_read(int arg) const
{
- if (is_tex() && arg == 0 && src[0].file == GRF) {
- return mlen;
- } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8 && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_WRITE && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_TYPED_ATOMIC && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_TYPED_SURFACE_READ && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE && arg == 0) {
- return mlen;
- } else if (opcode == FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET && arg == 0) {
- return mlen;
- } else if (opcode == FS_OPCODE_LINTERP && arg == 0) {
- return exec_size / 4;
+ switch (opcode) {
+ case FS_OPCODE_FB_WRITE:
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+ if (arg == 0)
+ return mlen;
+ break;
+
+ case FS_OPCODE_LINTERP:
+ if (arg == 0)
+ return exec_size / 4;
+ break;
+
+ default:
+ if (is_tex() && arg == 0 && src[0].file == GRF)
+ return mlen;
+ break;
}
switch (src[arg].file) {
From 241317d59ab440bdcda25bacaadacfb3b4c2dd93 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Fri, 19 Jun 2015 12:58:37 -0700
Subject: [PATCH 0120/1208] i965/fs: Actually set/use the mlen for gen7 uniform
pull constant loads
Previously, we were allocating the payload with different sizes per gen and
then figuring out the mlen in the generator based on gen. This meant,
among other things, that the higher level passes knew nothing about it.
Acked-by: Francisco Jerez
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 19 ++++++++++++-------
.../drivers/dri/i965/brw_fs_generator.cpp | 9 +++------
2 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 79ca33e42ed..94f42949ce2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2909,14 +2909,18 @@ fs_visitor::lower_uniform_pull_constant_loads()
assert(const_offset_reg.file == IMM &&
const_offset_reg.type == BRW_REGISTER_TYPE_UD);
const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
- fs_reg payload = fs_reg(GRF, alloc.allocate(1));
- /* We have to use a message header on Skylake to get SIMD4x2 mode.
- * Reserve space for the register.
- */
+ fs_reg payload, offset;
if (devinfo->gen >= 9) {
- payload.reg_offset++;
- alloc.sizes[payload.reg] = 2;
+ /* We have to use a message header on Skylake to get SIMD4x2
+ * mode. Reserve space for the register.
+ */
+ offset = payload = fs_reg(GRF, alloc.allocate(2));
+ offset.reg_offset++;
+ inst->mlen = 2;
+ } else {
+ offset = payload = fs_reg(GRF, alloc.allocate(1));
+ inst->mlen = 1;
}
/* This is actually going to be a MOV, but since only the first dword
@@ -2925,7 +2929,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
* by live variable analysis, or register allocation will explode.
*/
fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET,
- 8, payload, const_offset_reg);
+ 8, offset, const_offset_reg);
setup->force_writemask_all = true;
setup->ir = inst->ir;
@@ -2938,6 +2942,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
*/
inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
inst->src[1] = payload;
+ inst->base_mrf = -1;
invalidate_live_intervals();
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 2ed0bac6fd9..8d821abbac2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1054,7 +1054,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
struct brw_reg index,
struct brw_reg offset)
{
- assert(inst->mlen == 0);
assert(index.type == BRW_REGISTER_TYPE_UD);
assert(offset.file == BRW_GENERAL_REGISTER_FILE);
@@ -1069,12 +1068,10 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
struct brw_reg src = offset;
bool header_present = false;
- int mlen = 1;
if (devinfo->gen >= 9) {
/* Skylake requires a message header in order to use SIMD4x2 mode. */
- src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
- mlen = 2;
+ src = retype(brw_vec4_grf(offset.nr, 0), BRW_REGISTER_TYPE_UD);
header_present = true;
brw_push_insn_state(p);
@@ -1105,7 +1102,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
- mlen,
+ inst->mlen,
header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
@@ -1135,7 +1132,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
- mlen,
+ inst->mlen,
header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
From aca5228011e7b9e96f3bd3a621c88e63ba47a4f3 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Wed, 17 Jun 2015 18:02:11 -0700
Subject: [PATCH 0121/1208] i965/fs: Fix fs_inst::regs_read() for uniform pull
constant loads
Previously, fs_inst::regs_read() fell back to depending on the register
width for the second source. This isn't really correct since it isn't a
SIMD8 value at all, but a SIMD4x2 value. This commit changes it to
explicitly be always one register.
v2: Use mlen for determining the number of registers read
Reviewed-by: Iago Toral Quiroga
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 94f42949ce2..e83a0923e80 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -715,6 +715,12 @@ fs_inst::regs_read(int arg) const
return mlen;
break;
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+ /* The payload is actually stored in src1 */
+ if (arg == 1)
+ return mlen;
+ break;
+
case FS_OPCODE_LINTERP:
if (arg == 0)
return exec_size / 4;
From 12bc22ef58377191508af91a918efd18e2da7500 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 17:48:27 -0700
Subject: [PATCH 0122/1208] i965/fs: Report the right value in
fs_inst::regs_read() for PIXEL_X/Y
Reviewed-by: Iago Toral Quiroga
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e83a0923e80..d91ad0a0650 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -701,6 +701,7 @@ fs_inst::is_partial_write() const
int
fs_inst::regs_read(int arg) const
{
+ unsigned components = 1;
switch (opcode) {
case FS_OPCODE_FB_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
@@ -726,6 +727,12 @@ fs_inst::regs_read(int arg) const
return exec_size / 4;
break;
+ case FS_OPCODE_PIXEL_X:
+ case FS_OPCODE_PIXEL_Y:
+ if (arg == 0)
+ components = 1;
+ break;
+
default:
if (is_tex() && arg == 0 && src[0].file == GRF)
return mlen;
@@ -742,8 +749,8 @@ fs_inst::regs_read(int arg) const
if (src[arg].stride == 0) {
return 1;
} else {
- int size = src[arg].width * src[arg].stride * type_sz(src[arg].type);
- return (size + 31) / 32;
+ int size = components * src[arg].width * type_sz(src[arg].type);
+ return DIV_ROUND_UP(size * src[arg].stride, 32);
}
case MRF:
unreachable("MRF registers are not allowed as sources");
From c5a8da5f24eae4479b4ebe6301d780f781e24ed2 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Tue, 30 Jun 2015 15:51:13 -0700
Subject: [PATCH 0123/1208] i965/fs: Properly handle LOAD_PAYLOAD in
fs_inst::regs_read
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d91ad0a0650..cae4e4263ea 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -733,6 +733,11 @@ fs_inst::regs_read(int arg) const
components = 1;
break;
+ case SHADER_OPCODE_LOAD_PAYLOAD:
+ if (arg < this->header_size)
+ return 1;
+ break;
+
default:
if (is_tex() && arg == 0 && src[0].file == GRF)
return mlen;
From 438e9c8b88c8faf7cbc2a20b03c077342be214e3 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Wed, 17 Jun 2015 17:32:24 -0700
Subject: [PATCH 0124/1208] i965/fs: Explicitly set the exec_size on the
add(32) in interpolation setup
Soon we will start using the builder to explicitly set all the execution
sizes. We could make a 32-wide builder, but the builder asserts that we
never grow it which is usually a reasonable assumption. Since this one
instruction is a bit of an odd-ball, we just set the exec_size explicitly.
v2: Explicitly new the fs_inst instead of using the builder and setting
exec_size after the fact.
v3: Set force_writemask_all with the builder instead of directly. The
builder over-writes it if we set it manually. Also, if we don't have
force_writemask_all in the builder it will assert-fail on SIMD32.
Reviewed-by: Iago Toral Quiroga
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 395af73bc2e..89eb71769a3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1358,10 +1358,12 @@ fs_visitor::emit_interpolation_setup_gen6()
*/
fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8),
BRW_REGISTER_TYPE_UW, dispatch_width * 2);
- abld.exec_all()
- .ADD(int_pixel_xy,
- fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
- fs_reg(brw_imm_v(0x11001010)));
+ fs_inst *add =
+ new (mem_ctx) fs_inst(BRW_OPCODE_ADD, dispatch_width * 2,
+ int_pixel_xy,
+ fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
+ fs_reg(brw_imm_v(0x11001010)));
+ abld.exec_all().emit(add);
this->pixel_x = vgrf(glsl_type::float_type);
this->pixel_y = vgrf(glsl_type::float_type);
From 362eff7741f9ca6e49074509120a2e6c03ef7ae6 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 15:58:59 -0700
Subject: [PATCH 0125/1208] i965/fs: Set the builder group for emitting
FB-write stencil/AA alpha
Reviewed-by: Iago Toral Quiroga
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 89eb71769a3..69d3cfa8897 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1530,7 +1530,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
if (payload.aa_dest_stencil_reg) {
sources[length] = fs_reg(GRF, alloc.allocate(1));
- bld.exec_all().annotate("FB write stencil/AA alpha")
+ bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
.MOV(sources[length],
fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
length++;
From b535ba55ed6023f402374aeff79f9f37dbb21df0 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 12:00:54 -0700
Subject: [PATCH 0126/1208] i965/blorp: Explicitly set execution sizes for
new'd instructions
This doesn't affect instructions allocated using the builder.
Reviewed-by: Iago Toral Quiroga
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 789520c7353..d458ad846bf 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -73,7 +73,7 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg &x,
emit_cmp(BRW_CONDITIONAL_L, x, dst_x1)->predicate = BRW_PREDICATE_NORMAL;
emit_cmp(BRW_CONDITIONAL_L, y, dst_y1)->predicate = BRW_PREDICATE_NORMAL;
- fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, g1, f0, g1);
+ fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, 16, g1, f0, g1);
inst->force_writemask_all = true;
insts.push_tail(inst);
}
@@ -84,7 +84,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst,
unsigned base_mrf,
unsigned msg_length)
{
- fs_inst *inst = new (mem_ctx) fs_inst(op, dst, brw_message_reg(base_mrf),
+ fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf),
fs_reg(0u));
inst->base_mrf = base_mrf;
@@ -119,7 +119,8 @@ brw_blorp_eu_emitter::emit_combine(enum opcode combine_opcode,
{
assert(combine_opcode == BRW_OPCODE_ADD || combine_opcode == BRW_OPCODE_AVG);
- insts.push_tail(new (mem_ctx) fs_inst(combine_opcode, dst, src_1, src_2));
+ insts.push_tail(new (mem_ctx) fs_inst(combine_opcode, 16, dst,
+ src_1, src_2));
}
fs_inst *
@@ -127,7 +128,7 @@ brw_blorp_eu_emitter::emit_cmp(enum brw_conditional_mod op,
const struct brw_reg &x,
const struct brw_reg &y)
{
- fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP,
+ fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP, 16,
vec16(brw_null_reg()), x, y);
cmp->conditional_mod = op;
insts.push_tail(cmp);
From 7fcbe141076d18bf0245de1fd589c82f7c543fdf Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 25 Jun 2015 10:55:51 -0700
Subject: [PATCH 0127/1208] i965/fs: Move offset(fs_reg, unsigned) to brw_fs.h
Shortly, offset() will depend on the builder so we need it moved to some
place where it has access to that.
Reviewed-by: Iago Toral Quiroga
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.h | 21 +++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_ir_fs.h | 21 ---------------------
2 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f20b540020f..06f46765dd1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -62,6 +62,27 @@ namespace brw {
class fs_live_variables;
}
+static inline fs_reg
+offset(fs_reg reg, unsigned delta)
+{
+ switch (reg.file) {
+ case BAD_FILE:
+ break;
+ case GRF:
+ case MRF:
+ case ATTR:
+ return byte_offset(reg,
+ delta * MAX2(reg.width * reg.stride, 1) *
+ type_sz(reg.type));
+ case UNIFORM:
+ reg.reg_offset += delta;
+ break;
+ default:
+ assert(delta == 0);
+ }
+ return reg;
+}
+
/**
* The fragment shader front-end.
*
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 96dc20da3cf..16b20beb788 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -128,27 +128,6 @@ horiz_offset(fs_reg reg, unsigned delta)
return reg;
}
-static inline fs_reg
-offset(fs_reg reg, unsigned delta)
-{
- switch (reg.file) {
- case BAD_FILE:
- break;
- case GRF:
- case MRF:
- case ATTR:
- return byte_offset(reg,
- delta * MAX2(reg.width * reg.stride, 1) *
- type_sz(reg.type));
- case UNIFORM:
- reg.reg_offset += delta;
- break;
- default:
- assert(delta == 0);
- }
- return reg;
-}
-
static inline fs_reg
component(fs_reg reg, unsigned idx)
{
From f7dcc1160331462a071c54ca1067f9e2f57b55be Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 12:07:27 -0700
Subject: [PATCH 0128/1208] i965/fs: Add a builder argument to offset()
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 42 ++---
src/mesa/drivers/dri/i965/brw_fs.h | 2 +-
src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 58 +++----
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 143 +++++++++---------
.../dri/i965/test_fs_cmod_propagation.cpp | 4 +-
.../dri/i965/test_fs_saturate_propagation.cpp | 4 +-
7 files changed, 132 insertions(+), 123 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index cae4e4263ea..ceac20cc97a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -267,7 +267,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
inst->mlen = 1 + dispatch_width / 8;
}
- bld.MOV(dst, offset(vec4_result, (const_offset & 3) * scale));
+ bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale));
}
/**
@@ -361,7 +361,12 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
reg.width = this->src[i].width;
if (!this->src[i].equals(reg))
return false;
- reg = ::offset(reg, 1);
+
+ if (i < this->header_size) {
+ reg.reg_offset += 1;
+ } else {
+ reg.reg_offset += this->exec_size / 8;
+ }
}
return true;
@@ -926,7 +931,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
} else {
bld.ADD(wpos, this->pixel_x, fs_reg(0.5f));
}
- wpos = offset(wpos, 1);
+ wpos = offset(wpos, bld, 1);
/* gl_FragCoord.y */
if (!flip && pixel_center_integer) {
@@ -942,7 +947,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
bld.ADD(wpos, pixel_y, fs_reg(offset));
}
- wpos = offset(wpos, 1);
+ wpos = offset(wpos, bld, 1);
/* gl_FragCoord.z */
if (devinfo->gen >= 6) {
@@ -952,7 +957,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
interp_reg(VARYING_SLOT_POS, 2));
}
- wpos = offset(wpos, 1);
+ wpos = offset(wpos, bld, 1);
/* gl_FragCoord.w: Already set up in emit_interpolation */
bld.MOV(wpos, this->wpos_w);
@@ -1035,7 +1040,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
/* If there's no incoming setup data for this slot, don't
* emit interpolation for it.
*/
- attr = offset(attr, type->vector_elements);
+ attr = offset(attr, bld, type->vector_elements);
location++;
continue;
}
@@ -1050,7 +1055,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
interp = suboffset(interp, 3);
interp.type = attr.type;
bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
- attr = offset(attr, 1);
+ attr = offset(attr, bld, 1);
}
} else {
/* Smooth/noperspective interpolation case. */
@@ -1088,7 +1093,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
bld.MUL(attr, attr, this->pixel_w);
}
- attr = offset(attr, 1);
+ attr = offset(attr, bld, 1);
}
}
@@ -1196,7 +1201,7 @@ fs_visitor::emit_samplepos_setup()
}
/* Compute gl_SamplePosition.x */
compute_sample_position(pos, int_sample_x);
- pos = offset(pos, 1);
+ pos = offset(pos, abld, 1);
if (dispatch_width == 8) {
abld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1)));
} else {
@@ -2986,10 +2991,6 @@ fs_visitor::lower_load_payload()
assert(inst->dst.file == MRF || inst->dst.file == GRF);
assert(inst->saturate == false);
-
- const fs_builder ibld = bld.group(inst->exec_size, inst->force_sechalf)
- .exec_all(inst->force_writemask_all)
- .at(block, inst);
fs_reg dst = inst->dst;
/* Get rid of COMPR4. We'll add it back in if we need it */
@@ -2997,17 +2998,23 @@ fs_visitor::lower_load_payload()
dst.reg = dst.reg & ~BRW_MRF_COMPR4;
dst.width = 8;
+ const fs_builder hbld = bld.group(8, 0).exec_all().at(block, inst);
+
for (uint8_t i = 0; i < inst->header_size; i++) {
if (inst->src[i].file != BAD_FILE) {
fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD);
fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD);
mov_src.width = 8;
- ibld.exec_all().MOV(mov_dst, mov_src);
+ hbld.MOV(mov_dst, mov_src);
}
- dst = offset(dst, 1);
+ dst = offset(dst, hbld, 1);
}
dst.width = inst->exec_size;
+ const fs_builder ibld = bld.group(inst->exec_size, inst->force_sechalf)
+ .exec_all(inst->force_writemask_all)
+ .at(block, inst);
+
if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
inst->exec_size > 8) {
/* In this case, the payload portion of the LOAD_PAYLOAD isn't
@@ -3039,7 +3046,8 @@ fs_visitor::lower_load_payload()
fs_reg mov_dst = retype(dst, inst->src[i].type);
mov_dst.width = 8;
ibld.half(0).MOV(mov_dst, half(inst->src[i], 0));
- ibld.half(1).MOV(offset(mov_dst, 4), half(inst->src[i], 1));
+ mov_dst.reg += 4;
+ ibld.half(1).MOV(mov_dst, half(inst->src[i], 1));
}
}
@@ -3064,7 +3072,7 @@ fs_visitor::lower_load_payload()
for (uint8_t i = inst->header_size; i < inst->sources; i++) {
if (inst->src[i].file != BAD_FILE)
ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]);
- dst = offset(dst, 1);
+ dst = offset(dst, ibld, 1);
}
inst->remove(block);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 06f46765dd1..ece7e49ea17 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -63,7 +63,7 @@ namespace brw {
}
static inline fs_reg
-offset(fs_reg reg, unsigned delta)
+offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
{
switch (reg.file) {
case BAD_FILE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 70f0217b93d..29d1f2a6a57 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -205,7 +205,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
}
for (int i = header_size; i < sources; i++) {
payload[i] = src;
- src = offset(src, 1);
+ src = offset(src, ubld, 1);
}
copy = ubld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 58896d72e14..f52f344ccff 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -76,7 +76,7 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
{
foreach_list_typed(nir_variable, var, node, &shader->inputs) {
enum brw_reg_type type = brw_type_for_base_type(var->type);
- fs_reg input = offset(nir_inputs, var->data.driver_location);
+ fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
fs_reg reg;
switch (stage) {
@@ -95,8 +95,8 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
unsigned array_length = var->type->is_array() ? var->type->length : 1;
for (unsigned i = 0; i < array_length; i++) {
for (unsigned j = 0; j < components; j++) {
- bld.MOV(retype(offset(input, components * i + j), type),
- offset(fs_reg(ATTR, var->data.location + i, type), j));
+ bld.MOV(retype(offset(input, bld, components * i + j), type),
+ offset(fs_reg(ATTR, var->data.location + i, type), bld, j));
}
}
break;
@@ -127,7 +127,7 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
foreach_list_typed(nir_variable, var, node, &shader->outputs) {
- fs_reg reg = offset(nir_outputs, var->data.driver_location);
+ fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
int vector_elements =
var->type->is_array() ? var->type->fields.array->vector_elements
@@ -136,7 +136,7 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
if (stage == MESA_SHADER_VERTEX) {
for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) {
int output = var->data.location + i;
- this->outputs[output] = offset(reg, 4 * i);
+ this->outputs[output] = offset(reg, bld, 4 * i);
this->output_components[output] = vector_elements;
}
} else if (var->data.index > 0) {
@@ -162,7 +162,7 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
/* General color output. */
for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
int output = var->data.location - FRAG_RESULT_DATA0 + i;
- this->outputs[output] = offset(reg, vector_elements * i);
+ this->outputs[output] = offset(reg, bld, vector_elements * i);
this->output_components[output] = vector_elements;
}
}
@@ -618,11 +618,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
continue;
if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
- inst = bld.MOV(offset(temp, i),
- offset(op[0], instr->src[0].swizzle[i]));
+ inst = bld.MOV(offset(temp, bld, i),
+ offset(op[0], bld, instr->src[0].swizzle[i]));
} else {
- inst = bld.MOV(offset(temp, i),
- offset(op[i], instr->src[i].swizzle[0]));
+ inst = bld.MOV(offset(temp, bld, i),
+ offset(op[i], bld, instr->src[i].swizzle[0]));
}
inst->saturate = instr->dest.saturate;
}
@@ -636,7 +636,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
if (!(instr->dest.write_mask & (1 << i)))
continue;
- bld.MOV(offset(result, i), offset(temp, i));
+ bld.MOV(offset(result, bld, i), offset(temp, bld, i));
}
}
return;
@@ -657,12 +657,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
assert(_mesa_bitcount(instr->dest.write_mask) == 1);
channel = ffs(instr->dest.write_mask) - 1;
- result = offset(result, channel);
+ result = offset(result, bld, channel);
}
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
assert(nir_op_infos[instr->op].input_sizes[i] < 2);
- op[i] = offset(op[i], instr->src[i].swizzle[channel]);
+ op[i] = offset(op[i], bld, instr->src[i].swizzle[channel]);
}
switch (instr->op) {
@@ -1153,7 +1153,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
for (unsigned i = 0; i < instr->def.num_components; i++)
- bld.MOV(offset(reg, i), fs_reg(instr->value.i[i]));
+ bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i]));
nir_ssa_values[instr->def.index] = reg;
}
@@ -1175,7 +1175,7 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
else
reg = v->nir_locals[nir_reg->index];
- reg = offset(reg, base_offset * nir_reg->num_components);
+ reg = offset(reg, v->bld, base_offset * nir_reg->num_components);
if (indirect) {
int multiplier = nir_reg->num_components * (v->dispatch_width / 8);
@@ -1227,10 +1227,10 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
continue;
fs_inst *new_inst = new(mem_ctx) fs_inst(inst);
- new_inst->dst = offset(new_inst->dst, i);
+ new_inst->dst = offset(new_inst->dst, bld, i);
for (unsigned j = 0; j < new_inst->sources; j++)
if (new_inst->src[j].file == GRF)
- new_inst->src[j] = offset(new_inst->src[j], i);
+ new_inst->src[j] = offset(new_inst->src[j], bld, i);
bld.emit(new_inst);
}
@@ -1341,7 +1341,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
assert(sample_pos.file != BAD_FILE);
dest.type = sample_pos.type;
bld.MOV(dest, sample_pos);
- bld.MOV(offset(dest, 1), offset(sample_pos, 1));
+ bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
break;
}
@@ -1368,13 +1368,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = offset(retype(uniform_reg, dest.type), index);
+ fs_reg src = offset(retype(uniform_reg, dest.type), bld, index);
if (has_indirect)
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
index++;
bld.MOV(dest, src);
- dest = offset(dest, 1);
+ dest = offset(dest, bld, 1);
}
break;
}
@@ -1416,7 +1416,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned vec4_offset = instr->const_index[0] / 4;
for (int i = 0; i < instr->num_components; i++)
- VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, i), surf_index,
+ VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
base_offset, vec4_offset + i);
} else {
fs_reg packed_consts = vgrf(glsl_type::float_type);
@@ -1435,7 +1435,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
assert(packed_consts.subreg_offset < 32);
bld.MOV(dest, packed_consts);
- dest = offset(dest, 1);
+ dest = offset(dest, bld, 1);
}
}
break;
@@ -1447,14 +1447,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_load_input: {
unsigned index = 0;
for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = offset(retype(nir_inputs, dest.type),
+ fs_reg src = offset(retype(nir_inputs, dest.type), bld,
instr->const_index[0] + index);
if (has_indirect)
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
index++;
bld.MOV(dest, src);
- dest = offset(dest, 1);
+ dest = offset(dest, bld, 1);
}
break;
}
@@ -1527,7 +1527,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
fs_reg temp = vgrf(glsl_type::float_type);
- bld.MUL(temp, offset(offset_src, i), fs_reg(16.0f));
+ bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
fs_reg itemp = vgrf(glsl_type::int_type);
bld.MOV(itemp, temp); /* float to int */
@@ -1547,7 +1547,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* FRAGMENT_INTERPOLATION_OFFSET_BITS"
*/
set_condmod(BRW_CONDITIONAL_L,
- bld.SEL(offset(src, i), itemp, fs_reg(7)));
+ bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
}
mlen = 2;
@@ -1571,7 +1571,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
src.type = dest.type;
bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
- dest = offset(dest, 1);
+ dest = offset(dest, bld, 1);
}
break;
}
@@ -1583,13 +1583,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg src = get_nir_src(instr->src[0]);
unsigned index = 0;
for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg new_dest = offset(retype(nir_outputs, src.type),
+ fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
instr->const_index[0] + index);
if (has_indirect)
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
index++;
bld.MOV(new_dest, src);
- src = offset(src, 1);
+ src = offset(src, bld, 1);
}
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 69d3cfa8897..9b91f47d264 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -95,7 +95,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
if (shadow_c.file != BAD_FILE) {
for (int i = 0; i < coord_components; i++) {
bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present.
@@ -124,7 +124,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
} else if (op == ir_tex) {
for (int i = 0; i < coord_components; i++) {
bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
}
/* zero the others. */
for (int i = coord_components; i<3; i++) {
@@ -137,7 +137,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
for (int i = 0; i < coord_components; i++) {
bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
}
/* the slots for u and v are always present, but r is optional */
mlen += MAX2(coord_components, 2);
@@ -158,13 +158,13 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
*/
for (int i = 0; i < grad_components; i++) {
bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx);
- dPdx = offset(dPdx, 1);
+ dPdx = offset(dPdx, bld, 1);
}
mlen += MAX2(grad_components, 2);
for (int i = 0; i < grad_components; i++) {
bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdy);
- dPdy = offset(dPdy, 1);
+ dPdy = offset(dPdy, bld, 1);
}
mlen += MAX2(grad_components, 2);
} else if (op == ir_txs) {
@@ -182,7 +182,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
for (int i = 0; i < coord_components; i++) {
bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
}
/* Initialize the rest of u/v/r with 0.0. Empirically, this seems to
@@ -232,8 +232,8 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
if (simd16) {
for (int i = 0; i < 4; i++) {
bld.MOV(orig_dst, dst);
- orig_dst = offset(orig_dst, 1);
- dst = offset(dst, 2);
+ orig_dst = offset(orig_dst, bld, 1);
+ dst = offset(dst, bld, 2);
}
}
@@ -257,31 +257,31 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
/* Copy the coordinates. */
for (int i = 0; i < vector_elements; i++) {
- bld.MOV(retype(offset(message, i), coordinate.type), coordinate);
- coordinate = offset(coordinate, 1);
+ bld.MOV(retype(offset(message, bld, i), coordinate.type), coordinate);
+ coordinate = offset(coordinate, bld, 1);
}
- fs_reg msg_end = offset(message, vector_elements);
+ fs_reg msg_end = offset(message, bld, vector_elements);
/* Messages other than sample and ld require all three components */
if (vector_elements > 0 && (has_lod || shadow_c.file != BAD_FILE)) {
for (int i = vector_elements; i < 3; i++) {
- bld.MOV(offset(message, i), fs_reg(0.0f));
+ bld.MOV(offset(message, bld, i), fs_reg(0.0f));
}
- msg_end = offset(message, 3);
+ msg_end = offset(message, bld, 3);
}
if (has_lod) {
fs_reg msg_lod = retype(msg_end, op == ir_txf ?
BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, 1);
+ msg_end = offset(msg_lod, bld, 1);
}
if (shadow_c.file != BAD_FILE) {
- fs_reg msg_ref = offset(message, 3 + has_lod);
+ fs_reg msg_ref = offset(message, bld, 3 + has_lod);
bld.MOV(msg_ref, shadow_c);
- msg_end = offset(msg_ref, 1);
+ msg_end = offset(msg_ref, bld, 1);
}
enum opcode opcode;
@@ -335,16 +335,16 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
}
for (int i = 0; i < vector_elements; i++) {
- bld.MOV(retype(offset(msg_coords, i), coordinate.type), coordinate);
- coordinate = offset(coordinate, 1);
+ bld.MOV(retype(offset(msg_coords, bld, i), coordinate.type), coordinate);
+ coordinate = offset(coordinate, bld, 1);
}
- fs_reg msg_end = offset(msg_coords, vector_elements);
- fs_reg msg_lod = offset(msg_coords, 4);
+ fs_reg msg_end = offset(msg_coords, bld, vector_elements);
+ fs_reg msg_lod = offset(msg_coords, bld, 4);
if (shadow_c.file != BAD_FILE) {
fs_reg msg_shadow = msg_lod;
bld.MOV(msg_shadow, shadow_c);
- msg_lod = offset(msg_shadow, 1);
+ msg_lod = offset(msg_shadow, bld, 1);
msg_end = msg_lod;
}
@@ -355,13 +355,13 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
break;
case ir_txb:
bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, 1);
+ msg_end = offset(msg_lod, bld, 1);
opcode = FS_OPCODE_TXB;
break;
case ir_txl:
bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, 1);
+ msg_end = offset(msg_lod, bld, 1);
opcode = SHADER_OPCODE_TXL;
break;
@@ -378,12 +378,12 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
msg_end = msg_lod;
for (int i = 0; i < grad_components; i++) {
bld.MOV(msg_end, lod);
- lod = offset(lod, 1);
- msg_end = offset(msg_end, 1);
+ lod = offset(lod, bld, 1);
+ msg_end = offset(msg_end, bld, 1);
bld.MOV(msg_end, lod2);
- lod2 = offset(lod2, 1);
- msg_end = offset(msg_end, 1);
+ lod2 = offset(lod2, bld, 1);
+ msg_end = offset(msg_end, bld, 1);
}
opcode = SHADER_OPCODE_TXD;
@@ -392,31 +392,31 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
case ir_txs:
msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);
bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, 1);
+ msg_end = offset(msg_lod, bld, 1);
opcode = SHADER_OPCODE_TXS;
break;
case ir_query_levels:
msg_lod = msg_end;
bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
- msg_end = offset(msg_lod, 1);
+ msg_end = offset(msg_lod, bld, 1);
opcode = SHADER_OPCODE_TXS;
break;
case ir_txf:
- msg_lod = offset(msg_coords, 3);
+ msg_lod = offset(msg_coords, bld, 3);
bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod);
- msg_end = offset(msg_lod, 1);
+ msg_end = offset(msg_lod, bld, 1);
opcode = SHADER_OPCODE_TXF;
break;
case ir_txf_ms:
- msg_lod = offset(msg_coords, 3);
+ msg_lod = offset(msg_coords, bld, 3);
/* lod */
bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
/* sample index */
- bld.MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index);
- msg_end = offset(msg_lod, 2);
+ bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index);
+ msg_end = offset(msg_lod, bld, 2);
opcode = SHADER_OPCODE_TXF_CMS;
break;
@@ -526,7 +526,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
*/
for (int i = 0; i < coord_components; i++) {
bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
length++;
/* For cube map array, the coordinate is (u,v,r,ai) but there are
@@ -534,11 +534,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
*/
if (i < grad_components) {
bld.MOV(sources[length], lod);
- lod = offset(lod, 1);
+ lod = offset(lod, bld, 1);
length++;
bld.MOV(sources[length], lod2);
- lod2 = offset(lod2, 1);
+ lod2 = offset(lod2, bld, 1);
length++;
}
}
@@ -560,13 +560,13 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
*/
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
length++;
if (devinfo->gen >= 9) {
if (coord_components >= 2) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
}
length++;
}
@@ -576,7 +576,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
length++;
}
@@ -595,7 +595,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
*/
for (int i = 0; i < coord_components; i++) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
length++;
}
@@ -609,19 +609,19 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
/* More crazy intermixing */
for (int i = 0; i < 2; i++) { /* u, v */
bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
length++;
}
for (int i = 0; i < 2; i++) { /* offu, offv */
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value);
- offset_value = offset(offset_value, 1);
+ offset_value = offset(offset_value, bld, 1);
length++;
}
if (coord_components == 3) { /* r if present */
bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
length++;
}
@@ -634,7 +634,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
if (!coordinate_done) {
for (int i = 0; i < coord_components; i++) {
bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
length++;
}
}
@@ -747,8 +747,8 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
coordinate = dst;
bld.MUL(dst, src, scale_x);
- dst = offset(dst, 1);
- src = offset(src, 1);
+ dst = offset(dst, bld, 1);
+ src = offset(src, bld, 1);
bld.MUL(dst, src, scale_y);
} else if (is_rect) {
/* On gen6+, the sampler handles the rectangle coordinates
@@ -761,7 +761,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
for (int i = 0; i < 2; i++) {
if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
fs_reg chan = coordinate;
- chan = offset(chan, i);
+ chan = offset(chan, bld, i);
set_condmod(BRW_CONDITIONAL_GE,
bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)));
@@ -786,7 +786,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
for (int i = 0; i < MIN2(coord_components, 3); i++) {
if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
fs_reg chan = coordinate;
- chan = offset(chan, i);
+ chan = offset(chan, bld, i);
set_saturate(true, bld.MOV(chan, chan));
}
}
@@ -808,7 +808,7 @@ fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler)
for (int i = 0; i < components; i++) {
sources[i] = vgrf(glsl_type::float_type);
bld.MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
+ coordinate = offset(coordinate, bld, 1);
}
bld.LOAD_PAYLOAD(payload, sources, components, 0);
@@ -854,7 +854,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
for (int i=0; i<4; i++) {
bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
- res = offset(res, 1);
+ res = offset(res, bld, 1);
}
return;
}
@@ -908,7 +908,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
/* fixup #layers for cube map arrays */
if (op == ir_txs && is_cube_array) {
- fs_reg depth = offset(dst, 2);
+ fs_reg depth = offset(dst, bld, 2);
fs_reg fixed_depth = vgrf(glsl_type::int_type);
bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
@@ -918,7 +918,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
if (i == 2) {
fixed_payload[i] = fixed_depth;
} else {
- fixed_payload[i] = offset(dst, i);
+ fixed_payload[i] = offset(dst, bld, i);
}
}
bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0);
@@ -953,7 +953,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
bld.ASR(dst, dst, fs_reg(32 - width));
}
- dst = offset(dst, 1);
+ dst = offset(dst, bld, 1);
}
}
@@ -990,7 +990,7 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
{
if (op == ir_query_levels) {
/* # levels is in .w */
- this->result = offset(orig_val, 3);
+ this->result = offset(orig_val, bld, 3);
return;
}
@@ -1011,15 +1011,15 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
for (int i = 0; i < 4; i++) {
int swiz = GET_SWZ(key_tex->swizzles[sampler], i);
fs_reg l = swizzled_result;
- l = offset(l, i);
+ l = offset(l, bld, i);
if (swiz == SWIZZLE_ZERO) {
bld.MOV(l, fs_reg(0.0f));
} else if (swiz == SWIZZLE_ONE) {
bld.MOV(l, fs_reg(1.0f));
} else {
- bld.MOV(l, offset(orig_val,
- GET_SWZ(key_tex->swizzles[sampler], i)));
+ bld.MOV(l, offset(orig_val, bld,
+ GET_SWZ(key_tex->swizzles[sampler], i)));
}
}
this->result = swizzled_result;
@@ -1316,14 +1316,14 @@ fs_visitor::emit_interpolation_setup_gen4()
if (devinfo->has_pln && dispatch_width == 16) {
for (unsigned i = 0; i < 2; i++) {
- abld.half(i).ADD(half(offset(delta_xy, i), 0),
+ abld.half(i).ADD(half(offset(delta_xy, abld, i), 0),
half(this->pixel_x, i), xstart);
- abld.half(i).ADD(half(offset(delta_xy, i), 1),
+ abld.half(i).ADD(half(offset(delta_xy, abld, i), 1),
half(this->pixel_y, i), ystart);
}
} else {
- abld.ADD(offset(delta_xy, 0), this->pixel_x, xstart);
- abld.ADD(offset(delta_xy, 1), this->pixel_y, ystart);
+ abld.ADD(offset(delta_xy, abld, 0), this->pixel_x, xstart);
+ abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart);
}
abld = bld.annotate("compute pos.w and 1/pos.w");
@@ -1421,7 +1421,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
fs_reg tmp = vgrf(glsl_type::vec4_type);
assert(color.type == BRW_REGISTER_TYPE_F);
for (unsigned i = 0; i < components; i++) {
- inst = bld.MOV(offset(tmp, i), offset(color, i));
+ inst = bld.MOV(offset(tmp, bld, i), offset(color, bld, i));
inst->saturate = true;
}
color = tmp;
@@ -1430,10 +1430,10 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
if (exec_size < dispatch_width) {
unsigned half_idx = use_2nd_half ? 1 : 0;
for (unsigned i = 0; i < components; i++)
- dst[i] = half(offset(color, i), half_idx);
+ dst[i] = half(offset(color, bld, i), half_idx);
} else {
for (unsigned i = 0; i < components; i++)
- dst[i] = offset(color, i);
+ dst[i] = offset(color, bld, i);
}
}
@@ -1481,7 +1481,7 @@ fs_visitor::emit_alpha_test()
BRW_CONDITIONAL_NEQ);
} else {
/* RT0 alpha */
- fs_reg color = offset(outputs[0], 3);
+ fs_reg color = offset(outputs[0], bld, 3);
/* f0.1 &= func(color, ref) */
cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref),
@@ -1558,7 +1558,8 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
* alpha-testing, alpha-to-coverage, and so on.
*/
if (this->outputs[0].file != BAD_FILE)
- setup_color_payload(&sources[length + 3], offset(this->outputs[0], 3),
+ setup_color_payload(&sources[length + 3],
+ offset(this->outputs[0], bld, 3),
1, exec_size, false);
length += 4;
} else if (color1.file == BAD_FILE) {
@@ -1694,7 +1695,7 @@ fs_visitor::emit_fb_writes()
fs_reg src0_alpha;
if (devinfo->gen >= 6 && key->replicate_alpha && target != 0)
- src0_alpha = offset(outputs[0], 3);
+ src0_alpha = offset(outputs[0], bld, 3);
inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
src0_alpha,
@@ -1787,7 +1788,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
abld.MUL(output, outputs[clip_vertex], u);
for (int j = 1; j < 4; j++) {
u.reg = userplane[i].reg + j;
- abld.MAD(output, output, offset(outputs[clip_vertex], j), u);
+ abld.MAD(output, output, offset(outputs[clip_vertex], bld, j), u);
}
}
}
@@ -1904,13 +1905,13 @@ fs_visitor::emit_urb_writes()
*/
for (int i = 0; i < 4; i++) {
reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
- src = offset(this->outputs[varying], i);
+ src = offset(this->outputs[varying], bld, i);
set_saturate(true, bld.MOV(reg, src));
sources[length++] = reg;
}
} else {
for (int i = 0; i < 4; i++)
- sources[length++] = offset(this->outputs[varying], i);
+ sources[length++] = offset(this->outputs[varying], bld, i);
}
break;
}
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 8010fb4f610..ba67bc59e19 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -283,10 +283,10 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::vec2_type);
fs_reg zero(0.0f);
- bld.ADD(offset(dest, 2), src0, src1);
+ bld.ADD(offset(dest, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dest, src2)
->regs_written = 4;
- bld.CMP(bld.null_reg_f(), offset(dest, 2), zero, BRW_CONDITIONAL_GE);
+ bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE);
/* = Before =
*
diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
index 3ef0cb319eb..1caa0b50ec6 100644
--- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
@@ -367,10 +367,10 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::vec2_type);
- bld.ADD(offset(dst0, 2), src0, src1);
+ bld.ADD(offset(dst0, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dst0, src2)
->regs_written = 4;
- set_saturate(true, bld.MOV(dst1, offset(dst0, 2)));
+ set_saturate(true, bld.MOV(dst1, offset(dst0, bld, 2)));
/* = Before =
*
From 67c4c9e1a709508b88d6d31eb1f7cb61d187189e Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 12:24:27 -0700
Subject: [PATCH 0129/1208] i965/fs: Make better use of the builder in
shader_time
Previously, we were just depending on register widths to ensure that
various things were exec_size of 1 etc. Now, we do so explicitly using the
builder.
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ceac20cc97a..464c1f673cd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -557,7 +557,7 @@ fs_visitor::get_timestamp(const fs_builder &bld)
/* We want to read the 3 fields we care about even if it's not enabled in
* the dispatch.
*/
- bld.exec_all().MOV(dst, ts);
+ bld.group(4, 0).exec_all().MOV(dst, ts);
/* The caller wants the low 32 bits of the timestamp. Since it's running
* at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
@@ -604,17 +604,19 @@ fs_visitor::emit_shader_time_end()
start.negate = true;
fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
diff.set_smear(0);
- ibld.ADD(diff, start, shader_end_time);
+
+ const fs_builder cbld = ibld.group(1, 0);
+ cbld.group(1, 0).ADD(diff, start, shader_end_time);
/* If there were no instructions between the two timestamp gets, the diff
* is 2 cycles. Remove that overhead, so I can forget about that when
* trying to determine the time taken for single instructions.
*/
- ibld.ADD(diff, diff, fs_reg(-2u));
- SHADER_TIME_ADD(ibld, 0, diff);
- SHADER_TIME_ADD(ibld, 1, fs_reg(1u));
+ cbld.ADD(diff, diff, fs_reg(-2u));
+ SHADER_TIME_ADD(cbld, 0, diff);
+ SHADER_TIME_ADD(cbld, 1, fs_reg(1u));
ibld.emit(BRW_OPCODE_ELSE);
- SHADER_TIME_ADD(ibld, 2, fs_reg(1u));
+ SHADER_TIME_ADD(cbld, 2, fs_reg(1u));
ibld.emit(BRW_OPCODE_ENDIF);
}
From 89bc4c78c394e50ddb16cc089bd3ec90681342d7 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 12:30:43 -0700
Subject: [PATCH 0130/1208] i965/fs: Remove fs_inst constructors that don't
take an explicit exec_size
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 30 ++--------------------
src/mesa/drivers/dri/i965/brw_fs_builder.h | 2 +-
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 6 +++--
src/mesa/drivers/dri/i965/brw_ir_fs.h | 9 +------
4 files changed, 8 insertions(+), 39 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 464c1f673cd..8d99abc6dbb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -126,9 +126,9 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size)
init(opcode, exec_size, reg_undef, NULL, 0);
}
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst)
+fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst)
{
- init(opcode, 0, dst, NULL, 0);
+ init(opcode, exec_size, dst, NULL, 0);
}
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
@@ -138,12 +138,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
init(opcode, exec_size, dst, src, 1);
}
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
-{
- const fs_reg src[1] = { src0 };
- init(opcode, 0, dst, src, 1);
-}
-
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1)
{
@@ -151,13 +145,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
init(opcode, exec_size, dst, src, 2);
}
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1)
-{
- const fs_reg src[2] = { src0, src1 };
- init(opcode, 0, dst, src, 2);
-}
-
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1, const fs_reg &src2)
{
@@ -165,19 +152,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
init(opcode, exec_size, dst, src, 3);
}
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1, const fs_reg &src2)
-{
- const fs_reg src[3] = { src0, src1, src2 };
- init(opcode, 0, dst, src, 3);
-}
-
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst,
- const fs_reg src[], unsigned sources)
-{
- init(opcode, 0, dst, src, sources);
-}
-
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
const fs_reg src[], unsigned sources)
{
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 58ac5980da5..c823190efbd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -235,7 +235,7 @@ namespace brw {
instruction *
emit(enum opcode opcode, const dst_reg &dst) const
{
- return emit(instruction(opcode, dst));
+ return emit(instruction(opcode, dst.width, dst));
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index f52f344ccff..caf1300d71b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -109,7 +109,8 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
if (var->data.location == VARYING_SLOT_POS) {
reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
var->data.origin_upper_left);
- emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, input, reg), 0xF);
+ emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+ input, reg), 0xF);
} else {
emit_general_interpolation(input, var->name, var->type,
(glsl_interp_qualifier) var->data.interpolation,
@@ -1762,7 +1763,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
fs_reg dest = get_nir_dest(instr->dest);
dest.type = this->result.type;
unsigned num_components = nir_tex_instr_dest_size(instr);
- emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, dest, this->result),
+ emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+ dest, this->result),
(1 << num_components) - 1);
}
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 16b20beb788..d6b617ab2bd 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -189,20 +189,13 @@ public:
fs_inst();
fs_inst(enum opcode opcode, uint8_t exec_size);
- fs_inst(enum opcode opcode, const fs_reg &dst);
+ fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0);
- fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1);
- fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1);
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
- fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1, const fs_reg &src2);
- fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg src[],
- unsigned sources);
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg src[], unsigned sources);
fs_inst(const fs_inst &that);
From 500525e96019aff551afa8fee841d00ca9ec4c4f Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 12:50:09 -0700
Subject: [PATCH 0131/1208] i965/fs: Use exec_size for determining regs
read/written and partial writes
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8d99abc6dbb..c11e3f3e17c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -101,7 +101,7 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
case MRF:
case ATTR:
this->regs_written =
- DIV_ROUND_UP(MAX2(dst.width * dst.stride, 1) * type_sz(dst.type), 32);
+ DIV_ROUND_UP(MAX2(exec_size * dst.stride, 1) * type_sz(dst.type), 32);
break;
case BAD_FILE:
this->regs_written = 0;
@@ -675,7 +675,7 @@ bool
fs_inst::is_partial_write() const
{
return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
- (this->dst.width * type_sz(this->dst.type)) < 32 ||
+ (this->exec_size * type_sz(this->dst.type)) < 32 ||
!this->dst.is_contiguous());
}
@@ -735,7 +735,7 @@ fs_inst::regs_read(int arg) const
if (src[arg].stride == 0) {
return 1;
} else {
- int size = components * src[arg].width * type_sz(src[arg].type);
+ int size = components * this->exec_size * type_sz(src[arg].type);
return DIV_ROUND_UP(size * src[arg].stride, 32);
}
case MRF:
From b624ccc206cbf19989c6562416d7c21b66270577 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 12:51:51 -0700
Subject: [PATCH 0132/1208] i965/fs_builder: Use the dispatch width for setting
exec sizes
Previously we used dst.width but the two *should* be the same.
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs_builder.h | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index c823190efbd..8af16a0fd73 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -235,7 +235,7 @@ namespace brw {
instruction *
emit(enum opcode opcode, const dst_reg &dst) const
{
- return emit(instruction(opcode, dst.width, dst));
+ return emit(instruction(opcode, dispatch_width(), dst));
}
/**
@@ -253,11 +253,11 @@ namespace brw {
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
return fix_math_instruction(
- emit(instruction(opcode, dst.width, dst,
+ emit(instruction(opcode, dispatch_width(), dst,
fix_math_operand(src0))));
default:
- return emit(instruction(opcode, dst.width, dst, src0));
+ return emit(instruction(opcode, dispatch_width(), dst, src0));
}
}
@@ -273,12 +273,12 @@ namespace brw {
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
return fix_math_instruction(
- emit(instruction(opcode, dst.width, dst,
+ emit(instruction(opcode, dispatch_width(), dst,
fix_math_operand(src0),
fix_math_operand(src1))));
default:
- return emit(instruction(opcode, dst.width, dst, src0, src1));
+ return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
}
}
@@ -295,13 +295,14 @@ namespace brw {
case BRW_OPCODE_BFI2:
case BRW_OPCODE_MAD:
case BRW_OPCODE_LRP:
- return emit(instruction(opcode, dst.width, dst,
+ return emit(instruction(opcode, dispatch_width(), dst,
fix_3src_operand(src0),
fix_3src_operand(src1),
fix_3src_operand(src2)));
default:
- return emit(instruction(opcode, dst.width, dst, src0, src1, src2));
+ return emit(instruction(opcode, dispatch_width(), dst,
+ src0, src1, src2));
}
}
@@ -517,7 +518,8 @@ namespace brw {
{
assert(dst.width % 8 == 0);
instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD,
- dst.width, dst, src, sources));
+ dispatch_width(), dst,
+ src, sources));
inst->header_size = header_size;
for (unsigned i = 0; i < header_size; i++)
@@ -528,7 +530,7 @@ namespace brw {
for (unsigned i = header_size; i < sources; ++i)
assert(src[i].file != GRF ||
src[i].width == dst.width);
- inst->regs_written += (sources - header_size) * (dst.width / 8);
+ inst->regs_written += (sources - header_size) * (dispatch_width() / 8);
return inst;
}
From c9676329dd6c69b2e0b12405c3b4078f7d216f2f Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 12:34:52 -0700
Subject: [PATCH 0133/1208] i965/fs: Remove exec_size guessing from
fs_inst::init()
Now that all of the non-explicit constructors are gone, we don't need to
guess anymore.
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 22 ----------------------
1 file changed, 22 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c11e3f3e17c..d08af84f157 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -68,28 +68,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
assert(dst.file != IMM && dst.file != UNIFORM);
- /* If exec_size == 0, try to guess it from the registers. Since all
- * manner of things may use hardware registers, we first try to guess
- * based on GRF registers. If this fails, we will go ahead and take the
- * width from the destination register.
- */
- if (this->exec_size == 0) {
- if (dst.file == GRF) {
- this->exec_size = dst.width;
- } else {
- for (unsigned i = 0; i < sources; ++i) {
- if (src[i].file != GRF && src[i].file != ATTR)
- continue;
-
- if (this->exec_size <= 1)
- this->exec_size = src[i].width;
- assert(src[i].width == 1 || src[i].width == this->exec_size);
- }
- }
-
- if (this->exec_size == 0 && dst.file != BAD_FILE)
- this->exec_size = dst.width;
- }
assert(this->exec_size != 0);
this->conditional_mod = BRW_CONDITIONAL_NONE;
From 21803b7b3304f053a48e313951ffddf1d2cd0bd9 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 13:41:38 -0700
Subject: [PATCH 0134/1208] i965/fs: Use the builder dispatch width instead of
dst.width for pull constants
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d08af84f157..3589bb92009 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -188,7 +188,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3));
int scale = 1;
- if (devinfo->gen == 4 && dst.width == 8) {
+ if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
/* Pre-gen5, we can either use a SIMD8 message that requires (header,
* u, v, r) as parameters, or we can just use the SIMD16 message
* consisting of (header, u). We choose the second, at the cost of a
@@ -204,9 +204,9 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
assert(dst.width % 8 == 0);
- int regs_written = 4 * (dst.width / 8) * scale;
+ int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
- dst.type, dst.width);
+ dst.type, bld.dispatch_width());
fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
inst->regs_written = regs_written;
@@ -216,7 +216,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
if (devinfo->gen == 4)
inst->mlen = 3;
else
- inst->mlen = 1 + dispatch_width / 8;
+ inst->mlen = 1 + bld.dispatch_width() / 8;
}
bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale));
From 9a0c883292cf48910a32634f7cc8b855e08c09d5 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 25 Jun 2015 11:00:01 -0700
Subject: [PATCH 0135/1208] i965/fs: Use the builder dispatch_width for
computing register offsets
Reviewed-by: Topi Pohjolainen
Reviewed-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index ece7e49ea17..88a50ae0913 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -72,7 +72,7 @@ offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
case MRF:
case ATTR:
return byte_offset(reg,
- delta * MAX2(reg.width * reg.stride, 1) *
+ delta * MAX2(bld.dispatch_width() * reg.stride, 1) *
type_sz(reg.type));
case UNIFORM:
reg.reg_offset += delta;
From 83458e7c53cfc1f344280da6eb9a3b4e2dfdbc00 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 13:49:22 -0700
Subject: [PATCH 0136/1208] i965/fs: Use exec_size instead of dst.width for
computing component size
There are a variety of places where we use dst.width / 8 to compute the
size of a single logical channel. Instead, we should be using exec_size.
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++---
src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 4 ++--
5 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3589bb92009..be772ae547b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2280,12 +2280,12 @@ fs_visitor::opt_register_renaming()
if (depth == 0 &&
inst->dst.file == GRF &&
- alloc.sizes[inst->dst.reg] == inst->dst.width / 8 &&
+ alloc.sizes[inst->dst.reg] == inst->exec_size / 8 &&
!inst->is_partial_write()) {
if (remap[dst] == -1) {
remap[dst] = dst;
} else {
- remap[dst] = alloc.allocate(inst->dst.width / 8);
+ remap[dst] = alloc.allocate(inst->exec_size / 8);
inst->dst.reg = remap[dst];
progress = true;
}
@@ -2416,7 +2416,7 @@ fs_visitor::compute_to_mrf()
/* Things returning more than one register would need us to
* understand coalescing out more than one MOV at a time.
*/
- if (scan_inst->regs_written > scan_inst->dst.width / 8)
+ if (scan_inst->regs_written > scan_inst->exec_size / 8)
break;
/* SEND instructions can't have MRF as a destination. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 29d1f2a6a57..29b46b96b8a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -179,7 +179,7 @@ static void
create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
{
int written = inst->regs_written;
- int dst_width = inst->dst.width / 8;
+ int dst_width = inst->exec_size / 8;
const fs_builder ubld = bld.group(inst->exec_size, inst->force_sechalf)
.exec_all(inst->force_writemask_all);
fs_inst *copy;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
index 2ad7079bdf8..149c0f0e217 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -196,7 +196,7 @@ fs_visitor::register_coalesce()
continue;
}
reg_to_offset[offset] = inst->dst.reg_offset;
- if (inst->src[0].width == 16)
+ if (inst->exec_size == 16)
reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
mov[offset] = inst;
channels_remaining -= inst->regs_written;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9b91f47d264..d5ff1be1414 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -913,7 +913,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
- int components = inst->regs_written / (dst.width / 8);
+ int components = inst->regs_written / (inst->exec_size / 8);
for (int i = 0; i < components; i++) {
if (i == 2) {
fixed_payload[i] = fixed_depth;
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index ee0add5d765..b49961fff68 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1314,8 +1314,8 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
* single-result send is probably actually reducing register
* pressure.
*/
- if (inst->regs_written <= inst->dst.width / 8 &&
- chosen_inst->regs_written > chosen_inst->dst.width / 8) {
+ if (inst->regs_written <= inst->exec_size / 8 &&
+ chosen_inst->regs_written > chosen_inst->exec_size / 8) {
chosen = n;
continue;
} else if (inst->regs_written > chosen_inst->regs_written) {
From 7f77abc9edf1348b8c6b82dfff102896cd4a2a58 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 13:57:37 -0700
Subject: [PATCH 0137/1208] i965/fs_generator: Use inst->exec_size for
determining hardware reg widths
Reviewed-by: Topi Pohjolainen
Acked-by: Francisco Jerez
---
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8d821abbac2..0a70bdc3c76 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -48,7 +48,7 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
}
static struct brw_reg
-brw_reg_from_fs_reg(fs_reg *reg)
+brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
{
struct brw_reg brw_reg;
@@ -57,10 +57,10 @@ brw_reg_from_fs_reg(fs_reg *reg)
case MRF:
if (reg->stride == 0) {
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
- } else if (reg->width < 8) {
+ } else if (inst->exec_size < 8) {
brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
- brw_reg = stride(brw_reg, reg->width * reg->stride,
- reg->width, reg->stride);
+ brw_reg = stride(brw_reg, inst->exec_size * reg->stride,
+ inst->exec_size, reg->stride);
} else {
/* From the Haswell PRM:
*
@@ -414,7 +414,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)
brw_fb_WRITE(p,
16 /* dispatch_width */,
brw_message_reg(inst->base_mrf),
- brw_reg_from_fs_reg(&inst->src[0]),
+ brw_reg_from_fs_reg(inst, &inst->src[0]),
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
inst->target,
inst->mlen,
@@ -1560,7 +1560,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < inst->sources; i++) {
- src[i] = brw_reg_from_fs_reg(&inst->src[i]);
+ src[i] = brw_reg_from_fs_reg(inst, &inst->src[i]);
/* The accumulator result appears to get used for the
* conditional modifier generation. When negating a UD
@@ -1572,7 +1572,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
inst->src[i].type != BRW_REGISTER_TYPE_UD ||
!inst->src[i].negate);
}
- dst = brw_reg_from_fs_reg(&inst->dst);
+ dst = brw_reg_from_fs_reg(inst, &inst->dst);
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
From 830f67046ace3c0b95a7f093fe373eeb417a1aad Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 18 Jun 2015 12:44:35 -0700
Subject: [PATCH 0138/1208] i965/fs: Remove the width field from fs_reg
As of now, the width field is no longer used for anything. The width field
"seemed like a good idea at the time" but is actually entirely redundant
with the instruction's execution size. Initially, it gave us the ability
to easily set the instructions execution size based entirely on register
widths. With the builder, we can easiliy set the sizes explicitly and the
width field doesn't have as much purpose. At this point, it's just
redundant information that can get out of sync so it really needs to go.
Reviewed-by: Topi Pohjolainen