From b9855dcdf7c94605305d6372d1f9883f76d10b6a Mon Sep 17 00:00:00 2001
From: Thomas Hindoe Paaboel Andersen
Date: Sat, 5 Mar 2016 13:07:07 +0100
Subject: [PATCH 001/224] st/va: avoid dereference after free in
vlVaDestroyImage
Cc: "11.1 11.2"
Reviewed-by: Emil Velikov
Tested-by: Julien Isorce
---
src/gallium/state_trackers/va/image.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c
index 2c42a985823..92d014c3d44 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -280,6 +280,7 @@ vlVaDestroyImage(VADriverContextP ctx, VAImageID image)
{
vlVaDriver *drv;
VAImage *vaimage;
+ VAStatus status;
if (!ctx)
return VA_STATUS_ERROR_INVALID_CONTEXT;
@@ -294,8 +295,9 @@ vlVaDestroyImage(VADriverContextP ctx, VAImageID image)
handle_table_remove(VL_VA_DRIVER(ctx)->htab, image);
pipe_mutex_unlock(drv->mutex);
+ status = vlVaDestroyBuffer(ctx, vaimage->buf);
FREE(vaimage);
- return vlVaDestroyBuffer(ctx, vaimage->buf);
+ return status;
}
VAStatus
From 4420f189b6d6dd68b517cd73dfdf3775d7d0f580 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Thu, 7 Apr 2016 17:38:41 -0600
Subject: [PATCH 002/224] st/mesa: fix glReadBuffer() assertion failure
If the first call in a GL app is glReadPixels(GL_FRONT) we'd fail the
assert(st->ctx->FragmentProgram._Current) at st_atom_shader.c:114 in
update_fp().
This is because we were calling st_validate_state() without first
updating Mesa state with _mesa_update_state().
The regression came from commit 83b589301f4a150f4 "st/mesa: fix
frontbuffer glReadPixels regressions".
The new piglit gl-1.0-simple-readbuffer test exercises this.
Cc: "11.1 11.2"
Reviewed-by: Roland Scheidegger
Reviewed-by: Jose Fonseca
---
src/mesa/state_tracker/st_cb_fbo.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index ff570e0e444..456ad83818b 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -40,6 +40,7 @@
#include "main/glformats.h"
#include "main/macros.h"
#include "main/renderbuffer.h"
+#include "main/state.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
@@ -729,6 +730,7 @@ st_ReadBuffer(struct gl_context *ctx, GLenum buffer)
fb->Attachment[fb->_ColorReadBufferIndex].Type == GL_NONE) {
/* add the buffer */
st_manager_add_color_renderbuffer(st, fb, fb->_ColorReadBufferIndex);
+ _mesa_update_state(ctx);
st_validate_state(st, ST_PIPELINE_RENDER);
}
}
From 4213b00e30d4d70823dca25e299e7b034c91d94c Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Thu, 7 Apr 2016 10:53:12 -0700
Subject: [PATCH 003/224] i965: Extract SSEU configuration info
Signed-off-by: Ben Widawsky
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/intel_screen.c | 35 ++++++++++++++----------
1 file changed, 21 insertions(+), 14 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index c6eb50aaba8..8c687b3ae1c 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1078,6 +1078,26 @@ intelDestroyBuffer(__DRIdrawable * driDrawPriv)
_mesa_reference_framebuffer(&fb, NULL);
}
+static void
+intel_detect_sseu(struct intel_screen *intelScreen)
+{
+ intelScreen->subslice_total = -1;
+ intelScreen->eu_total = -1;
+
+ intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
+ &intelScreen->subslice_total);
+ intel_get_param(intelScreen->driScrnPriv,
+ I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
+
+ /* Without this information, we cannot get the right Braswell brandstrings,
+ * and we have to use conservative numbers for GPGPU on many platforms, but
+ * otherwise, things will just work.
+ */
+ if (intelScreen->subslice_total == -1 || intelScreen->eu_total == -1)
+ _mesa_warning(NULL,
+ "Kernel 4.1 required to properly query GPU properties.\n");
+}
+
static bool
intel_init_bufmgr(struct intel_screen *intelScreen)
{
@@ -1100,24 +1120,11 @@ intel_init_bufmgr(struct intel_screen *intelScreen)
return false;
}
- intelScreen->subslice_total = -1;
- intelScreen->eu_total = -1;
-
/* Everything below this is for real hardware only */
if (intelScreen->no_hw || devid_override)
return true;
- intel_get_param(spriv, I915_PARAM_SUBSLICE_TOTAL,
- &intelScreen->subslice_total);
- intel_get_param(spriv, I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
-
- /* Without this information, we cannot get the right Braswell brandstrings,
- * and we have to use conservative numbers for GPGPU on many platforms, but
- * otherwise, things will just work.
- */
- if (intelScreen->subslice_total == -1 || intelScreen->eu_total == -1)
- _mesa_warning(NULL,
- "Kernel 4.1 required to properly query GPU properties.\n");
+ intel_detect_sseu(intelScreen);
return true;
}
From cc01b63d730d151097dd6c3d2030a4731e09a393 Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Thu, 7 Apr 2016 10:53:13 -0700
Subject: [PATCH 004/224] i965: Fix eu/subslice warning
Older gen platforms do not actually return a value for sublice and eu total
(IMO, confusingly) they return -ENODEV. This patch defers the SSEU setup until
we have the actual GPU generation to avoid useless warnings when running on
older platforms with older kernels.
Reported-by: Mark Janes
Signed-off-by: Ben Widawsky
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/intel_screen.c | 34 ++++++++++++++++--------
1 file changed, 23 insertions(+), 11 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 8c687b3ae1c..b596017c654 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1081,13 +1081,21 @@ intelDestroyBuffer(__DRIdrawable * driDrawPriv)
static void
intel_detect_sseu(struct intel_screen *intelScreen)
{
+ assert(intelScreen->devinfo->gen >= 8);
+ int ret;
+
intelScreen->subslice_total = -1;
intelScreen->eu_total = -1;
- intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
- &intelScreen->subslice_total);
- intel_get_param(intelScreen->driScrnPriv,
- I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
+ ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
+ &intelScreen->subslice_total);
+ if (ret != -EINVAL)
+ goto err_out;
+
+ ret = intel_get_param(intelScreen->driScrnPriv,
+ I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
+ if (ret != -EINVAL)
+ goto err_out;
/* Without this information, we cannot get the right Braswell brandstrings,
* and we have to use conservative numbers for GPGPU on many platforms, but
@@ -1096,13 +1104,19 @@ intel_detect_sseu(struct intel_screen *intelScreen)
if (intelScreen->subslice_total == -1 || intelScreen->eu_total == -1)
_mesa_warning(NULL,
"Kernel 4.1 required to properly query GPU properties.\n");
+
+ return;
+
+err_out:
+ intelScreen->subslice_total = -1;
+ intelScreen->eu_total = -1;
+ _mesa_warning(NULL, "Failed to query GPU properties.\n");
}
static bool
intel_init_bufmgr(struct intel_screen *intelScreen)
{
__DRIscreen *spriv = intelScreen->driScrnPriv;
- bool devid_override = getenv("INTEL_DEVID_OVERRIDE") != NULL;
intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
@@ -1120,12 +1134,6 @@ intel_init_bufmgr(struct intel_screen *intelScreen)
return false;
}
- /* Everything below this is for real hardware only */
- if (intelScreen->no_hw || devid_override)
- return true;
-
- intel_detect_sseu(intelScreen);
-
return true;
}
@@ -1480,6 +1488,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
+ /* GENs prior to 8 do not support EU/Subslice info */
+ if (intelScreen->devinfo->gen >= 8)
+ intel_detect_sseu(intelScreen);
+
const char *force_msaa = getenv("INTEL_FORCE_MSAA");
if (force_msaa) {
intelScreen->winsys_msaa_samples_override =
From e5295b5fb442d433b22e3b6f4b5c28a0e48677cd Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Thu, 7 Apr 2016 10:53:14 -0700
Subject: [PATCH 005/224] i965: Check eu/subslices are > 0
Now that the check is restricted to gen8+, we should always get back a non-zero
positive value for the EU and subslice counts.
Signed-off-by: Ben Widawsky
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/intel_screen.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index b596017c654..03e6852a7f1 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1101,7 +1101,7 @@ intel_detect_sseu(struct intel_screen *intelScreen)
* and we have to use conservative numbers for GPGPU on many platforms, but
* otherwise, things will just work.
*/
- if (intelScreen->subslice_total == -1 || intelScreen->eu_total == -1)
+ if (intelScreen->subslice_total < 1 || intelScreen->eu_total < 1)
_mesa_warning(NULL,
"Kernel 4.1 required to properly query GPU properties.\n");
From f6f54a29ca9bc8c1a1a994ff4e3ee09772de78e4 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 7 Apr 2016 10:52:28 -0700
Subject: [PATCH 006/224] i965/tiled_memcopy: Add aligned mem_copy parameters
to the [de]tiling functions
Each of the [de]tiling functions has three mem_copy calls:
1) Left edge to tile boundary
2) Tile boundary to tile boundary in a loop
3) Tile boundary to right edge
Copies 2 and 3 start at a tile edge so the pointer to tiled memory is
guaranteed to be at least 16-byte aligned. Copy 1, on the other hand,
starts at some arbitrary place in the tile so it doesn't have any such
alignment guarantees.
Cc: "11.1 11.2"
Reviewed-by: Matt Turner
Reviewed-by: Roland Scheidegger
Reviewed-by: Chad Versace
---
.../drivers/dri/i965/intel_tiled_memcpy.c | 75 +++++++++++--------
1 file changed, 43 insertions(+), 32 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 31354582964..0ffd3ffcab5 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -172,6 +172,12 @@ typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
* Copy texture data from linear to X tile layout.
*
* \copydoc tile_copy_fn
+ *
+ * The mem_copy parameters allow the user to specify an alternative mem_copy
+ * function that, for instance, may do RGBA -> BGRA swizzling. The first
+ * function must handle any memory alignment while the second function must
+ * only handle 16-byte alignment in whichever side (source or destination) is
+ * tiled.
*/
static inline void
linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
@@ -179,7 +185,8 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
char *dst, const char *src,
int32_t src_pitch,
uint32_t swizzle_bit,
- mem_copy_fn mem_copy)
+ mem_copy_fn mem_copy,
+ mem_copy_fn mem_copy_align16)
{
/* The copy destination offset for each range copied is the sum of
* an X offset 'x0' or 'xo' and a Y offset 'yo.'
@@ -200,10 +207,10 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0);
for (xo = x1; xo < x2; xo += xtile_span) {
- mem_copy(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
+ mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
}
- mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+ mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
src += src_pitch;
}
@@ -220,7 +227,8 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
char *dst, const char *src,
int32_t src_pitch,
uint32_t swizzle_bit,
- mem_copy_fn mem_copy)
+ mem_copy_fn mem_copy,
+ mem_copy_fn mem_copy_align16)
{
/* Y tiles consist of columns that are 'ytile_span' wide (and the same height
* as the tile). Thus the destination offset for (x,y) is the sum of:
@@ -259,12 +267,12 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
* at each step so we don't need to calculate it explicitly.
*/
for (x = x1; x < x2; x += ytile_span) {
- mem_copy(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
+ mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
xo += bytes_per_column;
swizzle ^= swizzle_bit;
}
- mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+ mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
src += src_pitch;
}
@@ -281,7 +289,8 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
char *dst, const char *src,
int32_t dst_pitch,
uint32_t swizzle_bit,
- mem_copy_fn mem_copy)
+ mem_copy_fn mem_copy,
+ mem_copy_fn mem_copy_align16)
{
/* The copy destination offset for each range copied is the sum of
* an X offset 'x0' or 'xo' and a Y offset 'yo.'
@@ -302,10 +311,10 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0);
for (xo = x1; xo < x2; xo += xtile_span) {
- mem_copy(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
+ mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
}
- mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+ mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
dst += dst_pitch;
}
@@ -322,7 +331,8 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
char *dst, const char *src,
int32_t dst_pitch,
uint32_t swizzle_bit,
- mem_copy_fn mem_copy)
+ mem_copy_fn mem_copy,
+ mem_copy_fn mem_copy_align16)
{
/* Y tiles consist of columns that are 'ytile_span' wide (and the same height
* as the tile). Thus the destination offset for (x,y) is the sum of:
@@ -361,12 +371,12 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
* at each step so we don't need to calculate it explicitly.
*/
for (x = x1; x < x2; x += ytile_span) {
- mem_copy(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
+ mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
xo += bytes_per_column;
swizzle ^= swizzle_bit;
}
- mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+ mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
dst += dst_pitch;
}
@@ -393,26 +403,27 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
if (mem_copy == memcpy)
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
- dst, src, src_pitch, swizzle_bit, memcpy);
+ dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_dst)
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst);
+ rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, memcpy);
+ dst, src, src_pitch, swizzle_bit,
+ memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_dst)
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst);
+ rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
else
unreachable("not reached");
}
linear_to_xtiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, mem_copy);
+ dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
}
/**
@@ -435,26 +446,26 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
if (mem_copy == memcpy)
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
- dst, src, src_pitch, swizzle_bit, memcpy);
+ dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_dst)
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst);
+ rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, memcpy);
+ dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_dst)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst);
+ rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
else
unreachable("not reached");
}
linear_to_ytiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, mem_copy);
+ dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
}
/**
@@ -477,26 +488,26 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
if (mem_copy == memcpy)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
- dst, src, dst_pitch, swizzle_bit, memcpy);
+ dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_src)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src);
+ rgba8_copy_aligned_src, rgba8_copy_aligned_src);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, memcpy);
+ dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_src)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src);
+ rgba8_copy_aligned_src, rgba8_copy_aligned_src);
else
unreachable("not reached");
}
xtiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, mem_copy);
+ dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
}
/**
@@ -519,26 +530,26 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
if (mem_copy == memcpy)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
- dst, src, dst_pitch, swizzle_bit, memcpy);
+ dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_src)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src);
+ rgba8_copy_aligned_src, rgba8_copy_aligned_src);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, memcpy);
+ dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_src)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src);
+ rgba8_copy_aligned_src, rgba8_copy_aligned_src);
else
unreachable("not reached");
}
ytiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, mem_copy);
+ dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
}
/**
From d2b32656e18607f5807b3f4d4dde02568370b9bf Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 7 Apr 2016 11:21:19 -0700
Subject: [PATCH 007/224] i965/tiled_memcpy: Rework the RGBA -> BGRA mem_copy
functions
This splits the two copy functions into three: One for unaligned copies,
one for aligned sources, and one for aligned destinations. Thanks to the
previous commit, we are now guaranteed that the aligned ones will *only*
operate on aligned memory so they should be safe.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93962
Cc: "11.1 11.2"
Reviewed-by: Matt Turner
Reviewed-by: Roland Scheidegger
Reviewed-by: Chad Versace
---
.../drivers/dri/i965/intel_tiled_memcpy.c | 141 ++++++++----------
1 file changed, 64 insertions(+), 77 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 0ffd3ffcab5..66c1f9b82f6 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -56,48 +56,16 @@ static const uint32_t ytile_width = 128;
static const uint32_t ytile_height = 32;
static const uint32_t ytile_span = 16;
-#ifdef __SSSE3__
-static const uint8_t rgba8_permutation[16] =
- { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
-
-/* NOTE: dst must be 16-byte aligned. src may be unaligned. */
-#define rgba8_copy_16_aligned_dst(dst, src) \
- _mm_store_si128((__m128i *)(dst), \
- _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(src)), \
- *(__m128i *) rgba8_permutation))
-
-/* NOTE: src must be 16-byte aligned. dst may be unaligned. */
-#define rgba8_copy_16_aligned_src(dst, src) \
- _mm_storeu_si128((__m128i *)(dst), \
- _mm_shuffle_epi8(_mm_load_si128((__m128i *)(src)), \
- *(__m128i *) rgba8_permutation))
-#endif
-
/**
- * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned.
+ * Copy RGBA to BGRA - swap R and B.
*/
static inline void *
-rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
+rgba8_copy(void *dst, const void *src, size_t bytes)
{
uint8_t *d = dst;
uint8_t const *s = src;
-#ifdef __SSSE3__
- if (bytes == 16) {
- assert(!(((uintptr_t)dst) & 0xf));
- rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
- return dst;
- }
-
- if (bytes == 64) {
- assert(!(((uintptr_t)dst) & 0xf));
- rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
- rgba8_copy_16_aligned_dst(d+16, s+16);
- rgba8_copy_16_aligned_dst(d+32, s+32);
- rgba8_copy_16_aligned_dst(d+48, s+48);
- return dst;
- }
-#endif
+ assert(bytes % 4 == 0);
while (bytes >= 4) {
d[0] = s[2];
@@ -111,6 +79,38 @@ rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
return dst;
}
+#ifdef __SSSE3__
+static const uint8_t rgba8_permutation[16] =
+ { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
+#endif
+
+/**
+ * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned.
+ */
+static inline void *
+rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
+{
+ uint8_t *d = dst;
+ uint8_t const *s = src;
+
+ assert(bytes == 0 || !(((uintptr_t)dst) & 0xf));
+
+#ifdef __SSSE3__
+ while (bytes >= 16) {
+ _mm_store_si128((__m128i *)d,
+ _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)s),
+ *(__m128i *) rgba8_permutation));
+ s += 16;
+ d += 16;
+ bytes -= 16;
+ }
+#endif
+
+ rgba8_copy(d, s, bytes);
+
+ return dst;
+}
+
/**
* Copy RGBA to BGRA - swap R and B, with the source 16-byte aligned.
*/
@@ -120,32 +120,21 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
uint8_t *d = dst;
uint8_t const *s = src;
-#ifdef __SSSE3__
- if (bytes == 16) {
- assert(!(((uintptr_t)src) & 0xf));
- rgba8_copy_16_aligned_src(d+ 0, s+ 0);
- return dst;
- }
+ assert(bytes == 0 || !(((uintptr_t)src) & 0xf));
- if (bytes == 64) {
- assert(!(((uintptr_t)src) & 0xf));
- rgba8_copy_16_aligned_src(d+ 0, s+ 0);
- rgba8_copy_16_aligned_src(d+16, s+16);
- rgba8_copy_16_aligned_src(d+32, s+32);
- rgba8_copy_16_aligned_src(d+48, s+48);
- return dst;
+#ifdef __SSSE3__
+ while (bytes >= 16) {
+ _mm_storeu_si128((__m128i *)d,
+ _mm_shuffle_epi8(_mm_load_si128((__m128i *)s),
+ *(__m128i *) rgba8_permutation));
+ s += 16;
+ d += 16;
+ bytes -= 16;
}
#endif
- while (bytes >= 4) {
- d[0] = s[2];
- d[1] = s[1];
- d[2] = s[0];
- d[3] = s[3];
- d += 4;
- s += 4;
- bytes -= 4;
- }
+ rgba8_copy(d, s, bytes);
+
return dst;
}
@@ -404,10 +393,10 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (mem_copy == memcpy)
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_dst)
+ else if (mem_copy == rgba8_copy)
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
+ rgba8_copy, rgba8_copy_aligned_dst);
else
unreachable("not reached");
} else {
@@ -415,10 +404,10 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_dst)
+ else if (mem_copy == rgba8_copy)
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
+ rgba8_copy, rgba8_copy_aligned_dst);
else
unreachable("not reached");
}
@@ -447,20 +436,20 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (mem_copy == memcpy)
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_dst)
+ else if (mem_copy == rgba8_copy)
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
+ rgba8_copy, rgba8_copy_aligned_dst);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_dst)
+ else if (mem_copy == rgba8_copy)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
+ rgba8_copy, rgba8_copy_aligned_dst);
else
unreachable("not reached");
}
@@ -489,20 +478,20 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (mem_copy == memcpy)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_src)
+ else if (mem_copy == rgba8_copy)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src, rgba8_copy_aligned_src);
+ rgba8_copy, rgba8_copy_aligned_src);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_src)
+ else if (mem_copy == rgba8_copy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src, rgba8_copy_aligned_src);
+ rgba8_copy, rgba8_copy_aligned_src);
else
unreachable("not reached");
}
@@ -531,20 +520,20 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (mem_copy == memcpy)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_src)
+ else if (mem_copy == rgba8_copy)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src, rgba8_copy_aligned_src);
+ rgba8_copy, rgba8_copy_aligned_src);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_src)
+ else if (mem_copy == rgba8_copy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src, rgba8_copy_aligned_src);
+ rgba8_copy, rgba8_copy_aligned_src);
else
unreachable("not reached");
}
@@ -775,8 +764,7 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
if (format == GL_BGRA) {
*mem_copy = memcpy;
} else if (format == GL_RGBA) {
- *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst
- : rgba8_copy_aligned_src;
+ *mem_copy = rgba8_copy;
}
} else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
(tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
@@ -787,8 +775,7 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
/* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
* use the same function.
*/
- *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst
- : rgba8_copy_aligned_src;
+ *mem_copy = rgba8_copy;
} else if (format == GL_RGBA) {
*mem_copy = memcpy;
}
From 6cc7aec5b05ff19d9bdf2b33c993257625d7c9fc Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Thu, 7 Apr 2016 11:36:47 -0700
Subject: [PATCH 008/224] i965/tiled_memcopy: Get rid of the direction
parameter to get_memcpy
Now that we can use the much simpler rgba8_copy function, we don't need to
hand different functions out based on direction.
Reviewed-by: Matt Turner
Reviewed-by: Roland Scheidegger
Reviewed-by: Chad Versace
---
src/mesa/drivers/dri/i965/intel_pixel_read.c | 3 +--
src/mesa/drivers/dri/i965/intel_tex_image.c | 3 +--
src/mesa/drivers/dri/i965/intel_tex_subimage.c | 3 +--
src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 3 +--
src/mesa/drivers/dri/i965/intel_tiled_memcpy.h | 15 +--------------
5 files changed, 5 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 31030b1b4ea..a486d6e1ab9 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -141,8 +141,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
if (rb->_BaseFormat == GL_RGB)
return false;
- if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp,
- INTEL_DOWNLOAD))
+ if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp))
return false;
if (!irb->mt ||
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 1601edddef6..bee8be1fd27 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -404,8 +404,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
if (texImage->_BaseFormat == GL_RGB)
return false;
- if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
- INTEL_DOWNLOAD))
+ if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
return false;
/* If this is a nontrivial texture view, let another path handle it instead. */
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 4849a4151e2..9561968d2d6 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -119,8 +119,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
if (ctx->_ImageTransferState)
return false;
- if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
- INTEL_UPLOAD))
+ if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
return false;
/* If this is a nontrivial texture view, let another path handle it instead. */
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 66c1f9b82f6..0a68751d5d0 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -745,8 +745,7 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
* \return true if the format and type combination are valid
*/
bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
- GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp,
- enum intel_memcpy_direction direction)
+ GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp)
{
if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
!(format == GL_RGBA || format == GL_BGRA))
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
index 01543bf298d..d9148bb6239 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
@@ -55,20 +55,7 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
uint32_t tiling,
mem_copy_fn mem_copy);
-/* Tells intel_get_memcpy() whether the memcpy() is
- *
- * - an upload to the GPU with an aligned destination and a potentially
- * unaligned source; or
- * - a download from the GPU with an aligned source and a potentially
- * unaligned destination.
- */
-enum intel_memcpy_direction {
- INTEL_UPLOAD,
- INTEL_DOWNLOAD
-};
-
bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
- GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp,
- enum intel_memcpy_direction direction);
+ GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp);
#endif /* INTEL_TILED_MEMCPY */
From e25c24c6389e5dbbb7ebf59c302659e5d6417ed4 Mon Sep 17 00:00:00 2001
From: Lars Hamre
Date: Fri, 8 Apr 2016 10:06:23 -0400
Subject: [PATCH 009/224] glsl: handle unsigned int wraparound in
link_shaders()
v2: change check_explicit_uniform_locations() to return an
unsigned 0 (Timothy Arceri)
We were storing the int result of check_explicit_uniform_locations()
in num_explicit_uniform_locs as an unsigned int which caused it to
be 4294967295 when a -1 was returned.
This in turn would cause the following error during linking:
error: count of uniform locations > MAX_UNIFORM_LOCATIONS(4294967295 > 98304)
Results from running piglit tests/all with this patch
and when ARB_explicit_uniform_location disabled:
changes: 178
fixes: 176
regressions: 2
The two regressions are for the following tests:
glean@glsl1-matrix column check (1)
glean@glsl1-matrix column check (2)
which regress from FAIL to CRASH.
The regressions are acceptable because the tests are currently failing due to
the aforementioned linker error.
Signed-off-by: Lars Hamre
Reviewed-by: Timothy Arceri
---
src/compiler/glsl/linker.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 957efe5b55d..10b5a8f721e 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -3249,12 +3249,12 @@ reserve_subroutine_explicit_locations(struct gl_shader_program *prog,
* any optimizations happen to handle also inactive uniforms and
* inactive array elements that may get trimmed away.
*/
-static int
+static unsigned
check_explicit_uniform_locations(struct gl_context *ctx,
struct gl_shader_program *prog)
{
if (!ctx->Extensions.ARB_explicit_uniform_location)
- return -1;
+ return 0;
/* This map is used to detect if overlapping explicit locations
* occur with the same uniform (from different stage) or a different one.
@@ -3263,7 +3263,7 @@ check_explicit_uniform_locations(struct gl_context *ctx,
if (!uniform_map) {
linker_error(prog, "Out of memory during linking.\n");
- return -1;
+ return 0;
}
unsigned entries_total = 0;
@@ -3292,7 +3292,7 @@ check_explicit_uniform_locations(struct gl_context *ctx,
}
if (!ret) {
delete uniform_map;
- return -1;
+ return 0;
}
}
}
From e529dd179f3f4bcc069239e9c2710ea23616a513 Mon Sep 17 00:00:00 2001
From: Rhys Kidd
Date: Fri, 1 Apr 2016 19:46:30 -0400
Subject: [PATCH 010/224] vc4: Remove unused include from vc4_program.c
Found with grep and inspection. Test compiled on RPi hw.
Assists any future effort to remove TGSI as an intermediate stage.
Signed-off-by: Rhys Kidd
Reviewed-by: Eric Anholt
---
src/gallium/drivers/vc4/vc4_program.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 71a1ebbb313..e698118d8e0 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -30,7 +30,6 @@
#include "util/ralloc.h"
#include "util/hash_table.h"
#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_parse.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
From e5997778bcafedd0d32b2399cb621a736051c4c2 Mon Sep 17 00:00:00 2001
From: Rhys Kidd
Date: Sat, 12 Mar 2016 18:34:01 -0500
Subject: [PATCH 011/224] vc4: Add better debug of NIR->QIR control flow graph
failure
Ensure NIR control flow graph nodes that are unhandled in QIR
are reported with sufficient verbosity to aid debugging.
This improves piglit outputs, amongst other tools.
There are no other remaining uses of assert(0) as a blunt tool
within vc4.
Signed-off-by: Rhys Kidd
Reviewed-by: Eric Anholt
---
src/gallium/drivers/vc4/vc4_program.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index e698118d8e0..921092bdf1b 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1708,7 +1708,8 @@ ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
break;
default:
- assert(0);
+ fprintf(stderr, "Unknown NIR node type\n");
+ abort();
}
}
}
From 2450b219e5706c86d0539b38f5f579bff148e9ef Mon Sep 17 00:00:00 2001
From: Rhys Kidd
Date: Sat, 12 Mar 2016 18:34:02 -0500
Subject: [PATCH 012/224] vc4: Add a stub for NIR->QIR of control flow function
nodes
We shouldn't have any NIR functions present since all GLSL functions get
inlined, but this would be a more informative error if it does happen.
Signed-off-by: Rhys Kidd
Reviewed-by: Eric Anholt
---
src/gallium/drivers/vc4/vc4_program.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 921092bdf1b..1439e1f36d9 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1693,6 +1693,13 @@ ntq_emit_block(struct vc4_compile *c, nir_block *block)
}
}
+static void
+ntq_emit_function(struct vc4_compile *c, nir_function_impl *func)
+{
+ fprintf(stderr, "FUNCTIONS not handled.\n");
+ abort();
+}
+
static void
ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
{
@@ -1707,6 +1714,10 @@ ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
ntq_emit_if(c, nir_cf_node_as_if(node));
break;
+ case nir_cf_node_function:
+ ntq_emit_function(c, nir_cf_node_as_function(node));
+ break;
+
default:
fprintf(stderr, "Unknown NIR node type\n");
abort();
From 40e77741cf1e9a74b867c7d132ca2346fe1584e4 Mon Sep 17 00:00:00 2001
From: Rhys Kidd
Date: Tue, 15 Mar 2016 23:00:28 -0400
Subject: [PATCH 013/224] vc4: Emit a warning and proceed for handling loops in
NIR.
We don't really suppor control flow yet, but it's a lot nicer to render
something and warn on stderr than to crash.
Fixes the following piglit tests:
- shaders/complex-loop-analysis-bug
- shaders/glsl-fs-discard-04
Converts the following piglit tests from crash to fail:
- shaders/glsl-fs-continue-inside-do-while
- shaders/glsl-fs-loop
- shaders/glsl-fs-loop-continue
- shaders/glsl-fs-loop-nested
- shaders/glsl-texcoord-array
- shaders/glsl-vs-continue-inside-do-while
- shaders/glsl-vs-loop
- shaders/glsl-vs-loop-continue
- shaders/glsl-vs-loop-nested
No piglit regressions.
v2 (Eric): Add stronger stderr warning.
Signed-off-by: Rhys Kidd
Reviewed-by: Eric Anholt
---
src/gallium/drivers/vc4/vc4_program.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 1439e1f36d9..6a8fad939ca 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1693,6 +1693,15 @@ ntq_emit_block(struct vc4_compile *c, nir_block *block)
}
}
+static void ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list);
+
+static void
+ntq_emit_loop(struct vc4_compile *c, nir_loop *nloop)
+{
+ fprintf(stderr, "LOOPS not fully handled. Rendering errors likely.\n");
+ ntq_emit_cf_list(c, &nloop->body);
+}
+
static void
ntq_emit_function(struct vc4_compile *c, nir_function_impl *func)
{
@@ -1705,7 +1714,6 @@ ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
{
foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
- /* case nir_cf_node_loop: */
case nir_cf_node_block:
ntq_emit_block(c, nir_cf_node_as_block(node));
break;
@@ -1714,6 +1722,10 @@ ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
ntq_emit_if(c, nir_cf_node_as_if(node));
break;
+ case nir_cf_node_loop:
+ ntq_emit_loop(c, nir_cf_node_as_loop(node));
+ break;
+
case nir_cf_node_function:
ntq_emit_function(c, nir_cf_node_as_function(node));
break;
From 7030eadbed7cbcdb00ce41bbbc4ca93854314d71 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Fri, 8 Apr 2016 14:53:55 -0700
Subject: [PATCH 014/224] vc4: Handle nir_intrinsic_load_user_clip_plane as a
vec4.
I liked having all my NIR be scalar, but nir_validate() complains that the
intrinsic writes 4 components but the destination we set up was only 1
component. I could generate a new scalar variant, but it's a lot easier
to just leave it as a vec4. This doesn't hurt codegen since we GC unused
uniforms, and UCP dot products use all the components anyway.
---
src/gallium/drivers/vc4/vc4_nir_lower_io.c | 24 +++++++---------------
src/gallium/drivers/vc4/vc4_program.c | 6 ++++--
2 files changed, 11 insertions(+), 19 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index d08ad588e5b..d0391c0262e 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -380,24 +380,14 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
intr_comp->num_components = 1;
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
- /* Convert the uniform (not user_clip_plane) offset to bytes.
- * If it happens to be a constant, constant-folding will clean
- * up the shift for us.
+ /* Convert the uniform offset to bytes. If it happens to be a
+ * constant, constant-folding will clean up the shift for us.
*/
- if (intr->intrinsic == nir_intrinsic_load_uniform) {
- /* Convert the base offset to bytes and add the
- * component
- */
- intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4);
+ intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4);
- intr_comp->src[0] =
- nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
- nir_imm_int(b, 4)));
- } else {
- assert(intr->intrinsic ==
- nir_intrinsic_load_user_clip_plane);
- intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
- }
+ intr_comp->src[0] =
+ nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
+ nir_imm_int(b, 4)));
dests[i] = &intr_comp->dest.ssa;
@@ -428,10 +418,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
break;
case nir_intrinsic_load_uniform:
- case nir_intrinsic_load_user_clip_plane:
vc4_nir_lower_uniform(c, b, intr);
break;
+ case nir_intrinsic_load_user_clip_plane:
default:
break;
}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 6a8fad939ca..fbefd217ce0 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1572,8 +1572,10 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_user_clip_plane:
- *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
- instr->const_index[0]);
+ for (int i = 0; i < instr->num_components; i++) {
+ dest[i] = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+ instr->const_index[0] * 4 + i);
+ }
break;
case nir_intrinsic_load_sample_mask_in:
From 99a759a4a3c29c283ae93612017d2f31c0ddbe73 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Fri, 8 Apr 2016 14:05:22 -0700
Subject: [PATCH 015/224] vc4: Switch to using NIR_PASS macros.
This gets us better validation of our NIR transformations.
---
src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 4 +-
src/gallium/drivers/vc4/vc4_nir_lower_io.c | 4 +-
.../drivers/vc4/vc4_nir_lower_txf_ms.c | 4 +-
src/gallium/drivers/vc4/vc4_program.c | 45 +++++++++----------
src/gallium/drivers/vc4/vc4_qir.h | 6 +--
5 files changed, 31 insertions(+), 32 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 49a314cdb25..cf6d2896f7d 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -710,9 +710,9 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
}
void
-vc4_nir_lower_blend(struct vc4_compile *c)
+vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
{
- nir_foreach_function(c->s, function) {
+ nir_foreach_function(s, function) {
if (function->impl) {
nir_foreach_block(function->impl,
vc4_nir_lower_blend_block, c);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index d0391c0262e..22c602adb54 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -455,9 +455,9 @@ vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl)
}
void
-vc4_nir_lower_io(struct vc4_compile *c)
+vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c)
{
- nir_foreach_function(c->s, function) {
+ nir_foreach_function(s, function) {
if (function->impl)
vc4_nir_lower_io_impl(c, function->impl);
}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
index 8b65cac5084..6b8830743eb 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
@@ -162,9 +162,9 @@ vc4_nir_lower_txf_ms_impl(struct vc4_compile *c, nir_function_impl *impl)
}
void
-vc4_nir_lower_txf_ms(struct vc4_compile *c)
+vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c)
{
- nir_foreach_function(c->s, function) {
+ nir_foreach_function(s, function) {
if (function->impl)
vc4_nir_lower_txf_ms_impl(c, function->impl);
}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index fbefd217ce0..d99862ad5ba 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1370,16 +1370,16 @@ vc4_optimize_nir(struct nir_shader *s)
do {
progress = false;
- nir_lower_vars_to_ssa(s);
- nir_lower_alu_to_scalar(s);
+ NIR_PASS_V(s, nir_lower_vars_to_ssa);
+ NIR_PASS_V(s, nir_lower_alu_to_scalar);
- progress = nir_copy_prop(s) || progress;
- progress = nir_opt_dce(s) || progress;
- progress = nir_opt_cse(s) || progress;
- progress = nir_opt_peephole_select(s) || progress;
- progress = nir_opt_algebraic(s) || progress;
- progress = nir_opt_constant_folding(s) || progress;
- progress = nir_opt_undef(s) || progress;
+ NIR_PASS(progress, s, nir_copy_prop);
+ NIR_PASS(progress, s, nir_opt_dce);
+ NIR_PASS(progress, s, nir_opt_cse);
+ NIR_PASS(progress, s, nir_opt_peephole_select);
+ NIR_PASS(progress, s, nir_opt_algebraic);
+ NIR_PASS(progress, s, nir_opt_constant_folding);
+ NIR_PASS(progress, s, nir_opt_undef);
} while (progress);
}
@@ -1835,11 +1835,11 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
}
c->s = tgsi_to_nir(tokens, &nir_options);
- nir_opt_global_to_local(c->s);
- nir_convert_to_ssa(c->s);
+ NIR_PASS_V(c->s, nir_opt_global_to_local);
+ NIR_PASS_V(c->s, nir_convert_to_ssa);
if (stage == QSTAGE_FRAG)
- vc4_nir_lower_blend(c);
+ NIR_PASS_V(c->s, vc4_nir_lower_blend, c);
struct nir_lower_tex_options tex_options = {
/* We would need to implement txs, but we don't want the
@@ -1889,26 +1889,25 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
}
}
- nir_lower_tex(c->s, &tex_options);
+ NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
if (c->fs_key && c->fs_key->light_twoside)
- nir_lower_two_sided_color(c->s);
+ NIR_PASS_V(c->s, nir_lower_two_sided_color);
if (stage == QSTAGE_FRAG)
- nir_lower_clip_fs(c->s, c->key->ucp_enables);
+ NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
else
- nir_lower_clip_vs(c->s, c->key->ucp_enables);
+ NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
- vc4_nir_lower_io(c);
- vc4_nir_lower_txf_ms(c);
- nir_lower_idiv(c->s);
- nir_lower_load_const_to_scalar(c->s);
+ NIR_PASS_V(c->s, vc4_nir_lower_io, c);
+ NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
+ NIR_PASS_V(c->s, nir_lower_idiv);
+ NIR_PASS_V(c->s, nir_lower_load_const_to_scalar);
vc4_optimize_nir(c->s);
- nir_remove_dead_variables(c->s);
-
- nir_convert_from_ssa(c->s, true);
+ NIR_PASS_V(c->s, nir_remove_dead_variables);
+ NIR_PASS_V(c->s, nir_convert_from_ssa, true);
if (vc4_debug & VC4_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 3fbf5d749e7..dae1a0ba51d 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -484,13 +484,13 @@ bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
bool qir_opt_vpm(struct vc4_compile *c);
-void vc4_nir_lower_blend(struct vc4_compile *c);
-void vc4_nir_lower_io(struct vc4_compile *c);
+void vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c);
+void vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c);
nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
enum quniform_contents contents);
nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
nir_ssa_def **srcs, int swiz);
-void vc4_nir_lower_txf_ms(struct vc4_compile *c);
+void vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c);
void qir_lower_uniforms(struct vc4_compile *c);
uint32_t qpu_schedule_instructions(struct vc4_compile *c);
From e46b48963a6f336049af3d10649bab48cd12fdee Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 21 Mar 2016 12:18:07 -0700
Subject: [PATCH 016/224] vc4: Allow multi-instruction QIR nodes to get VPM
optimization.
There used to be multi-instruction operations that would use src[] twice,
which is why we couldn't do some optimizations on them. This is no longer
the case.
total instructions in shared programs: 77973 -> 77969 (-0.01%)
instructions in affected programs: 84 -> 80 (-4.76%)
total estimated cycles in shared programs: 234165 -> 234157 (-0.00%)
estimated cycles in affected programs: 92 -> 84 (-8.70%)
---
src/gallium/drivers/vc4/vc4_opt_vpm.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c
index d15b0c1a39f..d31b673bd63 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c
@@ -65,7 +65,7 @@ qir_opt_vpm(struct vc4_compile *c)
* result, try to move the instruction up in place of the VPM read.
*/
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
- if (!inst || qir_is_multi_instruction(inst))
+ if (!inst)
continue;
if (qir_depends_on_flags(inst) || inst->sf)
@@ -132,7 +132,7 @@ qir_opt_vpm(struct vc4_compile *c)
continue;
struct qinst *inst = c->defs[temp];
- if (!inst || qir_is_multi_instruction(inst))
+ if (!inst)
continue;
if (qir_depends_on_flags(inst) || inst->sf)
From a8b525f8c4a71da42529453b3d0a7604fbd3195d Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 21 Mar 2016 12:44:31 -0700
Subject: [PATCH 017/224] vc4: Handle SF on instructions that write r4.
Normal SFU writes couldn't have SF because they were marked as
multi_instruction, but tex_result and tlb_color_read weren't. This ended
up not being a problem according to anything in shader-db, but it seems
possible.
---
src/gallium/drivers/vc4/vc4_qpu_emit.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index b507e370683..63e1ee54305 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -167,6 +167,16 @@ set_last_dst_pack(struct vc4_compile *c, struct qinst *inst)
}
}
+static void
+handle_r4_qpu_write(struct vc4_compile *c, struct qinst *qinst,
+ struct qpu_reg dst)
+{
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
+ else if (qinst->sf)
+ queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), qpu_r4()));
+}
+
void
vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
{
@@ -339,8 +349,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
abort();
}
- if (dst.mux != QPU_MUX_R4)
- queue(c, qpu_a_MOV(dst, qpu_r4()));
+ handle_r4_qpu_write(c, qinst, dst);
break;
@@ -391,9 +400,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_NOP());
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_COLOR_LOAD);
-
- if (dst.mux != QPU_MUX_R4)
- queue(c, qpu_a_MOV(dst, qpu_r4()));
+ handle_r4_qpu_write(c, qinst, dst);
break;
case QOP_TLB_COLOR_WRITE:
@@ -432,8 +439,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_NOP());
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_LOAD_TMU0);
- if (dst.mux != QPU_MUX_R4)
- queue(c, qpu_a_MOV(dst, qpu_r4()));
+ handle_r4_qpu_write(c, qinst, dst);
break;
default:
@@ -476,10 +482,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
assert(qinst->cond == QPU_COND_ALWAYS ||
handled_qinst_cond);
- if (qinst->sf) {
- assert(!qir_is_multi_instruction(qinst));
+ if (qinst->sf)
*last_inst(c) |= QPU_SF;
- }
}
uint32_t cycles = qpu_schedule_instructions(c);
From 483c172989be74a992befce3c0a9058a82b35c80 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 21 Mar 2016 12:49:08 -0700
Subject: [PATCH 018/224] vc4: Drop the multi_instruction distinction for QIR
instructions.
It wasn't correctly flagged everywhere, and QPU generation now handles the
only remaining case that was paying attention to it.
No change on shader-db.
---
src/gallium/drivers/vc4/vc4_qir.c | 18 +++++-------------
src/gallium/drivers/vc4/vc4_qir.h | 1 -
2 files changed, 5 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e73e3899410..1eb09d7c3bc 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -31,7 +31,6 @@ struct qir_op_info {
const char *name;
uint8_t ndst, nsrc;
bool has_side_effects;
- bool multi_instruction;
};
static const struct qir_op_info qir_op_info[] = {
@@ -65,10 +64,10 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_XOR] = { "xor", 1, 2 },
[QOP_NOT] = { "not", 1, 1 },
- [QOP_RCP] = { "rcp", 1, 1, false, true },
- [QOP_RSQ] = { "rsq", 1, 1, false, true },
- [QOP_EXP2] = { "exp2", 1, 2, false, true },
- [QOP_LOG2] = { "log2", 1, 2, false, true },
+ [QOP_RCP] = { "rcp", 1, 1 },
+ [QOP_RSQ] = { "rsq", 1, 1 },
+ [QOP_EXP2] = { "exp2", 1, 2 },
+ [QOP_LOG2] = { "log2", 1, 2 },
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
[QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
[QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
@@ -143,12 +142,6 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst)
return false;
}
-bool
-qir_is_multi_instruction(struct qinst *inst)
-{
- return qir_op_info[inst->op].multi_instruction;
-}
-
bool
qir_is_mul(struct qinst *inst)
{
@@ -492,8 +485,7 @@ qir_SF(struct vc4_compile *c, struct qreg src)
if (src.file != QFILE_TEMP ||
!c->defs[src.index] ||
- last_inst != c->defs[src.index] ||
- qir_is_multi_instruction(last_inst)) {
+ last_inst != c->defs[src.index]) {
struct qreg null = { QFILE_NULL, 0 };
last_inst = qir_MOV_dest(c, null, src);
last_inst = (struct qinst *)c->instructions.prev;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index dae1a0ba51d..d973b8d58bd 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -463,7 +463,6 @@ int qir_get_op_nsrc(enum qop qop);
bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
-bool qir_is_multi_instruction(struct qinst *inst);
bool qir_is_mul(struct qinst *inst);
bool qir_is_raw_mov(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
From 114c8b38d317629769ad0fcf3f84e24951ffcd7c Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 21 Mar 2016 12:58:48 -0700
Subject: [PATCH 019/224] vc4: Add missing scheduling dependency for MS color
writes.
---
src/gallium/drivers/vc4/vc4_qir_schedule.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index 186e81be750..4585918bc7d 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -229,6 +229,7 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
break;
case QOP_TLB_COLOR_WRITE:
+ case QOP_TLB_COLOR_WRITE_MS:
case QOP_TLB_COLOR_READ:
case QOP_TLB_Z_WRITE:
case QOP_TLB_STENCIL_SETUP:
From 44d7b8ad12df504058615901c7233c45e4f24a9f Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 21 Mar 2016 14:17:45 -0700
Subject: [PATCH 020/224] vc4: Add a helper function for the construction of
qregs.
The separate declaration of the struct is not helping clarity, and I was
going to be writing a whole lot more of these in the upcoming patches.
---
src/gallium/drivers/vc4/vc4_program.c | 7 +++----
src/gallium/drivers/vc4/vc4_qir.c | 8 +++-----
src/gallium/drivers/vc4/vc4_qir.h | 8 ++++++--
src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c | 2 +-
4 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index d99862ad5ba..d1e893a76a9 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -637,8 +637,8 @@ emit_vertex_input(struct vc4_compile *c, int attr)
c->vattr_sizes[attr] = align(attr_size, 4);
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
- struct qreg vpm = { QFILE_VPM, attr * 4 + i };
- c->inputs[attr * 4 + i] = qir_MOV(c, vpm);
+ c->inputs[attr * 4 + i] =
+ qir_MOV(c, qir_reg(QFILE_VPM, attr * 4 + i));
c->num_inputs++;
}
}
@@ -1303,8 +1303,7 @@ emit_stub_vpm_read(struct vc4_compile *c)
return;
c->vattr_sizes[0] = 4;
- struct qreg vpm = { QFILE_VPM, 0 };
- (void)qir_MOV(c, vpm);
+ (void)qir_MOV(c, qir_reg(QFILE_VPM, 0));
c->num_inputs++;
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 1eb09d7c3bc..c6d5a79eae1 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -448,12 +448,11 @@ qir_uniform(struct vc4_compile *c,
for (int i = 0; i < c->num_uniforms; i++) {
if (c->uniform_contents[i] == contents &&
c->uniform_data[i] == data) {
- return (struct qreg) { QFILE_UNIF, i };
+ return qir_reg(QFILE_UNIF, i);
}
}
uint32_t uniform = c->num_uniforms++;
- struct qreg u = { QFILE_UNIF, uniform };
if (uniform >= c->uniform_array_size) {
c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
@@ -470,7 +469,7 @@ qir_uniform(struct vc4_compile *c,
c->uniform_contents[uniform] = contents;
c->uniform_data[uniform] = data;
- return u;
+ return qir_reg(QFILE_UNIF, uniform);
}
void
@@ -486,8 +485,7 @@ qir_SF(struct vc4_compile *c, struct qreg src)
if (src.file != QFILE_TEMP ||
!c->defs[src.index] ||
last_inst != c->defs[src.index]) {
- struct qreg null = { QFILE_NULL, 0 };
- last_inst = qir_MOV_dest(c, null, src);
+ last_inst = qir_MOV_dest(c, qir_reg(QFILE_NULL, 0), src);
last_inst = (struct qinst *)c->instructions.prev;
}
last_inst->sf = true;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index d973b8d58bd..4aec313831f 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -63,6 +63,11 @@ struct qreg {
int pack;
};
+static inline struct qreg qir_reg(enum qfile file, uint32_t index)
+{
+ return (struct qreg){file, index};
+}
+
enum qop {
QOP_UNDEF,
QOP_MOV,
@@ -702,8 +707,7 @@ qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
static inline void
qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
{
- static const struct qreg vpm = { QFILE_VPM, 0 };
- qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef));
+ qir_MOV_dest(c, qir_reg(QFILE_VPM, 0), val);
}
#endif /* VC4_QIR_H */
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index a57e100593c..927268d71ef 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -150,7 +150,7 @@ qir_lower_uniforms(struct vc4_compile *c)
* reference a temp instead.
*/
struct qreg temp = qir_get_temp(c);
- struct qreg unif = { QFILE_UNIF, max_index };
+ struct qreg unif = qir_reg(QFILE_UNIF, max_index);
struct qinst *mov = qir_inst(QOP_MOV, temp, unif, c->undef);
list_add(&mov->link, &c->instructions);
c->defs[temp.index] = mov;
From f029932cac36859df5a6d04d1dd7343672ced83a Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 21 Mar 2016 13:12:41 -0700
Subject: [PATCH 021/224] vc4: Allow TLB Z/color/stencil writes from any ALU
operation in QIR.
This lets us write the Z directly from the FTOI for computed Z, and may
let us coalesce color writes in the future.
No change in my shader-db, but clearly drops an instruction in piglit's
early-z test.
---
src/gallium/drivers/vc4/vc4_program.c | 29 +++++++------
src/gallium/drivers/vc4/vc4_qir.c | 42 +++++++++++++++----
src/gallium/drivers/vc4/vc4_qir.h | 12 ++----
src/gallium/drivers/vc4/vc4_qir_schedule.c | 35 +++++++++++-----
src/gallium/drivers/vc4/vc4_qpu_emit.c | 47 ++++++++++------------
5 files changed, 100 insertions(+), 65 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index d1e893a76a9..35bad7e9296 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1192,12 +1192,15 @@ emit_frag_end(struct vc4_compile *c)
}
if (c->fs_key->stencil_enabled) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+ qir_uniform(c, QUNIFORM_STENCIL, 0));
if (c->fs_key->stencil_twoside) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 1));
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+ qir_uniform(c, QUNIFORM_STENCIL, 1));
}
if (c->fs_key->stencil_full_writemasks) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 2));
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+ qir_uniform(c, QUNIFORM_STENCIL, 2));
}
}
@@ -1206,24 +1209,24 @@ emit_frag_end(struct vc4_compile *c)
}
if (c->fs_key->depth_enabled) {
- struct qreg z;
if (c->output_position_index != -1) {
- z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
- qir_uniform_f(c, 0xffffff)));
+ qir_FTOI_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0),
+ qir_FMUL(c,
+ c->outputs[c->output_position_index + 2],
+ qir_uniform_f(c, 0xffffff)))->cond = discard_cond;
} else {
- z = qir_FRAG_Z(c);
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0),
+ qir_FRAG_Z(c))->cond = discard_cond;
}
- struct qinst *inst = qir_TLB_Z_WRITE(c, z);
- inst->cond = discard_cond;
}
if (!c->msaa_per_sample_output) {
- struct qinst *inst = qir_TLB_COLOR_WRITE(c, color);
- inst->cond = discard_cond;
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_COLOR_WRITE, 0),
+ color)->cond = discard_cond;
} else {
for (int i = 0; i < VC4_MAX_SAMPLES; i++) {
- struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
- inst->cond = discard_cond;
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_COLOR_WRITE_MS, 0),
+ c->sample_colors[i])->cond = discard_cond;
}
}
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index c6d5a79eae1..10b82ffc16c 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -68,10 +68,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_RSQ] = { "rsq", 1, 1 },
[QOP_EXP2] = { "exp2", 1, 2 },
[QOP_LOG2] = { "log2", 1, 2 },
- [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
- [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
- [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
- [QOP_TLB_COLOR_WRITE_MS] = { "tlb_color_ms", 0, 1, true },
[QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 },
[QOP_MS_MASK] = { "ms_mask", 0, 1, true },
[QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
@@ -115,6 +111,16 @@ qir_get_op_nsrc(enum qop qop)
bool
qir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
{
+ switch (inst->dst.file) {
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
+ case QFILE_TLB_STENCIL_SETUP:
+ return true;
+ default:
+ break;
+ }
+
return qir_op_info[inst->op].has_side_effects;
}
@@ -226,24 +232,44 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
[QFILE_TEMP] = "t",
[QFILE_VARY] = "v",
[QFILE_UNIF] = "u",
+ [QFILE_TLB_COLOR_WRITE] = "tlb_c",
+ [QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms",
+ [QFILE_TLB_Z_WRITE] = "tlb_z",
+ [QFILE_TLB_STENCIL_SETUP] = "tlb_stencil",
};
- if (reg.file == QFILE_NULL) {
+ switch (reg.file) {
+
+ case QFILE_NULL:
fprintf(stderr, "null");
- } else if (reg.file == QFILE_SMALL_IMM) {
+ break;
+
+ case QFILE_SMALL_IMM:
if ((int)reg.index >= -16 && (int)reg.index <= 15)
fprintf(stderr, "%d", reg.index);
else
fprintf(stderr, "%f", uif(reg.index));
- } else if (reg.file == QFILE_VPM) {
+ break;
+
+ case QFILE_VPM:
if (write) {
fprintf(stderr, "vpm");
} else {
fprintf(stderr, "vpm%d.%d",
reg.index / 4, reg.index % 4);
}
- } else {
+ break;
+
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_STENCIL_SETUP:
+ fprintf(stderr, "%s", files[reg.file]);
+ break;
+
+ default:
fprintf(stderr, "%s%d", files[reg.file], reg.index);
+ break;
}
if (reg.file == QFILE_UNIF &&
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 4aec313831f..970eafd542b 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -49,6 +49,10 @@ enum qfile {
QFILE_VARY,
QFILE_UNIF,
QFILE_VPM,
+ QFILE_TLB_COLOR_WRITE,
+ QFILE_TLB_COLOR_WRITE_MS,
+ QFILE_TLB_Z_WRITE,
+ QFILE_TLB_STENCIL_SETUP,
/**
* Stores an immediate value in the index field that can be turned
@@ -106,10 +110,6 @@ enum qop {
QOP_LOG2,
QOP_VW_SETUP,
QOP_VR_SETUP,
- QOP_TLB_STENCIL_SETUP,
- QOP_TLB_Z_WRITE,
- QOP_TLB_COLOR_WRITE,
- QOP_TLB_COLOR_WRITE_MS,
QOP_TLB_COLOR_READ,
QOP_MS_MASK,
QOP_VARY_ADD_C,
@@ -629,10 +629,6 @@ QIR_ALU0(FRAG_W)
QIR_ALU0(FRAG_REV_FLAG)
QIR_ALU0(TEX_RESULT)
QIR_ALU0(TLB_COLOR_READ)
-QIR_NODST_1(TLB_COLOR_WRITE)
-QIR_NODST_1(TLB_COLOR_WRITE_MS)
-QIR_NODST_1(TLB_Z_WRITE)
-QIR_NODST_1(TLB_STENCIL_SETUP)
QIR_NODST_1(MS_MASK)
static inline struct qreg
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index 4585918bc7d..8b843a3a158 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -228,11 +228,7 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
add_write_dep(dir, &state->last_tex_result, n);
break;
- case QOP_TLB_COLOR_WRITE:
- case QOP_TLB_COLOR_WRITE_MS:
case QOP_TLB_COLOR_READ:
- case QOP_TLB_Z_WRITE:
- case QOP_TLB_STENCIL_SETUP:
case QOP_MS_MASK:
add_write_dep(dir, &state->last_tlb, n);
break;
@@ -241,10 +237,25 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
break;
}
- if (inst->dst.file == QFILE_VPM)
+ switch (inst->dst.file) {
+ case QFILE_VPM:
add_write_dep(dir, &state->last_vpm_write, n);
- else if (inst->dst.file == QFILE_TEMP)
+ break;
+
+ case QFILE_TEMP:
add_write_dep(dir, &state->last_temp_write[inst->dst.index], n);
+ break;
+
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_STENCIL_SETUP:
+ add_write_dep(dir, &state->last_tlb, n);
+ break;
+
+ default:
+ break;
+ }
if (qir_depends_on_flags(inst))
add_dep(dir, state->last_sf, n);
@@ -358,11 +369,13 @@ get_register_pressure_cost(struct schedule_state *state, struct qinst *inst)
static bool
locks_scoreboard(struct qinst *inst)
{
- switch (inst->op) {
- case QOP_TLB_Z_WRITE:
- case QOP_TLB_COLOR_WRITE:
- case QOP_TLB_COLOR_WRITE_MS:
- case QOP_TLB_COLOR_READ:
+ if (inst->op == QOP_TLB_COLOR_READ)
+ return true;
+
+ switch (inst->dst.file) {
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
return true;
default:
return false;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 63e1ee54305..5c655495c2b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -300,6 +300,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
last_vpm_read_index = qinst->src[i].index;
src[i] = qpu_ra(QPU_R_VPM);
break;
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_STENCIL_SETUP:
+ unreachable("bad qir src file");
}
}
@@ -314,6 +319,23 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QFILE_VPM:
dst = qpu_ra(QPU_W_VPM);
break;
+
+ case QFILE_TLB_COLOR_WRITE:
+ dst = qpu_tlbc();
+ break;
+
+ case QFILE_TLB_COLOR_WRITE_MS:
+ dst = qpu_tlbc_ms();
+ break;
+
+ case QFILE_TLB_Z_WRITE:
+ dst = qpu_ra(QPU_W_TLB_Z);
+ break;
+
+ case QFILE_TLB_STENCIL_SETUP:
+ dst = qpu_ra(QPU_W_TLB_STENCIL_SETUP);
+ break;
+
case QFILE_VARY:
case QFILE_UNIF:
case QFILE_SMALL_IMM:
@@ -383,19 +405,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
*/
break;
- case QOP_TLB_STENCIL_SETUP:
- assert(!unpack);
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP),
- src[0]) | unpack);
- break;
-
- case QOP_TLB_Z_WRITE:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
- src[0]) | unpack);
- set_last_cond_add(c, qinst->cond);
- handled_qinst_cond = true;
- break;
-
case QOP_TLB_COLOR_READ:
queue(c, qpu_NOP());
*last_inst(c) = qpu_set_sig(*last_inst(c),
@@ -403,18 +412,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
handle_r4_qpu_write(c, qinst, dst);
break;
- case QOP_TLB_COLOR_WRITE:
- queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack);
- set_last_cond_add(c, qinst->cond);
- handled_qinst_cond = true;
- break;
-
- case QOP_TLB_COLOR_WRITE_MS:
- queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0]));
- set_last_cond_add(c, qinst->cond);
- handled_qinst_cond = true;
- break;
-
case QOP_VARY_ADD_C:
queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
break;
From 30b818d5eb67c7427fbefb456c7bc2d876bf9eac Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 21 Mar 2016 14:11:10 -0700
Subject: [PATCH 022/224] vc4: Move FRAG_X/Y/REV_FLAG to a QFILE like VPM or
TLB color writes.
This gives us one less set of special instruction generation cases, and
instead just the case for returning the correct register to read.
---
src/gallium/drivers/vc4/vc4_program.c | 8 ++++---
src/gallium/drivers/vc4/vc4_qir.c | 6 +++---
src/gallium/drivers/vc4/vc4_qir.h | 13 ++++++------
src/gallium/drivers/vc4/vc4_qpu_emit.c | 29 +++++++++++++-------------
4 files changed, 29 insertions(+), 27 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 35bad7e9296..ca293bee182 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -646,8 +646,8 @@ emit_vertex_input(struct vc4_compile *c, int attr)
static void
emit_fragcoord_input(struct vc4_compile *c, int attr)
{
- c->inputs[attr * 4 + 0] = qir_FRAG_X(c);
- c->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
+ c->inputs[attr * 4 + 0] = qir_ITOF(c, qir_reg(QFILE_FRAG_X, 0));
+ c->inputs[attr * 4 + 1] = qir_ITOF(c, qir_reg(QFILE_FRAG_Y, 0));
c->inputs[attr * 4 + 2] =
qir_FMUL(c,
qir_ITOF(c, qir_FRAG_Z(c)),
@@ -1428,7 +1428,9 @@ ntq_setup_inputs(struct vc4_compile *c)
if (var->data.location == VARYING_SLOT_POS) {
emit_fragcoord_input(c, loc);
} else if (var->data.location == VARYING_SLOT_FACE) {
- c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
+ c->inputs[loc * 4 + 0] =
+ qir_ITOF(c, qir_reg(QFILE_FRAG_REV_FLAG,
+ 0));
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
(c->fs_key->point_sprite_mask &
(1 << (var->data.location -
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 10b82ffc16c..293eb01adab 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -72,11 +72,8 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_MS_MASK] = { "ms_mask", 0, 1, true },
[QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
- [QOP_FRAG_X] = { "frag_x", 1, 0 },
- [QOP_FRAG_Y] = { "frag_y", 1, 0 },
[QOP_FRAG_Z] = { "frag_z", 1, 0 },
[QOP_FRAG_W] = { "frag_w", 1, 0 },
- [QOP_FRAG_REV_FLAG] = { "frag_rev_flag", 1, 0 },
[QOP_TEX_S] = { "tex_s", 0, 2 },
[QOP_TEX_T] = { "tex_t", 0, 2 },
@@ -236,6 +233,9 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
[QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms",
[QFILE_TLB_Z_WRITE] = "tlb_z",
[QFILE_TLB_STENCIL_SETUP] = "tlb_stencil",
+ [QFILE_FRAG_X] = "frag_x",
+ [QFILE_FRAG_Y] = "frag_y",
+ [QFILE_FRAG_REV_FLAG] = "frag_rev_flag",
};
switch (reg.file) {
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 970eafd542b..e8ba74b9a4d 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -54,6 +54,13 @@ enum qfile {
QFILE_TLB_Z_WRITE,
QFILE_TLB_STENCIL_SETUP,
+ /* Payload registers that aren't in the physical register file, so we
+ * can just use the corresponding qpu_reg at qpu_emit time.
+ */
+ QFILE_FRAG_X,
+ QFILE_FRAG_Y,
+ QFILE_FRAG_REV_FLAG,
+
/**
* Stores an immediate value in the index field that can be turned
* into a small immediate field by qpu_encode_small_immediate().
@@ -114,11 +121,8 @@ enum qop {
QOP_MS_MASK,
QOP_VARY_ADD_C,
- QOP_FRAG_X,
- QOP_FRAG_Y,
QOP_FRAG_Z,
QOP_FRAG_W,
- QOP_FRAG_REV_FLAG,
/** Texture x coordinate parameter write */
QOP_TEX_S,
@@ -622,11 +626,8 @@ QIR_NODST_2(TEX_T)
QIR_NODST_2(TEX_R)
QIR_NODST_2(TEX_B)
QIR_NODST_2(TEX_DIRECT)
-QIR_ALU0(FRAG_X)
-QIR_ALU0(FRAG_Y)
QIR_ALU0(FRAG_Z)
QIR_ALU0(FRAG_W)
-QIR_ALU0(FRAG_REV_FLAG)
QIR_ALU0(TEX_RESULT)
QIR_ALU0(TLB_COLOR_READ)
QIR_NODST_1(MS_MASK)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 5c655495c2b..ae3590854b2 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -300,6 +300,17 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
last_vpm_read_index = qinst->src[i].index;
src[i] = qpu_ra(QPU_R_VPM);
break;
+
+ case QFILE_FRAG_X:
+ src[i] = qpu_ra(QPU_R_XY_PIXEL_COORD);
+ break;
+ case QFILE_FRAG_Y:
+ src[i] = qpu_rb(QPU_R_XY_PIXEL_COORD);
+ break;
+ case QFILE_FRAG_REV_FLAG:
+ src[i] = qpu_rb(QPU_R_MS_REV_FLAGS);
+ break;
+
case QFILE_TLB_COLOR_WRITE:
case QFILE_TLB_COLOR_WRITE_MS:
case QFILE_TLB_Z_WRITE:
@@ -339,6 +350,9 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QFILE_VARY:
case QFILE_UNIF:
case QFILE_SMALL_IMM:
+ case QFILE_FRAG_X:
+ case QFILE_FRAG_Y:
+ case QFILE_FRAG_REV_FLAG:
assert(!"not reached");
break;
}
@@ -375,21 +389,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
- case QOP_FRAG_X:
- queue(c, qpu_a_ITOF(dst,
- qpu_ra(QPU_R_XY_PIXEL_COORD)));
- break;
-
- case QOP_FRAG_Y:
- queue(c, qpu_a_ITOF(dst,
- qpu_rb(QPU_R_XY_PIXEL_COORD)));
- break;
-
- case QOP_FRAG_REV_FLAG:
- queue(c, qpu_a_ITOF(dst,
- qpu_rb(QPU_R_MS_REV_FLAGS)));
- break;
-
case QOP_MS_MASK:
src[1] = qpu_ra(QPU_R_MS_REV_FLAGS);
fixup_raddr_conflict(c, dst, &src[0], &src[1],
From f9480d7918a2da19b8ac85a8326049488feef92b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Sun, 3 Apr 2016 15:11:39 -0400
Subject: [PATCH 023/224] nv50,nvc0: add invalidate_resource support for buffer
resources
Provide a callback to reallocate the underlying storage of a resource so
that it is not bound to any existing fences.
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nouveau_buffer.c | 33 +++++++++++++++++++
src/gallium/drivers/nouveau/nouveau_buffer.h | 4 +++
.../drivers/nouveau/nv50/nv50_resource.c | 8 +++++
.../drivers/nouveau/nv50/nv50_resource.h | 3 ++
.../drivers/nouveau/nv50/nv50_screen.c | 2 +-
.../drivers/nouveau/nvc0/nvc0_resource.c | 1 +
.../drivers/nouveau/nvc0/nvc0_screen.c | 2 +-
7 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 1695553d793..ba43a614b90 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -843,6 +843,39 @@ nouveau_user_buffer_upload(struct nouveau_context *nv,
return true;
}
+/* Invalidate underlying buffer storage, reset fences, reallocate to non-busy
+ * buffer.
+ */
+void
+nouveau_buffer_invalidate(struct pipe_context *pipe,
+ struct pipe_resource *resource)
+{
+ struct nouveau_context *nv = nouveau_context(pipe);
+ struct nv04_resource *buf = nv04_resource(resource);
+ int ref = buf->base.reference.count - 1;
+
+ /* Shared buffers shouldn't get reallocated */
+ if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
+ return;
+
+ /* We can't touch persistent/coherent buffers */
+ if (buf->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
+ PIPE_RESOURCE_FLAG_MAP_COHERENT))
+ return;
+
+ /* If the buffer is sub-allocated and not currently being written, just
+ * wipe the valid buffer range. Otherwise we have to create fresh
+ * storage. (We don't keep track of fences for non-sub-allocated BO's.)
+ */
+ if (buf->mm && !nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE)) {
+ util_range_set_empty(&buf->valid_buffer_range);
+ } else {
+ nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
+ if (ref > 0) /* any references inside context possible ? */
+ nv->invalidate_resource_storage(nv, &buf->base, ref);
+ }
+}
+
/* Scratch data allocation. */
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h
index d45bf7aebcf..3a33fae9ce2 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -99,6 +99,10 @@ bool
nouveau_user_buffer_upload(struct nouveau_context *, struct nv04_resource *,
unsigned base, unsigned size);
+void
+nouveau_buffer_invalidate(struct pipe_context *pipe,
+ struct pipe_resource *resource);
+
/* Copy data to a scratch buffer and return address & bo the data resides in.
* Returns 0 on failure.
*/
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
index ad5f3b814db..b090a30aed6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
@@ -84,6 +84,13 @@ nv50_surface_destroy(struct pipe_context *pipe, struct pipe_surface *ps)
FREE(s);
}
+void
+nv50_invalidate_resource(struct pipe_context *pipe, struct pipe_resource *res)
+{
+ if (res->target == PIPE_BUFFER)
+ nouveau_buffer_invalidate(pipe, res);
+}
+
void
nv50_init_resource_functions(struct pipe_context *pcontext)
{
@@ -93,6 +100,7 @@ nv50_init_resource_functions(struct pipe_context *pcontext)
pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
pcontext->create_surface = nv50_surface_create;
pcontext->surface_destroy = nv50_surface_destroy;
+ pcontext->invalidate_resource = nv50_invalidate_resource;
}
void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
index b40370a1d78..5d03925b0d0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -151,6 +151,9 @@ nv50_surface_from_buffer(struct pipe_context *pipe,
void
nv50_surface_destroy(struct pipe_context *, struct pipe_surface *);
+void
+nv50_invalidate_resource(struct pipe_context *, struct pipe_resource *);
+
void
nv50_clear_texture(struct pipe_context *pipe,
struct pipe_resource *res,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 20fb61b51f4..4968783b480 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -193,6 +193,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_INVALIDATE_BUFFER:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -234,7 +235,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
- case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
index c034d0fd011..0aee5890fd8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
@@ -52,6 +52,7 @@ nvc0_init_resource_functions(struct pipe_context *pcontext)
pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
pcontext->create_surface = nvc0_surface_create;
pcontext->surface_destroy = nv50_surface_destroy;
+ pcontext->invalidate_resource = nv50_invalidate_resource;
}
void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index c41912a6037..a2ea660fa13 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -216,6 +216,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_INVALIDATE_BUFFER:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -241,7 +242,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
From 59ca92137b3841a65d9232bee1164bff1e6bd57d Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Sun, 3 Apr 2016 16:02:59 -0400
Subject: [PATCH 024/224] nv50,nvc0: support sending string markers down into
the command stream
This should hopefully make it a little easier to debug with GL
applications like glretrace and looking at command streams.
Signed-off-by: Ilia Mirkin
---
.../drivers/nouveau/nv50/nv50_context.c | 25 +++++++++++++++++++
.../drivers/nouveau/nv50/nv50_screen.c | 2 +-
.../drivers/nouveau/nvc0/nvc0_context.c | 25 +++++++++++++++++++
.../drivers/nouveau/nvc0/nvc0_screen.c | 2 +-
4 files changed, 52 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 61a52c4b366..5af0e9b3a27 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -93,6 +93,30 @@ nv50_memory_barrier(struct pipe_context *pipe, unsigned flags)
}
}
+static void
+nv50_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
+{
+ struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+ int string_words = len / 4;
+ int data_words;
+
+ if (len <= 0)
+ return;
+ string_words = MIN2(string_words, NV04_PFIFO_MAX_PACKET_LEN);
+ if (string_words == NV04_PFIFO_MAX_PACKET_LEN)
+ data_words = string_words;
+ else
+ data_words = string_words + !!(len & 3);
+ BEGIN_NI04(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
+ if (string_words)
+ PUSH_DATAp(push, str, string_words);
+ if (string_words != data_words) {
+ int data = 0;
+ memcpy(&data, &str[string_words * 4], len & 3);
+ PUSH_DATA (push, data);
+ }
+}
+
void
nv50_default_kick_notify(struct nouveau_pushbuf *push)
{
@@ -309,6 +333,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->texture_barrier = nv50_texture_barrier;
pipe->memory_barrier = nv50_memory_barrier;
pipe->get_sample_position = nv50_context_get_sample_position;
+ pipe->emit_string_marker = nv50_emit_string_marker;
if (!screen->cur_ctx) {
/* Restore the last context's state here, normally handled during
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 4968783b480..a2c9a3e152c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -194,6 +194,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COMPUTE:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_STRING_MARKER:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -236,7 +237,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
- case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 007cccfd10b..fcb8289beda 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -96,6 +96,30 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
}
}
+static void
+nvc0_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
+{
+ struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf;
+ int string_words = len / 4;
+ int data_words;
+
+ if (len <= 0)
+ return;
+ string_words = MIN2(string_words, NV04_PFIFO_MAX_PACKET_LEN);
+ if (string_words == NV04_PFIFO_MAX_PACKET_LEN)
+ data_words = string_words;
+ else
+ data_words = string_words + !!(len & 3);
+ BEGIN_NIC0(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
+ if (string_words)
+ PUSH_DATAp(push, str, string_words);
+ if (string_words != data_words) {
+ int data = 0;
+ memcpy(&data, &str[string_words * 4], len & 3);
+ PUSH_DATA (push, data);
+ }
+}
+
static void
nvc0_context_unreference_resources(struct nvc0_context *nvc0)
{
@@ -333,6 +357,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->texture_barrier = nvc0_texture_barrier;
pipe->memory_barrier = nvc0_memory_barrier;
pipe->get_sample_position = nvc0_context_get_sample_position;
+ pipe->emit_string_marker = nvc0_emit_string_marker;
nouveau_context_init(&nvc0->base);
nvc0_init_query_functions(nvc0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index a2ea660fa13..4ebdd328e9c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -217,6 +217,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_STRING_MARKER:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -243,7 +244,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_GENERATE_MIPMAP:
- case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_MEMORY_INFO:
From cdb6fa91fa79c0c9a796730e9ecae320bebdb825 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Sat, 23 Jan 2016 08:37:03 -0500
Subject: [PATCH 025/224] nvc0: handle the case where there are no framebuffer
attachments
Signed-off-by: Ilia Mirkin
---
docs/GL3.txt | 4 +--
docs/relnotes/11.3.0.html | 2 +-
.../drivers/nouveau/nvc0/nvc0_program.c | 7 ++++
.../drivers/nouveau/nvc0/nvc0_screen.c | 8 ++++-
.../nouveau/nvc0/nvc0_state_validate.c | 36 +++++++++++++------
.../drivers/nouveau/nvc0/nvc0_surface.c | 4 +++
6 files changed, 47 insertions(+), 14 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index c48802a9f7b..81ece662349 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -172,7 +172,7 @@ GL 4.3, GLSL 4.30:
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
- GL_ARB_framebuffer_no_attachments DONE (i965, r600, radeonsi)
+ GL_ARB_framebuffer_no_attachments DONE (i965, nvc0, r600, radeonsi)
GL_ARB_internalformat_query2 DONE (all drivers)
GL_ARB_invalidate_subdata DONE (all drivers)
GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -228,7 +228,7 @@ GLES3.1, GLSL ES 3.1
GL_ARB_compute_shader DONE (i965)
GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
- GL_ARB_framebuffer_no_attachments DONE (i965)
+ GL_ARB_framebuffer_no_attachments DONE (i965, nvc0, r600, radeonsi)
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
GL_ARB_shader_image_load_store DONE (i965)
diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html
index d56f6553fe9..971b17af5ab 100644
--- a/docs/relnotes/11.3.0.html
+++ b/docs/relnotes/11.3.0.html
@@ -44,7 +44,7 @@ Note: some of the new features are only available with certain drivers.
-- GL_ARB_framebuffer_no_attachments on r600, radeonsi
+- GL_ARB_framebuffer_no_attachments on nvc0, r600, radeonsi
- GL_ARB_internalformat_query2 on all drivers
- GL_ARB_shader_atomic_counter_ops on nvc0
- GL_ARB_shader_image_load_store on radeonsi, softpipe
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index db02fa2df5c..d3024f9fa06 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -456,6 +456,13 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
fp->hdr[18] |= 0xf << info->out[i].slot[0];
}
+ /* There are no "regular" attachments, but the shader still needs to be
+ * executed. It seems like it wants to think that it has some color
+ * outputs in order to actually run.
+ */
+ if (info->prop.fp.numColourResults == 0 && !info->prop.fp.writesDepth)
+ fp->hdr[18] |= 0xf;
+
fp->fp.early_z = info->prop.fp.earlyFragTests;
return 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 4ebdd328e9c..b6eb679a611 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -52,6 +52,12 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
return false;
+ /* Short-circuit the rest of the logic -- this is used by the state tracker
+ * to determine valid MS levels in a no-attachments scenario.
+ */
+ if (format == PIPE_FORMAT_NONE && bindings & PIPE_BIND_RENDER_TARGET)
+ return true;
+
if (!util_format_is_supported(format, bindings))
return false;
@@ -218,6 +224,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -251,7 +258,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
- case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 9c64482f2e2..d0d9315dd2b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -56,15 +56,18 @@ nvc0_validate_zcull(struct nvc0_context *nvc0)
#endif
static inline void
-nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
+nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
{
- BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 6);
- PUSH_DATA (push, 0);
- PUSH_DATA (push, 0);
- PUSH_DATA (push, 64);
- PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
+ PUSH_DATA (push, 64); // width
+ PUSH_DATA (push, 0); // height
+ PUSH_DATA (push, 0); // format
+ PUSH_DATA (push, 0); // tile mode
+ PUSH_DATA (push, layers); // layers
+ PUSH_DATA (push, 0); // layer stride
+ PUSH_DATA (push, 0); // base layer
}
static void
@@ -75,12 +78,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct nvc0_screen *screen = nvc0->screen;
unsigned i, ms;
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
+ unsigned nr_cbufs = fb->nr_cbufs;
bool serialize = false;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
- BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
- PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
PUSH_DATA (push, fb->width << 16);
PUSH_DATA (push, fb->height << 16);
@@ -91,7 +93,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct nouveau_bo *bo;
if (!fb->cbufs[i]) {
- nvc0_fb_set_null_rt(push, i);
+ nvc0_fb_set_null_rt(push, i, 0);
continue;
}
@@ -179,6 +181,19 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
PUSH_DATA (push, 0);
}
+ if (nr_cbufs == 0 && !fb->zsbuf) {
+ assert(util_is_power_of_two(fb->samples));
+ assert(fb->samples <= 8);
+
+ nvc0_fb_set_null_rt(push, 0, fb->layers);
+
+ if (fb->samples > 1)
+ ms_mode = ffs(fb->samples) - 1;
+ nr_cbufs = 1;
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
ms = 1 << ms_mode;
@@ -592,8 +607,9 @@ nvc0_validate_derived_2(struct nvc0_context *nvc0)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
+ nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.nr_cbufs == 0) {
- nvc0_fb_set_null_rt(push, 0);
+ nvc0_fb_set_null_rt(push, 0, 0);
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
PUSH_DATA (push, (076543210 << 4) | 1);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index e657204128e..e108590e215 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1043,6 +1043,8 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
ctx->saved.fb.width = nvc0->framebuffer.width;
ctx->saved.fb.height = nvc0->framebuffer.height;
+ ctx->saved.fb.samples = nvc0->framebuffer.samples;
+ ctx->saved.fb.layers = nvc0->framebuffer.layers;
ctx->saved.fb.nr_cbufs = nvc0->framebuffer.nr_cbufs;
ctx->saved.fb.cbufs[0] = nvc0->framebuffer.cbufs[0];
ctx->saved.fb.zsbuf = nvc0->framebuffer.zsbuf;
@@ -1110,6 +1112,8 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
nvc0->framebuffer.width = blit->saved.fb.width;
nvc0->framebuffer.height = blit->saved.fb.height;
+ nvc0->framebuffer.samples = blit->saved.fb.samples;
+ nvc0->framebuffer.layers = blit->saved.fb.layers;
nvc0->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
nvc0->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
nvc0->framebuffer.zsbuf = blit->saved.fb.zsbuf;
From 9b5bd20eb2d09e1ec2319b55c83ad7f28b6fefee Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Sat, 9 Apr 2016 13:11:42 -0400
Subject: [PATCH 026/224] glsl: allow usage of the keyword buffer before GLSL
430 / ESSL 310
The GLSL 4.20 and ESSL 3.00 specs don't list 'buffer' as a reserved
keyword. Make the parser ignore it unless GLSL 4.30 / ESSL 3.10 are
used, or ARB_shader_storage_buffer_objects is enabled.
Signed-off-by: Ilia Mirkin
Reviewed-by: Timothy Arceri
Cc: mesa-stable@lists.freedesktop.org
---
src/compiler/glsl/glsl_lexer.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index 0b7695f8d3e..6b1ef1717e5 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -304,7 +304,7 @@ in return IN_TOK;
out return OUT_TOK;
inout return INOUT_TOK;
uniform return UNIFORM;
-buffer return BUFFER;
+buffer KEYWORD_WITH_ALT(0, 0, 430, 310, yyextra->ARB_shader_storage_buffer_object_enable, BUFFER);
varying DEPRECATED_ES_KEYWORD(VARYING);
centroid KEYWORD(120, 300, 120, 300, CENTROID);
invariant KEYWORD(120, 100, 120, 100, INVARIANT);
From b790232524d46ef888a3657d205aa18502b09d3f Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Fri, 8 Apr 2016 13:38:54 -0700
Subject: [PATCH 027/224] i965/disasm: Simplify the URB opcode printing with
?:.
Signed-off-by: Kenneth Graunke
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/brw_disasm.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 09eb2392836..0ae237d9782 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -1509,13 +1509,12 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
format(file, " %ld", brw_inst_urb_global_offset(devinfo, inst));
space = 1;
- if (devinfo->gen >= 7) {
- err |= control(file, "urb opcode", gen7_urb_opcode,
- brw_inst_urb_opcode(devinfo, inst), &space);
- } else if (devinfo->gen >= 5) {
- err |= control(file, "urb opcode", gen5_urb_opcode,
- brw_inst_urb_opcode(devinfo, inst), &space);
- }
+
+ err |= control(file, "urb opcode",
+ devinfo->gen >= 7 ? gen7_urb_opcode
+ : gen5_urb_opcode,
+ brw_inst_urb_opcode(devinfo, inst), &space);
+
err |= control(file, "urb swizzle", urb_swizzle,
brw_inst_urb_swizzle_control(devinfo, inst), &space);
if (devinfo->gen < 7) {
From 20c8f365081801b3370b705d18a01f13c181abe9 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Fri, 8 Apr 2016 13:48:46 -0700
Subject: [PATCH 028/224] i965/disasm: Decode "channel mask present" bit
correctly.
Bit 15 means "interleave" for most messages, but for SIMD8 messages it
means "use channel masks".
Signed-off-by: Kenneth Graunke
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/brw_disasm.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 0ae237d9782..08486578718 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -1505,7 +1505,9 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
break;
}
- case BRW_SFID_URB:
+ case BRW_SFID_URB: {
+ unsigned opcode = brw_inst_urb_opcode(devinfo, inst);
+
format(file, " %ld", brw_inst_urb_global_offset(devinfo, inst));
space = 1;
@@ -1513,10 +1515,18 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
err |= control(file, "urb opcode",
devinfo->gen >= 7 ? gen7_urb_opcode
: gen5_urb_opcode,
- brw_inst_urb_opcode(devinfo, inst), &space);
+ opcode, &space);
+
+ if (opcode == GEN8_URB_OPCODE_SIMD8_WRITE ||
+ opcode == GEN8_URB_OPCODE_SIMD8_READ) {
+ if (brw_inst_urb_channel_mask_present(devinfo, inst))
+ string(file, " masked");
+ } else {
+ err |= control(file, "urb swizzle", urb_swizzle,
+ brw_inst_urb_swizzle_control(devinfo, inst),
+ &space);
+ }
- err |= control(file, "urb swizzle", urb_swizzle,
- brw_inst_urb_swizzle_control(devinfo, inst), &space);
if (devinfo->gen < 7) {
err |= control(file, "urb allocate", urb_allocate,
brw_inst_urb_allocate(devinfo, inst), &space);
@@ -1528,6 +1538,7 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
brw_inst_urb_complete(devinfo, inst), &space);
}
break;
+ }
case BRW_SFID_THREAD_SPAWNER:
break;
From ce84a92df54c738d2c248716f0f673247d50e1a7 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Fri, 8 Apr 2016 13:52:30 -0700
Subject: [PATCH 029/224] i965/disasm: Decode per-slot offsets.
We just never bothered to decode this.
Signed-off-by: Kenneth Graunke
Reviewed-by: Ben Widawsky
---
src/mesa/drivers/dri/i965/brw_disasm.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 08486578718..88bd7a499a7 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -1517,6 +1517,11 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
: gen5_urb_opcode,
opcode, &space);
+ if (devinfo->gen >= 7 &&
+ brw_inst_urb_per_slot_offset(devinfo, inst)) {
+ string(file, " per-slot");
+ }
+
if (opcode == GEN8_URB_OPCODE_SIMD8_WRITE ||
opcode == GEN8_URB_OPCODE_SIMD8_READ) {
if (brw_inst_urb_channel_mask_present(devinfo, inst))
From 26c56e24e700d0ab4c6499fea28328045b5e4b32 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Sun, 3 Apr 2016 00:48:03 -0700
Subject: [PATCH 030/224] glsl: Don't remove XFB-only varyings.
Consider the case of linking a program with both a vertex and fragment
shader. The VS may compute output varyings that are intended for
transform feedback, and not read by the fragment shader.
In this case, var->data.is_unmatched_generic_inout will be true,
but we still cannot eliminate the varyings. We need to also check
!var->data.is_xfb_only.
Fixes failures in ES31-CTS.gpu_shader5.fma_precision_*, which happen
to use transform feedback in a way we apparently hadn't seen before.
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke
Reviewed-by: Timothy Arceri
---
src/compiler/glsl/link_varyings.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index e9d0067459a..87606be9337 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -488,7 +488,7 @@ remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
* its value is used by other shader stages. This will cause the
* variable to have a location assigned.
*/
- if (var->data.is_unmatched_generic_inout) {
+ if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
assert(var->data.mode != ir_var_temporary);
var->data.mode = ir_var_auto;
}
From 6f5f818b6d9fa5ab8ed7665c85df65237e34aa24 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Mon, 11 Apr 2016 13:10:36 +1000
Subject: [PATCH 031/224] docs: add some missing softpipe entries.
I just forgot these when I added this stuff.
Signed-off-by: Dave Airlie
---
docs/GL3.txt | 6 +++---
docs/relnotes/11.3.0.html | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 81ece662349..e6bb7529a2e 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -178,7 +178,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_robust_buffer_access_behavior not started
- GL_ARB_shader_image_size DONE (i965, radeonsi)
+ GL_ARB_shader_image_size DONE (i965, radeonsi, softpipe)
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
@@ -231,8 +231,8 @@ GLES3.1, GLSL ES 3.1
GL_ARB_framebuffer_no_attachments DONE (i965, nvc0, r600, radeonsi)
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
- GL_ARB_shader_image_load_store DONE (i965)
- GL_ARB_shader_image_size DONE (i965)
+ GL_ARB_shader_image_load_store DONE (i965, softpipe)
+ GL_ARB_shader_image_size DONE (i965, softpipe)
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
GL_ARB_shading_language_packing DONE (all drivers)
GL_ARB_separate_shader_objects DONE (all drivers)
diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html
index 971b17af5ab..97c83a36cf7 100644
--- a/docs/relnotes/11.3.0.html
+++ b/docs/relnotes/11.3.0.html
@@ -48,7 +48,7 @@ Note: some of the new features are only available with certain drivers.
- GL_ARB_internalformat_query2 on all drivers
- GL_ARB_shader_atomic_counter_ops on nvc0
- GL_ARB_shader_image_load_store on radeonsi, softpipe
-- GL_ARB_shader_image_size on radeonsi
+- GL_ARB_shader_image_size on radeonsi, softpipe
- GL_ATI_fragment_shader on all Gallium drivers
- GL_EXT_base_instance on all drivers that support GL_ARB_base_instance
- GL_OES_draw_buffers_indexed and GL_EXT_draw_buffers_indexed on all drivers that support GL_ARB_draw_buffers_blend
From 875543e270de67e99ce049ca01b6cde8ac6911e1 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Tue, 29 Mar 2016 19:30:31 -0400
Subject: [PATCH 032/224] i965: enable OES_texture_buffer on gen7+
It will only end up getting exposed on gen8+ since it requires GL ES
3.1, but it should be ready to go on gen7 when support for GL ES 3.1 is
completed there.
Signed-off-by: Ilia Mirkin
Tested-by: Kenneth Graunke
---
docs/GL3.txt | 2 +-
docs/relnotes/11.3.0.html | 1 +
src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
3 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index e6bb7529a2e..7267de3a433 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -269,7 +269,7 @@ GLES3.2, GLSL ES 3.2
GL_OES_shader_multisample_interpolation DONE (nvc0, r600, radeonsi)
GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
GL_OES_texture_border_clamp DONE (all drivers)
- GL_OES_texture_buffer DONE (core only)
+ GL_OES_texture_buffer DONE (i965, radeonsi)
GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8)
GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample)
diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html
index 97c83a36cf7..2474f386bd9 100644
--- a/docs/relnotes/11.3.0.html
+++ b/docs/relnotes/11.3.0.html
@@ -54,6 +54,7 @@ Note: some of the new features are only available with certain drivers.
- GL_OES_draw_buffers_indexed and GL_EXT_draw_buffers_indexed on all drivers that support GL_ARB_draw_buffers_blend
- GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store
- GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp
+- GL_OES_texture_buffer and GL_EXT_texture_buffer on i965, radeonsi
- EGL_KHR_reusable_sync on all drivers
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 60ac124ecd0..6a20bd6d925 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -340,6 +340,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_texture_view = true;
ctx->Extensions.ARB_shader_storage_buffer_object = true;
ctx->Extensions.EXT_shader_samples_identical = true;
+ ctx->Extensions.OES_texture_buffer = true;
if (brw->can_do_pipelined_register_writes) {
ctx->Extensions.ARB_draw_indirect = true;
From 7d58cfa36680206f7b81d47c6507e0b56b8468c5 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Fri, 25 Mar 2016 10:23:25 -0700
Subject: [PATCH 033/224] nir: Add a pass for gathering various bits of shader
info
Reviewed-by: Rob Clark
Reviewed-by: Kenneth Graunke
---
src/compiler/Makefile.sources | 1 +
src/compiler/nir/Makefile.sources | 1 +
src/compiler/nir/nir.h | 1 +
src/compiler/nir/nir_gather_info.c | 161 +++++++++++++++++++++++++++++
4 files changed, 164 insertions(+)
create mode 100644 src/compiler/nir/nir_gather_info.c
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 0aee2006902..b24332807df 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -176,6 +176,7 @@ NIR_FILES = \
nir/nir_control_flow_private.h \
nir/nir_dominance.c \
nir/nir_from_ssa.c \
+ nir/nir_gather_info.c \
nir/nir_gs_count_vertices.c \
nir/nir_inline_functions.c \
nir/nir_instr_set.c \
diff --git a/src/compiler/nir/Makefile.sources b/src/compiler/nir/Makefile.sources
index 34743024551..c920d2ca544 100644
--- a/src/compiler/nir/Makefile.sources
+++ b/src/compiler/nir/Makefile.sources
@@ -19,6 +19,7 @@ NIR_FILES = \
nir_control_flow_private.h \
nir_dominance.c \
nir_from_ssa.c \
+ nir_gather_info.c \
nir_gs_count_vertices.c \
nir_inline_functions.c \
nir_instr_set.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index c19ae5948bd..35b09c62334 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2187,6 +2187,7 @@ bool nir_lower_locals_to_regs(nir_shader *shader);
void nir_lower_outputs_to_temporaries(nir_shader *shader,
nir_function *entrypoint);
+void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
void nir_assign_var_locations(struct exec_list *var_list,
unsigned *size,
diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c
new file mode 100644
index 00000000000..bff235bb377
--- /dev/null
+++ b/src/compiler/nir/nir_gather_info.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+static void
+gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if:
+ assert(shader->stage == MESA_SHADER_FRAGMENT);
+ shader->info.fs.uses_discard = true;
+ break;
+
+ case nir_intrinsic_load_front_face:
+ case nir_intrinsic_load_vertex_id:
+ case nir_intrinsic_load_vertex_id_zero_base:
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_instance_id:
+ case nir_intrinsic_load_sample_id:
+ case nir_intrinsic_load_sample_pos:
+ case nir_intrinsic_load_sample_mask_in:
+ case nir_intrinsic_load_primitive_id:
+ case nir_intrinsic_load_invocation_id:
+ case nir_intrinsic_load_local_invocation_id:
+ case nir_intrinsic_load_work_group_id:
+ case nir_intrinsic_load_num_work_groups:
+ shader->info.system_values_read |=
+ (1 << nir_system_value_from_intrinsic(instr->intrinsic));
+ break;
+
+ case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
+ assert(shader->stage == MESA_SHADER_GEOMETRY);
+ shader->info.gs.uses_end_primitive = 1;
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+gather_tex_info(nir_tex_instr *instr, nir_shader *shader)
+{
+ if (instr->op == nir_texop_tg4)
+ shader->info.uses_texture_gather = true;
+}
+
+static bool
+gather_info_block(nir_block *block, void *shader)
+{
+ nir_foreach_instr(block, instr) {
+ switch (instr->type) {
+ case nir_instr_type_intrinsic:
+ gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader);
+ break;
+ case nir_instr_type_tex:
+ gather_tex_info(nir_instr_as_tex(instr), shader);
+ break;
+ case nir_instr_type_call:
+ assert(!"nir_shader_gather_info only works if functions are inlined");
+ break;
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Returns the bits in the inputs_read, outputs_written, or
+ * system_values_read bitfield corresponding to this variable.
+ */
+static inline uint64_t
+get_io_mask(nir_variable *var, gl_shader_stage stage)
+{
+ assert(var->data.mode == nir_var_shader_in ||
+ var->data.mode == nir_var_shader_out ||
+ var->data.mode == nir_var_system_value);
+ assert(var->data.location >= 0);
+
+ const struct glsl_type *var_type = var->type;
+ if (stage == MESA_SHADER_GEOMETRY && var->data.mode == nir_var_shader_in) {
+ /* Most geometry shader inputs are per-vertex arrays */
+ if (var->data.location >= VARYING_SLOT_VAR0)
+ assert(glsl_type_is_array(var_type));
+
+ if (glsl_type_is_array(var_type))
+ var_type = glsl_get_array_element(var_type);
+ }
+
+ bool is_vertex_input = (var->data.mode == nir_var_shader_in &&
+ stage == MESA_SHADER_VERTEX);
+ unsigned slots = glsl_count_attribute_slots(var_type, is_vertex_input);
+ return ((1ull << slots) - 1) << var->data.location;
+}
+
+void
+nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)
+{
+ /* This pass does not yet support tessellation shaders */
+ assert(shader->stage == MESA_SHADER_VERTEX ||
+ shader->stage == MESA_SHADER_GEOMETRY ||
+ shader->stage == MESA_SHADER_FRAGMENT ||
+ shader->stage == MESA_SHADER_COMPUTE);
+
+ shader->info.inputs_read = 0;
+ foreach_list_typed(nir_variable, var, node, &shader->inputs)
+ shader->info.inputs_read |= get_io_mask(var, shader->stage);
+
+ /* TODO: Some day we may need to add stream support to NIR */
+ shader->info.outputs_written = 0;
+ foreach_list_typed(nir_variable, var, node, &shader->outputs)
+ shader->info.outputs_written |= get_io_mask(var, shader->stage);
+
+ shader->info.system_values_read = 0;
+ foreach_list_typed(nir_variable, var, node, &shader->system_values)
+ shader->info.system_values_read |= get_io_mask(var, shader->stage);
+
+ shader->info.num_textures = 0;
+ shader->info.num_images = 0;
+ nir_foreach_variable(var, &shader->uniforms) {
+ const struct glsl_type *type = var->type;
+ unsigned count = 1;
+ if (glsl_type_is_array(type)) {
+ count = glsl_get_length(type);
+ type = glsl_get_array_element(type);
+ }
+
+ if (glsl_type_is_image(type)) {
+ shader->info.num_images += count;
+ } else if (glsl_type_is_sampler(type)) {
+ shader->info.num_textures += count;
+ }
+ }
+
+ nir_foreach_block(entrypoint, gather_info_block, shader);
+}
From 4979cec8201523f38654771486a065000888b27d Mon Sep 17 00:00:00 2001
From: Timothy Arceri
Date: Sun, 10 Apr 2016 12:28:00 +1000
Subject: [PATCH 034/224] i965: fix struct type in comment
Reviewed-by: Eduardo Lima Mitev
---
src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index f3361d69786..636340add35 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -75,7 +75,7 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
int namelen = strlen(var->name);
/* The data for our (non-builtin) uniforms is stored in a series of
- * gl_uniform_driver_storage structs for each subcomponent that
+ * gl_uniform_storage structs for each subcomponent that
* glGetUniformLocation() could name. We know it's been set up in the same
* order we'd walk the type, so walk the list of storage and find anything
* with our name, or the prefix of a component that starts with our name.
From 40d1b671a96247c3dca55114127cd92ead60441f Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Tue, 17 Nov 2015 15:49:29 +0100
Subject: [PATCH 035/224] nir/from_ssa: adapt to different bit sizes
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_from_ssa.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c
index 82317c21b62..7bbc2c0f299 100644
--- a/src/compiler/nir/nir_from_ssa.c
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -474,6 +474,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
node->set->reg = nir_local_reg_create(state->impl);
node->set->reg->name = def->name;
node->set->reg->num_components = def->num_components;
+ node->set->reg->bit_size = def->bit_size;
node->set->reg->num_array_elems = 0;
}
@@ -491,6 +492,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
reg = nir_local_reg_create(state->impl);
reg->name = def->name;
reg->num_components = def->num_components;
+ reg->bit_size = def->bit_size;
reg->num_array_elems = 0;
}
From 41a39e338470d2b0e1b78e5e45d673f64f4dc418 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Tue, 17 Nov 2015 15:50:00 +0100
Subject: [PATCH 036/224] nir/locals_to_regs: adapt to different bit sizes
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_lower_locals_to_regs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c
index 0438802d3b2..cda652d135f 100644
--- a/src/compiler/nir/nir_lower_locals_to_regs.c
+++ b/src/compiler/nir/nir_lower_locals_to_regs.c
@@ -119,6 +119,7 @@ get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)
nir_register *reg = nir_local_reg_create(state->impl);
reg->num_components = glsl_get_vector_elements(tail->type);
reg->num_array_elems = array_size > 1 ? array_size : 0;
+ reg->bit_size = glsl_get_bit_size(glsl_get_base_type(tail->type));
_mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
nir_array_add(&state->derefs_array, nir_deref_var *, deref);
From e3edaec739a72a36d54b60ddf5c952d377324f00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?=
Date: Wed, 23 Mar 2016 08:04:09 +0100
Subject: [PATCH 037/224] nir: add bit_size info to
nir_ssa_undef_instr_create()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
v2:
- Make the users to give the right bit_sizes as arguments (Jason).
Signed-off-by: Samuel Iglesias Gonsálvez
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/glsl_to_nir.cpp | 2 +-
src/compiler/nir/nir.c | 6 ++++--
src/compiler/nir/nir.h | 3 ++-
src/compiler/nir/nir_builder.h | 2 +-
src/compiler/nir/nir_clone.c | 3 ++-
src/compiler/nir/nir_control_flow.c | 6 ++++--
src/compiler/nir/nir_lower_vars_to_ssa.c | 4 ++--
src/compiler/nir/nir_phi_builder.c | 3 ++-
src/compiler/nir/nir_to_ssa.c | 3 ++-
9 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 1fac481ec9f..c35ec7e5848 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -735,7 +735,7 @@ nir_visitor::visit(ir_call *ir)
case nir_intrinsic_image_samples:
case nir_intrinsic_image_size: {
nir_ssa_undef_instr *instr_undef =
- nir_ssa_undef_instr_create(shader, 1);
+ nir_ssa_undef_instr_create(shader, 1, 32);
nir_builder_instr_insert(&b, &instr_undef->instr);
/* Set the image variable dereference. */
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index b67916dc86b..e45f727764e 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -558,12 +558,14 @@ nir_parallel_copy_instr_create(nir_shader *shader)
}
nir_ssa_undef_instr *
-nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
+nir_ssa_undef_instr_create(nir_shader *shader,
+ unsigned num_components,
+ unsigned bit_size)
{
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
- nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
return instr;
}
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 35b09c62334..f84f39dbf64 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1845,7 +1845,8 @@ nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
- unsigned num_components);
+ unsigned num_components,
+ unsigned bit_size);
nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
nir_deref_array *nir_deref_array_create(void *mem_ctx);
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 3dc7c25ec28..a894aba8f72 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -78,7 +78,7 @@ static inline nir_ssa_def *
nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned bit_size)
{
nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(build->shader, num_components);
+ nir_ssa_undef_instr_create(build->shader, num_components, bit_size);
undef->def.bit_size = bit_size;
if (!undef)
return NULL;
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 7d2e3835258..a3d467d5174 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -372,7 +372,8 @@ static nir_ssa_undef_instr *
clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
{
nir_ssa_undef_instr *nsa =
- nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+ nir_ssa_undef_instr_create(state->ns, sa->def.num_components,
+ sa->def.bit_size);
add_remap(state, &nsa->def, &sa->def);
diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c
index 33b06d0cc84..ea5741288ce 100644
--- a/src/compiler/nir/nir_control_flow.c
+++ b/src/compiler/nir/nir_control_flow.c
@@ -281,7 +281,8 @@ insert_phi_undef(nir_block *block, nir_block *pred)
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(ralloc_parent(phi),
- phi->dest.ssa.num_components);
+ phi->dest.ssa.num_components,
+ phi->dest.ssa.bit_size);
nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
nir_phi_src *src = ralloc(phi, nir_phi_src);
src->pred = pred;
@@ -691,7 +692,8 @@ replace_ssa_def_uses(nir_ssa_def *def, void *void_impl)
void *mem_ctx = ralloc_parent(impl);
nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(mem_ctx, def->num_components);
+ nir_ssa_undef_instr_create(mem_ctx, def->num_components,
+ def->bit_size);
nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
return true;
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
index 9f9e454c198..249c3892335 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -504,8 +504,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
*/
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(state->shader,
- intrin->num_components);
- undef->def.bit_size = intrin->dest.ssa.bit_size;
+ intrin->num_components,
+ intrin->dest.ssa.bit_size);
nir_instr_insert_before(&intrin->instr, &undef->instr);
nir_instr_remove(&intrin->instr);
diff --git a/src/compiler/nir/nir_phi_builder.c b/src/compiler/nir/nir_phi_builder.c
index a39e3606fd5..1f1388a73dd 100644
--- a/src/compiler/nir/nir_phi_builder.c
+++ b/src/compiler/nir/nir_phi_builder.c
@@ -195,7 +195,8 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
*/
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(val->builder->shader,
- val->num_components);
+ val->num_components,
+ val->bit_size);
nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
&undef->instr);
val->defs[block->index] = &undef->def;
diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c
index d588d7d2df3..23d709a218a 100644
--- a/src/compiler/nir/nir_to_ssa.c
+++ b/src/compiler/nir/nir_to_ssa.c
@@ -160,7 +160,8 @@ static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state)
* to preserve the information that this source is undefined
*/
nir_ssa_undef_instr *instr =
- nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components);
+ nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components,
+ reg->bit_size);
/*
* We could just insert the undefined instruction before the instruction
From a5b17ae7455af205a89074d176289affbf68e444 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Mon, 2 Nov 2015 18:33:46 -0500
Subject: [PATCH 038/224] nir/lower_vec: adapt to different bit sizes
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_lower_vec_to_movs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c
index f51cede3920..9e40b84e6e3 100644
--- a/src/compiler/nir/nir_lower_vec_to_movs.c
+++ b/src/compiler/nir/nir_lower_vec_to_movs.c
@@ -240,6 +240,7 @@ lower_vec_to_movs_block(nir_block *block, void *void_state)
/* Since we insert multiple MOVs, we have a register destination. */
nir_register *reg = nir_local_reg_create(impl);
reg->num_components = vec->dest.dest.ssa.num_components;
+ reg->bit_size = vec->dest.dest.ssa.bit_size;
nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg));
From 3663a2397e47da9b766b0c4239a8b74ac77b5d04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?=
Date: Wed, 23 Mar 2016 08:04:18 +0100
Subject: [PATCH 039/224] nir: add bit_size info to
nir_load_const_instr_create()
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir.c | 8 +++++---
src/compiler/nir/nir.h | 3 ++-
src/compiler/nir/nir_builder.h | 2 +-
src/compiler/nir/nir_clone.c | 3 ++-
src/compiler/nir/nir_lower_atomics.c | 5 +++--
src/compiler/nir/nir_lower_load_const_to_scalar.c | 3 ++-
src/compiler/nir/nir_lower_locals_to_regs.c | 2 +-
src/compiler/nir/nir_opt_constant_folding.c | 4 ++--
src/compiler/nir/nir_search.c | 5 ++---
src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +-
10 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index e45f727764e..56a50090fdd 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -469,12 +469,13 @@ nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
}
nir_load_const_instr *
-nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
+nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
+ unsigned bit_size)
{
nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
instr_init(&instr->instr, nir_instr_type_load_const);
- nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
return instr;
}
@@ -694,7 +695,8 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
}
nir_load_const_instr *load =
- nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type));
+ nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type),
+ 32);
matrix_offset *= load->def.num_components;
for (unsigned i = 0; i < load->def.num_components; i++) {
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f84f39dbf64..4cc158767f0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1830,7 +1830,8 @@ nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
- unsigned num_components);
+ unsigned num_components,
+ unsigned bit_size);
nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
nir_intrinsic_op op);
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index a894aba8f72..29b13fb222f 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -92,7 +92,7 @@ static inline nir_ssa_def *
nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
{
nir_load_const_instr *load_const =
- nir_load_const_instr_create(build->shader, num_components);
+ nir_load_const_instr_create(build->shader, num_components, 32);
if (!load_const)
return NULL;
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index a3d467d5174..e889f19d24e 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -359,7 +359,8 @@ static nir_load_const_instr *
clone_load_const(clone_state *state, const nir_load_const_instr *lc)
{
nir_load_const_instr *nlc =
- nir_load_const_instr_create(state->ns, lc->def.num_components);
+ nir_load_const_instr_create(state->ns, lc->def.num_components,
+ lc->def.bit_size);
memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c
index 70381a7968a..b2ea31888f8 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -74,7 +74,8 @@ lower_instr(nir_intrinsic_instr *instr,
nir_intrinsic_set_base(new_instr,
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index);
- nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
+ nir_load_const_instr *offset_const =
+ nir_load_const_instr_create(mem_ctx, 1, 32);
offset_const->value.u32[0] = instr->variables[0]->var->data.offset;
nir_instr_insert_before(&instr->instr, &offset_const->instr);
@@ -95,7 +96,7 @@ lower_instr(nir_intrinsic_instr *instr,
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_load_const_instr *atomic_counter_size =
- nir_load_const_instr_create(mem_ctx, 1);
+ nir_load_const_instr_create(mem_ctx, 1, 32);
atomic_counter_size->value.u32[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c
index b5df46413f1..d290c303cd2 100644
--- a/src/compiler/nir/nir_lower_load_const_to_scalar.c
+++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c
@@ -48,7 +48,8 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
/* Emit the individual loads. */
nir_ssa_def *loads[4];
for (unsigned i = 0; i < lower->def.num_components; i++) {
- nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1);
+ nir_load_const_instr *load_comp =
+ nir_load_const_instr_create(b.shader, 1, 32);
load_comp->value.u32[0] = lower->value.u32[i];
nir_builder_instr_insert(&b, &load_comp->instr);
loads[i] = &load_comp->def;
diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c
index cda652d135f..111bfdd2e33 100644
--- a/src/compiler/nir/nir_lower_locals_to_regs.c
+++ b/src/compiler/nir/nir_lower_locals_to_regs.c
@@ -161,7 +161,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
if (src.reg.indirect) {
nir_load_const_instr *load_const =
- nir_load_const_instr_create(state->shader, 1);
+ nir_load_const_instr_create(state->shader, 1, 32);
load_const->value.u32[0] = glsl_get_length(parent_type);
nir_instr_insert_before(instr, &load_const->instr);
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index e64ca369bbc..caa4231b188 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -98,9 +98,9 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
nir_load_const_instr *new_instr =
nir_load_const_instr_create(mem_ctx,
- instr->dest.dest.ssa.num_components);
+ instr->dest.dest.ssa.num_components,
+ instr->dest.dest.ssa.bit_size);
- new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
new_instr->value = dest;
nir_instr_insert_before(&instr->instr, &new_instr->instr);
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 3a65ab18928..dc53a9063c4 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -477,7 +477,8 @@ construct_value(const nir_search_value *value,
case nir_search_value_constant: {
const nir_search_constant *c = nir_search_value_as_constant(value);
- nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
+ nir_load_const_instr *load =
+ nir_load_const_instr_create(mem_ctx, 1, bitsize->dest_size);
switch (c->type) {
case nir_type_float:
@@ -528,8 +529,6 @@ construct_value(const nir_search_value *value,
unreachable("Invalid alu source type");
}
- load->def.bit_size = bitsize->dest_size;
-
nir_instr_insert_before(instr, &load->instr);
nir_alu_src val;
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 7ec8b662200..d76b6d900ce 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -454,7 +454,7 @@ ttn_emit_immediate(struct ttn_compile *c)
nir_load_const_instr *load_const;
int i;
- load_const = nir_load_const_instr_create(b->shader, 4);
+ load_const = nir_load_const_instr_create(b->shader, 4, 32);
c->imm_defs[c->next_imm] = &load_const->def;
c->next_imm++;
From 12f628adcbe2f87073fb06f98a98414b898cde5d Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga
Date: Fri, 6 Nov 2015 11:19:58 +0100
Subject: [PATCH 040/224] nir/lower_to_source_mods: Handle different bit sizes
v2 (Sam):
- Use helper to get base type from nir_alu_type.
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_lower_to_source_mods.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/compiler/nir/nir_lower_to_source_mods.c b/src/compiler/nir/nir_lower_to_source_mods.c
index 6c4e1f0d3f3..1e8c3c2a130 100644
--- a/src/compiler/nir/nir_lower_to_source_mods.c
+++ b/src/compiler/nir/nir_lower_to_source_mods.c
@@ -54,7 +54,7 @@ nir_lower_to_source_mods_block(nir_block *block, void *state)
if (parent->dest.saturate)
continue;
- switch (nir_op_infos[alu->op].input_types[i]) {
+ switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i])) {
case nir_type_float:
if (parent->op != nir_op_fmov)
continue;
@@ -128,7 +128,8 @@ nir_lower_to_source_mods_block(nir_block *block, void *state)
continue;
/* We can only saturate float destinations */
- if (nir_op_infos[alu->op].output_type != nir_type_float)
+ if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) !=
+ nir_type_float)
continue;
if (!list_empty(&alu->dest.dest.ssa.if_uses))
From 8e69782e3e4e574e0395f8300f86e32633d2f21a Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga
Date: Wed, 10 Feb 2016 16:05:11 +0100
Subject: [PATCH 041/224] nir/lower_load_const_to_scalar: support doubles and
multiple bit sizes
v2 (Sam):
- Add assert to detect bitsizes differents than 32 and 64 (Jason).
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_lower_load_const_to_scalar.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c
index d290c303cd2..db5865fb0c0 100644
--- a/src/compiler/nir/nir_lower_load_const_to_scalar.c
+++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c
@@ -49,8 +49,12 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
nir_ssa_def *loads[4];
for (unsigned i = 0; i < lower->def.num_components; i++) {
nir_load_const_instr *load_comp =
- nir_load_const_instr_create(b.shader, 1, 32);
- load_comp->value.u32[0] = lower->value.u32[i];
+ nir_load_const_instr_create(b.shader, 1, lower->def.bit_size);
+ if (lower->def.bit_size == 64)
+ load_comp->value.f64[0] = lower->value.f64[i];
+ else
+ load_comp->value.u32[0] = lower->value.u32[i];
+ assert(lower->def.bit_size == 64 || lower->def.bit_size == 32);
nir_builder_instr_insert(&b, &load_comp->instr);
loads[i] = &load_comp->def;
}
From f5551f8a8bdf2786620010554e526d329c7622b5 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Thu, 12 Nov 2015 11:18:50 +0100
Subject: [PATCH 042/224] nir/glsl_to_nir: support doubles
v2:
- Don't set sized types to the destination of texture related opcodes.
(Jason)
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/glsl_to_nir.cpp | 64 +++++++++++++++++++-------------
1 file changed, 38 insertions(+), 26 deletions(-)
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index c35ec7e5848..c77b43027fc 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -257,6 +257,11 @@ constant_copy(ir_constant *ir, void *mem_ctx)
ret->value.f[i] = ir->value.f[i];
break;
+ case GLSL_TYPE_DOUBLE:
+ for (i = 0; i < total_elems; i++)
+ ret->value.d[i] = ir->value.d[i];
+ break;
+
case GLSL_TYPE_BOOL:
for (i = 0; i < total_elems; i++)
ret->value.b[i] = ir->value.b[i];
@@ -1182,11 +1187,18 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
load_instr->variables[0] = this->deref_head;
ralloc_steal(load_instr, load_instr->variables[0]);
add_instr(&load_instr->instr, ir->type->vector_elements);
+ load_instr->dest.ssa.bit_size = glsl_get_bit_size(ir->type->base_type);
}
return this->result;
}
+static bool
+type_is_float(glsl_base_type type)
+{
+ return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE;
+}
+
void
nir_visitor::visit(ir_expression *ir)
{
@@ -1305,20 +1317,20 @@ nir_visitor::visit(ir_expression *ir)
result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]);
break;
case ir_unop_neg:
- result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0])
- : nir_ineg(&b, srcs[0]);
+ result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0])
+ : nir_ineg(&b, srcs[0]);
break;
case ir_unop_abs:
- result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0])
- : nir_iabs(&b, srcs[0]);
+ result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0])
+ : nir_iabs(&b, srcs[0]);
break;
case ir_unop_saturate:
- assert(types[0] == GLSL_TYPE_FLOAT);
+ assert(type_is_float(types[0]));
result = nir_fsat(&b, srcs[0]);
break;
case ir_unop_sign:
- result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0])
- : nir_isign(&b, srcs[0]);
+ result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0])
+ : nir_isign(&b, srcs[0]);
break;
case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break;
case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break;
@@ -1469,19 +1481,19 @@ nir_visitor::visit(ir_expression *ir)
}
case ir_binop_add:
- result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1])
- : nir_iadd(&b, srcs[0], srcs[1]);
+ result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1])
+ : nir_iadd(&b, srcs[0], srcs[1]);
break;
case ir_binop_sub:
- result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1])
- : nir_isub(&b, srcs[0], srcs[1]);
+ result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1])
+ : nir_isub(&b, srcs[0], srcs[1]);
break;
case ir_binop_mul:
- result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1])
- : nir_imul(&b, srcs[0], srcs[1]);
+ result = type_is_float(out_type) ? nir_fmul(&b, srcs[0], srcs[1])
+ : nir_imul(&b, srcs[0], srcs[1]);
break;
case ir_binop_div:
- if (out_type == GLSL_TYPE_FLOAT)
+ if (type_is_float(out_type))
result = nir_fdiv(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
result = nir_idiv(&b, srcs[0], srcs[1]);
@@ -1489,11 +1501,11 @@ nir_visitor::visit(ir_expression *ir)
result = nir_udiv(&b, srcs[0], srcs[1]);
break;
case ir_binop_mod:
- result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1])
- : nir_umod(&b, srcs[0], srcs[1]);
+ result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1])
+ : nir_umod(&b, srcs[0], srcs[1]);
break;
case ir_binop_min:
- if (out_type == GLSL_TYPE_FLOAT)
+ if (type_is_float(out_type))
result = nir_fmin(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
result = nir_imin(&b, srcs[0], srcs[1]);
@@ -1501,7 +1513,7 @@ nir_visitor::visit(ir_expression *ir)
result = nir_umin(&b, srcs[0], srcs[1]);
break;
case ir_binop_max:
- if (out_type == GLSL_TYPE_FLOAT)
+ if (type_is_float(out_type))
result = nir_fmax(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
result = nir_imax(&b, srcs[0], srcs[1]);
@@ -1537,7 +1549,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
case ir_binop_less:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_flt(&b, srcs[0], srcs[1]);
else if (types[0] == GLSL_TYPE_INT)
result = nir_ilt(&b, srcs[0], srcs[1]);
@@ -1549,7 +1561,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_greater:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_flt(&b, srcs[1], srcs[0]);
else if (types[0] == GLSL_TYPE_INT)
result = nir_ilt(&b, srcs[1], srcs[0]);
@@ -1561,7 +1573,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_lequal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_fge(&b, srcs[1], srcs[0]);
else if (types[0] == GLSL_TYPE_INT)
result = nir_ige(&b, srcs[1], srcs[0]);
@@ -1573,7 +1585,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_gequal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_fge(&b, srcs[0], srcs[1]);
else if (types[0] == GLSL_TYPE_INT)
result = nir_ige(&b, srcs[0], srcs[1]);
@@ -1585,7 +1597,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_equal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_feq(&b, srcs[0], srcs[1]);
else
result = nir_ieq(&b, srcs[0], srcs[1]);
@@ -1595,7 +1607,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_nequal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_fne(&b, srcs[0], srcs[1]);
else
result = nir_ine(&b, srcs[0], srcs[1]);
@@ -1605,7 +1617,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_all_equal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT) {
+ if (type_is_float(types[0])) {
switch (ir->operands[0]->type->vector_elements) {
case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
@@ -1637,7 +1649,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_any_nequal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT) {
+ if (type_is_float(types[0])) {
switch (ir->operands[0]->type->vector_elements) {
case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
From 41c2541fc77fc32a89f2124bfcf6340959a48534 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Fri, 14 Aug 2015 10:37:18 -0700
Subject: [PATCH 043/224] nir/print: add support for printing doubles and
bitsize
v2:
- Squash the printing doubles related patches into one patch (Sam).
v3:
- Print using PRIx64 format: long is 32-bit on some 32-bit platforms but long
long is basically always 64-bit (Jason).
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_print.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index c295c192c2a..01712fa1e40 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -29,6 +29,7 @@
#include "compiler/shader_enums.h"
#include
#include
+#include /* for PRIx64 macro */
static void
print_tabs(unsigned num_tabs, FILE *fp)
@@ -68,7 +69,7 @@ static void
print_register_decl(nir_register *reg, print_state *state)
{
FILE *fp = state->fp;
- fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
+ fprintf(fp, "decl_reg %s %u ", sizes[reg->num_components], reg->bit_size);
if (reg->is_packed)
fprintf(fp, "(packed) ");
print_register(reg, state);
@@ -83,7 +84,8 @@ print_ssa_def(nir_ssa_def *def, print_state *state)
FILE *fp = state->fp;
if (def->name != NULL)
fprintf(fp, "/* %s */ ", def->name);
- fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
+ fprintf(fp, "%s %u ssa_%u", sizes[def->num_components], def->bit_size,
+ def->index);
}
static void
@@ -279,6 +281,13 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
}
break;
+ case GLSL_TYPE_DOUBLE:
+ for (i = 0; i < total_elems; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "%f", c->value.d[i]);
+ }
+ break;
+
case GLSL_TYPE_STRUCT:
for (i = 0; i < c->num_elements; i++) {
if (i > 0) fprintf(fp, ", ");
@@ -713,7 +722,11 @@ print_load_const_instr(nir_load_const_instr *instr, print_state *state)
* and then print the float in a comment for readability.
*/
- fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]);
+ if (instr->def.bit_size == 64)
+ fprintf(fp, "0x%16" PRIx64 " /* %f */", instr->value.u64[i],
+ instr->value.f64[i]);
+ else
+ fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]);
}
fprintf(fp, ")");
From f2ccb63be17544fc156c22b4ef1b873452af5dd2 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Fri, 14 Aug 2015 10:40:03 -0700
Subject: [PATCH 044/224] nir: handle doubles in
nir_deref_get_const_initializer_load()
v2 (Sam):
- Use proper bitsize value when calling to nir_load_const_instr_create()
(Jason).
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 56a50090fdd..8d38d3384d8 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -694,9 +694,10 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
tail = tail->child;
}
+ unsigned bit_size = glsl_get_bit_size(glsl_get_base_type(tail->type));
nir_load_const_instr *load =
nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type),
- 32);
+ bit_size);
matrix_offset *= load->def.num_components;
for (unsigned i = 0; i < load->def.num_components; i++) {
@@ -706,6 +707,9 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
case GLSL_TYPE_UINT:
load->value.u32[i] = constant->value.u[matrix_offset + i];
break;
+ case GLSL_TYPE_DOUBLE:
+ load->value.f64[i] = constant->value.d[matrix_offset + i];
+ break;
case GLSL_TYPE_BOOL:
load->value.u32[i] = constant->value.b[matrix_offset + i] ?
NIR_TRUE : NIR_FALSE;
From 106a1b5501b23503394fbb9dd99ab30def60652e Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Mon, 3 Aug 2015 15:05:38 -0700
Subject: [PATCH 045/224] nir/instr_set: handle 64-bit bit-sizes
v2: Revert spurious change in nir_opt_cse.c (Iago)
Signed-off-by: Iago Toral Quiroga
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_instr_set.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index e244122e466..c6161433516 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -52,6 +52,7 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr)
{
hash = HASH(hash, instr->op);
hash = HASH(hash, instr->dest.dest.ssa.num_components);
+ hash = HASH(hash, instr->dest.dest.ssa.bit_size);
/* We explicitly don't hash instr->dest.dest.exact */
if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
@@ -82,9 +83,8 @@ hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
{
hash = HASH(hash, instr->def.num_components);
- hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32,
- instr->def.num_components
- * sizeof(instr->value.f32[0]));
+ unsigned size = instr->def.num_components * (instr->def.bit_size / 8);
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32, size);
return hash;
}
@@ -126,8 +126,10 @@ hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr)
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
hash = HASH(hash, instr->intrinsic);
- if (info->has_dest)
+ if (info->has_dest) {
hash = HASH(hash, instr->dest.ssa.num_components);
+ hash = HASH(hash, instr->dest.ssa.bit_size);
+ }
assert(info->num_variables == 0);
@@ -268,6 +270,9 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
return false;
+ if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size)
+ return false;
+
/* We explicitly don't hash instr->dest.dest.exact */
if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
@@ -325,8 +330,11 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
if (load1->def.num_components != load2->def.num_components)
return false;
+ if (load1->def.bit_size != load2->def.bit_size)
+ return false;
+
return memcmp(load1->value.f32, load2->value.f32,
- load1->def.num_components * sizeof(*load2->value.f32)) == 0;
+ load1->def.num_components * (load1->def.bit_size / 8)) == 0;
}
case nir_instr_type_phi: {
nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
@@ -362,6 +370,10 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
intrinsic2->dest.ssa.num_components)
return false;
+ if (info->has_dest && intrinsic1->dest.ssa.bit_size !=
+ intrinsic2->dest.ssa.bit_size)
+ return false;
+
for (unsigned i = 0; i < info->num_srcs; i++) {
if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
return false;
From 4b37c64f3b547b1f769e0048217f9736a83ce12d Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Tue, 4 Aug 2015 14:04:54 -0700
Subject: [PATCH 046/224] nir/split_var_copies: handle doubles
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_split_var_copies.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c
index 6fdaefa32c8..2b011077a7c 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -149,6 +149,7 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_BOOL:
if (glsl_type_is_matrix(src_tail->type)) {
nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
@@ -231,6 +232,7 @@ split_var_copies_block(nir_block *block, void *void_state)
ralloc_steal(state->dead_ctx, instr);
break;
case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_BOOL:
From a741378cb59e4263f909d73498ebf153375e6dcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?=
Date: Mon, 4 Apr 2016 10:16:11 +0200
Subject: [PATCH 047/224] nir/glsl_to_nir: add bit-size info to add_instr()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Samuel Iglesias Gonsálvez
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/glsl_to_nir.cpp | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index c77b43027fc..4db7946e884 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -73,7 +73,7 @@ public:
void create_function(ir_function_signature *ir);
private:
- void add_instr(nir_instr *instr, unsigned num_components);
+ void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size);
nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
@@ -1156,12 +1156,13 @@ get_instr_dest(nir_instr *instr)
}
void
-nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
+nir_visitor::add_instr(nir_instr *instr, unsigned num_components,
+ unsigned bit_size)
{
nir_dest *dest = get_instr_dest(instr);
if (dest)
- nir_ssa_dest_init(instr, dest, num_components, 32, NULL);
+ nir_ssa_dest_init(instr, dest, num_components, bit_size, NULL);
nir_builder_instr_insert(&b, instr);
@@ -1186,8 +1187,8 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
load_instr->num_components = ir->type->vector_elements;
load_instr->variables[0] = this->deref_head;
ralloc_steal(load_instr, load_instr->variables[0]);
- add_instr(&load_instr->instr, ir->type->vector_elements);
- load_instr->dest.ssa.bit_size = glsl_get_bit_size(ir->type->base_type);
+ unsigned bit_size = glsl_get_bit_size(ir->type->base_type);
+ add_instr(&load_instr->instr, ir->type->vector_elements, bit_size);
}
return this->result;
@@ -1207,11 +1208,11 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_ubo_load: {
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
+ unsigned bit_size = glsl_get_bit_size(ir->type->base_type);
load->num_components = ir->type->vector_elements;
- load->dest.ssa.bit_size = glsl_get_bit_size(ir->type->base_type);
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
- add_instr(&load->instr, ir->type->vector_elements);
+ add_instr(&load->instr, ir->type->vector_elements, bit_size);
/*
* In UBO's, a true boolean value is any non-zero value, but we consider
@@ -1276,7 +1277,8 @@ nir_visitor::visit(ir_expression *ir)
intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
- add_instr(&intrin->instr, deref->type->vector_elements);
+ unsigned bit_size = glsl_get_bit_size(deref->type->base_type);
+ add_instr(&intrin->instr, deref->type->vector_elements, bit_size);
if (swizzle) {
unsigned swiz[4] = {
@@ -1476,7 +1478,8 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_get_buffer_size);
load->num_components = ir->type->vector_elements;
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
- add_instr(&load->instr, ir->type->vector_elements);
+ unsigned bit_size = glsl_get_bit_size(ir->type->base_type);
+ add_instr(&load->instr, ir->type->vector_elements, bit_size);
return;
}
@@ -1913,7 +1916,8 @@ nir_visitor::visit(ir_texture *ir)
assert(src_number == num_srcs);
- add_instr(&instr->instr, nir_tex_instr_dest_size(instr));
+ unsigned bit_size = glsl_get_bit_size(ir->type->base_type);
+ add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size);
}
void
From fab5d4cd9582aa5289143876c26af23ff855f355 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga
Date: Thu, 21 Jan 2016 13:23:55 +0100
Subject: [PATCH 048/224] nir/glsl_to_nir: set bit_size on ssbo_load result
v2 (Sam):
- Add missing bit_size assignment when ssbo_load destination is a boolean.
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/glsl_to_nir.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 4db7946e884..0b83bdc4078 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -858,8 +858,9 @@ nir_visitor::visit(ir_call *ir)
instr->num_components = type->vector_elements;
/* Setup destination register */
+ unsigned bit_size = glsl_get_bit_size(type->base_type);
nir_ssa_dest_init(&instr->instr, &instr->dest,
- type->vector_elements, 32, NULL);
+ type->vector_elements, bit_size, NULL);
/* Insert the created nir instruction now since in the case of boolean
* result we will need to emit another instruction after it
@@ -882,7 +883,7 @@ nir_visitor::visit(ir_call *ir)
load_ssbo_compare->src[1].swizzle[i] = 0;
nir_ssa_dest_init(&load_ssbo_compare->instr,
&load_ssbo_compare->dest.dest,
- type->vector_elements, 32, NULL);
+ type->vector_elements, bit_size, NULL);
load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
dest = &load_ssbo_compare->dest.dest;
From a4bce07dc6ebbd74dfb47394962d573ed01ee482 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Wed, 29 Jul 2015 23:46:20 -0700
Subject: [PATCH 049/224] nir: add support for d2f and f2d
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/glsl_to_nir.cpp | 2 ++
src/compiler/nir/nir_opcodes.py | 3 +++
2 files changed, 5 insertions(+)
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 0b83bdc4078..6a70c4d1758 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -1356,6 +1356,8 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break;
case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break;
case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break;
+ case ir_unop_d2f: result = nir_d2f(&b, srcs[0]); break;
+ case ir_unop_f2d: result = nir_f2d(&b, srcs[0]); break;
case ir_unop_i2u:
case ir_unop_u2i:
case ir_unop_bitcast_i2f:
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index bc9845036d3..ef544250652 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -170,6 +170,9 @@ unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
unop_convert("i2b", tbool, tint32, "src0 != 0")
unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
+# double-to-float conversion
+unop_convert("d2f", tfloat32, tfloat64, "src0") # Single to double precision
+unop_convert("f2d", tfloat64, tfloat32, "src0") # Double to single precision
# Unary floating-point rounding operations.
From b16d06252e9179f5c279da69ee194cc0400ae403 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga
Date: Tue, 12 Jan 2016 12:39:58 +0100
Subject: [PATCH 050/224] nir: add d2i, d2u, d2b opcodes
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/glsl_to_nir.cpp | 3 +++
src/compiler/nir/nir_opcodes.py | 3 +++
2 files changed, 6 insertions(+)
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 6a70c4d1758..6428b95965f 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -1358,6 +1358,9 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break;
case ir_unop_d2f: result = nir_d2f(&b, srcs[0]); break;
case ir_unop_f2d: result = nir_f2d(&b, srcs[0]); break;
+ case ir_unop_d2i: result = nir_d2i(&b, srcs[0]); break;
+ case ir_unop_d2u: result = nir_d2u(&b, srcs[0]); break;
+ case ir_unop_d2b: result = nir_d2b(&b, srcs[0]); break;
case ir_unop_i2u:
case ir_unop_u2i:
case ir_unop_bitcast_i2f:
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index ef544250652..0898abcedfd 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -161,9 +161,12 @@ unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
+unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion.
+unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion.
unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
+unop_convert("d2b", tbool, tfloat64, "src0 != 0.0")
# Boolean-to-float conversion
unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
From d5d6260329ed2df4aaffffac18d8998d4ad3676b Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga
Date: Tue, 12 Jan 2016 14:03:08 +0100
Subject: [PATCH 051/224] nir: add i2d and u2d opcodes
v2:
- Assert supports_int and don't fallback to nir_fmov (Jason)
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/glsl_to_nir.cpp | 8 ++++++++
src/compiler/nir/nir_opcodes.py | 2 ++
2 files changed, 10 insertions(+)
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 6428b95965f..0c5cc99981b 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -1361,6 +1361,14 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_d2i: result = nir_d2i(&b, srcs[0]); break;
case ir_unop_d2u: result = nir_d2u(&b, srcs[0]); break;
case ir_unop_d2b: result = nir_d2b(&b, srcs[0]); break;
+ case ir_unop_i2d:
+ assert(supports_ints);
+ result = nir_i2d(&b, srcs[0]);
+ break;
+ case ir_unop_u2d:
+ assert(supports_ints);
+ result = nir_u2d(&b, srcs[0]);
+ break;
case ir_unop_i2u:
case ir_unop_u2i:
case ir_unop_bitcast_i2f:
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 0898abcedfd..f1f12f72bdd 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -164,6 +164,7 @@ unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion.
unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion.
unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
+unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion.
# Float-to-boolean conversion
unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
unop_convert("d2b", tbool, tfloat64, "src0 != 0.0")
@@ -173,6 +174,7 @@ unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
unop_convert("i2b", tbool, tint32, "src0 != 0")
unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
+unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion.
# double-to-float conversion
unop_convert("d2f", tfloat32, tfloat64, "src0") # Single to double precision
unop_convert("f2d", tfloat64, tfloat32, "src0") # Double to single precision
From 9e31e0a21bd462b5a06e187bbaf95d3752052ef0 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Fri, 14 Aug 2015 12:20:37 -0700
Subject: [PATCH 052/224] nir: add support for (un)pack_double_2x32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
v2 (Sam):
- Use uint64 instead of float64 for sources and destinations. (Connor)
Signed-off-by: Samuel Iglesias Gonsálvez
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/glsl_to_nir.cpp | 6 ++++++
src/compiler/nir/nir_opcodes.py | 29 +++++++++++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 0c5cc99981b..fafa8bbe013 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -1422,6 +1422,12 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_unpack_half_2x16:
result = nir_unpack_half_2x16(&b, srcs[0]);
break;
+ case ir_unop_pack_double_2x32:
+ result = nir_pack_double_2x32(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_double_2x32:
+ result = nir_unpack_double_2x32(&b, srcs[0]);
+ break;
case ir_unop_bitfield_reverse:
result = nir_bitfield_reverse(&b, srcs[0]);
break;
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index f1f12f72bdd..18404472fe4 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -95,6 +95,7 @@ tuint = "uint"
tfloat32 = "float32"
tint32 = "int32"
tuint32 = "uint32"
+tuint64 = "uint64"
tfloat64 = "float64"
commutative = "commutative "
@@ -261,6 +262,34 @@ dst.x = (src0.x << 0) |
(src0.w << 24);
""")
+unop_horiz("pack_double_2x32", 1, tuint64, 2, tuint32, """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t i1;
+ uint32_t i2;
+ };
+} di;
+
+di.i1 = src0.x;
+di.i2 = src0.y;
+dst.x = di.u64;
+""")
+
+unop_horiz("unpack_double_2x32", 2, tuint32, 1, tuint64, """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t i1;
+ uint32_t i2;
+ };
+} di;
+
+di.u64 = src0.x;
+dst.x = di.i1;
+dst.y = di.i2;
+""")
+
# Lowered floating point unpacking operations.
From b093808d26c3c6e06db6329debbb3cfa7acf3a48 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Fri, 31 Jul 2015 10:52:25 -0700
Subject: [PATCH 053/224] nir: don't try to scalarize unpack_double_2x32
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_lower_alu_to_scalar.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index e8ba640fe0b..1548abbd558 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -187,6 +187,9 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
return;
}
+ case nir_op_unpack_double_2x32:
+ return;
+
LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
From 663e6421df9bb94cf9b46bb0b41f0d5331aa02fe Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Fri, 7 Aug 2015 08:37:38 -0700
Subject: [PATCH 054/224] nir: add split versions of (un)pack_double_2x32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
v2 (Sam):
- Use uint64 instead of float64 for sources and destinations. (Connor)
Signed-off-by: Samuel Iglesias Gonsálvez
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_opcodes.py | 36 +++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 18404472fe4..9f62e089e58 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -298,6 +298,29 @@ unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x >> 16))")
+unop_convert("unpack_double_2x32_split_x", tuint32, tuint64, """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t x;
+ uint32_t y;
+ };
+} di;
+di.u64 = src0;
+dst = di.x;
+""")
+
+unop_convert("unpack_double_2x32_split_y", tuint32, tuint64, """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t x;
+ uint32_t y;
+ };
+} di;
+di.u64 = src0;
+dst = di.y;
+""")
# Bit operations, part of ARB_gpu_shader5.
@@ -563,6 +586,19 @@ binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
+binop_convert("pack_double_2x32_split", tuint64, tuint32, "", """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t x;
+ uint32_t y;
+ };
+} di;
+di.x = src0;
+di.y = src1;
+dst = di.u64;
+""")
+
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
# if either of its arguments are 32.
From a89c47415774fe9b5458a7ec7e072711e06201e6 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Thu, 12 Nov 2015 11:40:34 +0100
Subject: [PATCH 055/224] nir: add a pass for lowering (un)pack_double_2x32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
v2: Undo unintended change to the signature of
nir_normalize_cubemap_coords (Iago).
v3: Move to compiler/nir (Iago)
v4: Remove Authors from copyright header (Michael Schellenberger)
v5 (Sam):
- Use nir_channel() and nir_ssa_for_alu_src() helpers (Jason)
- Inline lower_double_pack_instr() code into lower_double_pack_block()
(Jason).
- Initialize nir_builder at lower_double_pack_impl() (Jason).
Signed-off-by: Iago Toral Quiroga
Signed-off-by: Samuel Iglesias Gonsálvez
Reviewed-by: Jason Ekstrand
---
src/compiler/Makefile.sources | 1 +
src/compiler/glsl/Makefile.sources | 1 +
src/compiler/nir/Makefile.sources | 1 +
src/compiler/nir/nir.h | 2 +
src/compiler/nir/nir_lower_double_packing.c | 95 +++++++++++++++++++++
5 files changed, 100 insertions(+)
create mode 100644 src/compiler/nir/nir_lower_double_packing.c
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index b24332807df..6f09abf3d5c 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -187,6 +187,7 @@ NIR_FILES = \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
nir/nir_lower_clip.c \
+ nir/nir_lower_double_packing.c \
nir/nir_lower_global_vars_to_local.c \
nir/nir_lower_gs_intrinsics.c \
nir/nir_lower_load_const_to_scalar.c \
diff --git a/src/compiler/glsl/Makefile.sources b/src/compiler/glsl/Makefile.sources
index 538196a79a9..fadfab8a209 100644
--- a/src/compiler/glsl/Makefile.sources
+++ b/src/compiler/glsl/Makefile.sources
@@ -38,6 +38,7 @@ NIR_FILES = \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
nir/nir_lower_clip.c \
+ nir/nir_lower_double_packing.c \
nir/nir_lower_global_vars_to_local.c \
nir/nir_lower_gs_intrinsics.c \
nir/nir_lower_load_const_to_scalar.c \
diff --git a/src/compiler/nir/Makefile.sources b/src/compiler/nir/Makefile.sources
index c920d2ca544..ae6cddbcf6c 100644
--- a/src/compiler/nir/Makefile.sources
+++ b/src/compiler/nir/Makefile.sources
@@ -30,6 +30,7 @@ NIR_FILES = \
nir_lower_alu_to_scalar.c \
nir_lower_atomics.c \
nir_lower_clip.c \
+ nir_lower_double_packing.c \
nir_lower_global_vars_to_local.c \
nir_lower_gs_intrinsics.c \
nir_lower_load_const_to_scalar.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4cc158767f0..31498241555 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2279,6 +2279,8 @@ void nir_lower_to_source_mods(nir_shader *shader);
bool nir_lower_gs_intrinsics(nir_shader *shader);
+void nir_lower_double_pack(nir_shader *shader);
+
bool nir_normalize_cubemap_coords(nir_shader *shader);
void nir_live_ssa_defs_impl(nir_function_impl *impl);
diff --git a/src/compiler/nir/nir_lower_double_packing.c b/src/compiler/nir/nir_lower_double_packing.c
new file mode 100644
index 00000000000..d43683d2007
--- /dev/null
+++ b/src/compiler/nir/nir_lower_double_packing.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/*
+ * lowers:
+ *
+ * packDouble2x32(foo) -> packDouble2x32Split(foo.x, foo.y)
+ * unpackDouble2x32(foo) -> vec2(unpackDouble2x32_x(foo), unpackDouble2x32_y(foo))
+ */
+
+static nir_ssa_def *
+lower_pack_double(nir_builder *b, nir_ssa_def *src)
+{
+ return nir_pack_double_2x32_split(b, nir_channel(b, src, 0),
+ nir_channel(b, src, 1));
+}
+
+static nir_ssa_def *
+lower_unpack_double(nir_builder *b, nir_ssa_def *src)
+{
+ return nir_vec2(b, nir_unpack_double_2x32_split_x(b, src),
+ nir_unpack_double_2x32_split_y(b, src));
+}
+
+static bool
+lower_double_pack_block(nir_block *block, void *ctx)
+{
+ nir_builder *b = (nir_builder *) ctx;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
+
+ if (alu_instr->op != nir_op_pack_double_2x32 &&
+ alu_instr->op != nir_op_unpack_double_2x32)
+ continue;
+
+ b->cursor = nir_before_instr(&alu_instr->instr);
+
+ nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
+ nir_ssa_def *dest =
+ alu_instr->op == nir_op_pack_double_2x32 ?
+ lower_pack_double(b, src) :
+ lower_unpack_double(b, src);
+
+ nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, nir_src_for_ssa(dest));
+ nir_instr_remove(&alu_instr->instr);
+ }
+
+ return true;
+}
+
+static void
+lower_double_pack_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ nir_foreach_block(impl, lower_double_pack_block, &b);
+}
+
+void
+nir_lower_double_pack(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ lower_double_pack_impl(function->impl);
+ }
+}
+
From d97f5d60f51454727c8d854977c3050a7827d415 Mon Sep 17 00:00:00 2001
From: Oded Gabbay
Date: Sun, 20 Mar 2016 10:41:40 +0200
Subject: [PATCH 056/224] tgsi/doc: fix spelling error
Signed-off-by: Oded Gabbay
Reviewed-by: Rob Clark
---
src/gallium/docs/source/tgsi.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index ac6052a244a..85c302f0dc3 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2710,7 +2710,7 @@ TGSI_SEMANTIC_COLOR
"""""""""""""""""""
For vertex shader outputs or fragment shader inputs/outputs, this
-label indicates that the resister contains an R,G,B,A color.
+label indicates that the register contains an R,G,B,A color.
Several shader inputs/outputs may contain colors so the semantic index
is used to distinguish them. For example, color[0] may be the diffuse
From 05aec42d3d9b4aa357caf21c91a462e182a82b9f Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 11 Apr 2016 09:10:29 -0600
Subject: [PATCH 057/224] docs: fix Coverity URL
---
docs/utilities.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/utilities.html b/docs/utilities.html
index 5c0a4fdcd2e..238da1c0d19 100644
--- a/docs/utilities.html
+++ b/docs/utilities.html
@@ -31,7 +31,7 @@
is a very useful tool for tracking down
memory-related problems in your code.
- Coverity
+ Coverity
provides static code analysis of Mesa. If you create an account
you can see the results and try to fix outstanding issues.
From 590a37dc05ceacd36d9e78c5e5c40e77548f6b39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?=
Date: Mon, 11 Apr 2016 12:48:10 -0500
Subject: [PATCH 058/224] GL3: ARB_shader_image_load_store/size is done for
radeonsi also in GLES
Trivial.
---
docs/GL3.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 7267de3a433..33b7d128f5b 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -231,8 +231,8 @@ GLES3.1, GLSL ES 3.1
GL_ARB_framebuffer_no_attachments DONE (i965, nvc0, r600, radeonsi)
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
- GL_ARB_shader_image_load_store DONE (i965, softpipe)
- GL_ARB_shader_image_size DONE (i965, softpipe)
+ GL_ARB_shader_image_load_store DONE (i965, softpipe, radeonsi)
+ GL_ARB_shader_image_size DONE (i965, softpipe, radeonsi)
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
GL_ARB_shading_language_packing DONE (all drivers)
GL_ARB_separate_shader_objects DONE (all drivers)
From 594e8685559215833a3da751314b0b606c3fa65f Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Tue, 5 Apr 2016 13:00:52 +0100
Subject: [PATCH 059/224] Part revert "gallium/auxiliary: don't build NIR
sources with MSVC2008 flags"
This reverts commit 41c7912d04111a7e3b75a438c5cbbd7edc0fee25 but leaves
out the pragma [that inspired the original commit].
Building mesa requires MSVC2013 or later, thus we no longer need this.
v2: Use correct include path (src/glsl/nir -> src/compiler/nir)
Conflicts:
src/gallium/auxiliary/Makefile.am
Acked-by: Jason Ekstrand (v1)
---
src/gallium/auxiliary/Makefile.am | 18 +++---------------
1 file changed, 3 insertions(+), 15 deletions(-)
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 82c2869b99b..296ed59317b 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,10 +1,11 @@
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
-noinst_LTLIBRARIES = libgallium_nir.la
+noinst_LTLIBRARIES = libgallium.la
AM_CFLAGS = \
-I$(top_srcdir)/src/loader \
+ -I$(top_builddir)/src/compiler/nir \
-I$(top_srcdir)/src/gallium/auxiliary/util \
$(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS) \
@@ -14,24 +15,11 @@ AM_CXXFLAGS = \
$(VISIBILITY_CXXFLAGS) \
$(MSVC2013_COMPAT_CXXFLAGS)
-libgallium_nir_la_SOURCES = \
- $(NIR_SOURCES)
-
-libgallium_nir_la_CFLAGS = \
- -I$(top_builddir)/src/compiler/nir \
- $(GALLIUM_CFLAGS) \
- $(VISIBILITY_CFLAGS) \
- $(MSVC2013_COMPAT_CFLAGS)
-
-noinst_LTLIBRARIES += libgallium.la
-
libgallium_la_SOURCES = \
$(C_SOURCES) \
+ $(NIR_SOURCES) \
$(GENERATED_SOURCES)
-libgallium_la_LIBADD = \
- libgallium_nir.la
-
if HAVE_MESA_LLVM
AM_CFLAGS += \
From abf7088eb7674e78541b4585e468cc6de7735b8c Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Tue, 5 Apr 2016 14:05:19 +0100
Subject: [PATCH 060/224] glsl: move the scons build script a level up
It will allow us to remove the duplicate glsl/Makefile.sources.
Signed-off-by: Emil Velikov
Acked-by: Jason Ekstrand
---
src/compiler/Makefile.am | 2 +-
src/compiler/SConscript | 2 +-
.../{glsl/SConscript => SConscript.glsl} | 36 +++++++++----------
3 files changed, 20 insertions(+), 20 deletions(-)
rename src/compiler/{glsl/SConscript => SConscript.glsl} (67%)
diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am
index f218af1b405..f1c70c2a72a 100644
--- a/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@ -62,7 +62,7 @@ EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README \
glsl/glcpp/glcpp-lex.l \
glsl/glcpp/glcpp-parse.y \
glsl/Makefile.sources \
- glsl/SConscript
+ SConscript.glsl
TESTS += glsl/glcpp/tests/glcpp-test \
glsl/glcpp/tests/glcpp-test-cr-lf \
diff --git a/src/compiler/SConscript b/src/compiler/SConscript
index 8d71b82bee0..8969d821984 100644
--- a/src/compiler/SConscript
+++ b/src/compiler/SConscript
@@ -21,4 +21,4 @@ compiler = env.ConvenienceLibrary(
)
Export('compiler')
-SConscript('glsl/SConscript')
+SConscript('SConscript.glsl')
diff --git a/src/compiler/glsl/SConscript b/src/compiler/SConscript.glsl
similarity index 67%
rename from src/compiler/glsl/SConscript
rename to src/compiler/SConscript.glsl
index ef82a9d317a..43a11d105d4 100644
--- a/src/compiler/glsl/SConscript
+++ b/src/compiler/SConscript.glsl
@@ -15,14 +15,14 @@ env.Prepend(CPPPATH = [
'#src/mesa',
'#src/gallium/include',
'#src/gallium/auxiliary',
- '#src/glsl',
- '#src/glsl/glcpp',
+ '#src/compiler/glsl',
+ '#src/compiler/glsl/glcpp',
])
env.Prepend(LIBS = [mesautil])
# Make glcpp-parse.h and glsl_parser.h reachable from the include path.
-env.Append(CPPPATH = [Dir('.').abspath, Dir('glcpp').abspath])
+env.Prepend(CPPPATH = [Dir('.').abspath, Dir('glsl').abspath])
glcpp_env = env.Clone()
glcpp_env.Append(YACCFLAGS = [
@@ -32,7 +32,7 @@ glcpp_env.Append(YACCFLAGS = [
glsl_env = env.Clone()
glsl_env.Append(YACCFLAGS = [
- '--defines=%s' % File('glsl_parser.h').abspath,
+ '--defines=%s' % File('glsl/glsl_parser.h').abspath,
'-p', '_mesa_glsl_',
])
@@ -40,10 +40,10 @@ glsl_env.Append(YACCFLAGS = [
# "glsl_parser.h", causing glsl_parser.cpp to be regenerated every time
glsl_env['YACCHXXFILESUFFIX'] = '.h'
-glcpp_lexer = glcpp_env.CFile('glcpp/glcpp-lex.c', 'glcpp/glcpp-lex.l')
-glcpp_parser = glcpp_env.CFile('glcpp/glcpp-parse.c', 'glcpp/glcpp-parse.y')
-glsl_lexer = glsl_env.CXXFile('glsl_lexer.cpp', 'glsl_lexer.ll')
-glsl_parser = glsl_env.CXXFile('glsl_parser.cpp', 'glsl_parser.yy')
+glcpp_lexer = glcpp_env.CFile('glsl/glcpp/glcpp-lex.c', 'glsl/glcpp/glcpp-lex.l')
+glcpp_parser = glcpp_env.CFile('glsl/glcpp/glcpp-parse.c', 'glsl/glcpp/glcpp-parse.y')
+glsl_lexer = glsl_env.CXXFile('glsl/glsl_lexer.cpp', 'glsl/glsl_lexer.ll')
+glsl_parser = glsl_env.CXXFile('glsl/glsl_parser.cpp', 'glsl/glsl_parser.yy')
# common generated sources
glsl_sources = [
@@ -66,20 +66,20 @@ if env['msvc']:
# Copy these files to avoid generation object files into src/mesa/program
env.Prepend(CPPPATH = ['#src/mesa/main'])
-env.Command('imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE'))
+env.Command('glsl/imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE'))
# Copy these files to avoid generation object files into src/mesa/program
env.Prepend(CPPPATH = ['#src/mesa/program'])
-env.Command('prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE'))
-env.Command('symbol_table.c', '#src/mesa/program/symbol_table.c', Copy('$TARGET', '$SOURCE'))
-env.Command('dummy_errors.c', '#src/mesa/program/dummy_errors.c', Copy('$TARGET', '$SOURCE'))
+env.Command('glsl/prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE'))
+env.Command('glsl/symbol_table.c', '#src/mesa/program/symbol_table.c', Copy('$TARGET', '$SOURCE'))
+env.Command('glsl/dummy_errors.c', '#src/mesa/program/dummy_errors.c', Copy('$TARGET', '$SOURCE'))
compiler_objs = env.StaticObject(source_lists['GLSL_COMPILER_CXX_FILES'])
mesa_objs = env.StaticObject([
- 'imports.c',
- 'prog_hash_table.c',
- 'symbol_table.c',
- 'dummy_errors.c',
+ 'glsl/imports.c',
+ 'glsl/prog_hash_table.c',
+ 'glsl/symbol_table.c',
+ 'glsl/dummy_errors.c',
])
compiler_objs += mesa_objs
@@ -116,7 +116,7 @@ glsl_compiler = env.Program(
env.Alias('glsl_compiler', glsl_compiler)
glcpp = env.Program(
- target = 'glcpp/glcpp',
- source = ['glcpp/glcpp.c'] + mesa_objs,
+ target = 'glsl/glcpp/glcpp',
+ source = ['glsl/glcpp/glcpp.c'] + mesa_objs,
)
env.Alias('glcpp', glcpp)
From 4db8f15a257679b90ad98f8fb46bbb71d2075cfa Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Tue, 5 Apr 2016 14:25:19 +0100
Subject: [PATCH 061/224] glsl: move the android build scripts a level up
Analogous to previous commit.
Signed-off-by: Emil Velikov
Acked-by: Jason Ekstrand
---
Android.mk | 1 -
.../{glsl/Android.gen.mk => Android.glsl.gen.mk} | 13 +++++++------
src/compiler/{glsl/Android.mk => Android.glsl.mk} | 2 +-
src/compiler/Android.mk | 2 ++
4 files changed, 10 insertions(+), 8 deletions(-)
rename src/compiler/{glsl/Android.gen.mk => Android.glsl.gen.mk} (86%)
rename src/compiler/{glsl/Android.mk => Android.glsl.mk} (98%)
diff --git a/Android.mk b/Android.mk
index 67d894f2af1..aa4350f0a77 100644
--- a/Android.mk
+++ b/Android.mk
@@ -91,7 +91,6 @@ SUBDIRS := \
src/loader \
src/mapi \
src/compiler \
- src/compiler/glsl \
src/mesa \
src/util \
src/egl \
diff --git a/src/compiler/glsl/Android.gen.mk b/src/compiler/Android.glsl.gen.mk
similarity index 86%
rename from src/compiler/glsl/Android.gen.mk
rename to src/compiler/Android.glsl.gen.mk
index de5cd0f474c..b0df8a146c0 100644
--- a/src/compiler/glsl/Android.gen.mk
+++ b/src/compiler/Android.glsl.gen.mk
@@ -32,8 +32,9 @@ intermediates := $(call local-generated-sources-dir)
LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)
LOCAL_C_INCLUDES += \
- $(intermediates)/glcpp \
- $(LOCAL_PATH)/glcpp \
+ $(intermediates)/glsl \
+ $(LOCAL_PATH)/glsl \
+ $(LOCAL_PATH)/glsl/glcpp \
LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
$(LIBGLCPP_GENERATED_FILES) \
@@ -65,14 +66,14 @@ define local-yy-to-cpp-and-h
rm -f $(@:$1=$(YACC_HEADER_SUFFIX))
endef
-$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll
+$(intermediates)/glsl/glsl_lexer.cpp: $(LOCAL_PATH)/glsl/glsl_lexer.ll
$(call local-l-or-ll-to-c-or-cpp)
-$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy
+$(intermediates)/glsl/glsl_parser.cpp: $(LOCAL_PATH)/glsl/glsl_parser.yy
$(call local-yy-to-cpp-and-h,.cpp)
-$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l
+$(intermediates)/glsl/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glsl/glcpp/glcpp-lex.l
$(call local-l-or-ll-to-c-or-cpp)
-$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
+$(intermediates)/glsl/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glsl/glcpp/glcpp-parse.y
$(call glsl_local-y-to-c-and-h)
diff --git a/src/compiler/glsl/Android.mk b/src/compiler/Android.glsl.mk
similarity index 98%
rename from src/compiler/glsl/Android.mk
rename to src/compiler/Android.glsl.mk
index f5d96b300f0..d9cf06d208f 100644
--- a/src/compiler/glsl/Android.mk
+++ b/src/compiler/Android.glsl.mk
@@ -47,7 +47,7 @@ LOCAL_STATIC_LIBRARIES := libmesa_compiler
LOCAL_MODULE := libmesa_glsl
-include $(LOCAL_PATH)/Android.gen.mk
+include $(LOCAL_PATH)/Android.glsl.gen.mk
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
diff --git a/src/compiler/Android.mk b/src/compiler/Android.mk
index 888780ba3fb..6c3a8552286 100644
--- a/src/compiler/Android.mk
+++ b/src/compiler/Android.mk
@@ -65,3 +65,5 @@ LOCAL_MODULE := libmesa_nir
include $(LOCAL_PATH)/Android.gen.mk
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
+
+include $(LOCAL_PATH)/Android.glsl.mk
From c481c8f7f135d4cf17e35bb5126bdcf6b5611940 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Tue, 5 Apr 2016 14:29:41 +0100
Subject: [PATCH 062/224] configure.ac: update the path of the generated files
... in order to determine if we need bison/flex. Failing to locate the
files will lead to mandating bison/flex even when building from a
release tarball.
CC: "11.2"
Signed-off-by: Emil Velikov
Acked-by: Jason Ekstrand
Reviewed-by: Matt Turner
---
configure.ac | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/configure.ac b/configure.ac
index ffd51db31b7..c426c72c93b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -110,10 +110,10 @@ LT_INIT([disable-static])
AC_CHECK_PROG(RM, rm, [rm -f])
AX_PROG_BISON([],
- AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
+ AS_IF([test ! -f "$srcdir/src/compiler/glsl/glcpp/glcpp-parse.c"],
[AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))
AX_PROG_FLEX([],
- AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-lex.c"],
+ AS_IF([test ! -f "$srcdir/src/compiler/glsl/glcpp/glcpp-lex.c"],
[AC_MSG_ERROR([flex not found - unable to compile glcpp-lex.l])]))
AC_CHECK_PROG(INDENT, indent, indent, cat)
From 3d67780b80df91bd0326b77dcc8d90af719e087e Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Mon, 11 Apr 2016 13:23:17 +0100
Subject: [PATCH 063/224] compiler: remove {glsl,nir}/Makefile.sources
No longer used as of last commit.
v2: Rebase.
Signed-off-by: Emil Velikov
Acked-by: Jason Ekstrand (v1)
---
src/compiler/Makefile.am | 4 +-
src/compiler/glsl/Makefile.am | 228 -----------------------------
src/compiler/glsl/Makefile.sources | 224 ----------------------------
src/compiler/nir/Makefile.sources | 79 ----------
4 files changed, 1 insertion(+), 534 deletions(-)
delete mode 100644 src/compiler/glsl/Makefile.am
delete mode 100644 src/compiler/glsl/Makefile.sources
delete mode 100644 src/compiler/nir/Makefile.sources
diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am
index f1c70c2a72a..6e5ae7a4f7b 100644
--- a/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@ -61,7 +61,6 @@ EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README \
glsl/glsl_parser.yy \
glsl/glcpp/glcpp-lex.l \
glsl/glcpp/glcpp-parse.y \
- glsl/Makefile.sources \
SConscript.glsl
TESTS += glsl/glcpp/tests/glcpp-test \
@@ -321,5 +320,4 @@ EXTRA_DIST += \
nir/nir_opcodes_c.py \
nir/nir_opcodes_h.py \
nir/nir_opt_algebraic.py \
- nir/tests \
- nir/Makefile.sources
+ nir/tests
diff --git a/src/compiler/glsl/Makefile.am b/src/compiler/glsl/Makefile.am
deleted file mode 100644
index 9954b812403..00000000000
--- a/src/compiler/glsl/Makefile.am
+++ /dev/null
@@ -1,228 +0,0 @@
-# Copyright © 2012 Jon TURNEY
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-AM_CPPFLAGS = \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/mapi \
- -I$(top_srcdir)/src/mesa/ \
- -I$(top_srcdir)/src/gallium/include \
- -I$(top_srcdir)/src/gallium/auxiliary \
- -I$(top_srcdir)/src/glsl/glcpp \
- -I$(top_srcdir)/src/gtest/include \
- $(DEFINES)
-AM_CFLAGS = \
- $(VISIBILITY_CFLAGS) \
- $(MSVC2013_COMPAT_CFLAGS)
-AM_CXXFLAGS = \
- $(VISIBILITY_CXXFLAGS) \
- $(MSVC2013_COMPAT_CXXFLAGS)
-
-EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \
- glsl_lexer.ll \
- glsl_parser.yy \
- glcpp/glcpp-lex.l \
- glcpp/glcpp-parse.y \
- SConscript
-
-include Makefile.sources
-
-TESTS = glcpp/tests/glcpp-test \
- glcpp/tests/glcpp-test-cr-lf \
- tests/blob-test \
- tests/general-ir-test \
- tests/optimization-test \
- tests/sampler-types-test \
- tests/uniform-initializer-test
-
-TESTS_ENVIRONMENT= \
- export PYTHON2=$(PYTHON2); \
- export PYTHON_FLAGS=$(PYTHON_FLAGS);
-
-noinst_LTLIBRARIES = libglsl.la libglcpp.la
-check_PROGRAMS = \
- glcpp/glcpp \
- glsl_test \
- tests/blob-test \
- tests/general-ir-test \
- tests/sampler-types-test \
- tests/uniform-initializer-test
-
-noinst_PROGRAMS = glsl_compiler
-
-tests_blob_test_SOURCES = \
- tests/blob_test.c
-tests_blob_test_LDADD = \
- $(top_builddir)/src/glsl/libglsl.la
-
-tests_general_ir_test_SOURCES = \
- standalone_scaffolding.cpp \
- tests/builtin_variable_test.cpp \
- tests/invalidate_locations_test.cpp \
- tests/general_ir_test.cpp \
- tests/varyings_test.cpp
-tests_general_ir_test_CFLAGS = \
- $(PTHREAD_CFLAGS)
-tests_general_ir_test_LDADD = \
- $(top_builddir)/src/gtest/libgtest.la \
- $(top_builddir)/src/glsl/libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(PTHREAD_LIBS)
-
-tests_uniform_initializer_test_SOURCES = \
- tests/copy_constant_to_storage_tests.cpp \
- tests/set_uniform_initializer_tests.cpp \
- tests/uniform_initializer_utils.cpp \
- tests/uniform_initializer_utils.h
-tests_uniform_initializer_test_CFLAGS = \
- $(PTHREAD_CFLAGS)
-tests_uniform_initializer_test_LDADD = \
- $(top_builddir)/src/gtest/libgtest.la \
- $(top_builddir)/src/glsl/libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(PTHREAD_LIBS)
-
-tests_sampler_types_test_SOURCES = \
- tests/sampler_types_test.cpp
-tests_sampler_types_test_CFLAGS = \
- $(PTHREAD_CFLAGS)
-tests_sampler_types_test_LDADD = \
- $(top_builddir)/src/gtest/libgtest.la \
- $(top_builddir)/src/glsl/libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(PTHREAD_LIBS)
-
-libglcpp_la_LIBADD = \
- $(top_builddir)/src/util/libmesautil.la
-libglcpp_la_SOURCES = \
- glcpp/glcpp-lex.c \
- glcpp/glcpp-parse.c \
- glcpp/glcpp-parse.h \
- $(LIBGLCPP_FILES)
-
-glcpp_glcpp_SOURCES = \
- glcpp/glcpp.c
-glcpp_glcpp_LDADD = \
- libglcpp.la \
- $(top_builddir)/src/libglsl_util.la \
- -lm
-
-libglsl_la_LIBADD = \
- $(top_builddir)/src/compiler/nir/libnir.la \
- libglcpp.la
-
-libglsl_la_SOURCES = \
- glsl_lexer.cpp \
- glsl_parser.cpp \
- glsl_parser.h \
- $(LIBGLSL_FILES)
-
-
-glsl_compiler_SOURCES = \
- $(GLSL_COMPILER_CXX_FILES)
-
-glsl_compiler_LDADD = \
- libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(top_builddir)/src/util/libmesautil.la \
- $(PTHREAD_LIBS)
-
-glsl_test_SOURCES = \
- standalone_scaffolding.cpp \
- test.cpp \
- test_optpass.cpp \
- test_optpass.h
-
-glsl_test_LDADD = \
- libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(PTHREAD_LIBS)
-
-# We write our own rules for yacc and lex below. We'd rather use automake,
-# but automake makes it especially difficult for a number of reasons:
-#
-# * < automake-1.12 generates .h files from .yy and .ypp files, but
-# >=automake-1.12 generates .hh and .hpp files respectively. There's no
-# good way of making a project that uses C++ yacc files compatible with
-# both versions of automake. Strong work automake developers.
-#
-# * Since we're generating code from .l/.y files in a subdirectory (glcpp/)
-# we'd like the resulting generated code to also go in glcpp/ for purposes
-# of distribution. Automake gives no way to do this.
-#
-# * Since we're building multiple yacc parsers into one library (and via one
-# Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes
-# automake to name the resulting generated code as _filename.c.
-# Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file.
-
-# In order to make build output print "LEX" and "YACC", we reproduce the
-# automake variables below.
-
-AM_V_LEX = $(am__v_LEX_$(V))
-am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY))
-am__v_LEX_0 = @echo " LEX " $@;
-am__v_LEX_1 =
-
-AM_V_YACC = $(am__v_YACC_$(V))
-am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
-am__v_YACC_0 = @echo " YACC " $@;
-am__v_YACC_1 =
-
-MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
-YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
-LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
-
-glsl_parser.cpp glsl_parser.h: glsl_parser.yy
- $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
-
-glsl_lexer.cpp: glsl_lexer.ll
- $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
-
-glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
- $(MKDIR_GEN)
- $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
-
-glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
- $(MKDIR_GEN)
- $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
-
-# Only the parsers (specifically the header files generated at the same time)
-# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
-# called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
-# YACC is only executed once for each parser. The rest of the generated code
-# will be created at the appropriate times according to standard automake
-# dependency rules.
-BUILT_SOURCES = \
- glsl_parser.cpp \
- glsl_lexer.cpp \
- glcpp/glcpp-parse.c \
- glcpp/glcpp-lex.c
-CLEANFILES = \
- glcpp/glcpp-parse.h \
- glsl_parser.h \
- $(BUILT_SOURCES)
-
-clean-local:
- $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
-
-dist-hook:
- $(RM) glcpp/tests/*.out
- $(RM) glcpp/tests/subtest*/*.out
diff --git a/src/compiler/glsl/Makefile.sources b/src/compiler/glsl/Makefile.sources
deleted file mode 100644
index fadfab8a209..00000000000
--- a/src/compiler/glsl/Makefile.sources
+++ /dev/null
@@ -1,224 +0,0 @@
-# shared source lists for Makefile, SConscript, and Android.mk
-
-# libglcpp
-
-LIBGLCPP_FILES = \
- glcpp/glcpp.h \
- glcpp/pp.c
-
-LIBGLCPP_GENERATED_FILES = \
- glcpp/glcpp-lex.c \
- glcpp/glcpp-parse.c
-
-NIR_GENERATED_FILES = \
- nir/nir_builder_opcodes.h \
- nir/nir_constant_expressions.c \
- nir/nir_opcodes.c \
- nir/nir_opcodes.h \
- nir/nir_opt_algebraic.c
-
-NIR_FILES = \
- nir/nir.c \
- nir/nir.h \
- nir/nir_array.h \
- nir/nir_builder.h \
- nir/nir_clone.c \
- nir/nir_constant_expressions.h \
- nir/nir_control_flow.c \
- nir/nir_control_flow.h \
- nir/nir_control_flow_private.h \
- nir/nir_dominance.c \
- nir/nir_from_ssa.c \
- nir/nir_gs_count_vertices.c \
- nir/nir_intrinsics.c \
- nir/nir_intrinsics.h \
- nir/nir_instr_set.c \
- nir/nir_instr_set.h \
- nir/nir_liveness.c \
- nir/nir_lower_alu_to_scalar.c \
- nir/nir_lower_atomics.c \
- nir/nir_lower_clip.c \
- nir/nir_lower_double_packing.c \
- nir/nir_lower_global_vars_to_local.c \
- nir/nir_lower_gs_intrinsics.c \
- nir/nir_lower_load_const_to_scalar.c \
- nir/nir_lower_locals_to_regs.c \
- nir/nir_lower_idiv.c \
- nir/nir_lower_io.c \
- nir/nir_lower_outputs_to_temporaries.c \
- nir/nir_lower_phis_to_scalar.c \
- nir/nir_lower_samplers.c \
- nir/nir_lower_system_values.c \
- nir/nir_lower_tex.c \
- nir/nir_lower_to_source_mods.c \
- nir/nir_lower_two_sided_color.c \
- nir/nir_lower_vars_to_ssa.c \
- nir/nir_lower_var_copies.c \
- nir/nir_lower_vec_to_movs.c \
- nir/nir_metadata.c \
- nir/nir_move_vec_src_uses_to_dest.c \
- nir/nir_normalize_cubemap_coords.c \
- nir/nir_opt_constant_folding.c \
- nir/nir_opt_copy_propagate.c \
- nir/nir_opt_cse.c \
- nir/nir_opt_dce.c \
- nir/nir_opt_dead_cf.c \
- nir/nir_opt_gcm.c \
- nir/nir_opt_global_to_local.c \
- nir/nir_opt_peephole_select.c \
- nir/nir_opt_remove_phis.c \
- nir/nir_opt_undef.c \
- nir/nir_print.c \
- nir/nir_remove_dead_variables.c \
- nir/nir_search.c \
- nir/nir_search.h \
- nir/nir_split_var_copies.c \
- nir/nir_sweep.c \
- nir/nir_to_ssa.c \
- nir/nir_validate.c \
- nir/nir_vla.h \
- nir/nir_worklist.c \
- nir/nir_worklist.h
-
-# libglsl
-
-LIBGLSL_FILES = \
- ast.h \
- ast_array_index.cpp \
- ast_expr.cpp \
- ast_function.cpp \
- ast_to_hir.cpp \
- ast_type.cpp \
- blob.c \
- blob.h \
- builtin_functions.cpp \
- builtin_types.cpp \
- builtin_variables.cpp \
- glsl_parser_extras.cpp \
- glsl_parser_extras.h \
- glsl_symbol_table.cpp \
- glsl_symbol_table.h \
- hir_field_selection.cpp \
- ir_basic_block.cpp \
- ir_basic_block.h \
- ir_builder.cpp \
- ir_builder.h \
- ir_clone.cpp \
- ir_constant_expression.cpp \
- ir.cpp \
- ir.h \
- ir_equals.cpp \
- ir_expression_flattening.cpp \
- ir_expression_flattening.h \
- ir_function_can_inline.cpp \
- ir_function_detect_recursion.cpp \
- ir_function_inlining.h \
- ir_function.cpp \
- ir_hierarchical_visitor.cpp \
- ir_hierarchical_visitor.h \
- ir_hv_accept.cpp \
- ir_import_prototypes.cpp \
- ir_optimization.h \
- ir_print_visitor.cpp \
- ir_print_visitor.h \
- ir_reader.cpp \
- ir_reader.h \
- ir_rvalue_visitor.cpp \
- ir_rvalue_visitor.h \
- ir_set_program_inouts.cpp \
- ir_uniform.h \
- ir_validate.cpp \
- ir_variable_refcount.cpp \
- ir_variable_refcount.h \
- ir_visitor.h \
- linker.cpp \
- linker.h \
- link_atomics.cpp \
- link_functions.cpp \
- link_interface_blocks.cpp \
- link_uniforms.cpp \
- link_uniform_initializers.cpp \
- link_uniform_block_active_visitor.cpp \
- link_uniform_block_active_visitor.h \
- link_uniform_blocks.cpp \
- link_varyings.cpp \
- link_varyings.h \
- list.h \
- loop_analysis.cpp \
- loop_analysis.h \
- loop_controls.cpp \
- loop_unroll.cpp \
- lower_buffer_access.cpp \
- lower_buffer_access.h \
- lower_clip_distance.cpp \
- lower_const_arrays_to_uniforms.cpp \
- lower_discard.cpp \
- lower_discard_flow.cpp \
- lower_if_to_cond_assign.cpp \
- lower_instructions.cpp \
- lower_jumps.cpp \
- lower_mat_op_to_vec.cpp \
- lower_noise.cpp \
- lower_offset_array.cpp \
- lower_packed_varyings.cpp \
- lower_named_interface_blocks.cpp \
- lower_packing_builtins.cpp \
- lower_subroutine.cpp \
- lower_tess_level.cpp \
- lower_texture_projection.cpp \
- lower_variable_index_to_cond_assign.cpp \
- lower_vec_index_to_cond_assign.cpp \
- lower_vec_index_to_swizzle.cpp \
- lower_vector.cpp \
- lower_vector_derefs.cpp \
- lower_vector_insert.cpp \
- lower_vertex_id.cpp \
- lower_output_reads.cpp \
- lower_shared_reference.cpp \
- lower_ubo_reference.cpp \
- opt_algebraic.cpp \
- opt_array_splitting.cpp \
- opt_conditional_discard.cpp \
- opt_constant_folding.cpp \
- opt_constant_propagation.cpp \
- opt_constant_variable.cpp \
- opt_copy_propagation.cpp \
- opt_copy_propagation_elements.cpp \
- opt_dead_builtin_variables.cpp \
- opt_dead_builtin_varyings.cpp \
- opt_dead_code.cpp \
- opt_dead_code_local.cpp \
- opt_dead_functions.cpp \
- opt_flatten_nested_if_blocks.cpp \
- opt_flip_matrices.cpp \
- opt_function_inlining.cpp \
- opt_if_simplification.cpp \
- opt_minmax.cpp \
- opt_noop_swizzle.cpp \
- opt_rebalance_tree.cpp \
- opt_redundant_jumps.cpp \
- opt_structure_splitting.cpp \
- opt_swizzle_swizzle.cpp \
- opt_tree_grafting.cpp \
- opt_vectorize.cpp \
- program.h \
- propagate_invariance.cpp \
- s_expression.cpp \
- s_expression.h
-
-# glsl to nir pass
-GLSL_TO_NIR_FILES = \
- nir/glsl_to_nir.cpp \
- nir/glsl_to_nir.h
-
-# glsl_compiler
-
-GLSL_COMPILER_CXX_FILES = \
- standalone_scaffolding.cpp \
- standalone_scaffolding.h \
- main.cpp
-
-# libglsl generated sources
-LIBGLSL_GENERATED_CXX_FILES = \
- glsl_lexer.cpp \
- glsl_parser.cpp
diff --git a/src/compiler/nir/Makefile.sources b/src/compiler/nir/Makefile.sources
deleted file mode 100644
index ae6cddbcf6c..00000000000
--- a/src/compiler/nir/Makefile.sources
+++ /dev/null
@@ -1,79 +0,0 @@
-NIR_GENERATED_FILES = \
- nir_builder_opcodes.h \
- nir_constant_expressions.c \
- nir_opcodes.c \
- nir_opcodes.h \
- nir_opt_algebraic.c
-
-NIR_FILES = \
- glsl_to_nir.cpp \
- glsl_to_nir.h \
- nir.c \
- nir.h \
- nir_array.h \
- nir_builder.h \
- nir_clone.c \
- nir_constant_expressions.h \
- nir_control_flow.c \
- nir_control_flow.h \
- nir_control_flow_private.h \
- nir_dominance.c \
- nir_from_ssa.c \
- nir_gather_info.c \
- nir_gs_count_vertices.c \
- nir_inline_functions.c \
- nir_instr_set.c \
- nir_instr_set.h \
- nir_intrinsics.c \
- nir_intrinsics.h \
- nir_liveness.c \
- nir_lower_alu_to_scalar.c \
- nir_lower_atomics.c \
- nir_lower_clip.c \
- nir_lower_double_packing.c \
- nir_lower_global_vars_to_local.c \
- nir_lower_gs_intrinsics.c \
- nir_lower_load_const_to_scalar.c \
- nir_lower_locals_to_regs.c \
- nir_lower_idiv.c \
- nir_lower_indirect_derefs.c \
- nir_lower_io.c \
- nir_lower_outputs_to_temporaries.c \
- nir_lower_phis_to_scalar.c \
- nir_lower_returns.c \
- nir_lower_samplers.c \
- nir_lower_system_values.c \
- nir_lower_tex.c \
- nir_lower_to_source_mods.c \
- nir_lower_two_sided_color.c \
- nir_lower_vars_to_ssa.c \
- nir_lower_var_copies.c \
- nir_lower_vec_to_movs.c \
- nir_metadata.c \
- nir_move_vec_src_uses_to_dest.c \
- nir_normalize_cubemap_coords.c \
- nir_opt_constant_folding.c \
- nir_opt_copy_propagate.c \
- nir_opt_cse.c \
- nir_opt_dce.c \
- nir_opt_dead_cf.c \
- nir_opt_gcm.c \
- nir_opt_global_to_local.c \
- nir_opt_peephole_select.c \
- nir_opt_remove_phis.c \
- nir_opt_undef.c \
- nir_phi_builder.c \
- nir_phi_builder.h \
- nir_print.c \
- nir_remove_dead_variables.c \
- nir_repair_ssa.c \
- nir_search.c \
- nir_search.h \
- nir_split_var_copies.c \
- nir_sweep.c \
- nir_to_ssa.c \
- nir_validate.c \
- nir_vla.h \
- nir_worklist.c \
- nir_worklist.h
-
From 9324afc0e9343417497667b15024a94da7654105 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Tue, 5 Apr 2016 14:40:42 +0100
Subject: [PATCH 064/224] compiler: automake: split out glsl into separate
makefile
Preserve the functionality while keeping the files smaller and
more readable.
v2: Do not include Makefile.sources from the GLSL makefile (silences
automake warnings)
Signed-off-by: Emil Velikov
Acked-by: Jason Ekstrand (v1)
---
src/compiler/Makefile.am | 195 +-----------------------------
src/compiler/Makefile.glsl.am | 217 ++++++++++++++++++++++++++++++++++
2 files changed, 218 insertions(+), 194 deletions(-)
create mode 100644 src/compiler/Makefile.glsl.am
diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am
index 6e5ae7a4f7b..89aa54e0c42 100644
--- a/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@ -54,202 +54,9 @@ BUILT_SOURCES =
CLEANFILES =
EXTRA_DIST = SConscript
-
-EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README \
- glsl/TODO glsl/glcpp/README \
- glsl/glsl_lexer.ll \
- glsl/glsl_parser.yy \
- glsl/glcpp/glcpp-lex.l \
- glsl/glcpp/glcpp-parse.y \
- SConscript.glsl
-
-TESTS += glsl/glcpp/tests/glcpp-test \
- glsl/glcpp/tests/glcpp-test-cr-lf \
- glsl/tests/blob-test \
- glsl/tests/general-ir-test \
- glsl/tests/optimization-test \
- glsl/tests/sampler-types-test \
- glsl/tests/uniform-initializer-test
-
-TESTS_ENVIRONMENT= \
- export PYTHON2=$(PYTHON2); \
- export PYTHON_FLAGS=$(PYTHON_FLAGS);
-
-check_PROGRAMS += \
- glsl/glcpp/glcpp \
- glsl/glsl_test \
- glsl/tests/blob-test \
- glsl/tests/general-ir-test \
- glsl/tests/sampler-types-test \
- glsl/tests/uniform-initializer-test
-
-noinst_PROGRAMS = glsl_compiler
-
-glsl_tests_blob_test_SOURCES = \
- glsl/tests/blob_test.c
-glsl_tests_blob_test_LDADD = \
- glsl/libglsl.la
-
-glsl_tests_general_ir_test_SOURCES = \
- glsl/standalone_scaffolding.cpp \
- glsl/tests/builtin_variable_test.cpp \
- glsl/tests/invalidate_locations_test.cpp \
- glsl/tests/general_ir_test.cpp \
- glsl/tests/varyings_test.cpp
-glsl_tests_general_ir_test_CFLAGS = \
- $(PTHREAD_CFLAGS)
-glsl_tests_general_ir_test_LDADD = \
- $(top_builddir)/src/gtest/libgtest.la \
- glsl/libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(PTHREAD_LIBS)
-
-glsl_tests_uniform_initializer_test_SOURCES = \
- glsl/tests/copy_constant_to_storage_tests.cpp \
- glsl/tests/set_uniform_initializer_tests.cpp \
- glsl/tests/uniform_initializer_utils.cpp \
- glsl/tests/uniform_initializer_utils.h
-glsl_tests_uniform_initializer_test_CFLAGS = \
- $(PTHREAD_CFLAGS)
-glsl_tests_uniform_initializer_test_LDADD = \
- $(top_builddir)/src/gtest/libgtest.la \
- glsl/libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(PTHREAD_LIBS)
-
-glsl_tests_sampler_types_test_SOURCES = \
- glsl/tests/sampler_types_test.cpp
-glsl_tests_sampler_types_test_CFLAGS = \
- $(PTHREAD_CFLAGS)
-glsl_tests_sampler_types_test_LDADD = \
- $(top_builddir)/src/gtest/libgtest.la \
- glsl/libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(PTHREAD_LIBS)
-
-noinst_LTLIBRARIES += glsl/libglsl.la glsl/libglcpp.la
-
-glsl_libglcpp_la_LIBADD = \
- $(top_builddir)/src/util/libmesautil.la
-glsl_libglcpp_la_SOURCES = \
- glsl/glcpp/glcpp-lex.c \
- glsl/glcpp/glcpp-parse.c \
- glsl/glcpp/glcpp-parse.h \
- $(LIBGLCPP_FILES)
-
-glsl_glcpp_glcpp_SOURCES = \
- glsl/glcpp/glcpp.c
-glsl_glcpp_glcpp_LDADD = \
- glsl/libglcpp.la \
- $(top_builddir)/src/libglsl_util.la \
- -lm
-
-glsl_libglsl_la_LIBADD = \
- nir/libnir.la \
- glsl/libglcpp.la
-
-glsl_libglsl_la_SOURCES = \
- glsl/glsl_lexer.cpp \
- glsl/glsl_parser.cpp \
- glsl/glsl_parser.h \
- $(LIBGLSL_FILES)
-
-
-glsl_compiler_SOURCES = \
- $(GLSL_COMPILER_CXX_FILES)
-
-glsl_compiler_LDADD = \
- glsl/libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(top_builddir)/src/util/libmesautil.la \
- $(PTHREAD_LIBS)
-
-glsl_glsl_test_SOURCES = \
- glsl/standalone_scaffolding.cpp \
- glsl/test.cpp \
- glsl/test_optpass.cpp \
- glsl/test_optpass.h
-
-glsl_glsl_test_LDADD = \
- glsl/libglsl.la \
- $(top_builddir)/src/libglsl_util.la \
- $(PTHREAD_LIBS)
-
-# We write our own rules for yacc and lex below. We'd rather use automake,
-# but automake makes it especially difficult for a number of reasons:
-#
-# * < automake-1.12 generates .h files from .yy and .ypp files, but
-# >=automake-1.12 generates .hh and .hpp files respectively. There's no
-# good way of making a project that uses C++ yacc files compatible with
-# both versions of automake. Strong work automake developers.
-#
-# * Since we're generating code from .l/.y files in a subdirectory (glcpp/)
-# we'd like the resulting generated code to also go in glcpp/ for purposes
-# of distribution. Automake gives no way to do this.
-#
-# * Since we're building multiple yacc parsers into one library (and via one
-# Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes
-# automake to name the resulting generated code as _filename.c.
-# Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file.
-
-# In order to make build output print "LEX" and "YACC", we reproduce the
-# automake variables below.
-
-AM_V_LEX = $(am__v_LEX_$(V))
-am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY))
-am__v_LEX_0 = @echo " LEX " $@;
-am__v_LEX_1 =
-
-AM_V_YACC = $(am__v_YACC_$(V))
-am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
-am__v_YACC_0 = @echo " YACC " $@;
-am__v_YACC_1 =
-
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
-YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
-LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
-glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
- $(MKDIR_GEN)
- $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
-
-glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
- $(MKDIR_GEN)
- $(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
-
-glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
- $(MKDIR_GEN)
- $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glsl/glcpp/glcpp-parse.h $(srcdir)/glsl/glcpp/glcpp-parse.y
-
-glsl/glcpp/glcpp-lex.c: glsl/glcpp/glcpp-lex.l
- $(MKDIR_GEN)
- $(LEX_GEN) -o $@ $(srcdir)/glsl/glcpp/glcpp-lex.l
-
-# Only the parsers (specifically the header files generated at the same time)
-# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
-# called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
-# YACC is only executed once for each parser. The rest of the generated code
-# will be created at the appropriate times according to standard automake
-# dependency rules.
-BUILT_SOURCES += \
- glsl/glsl_parser.cpp \
- glsl/glsl_lexer.cpp \
- glsl/glcpp/glcpp-parse.c \
- glsl/glcpp/glcpp-lex.c
-CLEANFILES += \
- glsl/glcpp/glcpp-parse.h \
- glsl/glsl_parser.h \
- glsl/glsl_parser.cpp \
- glsl/glsl_lexer.cpp \
- glsl/glcpp/glcpp-parse.c \
- glsl/glcpp/glcpp-lex.c
-
-clean-local:
- $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
-
-dist-hook:
- $(RM) glsl/glcpp/tests/*.out
- $(RM) glsl/glcpp/tests/subtest*/*.out
+include Makefile.glsl.am
noinst_LTLIBRARIES += nir/libnir.la
diff --git a/src/compiler/Makefile.glsl.am b/src/compiler/Makefile.glsl.am
new file mode 100644
index 00000000000..daf98f61244
--- /dev/null
+++ b/src/compiler/Makefile.glsl.am
@@ -0,0 +1,217 @@
+#
+# Copyright © 2012 Jon TURNEY
+# Copyright (C) 2015 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README \
+ glsl/TODO glsl/glcpp/README \
+ glsl/glsl_lexer.ll \
+ glsl/glsl_parser.yy \
+ glsl/glcpp/glcpp-lex.l \
+ glsl/glcpp/glcpp-parse.y \
+ SConscript.glsl
+
+TESTS += glsl/glcpp/tests/glcpp-test \
+ glsl/glcpp/tests/glcpp-test-cr-lf \
+ glsl/tests/blob-test \
+ glsl/tests/general-ir-test \
+ glsl/tests/optimization-test \
+ glsl/tests/sampler-types-test \
+ glsl/tests/uniform-initializer-test
+
+TESTS_ENVIRONMENT= \
+ export PYTHON2=$(PYTHON2); \
+ export PYTHON_FLAGS=$(PYTHON_FLAGS);
+
+check_PROGRAMS += \
+ glsl/glcpp/glcpp \
+ glsl/glsl_test \
+ glsl/tests/blob-test \
+ glsl/tests/general-ir-test \
+ glsl/tests/sampler-types-test \
+ glsl/tests/uniform-initializer-test
+
+noinst_PROGRAMS = glsl_compiler
+
+glsl_tests_blob_test_SOURCES = \
+ glsl/tests/blob_test.c
+glsl_tests_blob_test_LDADD = \
+ glsl/libglsl.la
+
+glsl_tests_general_ir_test_SOURCES = \
+ glsl/standalone_scaffolding.cpp \
+ glsl/tests/builtin_variable_test.cpp \
+ glsl/tests/invalidate_locations_test.cpp \
+ glsl/tests/general_ir_test.cpp \
+ glsl/tests/varyings_test.cpp
+glsl_tests_general_ir_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+glsl_tests_general_ir_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+glsl_tests_uniform_initializer_test_SOURCES = \
+ glsl/tests/copy_constant_to_storage_tests.cpp \
+ glsl/tests/set_uniform_initializer_tests.cpp \
+ glsl/tests/uniform_initializer_utils.cpp \
+ glsl/tests/uniform_initializer_utils.h
+glsl_tests_uniform_initializer_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+glsl_tests_uniform_initializer_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+glsl_tests_sampler_types_test_SOURCES = \
+ glsl/tests/sampler_types_test.cpp
+glsl_tests_sampler_types_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+glsl_tests_sampler_types_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+noinst_LTLIBRARIES += glsl/libglsl.la glsl/libglcpp.la
+
+glsl_libglcpp_la_LIBADD = \
+ $(top_builddir)/src/util/libmesautil.la
+glsl_libglcpp_la_SOURCES = \
+ glsl/glcpp/glcpp-lex.c \
+ glsl/glcpp/glcpp-parse.c \
+ glsl/glcpp/glcpp-parse.h \
+ $(LIBGLCPP_FILES)
+
+glsl_glcpp_glcpp_SOURCES = \
+ glsl/glcpp/glcpp.c
+glsl_glcpp_glcpp_LDADD = \
+ glsl/libglcpp.la \
+ $(top_builddir)/src/libglsl_util.la \
+ -lm
+
+glsl_libglsl_la_LIBADD = \
+ nir/libnir.la \
+ glsl/libglcpp.la
+
+glsl_libglsl_la_SOURCES = \
+ glsl/glsl_lexer.cpp \
+ glsl/glsl_parser.cpp \
+ glsl/glsl_parser.h \
+ $(LIBGLSL_FILES)
+
+
+glsl_compiler_SOURCES = \
+ $(GLSL_COMPILER_CXX_FILES)
+
+glsl_compiler_LDADD = \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(PTHREAD_LIBS)
+
+glsl_glsl_test_SOURCES = \
+ glsl/standalone_scaffolding.cpp \
+ glsl/test.cpp \
+ glsl/test_optpass.cpp \
+ glsl/test_optpass.h
+
+glsl_glsl_test_LDADD = \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+# We write our own rules for yacc and lex below. We'd rather use automake,
+# but automake makes it especially difficult for a number of reasons:
+#
+# * < automake-1.12 generates .h files from .yy and .ypp files, but
+# >=automake-1.12 generates .hh and .hpp files respectively. There's no
+# good way of making a project that uses C++ yacc files compatible with
+# both versions of automake. Strong work automake developers.
+#
+# * Since we're generating code from .l/.y files in a subdirectory (glcpp/)
+# we'd like the resulting generated code to also go in glcpp/ for purposes
+# of distribution. Automake gives no way to do this.
+#
+# * Since we're building multiple yacc parsers into one library (and via one
+# Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes
+# automake to name the resulting generated code as _filename.c.
+# Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file.
+
+# In order to make build output print "LEX" and "YACC", we reproduce the
+# automake variables below.
+
+AM_V_LEX = $(am__v_LEX_$(V))
+am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY))
+am__v_LEX_0 = @echo " LEX " $@;
+am__v_LEX_1 =
+
+AM_V_YACC = $(am__v_YACC_$(V))
+am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
+am__v_YACC_0 = @echo " YACC " $@;
+am__v_YACC_1 =
+
+YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
+glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
+ $(MKDIR_GEN)
+ $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
+
+glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
+ $(MKDIR_GEN)
+ $(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
+
+glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
+ $(MKDIR_GEN)
+ $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glsl/glcpp/glcpp-parse.h $(srcdir)/glsl/glcpp/glcpp-parse.y
+
+glsl/glcpp/glcpp-lex.c: glsl/glcpp/glcpp-lex.l
+ $(MKDIR_GEN)
+ $(LEX_GEN) -o $@ $(srcdir)/glsl/glcpp/glcpp-lex.l
+
+# Only the parsers (specifically the header files generated at the same time)
+# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
+# called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
+# YACC is only executed once for each parser. The rest of the generated code
+# will be created at the appropriate times according to standard automake
+# dependency rules.
+BUILT_SOURCES += \
+ glsl/glsl_parser.cpp \
+ glsl/glsl_lexer.cpp \
+ glsl/glcpp/glcpp-parse.c \
+ glsl/glcpp/glcpp-lex.c
+CLEANFILES += \
+ glsl/glcpp/glcpp-parse.h \
+ glsl/glsl_parser.h \
+ glsl/glsl_parser.cpp \
+ glsl/glsl_lexer.cpp \
+ glsl/glcpp/glcpp-parse.c \
+ glsl/glcpp/glcpp-lex.c
+
+clean-local:
+ $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
+
+dist-hook:
+ $(RM) glsl/glcpp/tests/*.out
+ $(RM) glsl/glcpp/tests/subtest*/*.out
From 8d51500b2dec1dbe54a517e3b1448e96978e2256 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Tue, 5 Apr 2016 14:42:50 +0100
Subject: [PATCH 065/224] compiler: automake: flesh out NIR into separate
makefile.
Analogous to previous commit - improved readability at the expense of
an extra file.
Signed-off-by: Emil Velikov
Acked-by: Jason Ekstrand
---
src/compiler/Makefile.am | 71 +--------------------------
src/compiler/Makefile.nir.am | 93 ++++++++++++++++++++++++++++++++++++
2 files changed, 94 insertions(+), 70 deletions(-)
create mode 100644 src/compiler/Makefile.nir.am
diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am
index 89aa54e0c42..dc30f908d8a 100644
--- a/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@ -58,73 +58,4 @@ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
include Makefile.glsl.am
-noinst_LTLIBRARIES += nir/libnir.la
-
-nir_libnir_la_CPPFLAGS = \
- $(AM_CPPFLAGS) \
- -I$(top_builddir)/src/compiler/nir \
- -I$(top_srcdir)/src/compiler/nir
-
-nir_libnir_la_LIBADD = \
- libcompiler.la
-
-nir_libnir_la_SOURCES = \
- $(NIR_FILES) \
- $(NIR_GENERATED_FILES)
-
-PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
-
-nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
- $(MKDIR_GEN)
- $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false)
-
-nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
- $(MKDIR_GEN)
- $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false)
-
-nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
- $(MKDIR_GEN)
- $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false)
-
-nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
- $(MKDIR_GEN)
- $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false)
-
-nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
- $(MKDIR_GEN)
- $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false)
-
-
-check_PROGRAMS += nir/tests/control_flow_tests
-
-nir_tests_control_flow_tests_CPPFLAGS = \
- $(AM_CPPFLAGS) \
- -I$(top_builddir)/src/compiler/nir \
- -I$(top_srcdir)/src/compiler/nir
-
-nir_tests_control_flow_tests_SOURCES = \
- nir/tests/control_flow_tests.cpp
-nir_tests_control_flow_tests_CFLAGS = \
- $(PTHREAD_CFLAGS)
-nir_tests_control_flow_tests_LDADD = \
- $(top_builddir)/src/gtest/libgtest.la \
- nir/libnir.la \
- $(top_builddir)/src/util/libmesautil.la \
- $(PTHREAD_LIBS)
-
-
-TESTS += nir/tests/control_flow_tests
-
-
-BUILT_SOURCES += $(NIR_GENERATED_FILES)
-CLEANFILES += $(NIR_GENERATED_FILES)
-
-EXTRA_DIST += \
- nir/nir_algebraic.py \
- nir/nir_builder_opcodes_h.py \
- nir/nir_constant_expressions.py \
- nir/nir_opcodes.py \
- nir/nir_opcodes_c.py \
- nir/nir_opcodes_h.py \
- nir/nir_opt_algebraic.py \
- nir/tests
+include Makefile.nir.am
diff --git a/src/compiler/Makefile.nir.am b/src/compiler/Makefile.nir.am
new file mode 100644
index 00000000000..e1acf8a29a2
--- /dev/null
+++ b/src/compiler/Makefile.nir.am
@@ -0,0 +1,93 @@
+#
+# Copyright © 2012 Jon TURNEY
+# Copyright (C) 2015 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+noinst_LTLIBRARIES += nir/libnir.la
+
+nir_libnir_la_CPPFLAGS = \
+ $(AM_CPPFLAGS) \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_srcdir)/src/compiler/nir
+
+nir_libnir_la_LIBADD = \
+ libcompiler.la
+
+nir_libnir_la_SOURCES = \
+ $(NIR_FILES) \
+ $(NIR_GENERATED_FILES)
+
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false)
+
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false)
+
+nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false)
+
+nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false)
+
+nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false)
+
+
+check_PROGRAMS += nir/tests/control_flow_tests
+
+nir_tests_control_flow_tests_CPPFLAGS = \
+ $(AM_CPPFLAGS) \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_srcdir)/src/compiler/nir
+
+nir_tests_control_flow_tests_SOURCES = \
+ nir/tests/control_flow_tests.cpp
+nir_tests_control_flow_tests_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+nir_tests_control_flow_tests_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ nir/libnir.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(PTHREAD_LIBS)
+
+
+TESTS += nir/tests/control_flow_tests
+
+
+BUILT_SOURCES += $(NIR_GENERATED_FILES)
+CLEANFILES += $(NIR_GENERATED_FILES)
+
+EXTRA_DIST += \
+ nir/nir_algebraic.py \
+ nir/nir_builder_opcodes_h.py \
+ nir/nir_constant_expressions.py \
+ nir/nir_opcodes.py \
+ nir/nir_opcodes_c.py \
+ nir/nir_opcodes_h.py \
+ nir/nir_opt_algebraic.py \
+ nir/tests
From 28da0d6922b32985949eabce7de9a329c18cae83 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Tue, 5 Apr 2016 14:48:20 +0100
Subject: [PATCH 066/224] compiler: android: flesh out nir into separate
makefile
Signed-off-by: Emil Velikov
Acked-by: Jason Ekstrand
---
src/compiler/Android.mk | 25 +---------
.../{Android.gen.mk => Android.nir.gen.mk} | 0
src/compiler/Android.nir.mk | 49 +++++++++++++++++++
3 files changed, 51 insertions(+), 23 deletions(-)
rename src/compiler/{Android.gen.mk => Android.nir.gen.mk} (100%)
create mode 100644 src/compiler/Android.nir.mk
diff --git a/src/compiler/Android.mk b/src/compiler/Android.mk
index 6c3a8552286..ac0ced58334 100644
--- a/src/compiler/Android.mk
+++ b/src/compiler/Android.mk
@@ -43,27 +43,6 @@ LOCAL_MODULE := libmesa_compiler
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
-# ---------------------------------------
-# Build libmesa_nir
-# ---------------------------------------
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
- $(NIR_FILES)
-
-LOCAL_C_INCLUDES := \
- $(MESA_TOP)/src/mapi \
- $(MESA_TOP)/src/mesa \
- $(MESA_TOP)/src/gallium/include \
- $(MESA_TOP)/src/gallium/auxiliary
-
-LOCAL_STATIC_LIBRARIES := libmesa_compiler
-
-LOCAL_MODULE := libmesa_nir
-
-include $(LOCAL_PATH)/Android.gen.mk
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
-
include $(LOCAL_PATH)/Android.glsl.mk
+
+include $(LOCAL_PATH)/Android.nir.mk
diff --git a/src/compiler/Android.gen.mk b/src/compiler/Android.nir.gen.mk
similarity index 100%
rename from src/compiler/Android.gen.mk
rename to src/compiler/Android.nir.gen.mk
diff --git a/src/compiler/Android.nir.mk b/src/compiler/Android.nir.mk
new file mode 100644
index 00000000000..e89a21c03ac
--- /dev/null
+++ b/src/compiler/Android.nir.mk
@@ -0,0 +1,49 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2015 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/Makefile.sources
+
+# ---------------------------------------
+# Build libmesa_nir
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ $(NIR_FILES)
+
+LOCAL_C_INCLUDES := \
+ $(MESA_TOP)/src/mapi \
+ $(MESA_TOP)/src/mesa \
+ $(MESA_TOP)/src/gallium/include \
+ $(MESA_TOP)/src/gallium/auxiliary
+
+LOCAL_STATIC_LIBRARIES := libmesa_compiler
+
+LOCAL_MODULE := libmesa_nir
+
+include $(LOCAL_PATH)/Android.nir.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
From c69ab885d74c675091c1a143bd8a4abb2071112e Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Tue, 5 Apr 2016 15:50:28 +0100
Subject: [PATCH 067/224] mesa: automake: update and reuse X86_SSE41_FILES list
Signed-off-by: Emil Velikov
Acked-by: Jason Ekstrand
---
src/mesa/Makefile.am | 6 ++----
src/mesa/Makefile.sources | 4 +++-
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index 8dc44fda0f2..390381828e9 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -179,10 +179,8 @@ libmesagallium_la_LIBADD = \
$(ARCH_LIBS)
libmesa_sse41_la_SOURCES = \
- main/streaming-load-memcpy.c \
- main/streaming-load-memcpy.h \
- main/sse_minmax.c \
- main/sse_minmax.h
+ $(X86_SSE41_FILES)
+
libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
pkgconfigdir = $(libdir)/pkgconfig
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 7425f01273d..bbafa454c0d 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -589,7 +589,9 @@ X86_64_FILES = \
X86_SSE41_FILES = \
main/streaming-load-memcpy.c \
- main/sse_minmax.c
+ main/streaming-load-memcpy.h \
+ main/sse_minmax.c \
+ main/sse_minmax.h
SPARC_FILES = \
sparc/sparc.h \
From 5e010a72c9bce217b49c564980509532d345b227 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Wed, 6 Apr 2016 13:06:26 +0100
Subject: [PATCH 068/224] drivers/softpipe: add missing header to the tarball
Signed-off-by: Emil Velikov
---
src/gallium/drivers/softpipe/Makefile.sources | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/drivers/softpipe/Makefile.sources b/src/gallium/drivers/softpipe/Makefile.sources
index efe88468e3f..3669dfc71f8 100644
--- a/src/gallium/drivers/softpipe/Makefile.sources
+++ b/src/gallium/drivers/softpipe/Makefile.sources
@@ -11,6 +11,7 @@ C_SOURCES := \
sp_fs_exec.c \
sp_fs.h \
sp_image.c \
+ sp_image.h \
sp_limits.h \
sp_prim_vbuf.c \
sp_prim_vbuf.h \
From 581c8016f846643ed9e836604211f4a46cef57e5 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Wed, 6 Apr 2016 13:07:04 +0100
Subject: [PATCH 069/224] mesa: add missing header to the tarball
Signed-off-by: Emil Velikov
---
src/mesa/Makefile.sources | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index bbafa454c0d..2ffbb152e3c 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -396,6 +396,7 @@ VBO_FILES = \
STATETRACKER_FILES = \
state_tracker/st_atifs_to_tgsi.c \
+ state_tracker/st_atifs_to_tgsi.h \
state_tracker/st_atom_array.c \
state_tracker/st_atom_atomicbuf.c \
state_tracker/st_atom_blend.c \
From 22836dbefa8b57fd788adf2693d7aab73c4de16a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Fri, 25 Mar 2016 10:50:11 -0700
Subject: [PATCH 070/224] glsl/shader_enums: Add an enum for Vulkan
InstanceIndex
In Vulkan, you have InstanceIndex which begins at the base instance value
rather than the zero-based InstanceID of GL.
Reviewed-by: Rob Clark
---
src/compiler/shader_enums.c | 1 +
src/compiler/shader_enums.h | 7 +++++++
2 files changed, 8 insertions(+)
diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
index 942d152b129..003ad3b8ab6 100644
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -201,6 +201,7 @@ gl_system_value_name(gl_system_value sysval)
static const char *names[] = {
ENUM(SYSTEM_VALUE_VERTEX_ID),
ENUM(SYSTEM_VALUE_INSTANCE_ID),
+ ENUM(SYSTEM_VALUE_INSTANCE_INDEX),
ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE),
ENUM(SYSTEM_VALUE_BASE_VERTEX),
ENUM(SYSTEM_VALUE_INVOCATION_ID),
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index 07ae9ee2de7..0c43d5a92cb 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -378,6 +378,13 @@ typedef enum
*/
SYSTEM_VALUE_INSTANCE_ID,
+ /**
+ * Vulkan InstanceIndex.
+ *
+ * InstanceIndex = gl_InstanceID + gl_BaseInstance
+ */
+ SYSTEM_VALUE_INSTANCE_INDEX,
+
/**
* DirectX-style vertex ID.
*
From 39103145ffe325c4a1432c07e1ac02b1aef0bae5 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Fri, 25 Mar 2016 10:51:23 -0700
Subject: [PATCH 071/224] glsl/shader_enums: Add the other two compute builtins
These weren't added before because they are actually calculated values that
are computed from other inputs. However, in order to handle them in
nir_lower_system_values, it's nice for them to have a cannonical locaiton.
Reviewed-by: Rob Clark
---
src/compiler/shader_enums.c | 2 ++
src/compiler/shader_enums.h | 2 ++
2 files changed, 4 insertions(+)
diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
index 003ad3b8ab6..ff2f564dc98 100644
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -215,6 +215,8 @@ gl_system_value_name(gl_system_value sysval)
ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER),
ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER),
ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
+ ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX),
+ ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID),
ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS),
ENUM(SYSTEM_VALUE_VERTEX_CNT),
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index 0c43d5a92cb..0c2740866fd 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -459,6 +459,8 @@ typedef enum
*/
/*@{*/
SYSTEM_VALUE_LOCAL_INVOCATION_ID,
+ SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
+ SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
SYSTEM_VALUE_WORK_GROUP_ID,
SYSTEM_VALUE_NUM_WORK_GROUPS,
/*@}*/
From a9e6213edd757980475167331bda15c3970a538d Mon Sep 17 00:00:00 2001
From: Jason Ekstrand
Date: Fri, 25 Mar 2016 10:54:27 -0700
Subject: [PATCH 072/224] nir/lower_system_values: Add support for several
computed values
Reviewed-by: Rob Clark
---
src/compiler/nir/nir.h | 3 +
src/compiler/nir/nir_lower_system_values.c | 73 ++++++++++++++++++++-
src/gallium/drivers/freedreno/ir3/ir3_nir.c | 1 +
src/mesa/drivers/dri/i965/brw_compiler.c | 3 +-
4 files changed, 76 insertions(+), 4 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 31498241555..ebac75075ec 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1626,6 +1626,9 @@ typedef struct nir_shader_compiler_options {
* are simulated by floats.)
*/
bool native_integers;
+
+ /* Indicates that the driver only has zero-based vertex id */
+ bool vertex_id_zero_based;
} nir_shader_compiler_options;
typedef struct nir_shader_info {
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
index 2bd787d3574..2d3ccd7d0f9 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -55,9 +55,76 @@ convert_block(nir_block *block, void *void_state)
b->cursor = nir_after_instr(&load_var->instr);
- nir_intrinsic_op sysval_op =
- nir_intrinsic_from_system_value(var->data.location);
- nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0);
+ nir_ssa_def *sysval;
+ switch (var->data.location) {
+ case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
+ /* From the GLSL man page for gl_GlobalInvocationID:
+ *
+ * "The value of gl_GlobalInvocationID is equal to
+ * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
+ */
+
+ nir_const_value local_size;
+ local_size.u32[0] = b->shader->info.cs.local_size[0];
+ local_size.u32[1] = b->shader->info.cs.local_size[1];
+ local_size.u32[2] = b->shader->info.cs.local_size[2];
+
+ nir_ssa_def *group_id =
+ nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
+ nir_ssa_def *local_id =
+ nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
+
+ sysval = nir_iadd(b, nir_imul(b, group_id,
+ nir_build_imm(b, 3, local_size)),
+ local_id);
+ break;
+ }
+
+ case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: {
+ /* From the GLSL man page for gl_LocalInvocationIndex:
+ *
+ * "The value of gl_LocalInvocationIndex is equal to
+ * gl_LocalInvocationID.z * gl_WorkGroupSize.x *
+ * gl_WorkGroupSize.y + gl_LocalInvocationID.y *
+ * gl_WorkGroupSize.x + gl_LocalInvocationID.x"
+ */
+ nir_ssa_def *local_id =
+ nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
+
+ nir_ssa_def *size_x = nir_imm_int(b, b->shader->info.cs.local_size[0]);
+ nir_ssa_def *size_y = nir_imm_int(b, b->shader->info.cs.local_size[1]);
+
+ sysval = nir_imul(b, nir_channel(b, local_id, 2),
+ nir_imul(b, size_x, size_y));
+ sysval = nir_iadd(b, sysval,
+ nir_imul(b, nir_channel(b, local_id, 1), size_x));
+ sysval = nir_iadd(b, sysval, nir_channel(b, local_id, 0));
+ break;
+ }
+
+ case SYSTEM_VALUE_VERTEX_ID:
+ if (b->shader->options->vertex_id_zero_based) {
+ sysval = nir_iadd(b,
+ nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0),
+ nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0));
+ } else {
+ sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0);
+ }
+ break;
+
+ case SYSTEM_VALUE_INSTANCE_INDEX:
+ sysval = nir_iadd(b,
+ nir_load_system_value(b, nir_intrinsic_load_instance_id, 0),
+ nir_load_system_value(b, nir_intrinsic_load_base_instance, 0));
+ break;
+
+ default: {
+ nir_intrinsic_op sysval_op =
+ nir_intrinsic_from_system_value(var->data.location);
+ sysval = nir_load_system_value(b, sysval_op, 0);
+ break;
+ } /* default */
+ }
nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
nir_instr_remove(&load_var->instr);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 73c65d6ad27..b3b6346c8a5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -45,6 +45,7 @@ ir3_tgsi_to_nir(const struct tgsi_token *tokens)
.lower_flrp = true,
.lower_ffract = true,
.native_integers = true,
+ .vertex_id_zero_based = true,
.lower_extract_byte = true,
.lower_extract_word = true,
};
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c
index 6509267a52e..4496699e397 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -82,7 +82,8 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
.lower_uadd_carry = true, \
.lower_usub_borrow = true, \
.lower_fdiv = true, \
- .native_integers = true
+ .native_integers = true, \
+ .vertex_id_zero_based = true
static const struct nir_shader_compiler_options scalar_nir_options = {
COMMON_OPTIONS,
From 704d203d5f9bfaee1367678e3c4e11e25e22acc4 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 11 Apr 2016 16:29:02 -0600
Subject: [PATCH 073/224] st/mesa: replace _mesa_sysval_to_semantic table with
function
Instead of using an array indexed by SYSTEM_VALUE_x, just use a
switch statement. This fixes a regression caused by inserting new
SYSTEM_VALUE_ enums but not updating the mapping to TGSI semantics.
v2: fix a few switch statement mistakes for compute-related enums
Reviewed-by: Ilia Mirkin
---
src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 126 ++++++++++-----------
src/mesa/state_tracker/st_glsl_to_tgsi.h | 3 +-
src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +-
3 files changed, 66 insertions(+), 65 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b9ab7ae9919..5f037daea76 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5192,43 +5192,72 @@ struct st_translate {
};
/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
-const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
- /* Vertex shader
- */
- TGSI_SEMANTIC_VERTEXID,
- TGSI_SEMANTIC_INSTANCEID,
- TGSI_SEMANTIC_VERTEXID_NOBASE,
- TGSI_SEMANTIC_BASEVERTEX,
- TGSI_SEMANTIC_BASEINSTANCE,
- TGSI_SEMANTIC_DRAWID,
+unsigned
+_mesa_sysval_to_semantic(unsigned sysval)
+{
+ switch (sysval) {
+ /* Vertex shader */
+ case SYSTEM_VALUE_VERTEX_ID:
+ return TGSI_SEMANTIC_VERTEXID;
+ case SYSTEM_VALUE_INSTANCE_ID:
+ return TGSI_SEMANTIC_INSTANCEID;
+ case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+ return TGSI_SEMANTIC_VERTEXID_NOBASE;
+ case SYSTEM_VALUE_BASE_VERTEX:
+ return TGSI_SEMANTIC_BASEVERTEX;
+ case SYSTEM_VALUE_BASE_INSTANCE:
+ return TGSI_SEMANTIC_BASEINSTANCE;
+ case SYSTEM_VALUE_DRAW_ID:
+ return TGSI_SEMANTIC_DRAWID;
- /* Geometry shader
- */
- TGSI_SEMANTIC_INVOCATIONID,
+ /* Geometry shader */
+ case SYSTEM_VALUE_INVOCATION_ID:
+ return TGSI_SEMANTIC_INVOCATIONID;
- /* Fragment shader
- */
- TGSI_SEMANTIC_POSITION,
- TGSI_SEMANTIC_FACE,
- TGSI_SEMANTIC_SAMPLEID,
- TGSI_SEMANTIC_SAMPLEPOS,
- TGSI_SEMANTIC_SAMPLEMASK,
- TGSI_SEMANTIC_HELPER_INVOCATION,
+ /* Fragment shader */
+ case SYSTEM_VALUE_FRAG_COORD:
+ return TGSI_SEMANTIC_POSITION;
+ case SYSTEM_VALUE_FRONT_FACE:
+ return TGSI_SEMANTIC_FACE;
+ case SYSTEM_VALUE_SAMPLE_ID:
+ return TGSI_SEMANTIC_SAMPLEID;
+ case SYSTEM_VALUE_SAMPLE_POS:
+ return TGSI_SEMANTIC_SAMPLEPOS;
+ case SYSTEM_VALUE_SAMPLE_MASK_IN:
+ return TGSI_SEMANTIC_SAMPLEMASK;
+ case SYSTEM_VALUE_HELPER_INVOCATION:
+ return TGSI_SEMANTIC_HELPER_INVOCATION;
- /* Tessellation shaders
- */
- TGSI_SEMANTIC_TESSCOORD,
- TGSI_SEMANTIC_VERTICESIN,
- TGSI_SEMANTIC_PRIMID,
- TGSI_SEMANTIC_TESSOUTER,
- TGSI_SEMANTIC_TESSINNER,
+ /* Tessellation shader */
+ case SYSTEM_VALUE_TESS_COORD:
+ return TGSI_SEMANTIC_TESSCOORD;
+ case SYSTEM_VALUE_VERTICES_IN:
+ return TGSI_SEMANTIC_VERTICESIN;
+ case SYSTEM_VALUE_PRIMITIVE_ID:
+ return TGSI_SEMANTIC_PRIMID;
+ case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+ return TGSI_SEMANTIC_TESSOUTER;
+ case SYSTEM_VALUE_TESS_LEVEL_INNER:
+ return TGSI_SEMANTIC_TESSINNER;
+
+ /* Compute shader */
+ case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+ return TGSI_SEMANTIC_THREAD_ID;
+ case SYSTEM_VALUE_WORK_GROUP_ID:
+ return TGSI_SEMANTIC_BLOCK_ID;
+ case SYSTEM_VALUE_NUM_WORK_GROUPS:
+ return TGSI_SEMANTIC_GRID_SIZE;
+
+ /* Unhandled */
+ case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
+ case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
+ case SYSTEM_VALUE_VERTEX_CNT:
+ default:
+ assert(!"Unexpected SYSTEM_VALUE_ enum");
+ return TGSI_SEMANTIC_COUNT;
+ }
+}
- /* Compute shaders
- */
- TGSI_SEMANTIC_THREAD_ID,
- TGSI_SEMANTIC_BLOCK_ID,
- TGSI_SEMANTIC_GRID_SIZE,
-};
/**
* Make note of a branch to a label in the TGSI code.
@@ -6000,35 +6029,6 @@ st_translate_program(
assert(numInputs <= ARRAY_SIZE(t->inputs));
assert(numOutputs <= ARRAY_SIZE(t->outputs));
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_FRONT_FACE] ==
- TGSI_SEMANTIC_FACE);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID] ==
- TGSI_SEMANTIC_VERTEXID);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INSTANCE_ID] ==
- TGSI_SEMANTIC_INSTANCEID);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_ID] ==
- TGSI_SEMANTIC_SAMPLEID);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_POS] ==
- TGSI_SEMANTIC_SAMPLEPOS);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_MASK_IN] ==
- TGSI_SEMANTIC_SAMPLEMASK);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INVOCATION_ID] ==
- TGSI_SEMANTIC_INVOCATIONID);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE] ==
- TGSI_SEMANTIC_VERTEXID_NOBASE);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] ==
- TGSI_SEMANTIC_BASEVERTEX);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] ==
- TGSI_SEMANTIC_TESSCOORD);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] ==
- TGSI_SEMANTIC_HELPER_INVOCATION);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_LOCAL_INVOCATION_ID] ==
- TGSI_SEMANTIC_THREAD_ID);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_WORK_GROUP_ID] ==
- TGSI_SEMANTIC_BLOCK_ID);
- assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_NUM_WORK_GROUPS] ==
- TGSI_SEMANTIC_GRID_SIZE);
-
t = CALLOC_STRUCT(st_translate);
if (!t) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
@@ -6215,7 +6215,7 @@ st_translate_program(
for (i = 0; sysInputs; i++) {
if (sysInputs & (1 << i)) {
- unsigned semName = _mesa_sysval_to_semantic[i];
+ unsigned semName = _mesa_sysval_to_semantic(i);
t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index 729295bcb52..774588a111b 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -63,7 +63,8 @@ st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi,
const GLuint outputMapping[],
struct pipe_stream_output_info *so);
-extern const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX];
+unsigned
+_mesa_sysval_to_semantic(unsigned sysval);
#ifdef __cplusplus
}
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 7a686b199d5..e1c79a57b0a 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -1074,7 +1074,7 @@ st_translate_mesa_program(
for (i = 0; sysInputs; i++) {
if (sysInputs & (1 << i)) {
- unsigned semName = _mesa_sysval_to_semantic[i];
+ unsigned semName = _mesa_sysval_to_semantic(i);
t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
From 1c7ba7f15646367006d615180e89d5ea34a3d5f6 Mon Sep 17 00:00:00 2001
From: Boyuan Zhang
Date: Fri, 8 Apr 2016 11:31:15 -0400
Subject: [PATCH 074/224] radeon/uvd: alignment fix for decode message buffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Boyuan Zhang
Reviewed-by: Christian König
---
src/gallium/drivers/radeon/radeon_uvd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 233f46091a4..098baf20797 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -1003,7 +1003,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
- dec->msg->body.decode.db_pitch = dec->base.width;
+ dec->msg->body.decode.db_pitch = align(dec->base.width, 16);
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY)
From 5886cd79a0ed10b887840f6afb980a5a6a47e26e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Wed, 6 Apr 2016 22:34:39 -0700
Subject: [PATCH 075/224] nir: Do basic constant reassociation.
Many shaders contain expression trees of the form:
const_1 * (value * const_2)
Reorganizing these to
(const_1 * const_2) * value
will allow constant folding to combine the constants. Sometimes, these
constants are 2 and 0.5, so we can remove a multiply altogether. Other
times, it can create more immediate constants, which can actually hurt.
Finding a good balance here is tricky. While much more could be done,
this simple patch seems to have a lot of positive benefit while having
a low downside.
shader-db results on Broadwell:
total instructions in shared programs: 8963768 -> 8961369 (-0.03%)
instructions in affected programs: 438318 -> 435919 (-0.55%)
helped: 1502
HURT: 245
total cycles in shared programs: 71527354 -> 71421516 (-0.15%)
cycles in affected programs: 11541788 -> 11435950 (-0.92%)
helped: 3445
HURT: 1224
Signed-off-by: Kenneth Graunke
Reviewed-by: Eduardo Lima Mitev
Reviewed-by: Jason Ekstrand
Reviewed-by: Matt Turner
---
src/compiler/nir/nir_opt_algebraic.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index e72b4a791cb..420d9d9330e 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -274,6 +274,14 @@ optimizations = [
(('fmul', ('fneg', a), b), ('fneg', ('fmul', a, b))),
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
+ # Reassociate constants in add/mul chains so they can be folded together.
+ # For now, we only handle cases where the constants are separated by
+ # a single non-constant. We could do better eventually.
+ (('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
+ (('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
+ (('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)),
+ (('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
+
# Misc. lowering
(('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
(('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
From 808d26c771541603544444ad5d8794e37e37e379 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 7 Apr 2016 15:03:39 -0700
Subject: [PATCH 076/224] nir: Silence unused "options" warning in algebraic
passes.
Some passes may not refer to options->..., at which point the compiler
will warn about an unused variable. Just cast to void unconditionally
to shut it up.
Signed-off-by: Kenneth Graunke
Reviewed-by: Eduardo Lima Mitev
Reviewed-by: Jason Ekstrand
Reviewed-by: Matt Turner
---
src/compiler/nir/nir_algebraic.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index d05564f779c..53a79073a44 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -291,6 +291,7 @@ ${pass_name}(nir_shader *shader)
bool progress = false;
bool condition_flags[${len(condition_list)}];
const nir_shader_compiler_options *options = shader->options;
+ (void) options;
% for index, condition in enumerate(condition_list):
condition_flags[${index}] = ${condition};
From b0dffdc616801a1fd8534502e11ac840369041ab Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 7 Apr 2016 15:09:56 -0700
Subject: [PATCH 077/224] i965: Pass brw_compiler into brw_preprocess_nir()
instead of is_scalar.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
I want to be able to read other fields.
Signed-off-by: Kenneth Graunke
Reviewed-by: Alejandro Piñeiro
Reviewed-by: Eduardo Lima Mitev
Reviewed-by: Jason Ekstrand
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/brw_nir.c | 6 ++++--
src/mesa/drivers/dri/i965/brw_nir.h | 3 ++-
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index c62840a6e15..1821c0d7d23 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -437,11 +437,13 @@ nir_optimize(nir_shader *nir, bool is_scalar)
* is_scalar = true to scalarize everything prior to code gen.
*/
nir_shader *
-brw_preprocess_nir(nir_shader *nir, bool is_scalar)
+brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
{
bool progress; /* Written by OPT and OPT_V */
(void)progress;
+ const bool is_scalar = compiler->scalar_stage[nir->stage];
+
if (nir->stage == MESA_SHADER_GEOMETRY)
OPT(nir_lower_gs_intrinsics);
@@ -568,7 +570,7 @@ brw_create_nir(struct brw_context *brw,
(void)progress;
- nir = brw_preprocess_nir(nir, is_scalar);
+ nir = brw_preprocess_nir(brw->intelScreen->compiler, nir);
OPT(nir_lower_system_values);
OPT_V(brw_nir_lower_uniforms, is_scalar);
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index 440b4ceb669..b10c0838fe0 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -81,7 +81,8 @@ nir_shader *brw_create_nir(struct brw_context *brw,
gl_shader_stage stage,
bool is_scalar);
-nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar);
+nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler,
+ nir_shader *nir);
void brw_nir_lower_vs_inputs(nir_shader *nir,
const struct brw_device_info *devinfo,
From bfd17c76c1267756ea16051cbe174cb23ff49f44 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 7 Apr 2016 15:04:35 -0700
Subject: [PATCH 078/224] i965: Port INTEL_PRECISE_TRIG=1 to NIR.
This makes the extra multiply visible to NIR's algebraic optimizations
(for constant reassociation) as well as constant folding. This means
that when the result of sin/cos are multiplied by an constant, we can
eliminate the extra multiply altogether, reducing the cost of the
workaround.
It also means we only have to implement it one place, rather than in
both backends.
This makes INTEL_PRECISE_TRIG=1 cost nothing on GPUTest/Volplosion,
which has a ton of sin() calls, but always multiplies them by an
immediate constant. The extra multiply gets folded away.
Signed-off-by: Kenneth Graunke
Reviewed-by: Eduardo Lima Mitev
Reviewed-by: Jason Ekstrand
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/Makefile.am | 5 +++
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +------
src/mesa/drivers/dri/i965/brw_nir.c | 3 ++
src/mesa/drivers/dri/i965/brw_nir.h | 2 +
.../dri/i965/brw_nir_trig_workarounds.py | 43 +++++++++++++++++++
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 16 +------
7 files changed, 58 insertions(+), 28 deletions(-)
create mode 100755 src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 0db5a51e725..a41c8305a80 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -33,6 +33,7 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
-I$(top_srcdir)/src/gtest/include \
+ -I$(top_srcdir)/src/compiler/nir \
-I$(top_builddir)/src/compiler/nir \
-I$(top_builddir)/src/mesa/drivers/dri/common \
$(DEFINES) \
@@ -41,6 +42,10 @@ AM_CFLAGS = \
AM_CXXFLAGS = $(AM_CFLAGS)
+brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
+ $(MKDIR_GEN)
+ $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; false)
+
noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la
libi965_dri_la_SOURCES = $(i965_FILES)
libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS)
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 46895882414..2619e4360bc 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -44,6 +44,7 @@ i965_compiler_FILES = \
brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
brw_nir_attribute_workarounds.c \
+ brw_nir_trig_workarounds.c \
brw_nir_opt_peephole_ffma.c \
brw_nir_uniforms.cpp \
brw_packed_float.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 90b878913b3..bd6314a3e37 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -775,24 +775,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
case nir_op_fsin:
- if (!compiler->precise_trig) {
- inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
- } else {
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
- inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
- inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
- }
+ inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fcos:
- if (!compiler->precise_trig) {
- inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
- } else {
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
- inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
- inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
- }
+ inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 1821c0d7d23..932979a7719 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -447,6 +447,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
if (nir->stage == MESA_SHADER_GEOMETRY)
OPT(nir_lower_gs_intrinsics);
+ if (compiler->precise_trig)
+ OPT(brw_nir_apply_trig_workarounds);
+
static const nir_lower_tex_options tex_options = {
.lower_txp = ~0,
};
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index b10c0838fe0..2711606511d 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -106,6 +106,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
bool use_legacy_snorm_formula,
const uint8_t *attrib_wa_flags);
+bool brw_nir_apply_trig_workarounds(nir_shader *nir);
+
nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
const struct brw_device_info *devinfo,
const struct brw_sampler_prog_key_data *key,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py
new file mode 100755
index 00000000000..67dab9ab326
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py
@@ -0,0 +1,43 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2016 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import nir_algebraic
+
+# The SIN and COS instructions on Intel hardware can produce values
+# slightly outside of the [-1.0, 1.0] range for a small set of values.
+# Obviously, this can break everyone's expectations about trig functions.
+#
+# According to an internal presentation, the COS instruction can produce
+# a value up to 1.000027 for inputs in the range (0.08296, 0.09888). One
+# suggested workaround is to multiply by 0.99997, scaling down the
+# amplitude slightly. Apparently this also minimizes the error function,
+# reducing the maximum error from 0.00006 to about 0.00003.
+
+trig_workarounds = [
+ (('fsin', 'x'), ('fmul', ('fsin', 'x'), 0.99997)),
+ (('fcos', 'x'), ('fmul', ('fcos', 'x'), 0.99997)),
+]
+
+print '#include "brw_nir.h"'
+print nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds",
+ trig_workarounds).render()
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index d9f96c58379..e4e8c38e703 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1101,24 +1101,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_fsin:
- if (!compiler->precise_trig) {
- inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
- } else {
- src_reg tmp = src_reg(this, glsl_type::vec4_type);
- inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
- inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
- }
+ inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fcos:
- if (!compiler->precise_trig) {
- inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
- } else {
- src_reg tmp = src_reg(this, glsl_type::vec4_type);
- inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
- inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
- }
+ inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
inst->saturate = instr->dest.saturate;
break;
From 18c8b927e205d7c8f2a04377b1fa3c4242074de1 Mon Sep 17 00:00:00 2001
From: Markus Wick
Date: Sat, 9 Apr 2016 03:31:16 +0200
Subject: [PATCH 079/224] nir: Merge redudant integer clamping.
Dolphin uses them a lot. Range tracking would be better in the long term,
but this two lines works fine for now.
Signed-off-by: Markus Wick
Reviewed-by: Kenneth Graunke
Reviewed-by: Jason Ekstrand
---
src/compiler/nir/nir_opt_algebraic.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 420d9d9330e..ec8929a6d9f 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -137,7 +137,10 @@ optimizations = [
(('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
- (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('fmin', ('fmax', ('fmin', ('fmax', a, b), c), b), c), ('fmin', ('fmax', a, b), c)),
+ (('imin', ('imax', ('imin', ('imax', a, b), c), b), c), ('imin', ('imax', a, b), c)),
+ (('umin', ('umax', ('umin', ('umax', a, b), c), b), c), ('umin', ('umax', a, b), c)),
+ (('extract_u8', ('imin', ('imax', a, 0), 0xff), 0), ('imin', ('imax', a, 0), 0xff)),
(('~ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
(('~ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
(('~ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
From 193a5cee6a535875542bb5d265bdeb21a8b93e9d Mon Sep 17 00:00:00 2001
From: Ian Romanick
Date: Mon, 11 Apr 2016 18:40:02 -0700
Subject: [PATCH 080/224] nir: Fix typo in comment
Trivial.
Signed-off-by: Ian Romanick
---
src/compiler/nir/nir_opt_dce.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c
index 32436c18b60..cab09dfffc3 100644
--- a/src/compiler/nir/nir_opt_dce.c
+++ b/src/compiler/nir/nir_opt_dce.c
@@ -71,7 +71,7 @@ init_instr(nir_instr *instr, struct exec_list *worklist)
nir_tex_instr *tex_instr;
/* We use the pass_flags to store the live/dead information. In DCE, we
- * just treat it as a zero/non-zerl boolean for whether or not the
+ * just treat it as a zero/non-zero boolean for whether or not the
* instruction is live.
*/
instr->pass_flags = 0;
From 9c7a0d188a495a247570a27bc4af77c540e40c34 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Mon, 11 Apr 2016 12:20:38 +1000
Subject: [PATCH 081/224] tgsi: set nonhelpermask for vertex shaders
For atomic operations we really need to avoid executing unnecessary shaders, so for some
tests that just draw a single point we only want one vertex to get processed not 4,
this fixes a number of the atomic counters tests.
Reviewed-by: Brian Paul
Reviewed-by: Roland Scheidegger
Signed-off-by: Dave Airlie
---
src/gallium/auxiliary/draw/draw_vs_exec.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 5b53cff29f0..4cd755e11ce 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -159,6 +159,7 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
input = (const float (*)[4])((const char *)input + input_stride);
}
+ machine->NonHelperMask = (1 << max_vertices) - 1;
/* run interpreter */
tgsi_exec_machine_run( machine );
From 081a958bcddd37131bd67d0ac26a0785ecbd5fa6 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Mon, 11 Apr 2016 12:54:59 +1000
Subject: [PATCH 082/224] tgsi: add support for buffer/atomic operations to
tgsi_exec.
This adds support for doing load/store/atomic operations on
buffer objects.
Reviewed-by: Brian Paul
Reviewed-by: Roland Scheidegger
Signed-off-by: Dave Airlie
---
src/gallium/auxiliary/draw/draw_gs.c | 2 +-
src/gallium/auxiliary/draw/draw_vs_exec.c | 2 +-
src/gallium/auxiliary/tgsi/tgsi_exec.c | 217 +++++++++++++++++++++-
src/gallium/auxiliary/tgsi/tgsi_exec.h | 34 +++-
src/gallium/drivers/softpipe/sp_fs_exec.c | 4 +-
5 files changed, 245 insertions(+), 14 deletions(-)
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 14db2d6f39d..a555b26c345 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -681,7 +681,7 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
tgsi_exec_machine_bind_shader(shader->machine,
shader->state.tokens,
- draw->gs.tgsi.sampler, draw->gs.tgsi.image);
+ draw->gs.tgsi.sampler, draw->gs.tgsi.image, NULL);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 4cd755e11ce..feb222b8fa5 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -70,7 +70,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
if (evs->machine->Tokens != shader->state.tokens) {
tgsi_exec_machine_bind_shader(evs->machine,
shader->state.tokens,
- draw->vs.tgsi.sampler, draw->vs.tgsi.image);
+ draw->vs.tgsi.sampler, draw->vs.tgsi.image, NULL);
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a595bbbc6d3..fb5105173c6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -854,7 +854,8 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
struct tgsi_sampler *sampler,
- struct tgsi_image *image)
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer)
{
uint k;
struct tgsi_parse_context parse;
@@ -873,6 +874,7 @@ tgsi_exec_machine_bind_shader(
mach->Tokens = tokens;
mach->Sampler = sampler;
mach->Image = image;
+ mach->Buffer = buffer;
if (!tokens) {
/* unbind and free all */
@@ -3758,8 +3760,8 @@ get_image_coord_sample(unsigned tgsi_tex)
}
static void
-exec_load(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_load_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[4], sample_r;
uint unit;
@@ -3805,8 +3807,51 @@ exec_load(struct tgsi_exec_machine *mach,
}
static void
-exec_store(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_load_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[4];
+ uint unit;
+ int j;
+ uint chan;
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ IFETCH(&r[0], 1, TGSI_CHAN_X);
+
+ mach->Buffer->load(mach->Buffer, ¶ms,
+ r[0].i, rgba);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ r[0].f[j] = rgba[0][j];
+ r[1].f[j] = rgba[1][j];
+ r[2].f[j] = rgba[2][j];
+ r[3].f[j] = rgba[3][j];
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_load(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_load_img(mach, inst);
+ else
+ exec_load_buf(mach, inst);
+}
+
+static void
+exec_store_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[3], sample_r;
union tgsi_exec_channel value[4];
@@ -3850,8 +3895,53 @@ exec_store(struct tgsi_exec_machine *mach,
}
static void
-exec_atomop(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_store_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+ union tgsi_exec_channel value[4];
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int i, j;
+ uint unit;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = inst->Dst[0].Register.Index;
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.writemask = inst->Dst[0].Register.WriteMask;
+
+ IFETCH(&r[0], 0, TGSI_CHAN_X);
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 1, TGSI_CHAN_X + i);
+ }
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = value[0].f[j];
+ rgba[1][j] = value[1].f[j];
+ rgba[2][j] = value[2].f[j];
+ rgba[3][j] = value[3].f[j];
+ }
+
+ mach->Buffer->store(mach->Buffer, ¶ms,
+ r[0].i,
+ rgba);
+}
+
+static void
+exec_store(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
+ exec_store_img(mach, inst);
+ else
+ exec_store_buf(mach, inst);
+}
+
+static void
+exec_atomop_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[4], sample_r;
union tgsi_exec_channel value[4], value2[4];
@@ -3918,8 +4008,77 @@ exec_atomop(struct tgsi_exec_machine *mach,
}
static void
-exec_resq(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_atomop_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+ union tgsi_exec_channel value[4], value2[4];
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int i, j;
+ uint unit, chan;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.writemask = inst->Dst[0].Register.WriteMask;
+
+ IFETCH(&r[0], 1, TGSI_CHAN_X);
+
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 2, TGSI_CHAN_X + i);
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+ FETCH(&value2[i], 3, TGSI_CHAN_X + i);
+ }
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = value[0].f[j];
+ rgba[1][j] = value[1].f[j];
+ rgba[2][j] = value[2].f[j];
+ rgba[3][j] = value[3].f[j];
+ }
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba2[0][j] = value2[0].f[j];
+ rgba2[1][j] = value2[1].f[j];
+ rgba2[2][j] = value2[2].f[j];
+ rgba2[3][j] = value2[3].f[j];
+ }
+ }
+
+ mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode,
+ r[0].i,
+ rgba, rgba2);
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ r[0].f[j] = rgba[0][j];
+ r[1].f[j] = rgba[1][j];
+ r[2].f[j] = rgba[2][j];
+ r[3].f[j] = rgba[3][j];
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_atomop(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_atomop_img(mach, inst);
+ else
+ exec_atomop_buf(mach, inst);
+}
+
+static void
+exec_resq_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
int result[4];
union tgsi_exec_channel r[4];
@@ -3951,6 +4110,46 @@ exec_resq(struct tgsi_exec_machine *mach,
}
}
+static void
+exec_resq_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ int result;
+ union tgsi_exec_channel r[4];
+ uint unit;
+ int i, chan;
+ struct tgsi_buffer_params params;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+
+ mach->Buffer->get_dims(mach->Buffer, ¶ms, &result);
+
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ r[0].i[i] = result;
+ }
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
+ TGSI_EXEC_DATA_INT);
+ }
+ }
+}
+
+static void
+exec_resq(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_resq_img(mach, inst);
+ else
+ exec_resq_buf(mach, inst);
+}
+
static void
micro_i2f(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 45fb8d43c88..257c58f777e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -138,6 +138,36 @@ struct tgsi_image {
int dims[4]);
};
+struct tgsi_buffer_params {
+ unsigned unit;
+ unsigned execmask;
+ unsigned writemask;
+};
+
+struct tgsi_buffer {
+ /* buffer interfaces */
+ void (*load)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*store)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*op)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ unsigned opcode,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*get_dims)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ int *dim);
+};
+
/**
* Information for sampling textures, which must be implemented
* by code outside the TGSI executor.
@@ -334,6 +364,7 @@ struct tgsi_exec_machine
struct tgsi_sampler *Sampler;
struct tgsi_image *Image;
+ struct tgsi_buffer *Buffer;
unsigned ImmLimit;
const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];
@@ -424,7 +455,8 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
struct tgsi_sampler *sampler,
- struct tgsi_image *image);
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer);
uint
tgsi_exec_machine_run(
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index bfd9a4b7496..6a5f7acd263 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -70,7 +70,7 @@ exec_prepare( const struct sp_fragment_shader_variant *var,
*/
tgsi_exec_machine_bind_shader(machine,
var->tokens,
- sampler, image);
+ sampler, image, NULL);
}
@@ -186,7 +186,7 @@ exec_delete(struct sp_fragment_shader_variant *var,
struct tgsi_exec_machine *machine)
{
if (machine->Tokens == var->tokens) {
- tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL);
+ tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
}
FREE( (void *) var->tokens );
From c2aeeca4554d744b6293ad87d9a183e51b0138ff Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Mon, 11 Apr 2016 12:59:25 +1000
Subject: [PATCH 083/224] draw: add support for passing buffers to vs/gs
shaders.
Like the image code, but for shader buffers this time.
Reviewed-by: Brian Paul
Reviewed-by: Roland Scheidegger
Signed-off-by: Dave Airlie
---
src/gallium/auxiliary/draw/draw_context.c | 18 +++++++++++++++++-
src/gallium/auxiliary/draw/draw_context.h | 6 ++++++
src/gallium/auxiliary/draw/draw_gs.c | 4 +++-
src/gallium/auxiliary/draw/draw_private.h | 3 +++
src/gallium/auxiliary/draw/draw_vs_exec.c | 4 +++-
5 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 2ba9b099664..75551fbe2dd 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -749,7 +749,23 @@ draw_image(struct draw_context *draw,
}
}
-
+/**
+ * Provide TGSI buffer objects for vertex/geometry shaders that use
+ * load/store/atomic ops. This state only needs to be set once per context.
+ * This might only be used by software drivers for the time being.
+ */
+void
+draw_buffer(struct draw_context *draw,
+ uint shader,
+ struct tgsi_buffer *buffer)
+{
+ if (shader == PIPE_SHADER_VERTEX) {
+ draw->vs.tgsi.buffer = buffer;
+ } else {
+ debug_assert(shader == PIPE_SHADER_GEOMETRY);
+ draw->gs.tgsi.buffer = buffer;
+ }
+}
void draw_set_render( struct draw_context *draw,
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 5d9870b115c..3e6722fcb7e 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -49,6 +49,7 @@ struct draw_geometry_shader;
struct draw_fragment_shader;
struct tgsi_sampler;
struct tgsi_image;
+struct tgsi_buffer;
/*
* structure to contain driver internal information
@@ -160,6 +161,11 @@ draw_image(struct draw_context *draw,
uint shader_type,
struct tgsi_image *image);
+void
+draw_buffer(struct draw_context *draw,
+ uint shader_type,
+ struct tgsi_buffer *buffer);
+
void
draw_set_sampler_views(struct draw_context *draw,
unsigned shader_stage,
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index a555b26c345..ef217fa5ceb 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -681,7 +681,9 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
tgsi_exec_machine_bind_shader(shader->machine,
shader->state.tokens,
- draw->gs.tgsi.sampler, draw->gs.tgsi.image, NULL);
+ draw->gs.tgsi.sampler,
+ draw->gs.tgsi.image,
+ draw->gs.tgsi.buffer);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 211bd6f7e70..a18f6632124 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -67,6 +67,7 @@ struct vbuf_render;
struct tgsi_exec_machine;
struct tgsi_sampler;
struct tgsi_image;
+struct tgsi_buffer;
struct draw_pt_front_end;
struct draw_assembler;
struct draw_llvm;
@@ -269,6 +270,7 @@ struct draw_context
struct tgsi_sampler *sampler;
struct tgsi_image *image;
+ struct tgsi_buffer *buffer;
} tgsi;
struct translate *fetch;
@@ -289,6 +291,7 @@ struct draw_context
struct tgsi_sampler *sampler;
struct tgsi_image *image;
+ struct tgsi_buffer *buffer;
} tgsi;
} gs;
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index feb222b8fa5..da0d1a7f9a8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -70,7 +70,9 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
if (evs->machine->Tokens != shader->state.tokens) {
tgsi_exec_machine_bind_shader(evs->machine,
shader->state.tokens,
- draw->vs.tgsi.sampler, draw->vs.tgsi.image, NULL);
+ draw->vs.tgsi.sampler,
+ draw->vs.tgsi.image,
+ draw->vs.tgsi.buffer);
}
}
From afa8707ba93a7d226a76319acda2a8dd89524db7 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Mon, 11 Apr 2016 13:02:20 +1000
Subject: [PATCH 084/224] softpipe: add SSBO/shader atomics support.
This adds support for the features requires for ARB_shader_storage_buffer_object
and ARB_shader_atomic_counters, ARB_shader_atomic_counter_ops.
[airlied: some cleanups applied]
Reviewed-by: Brian Paul
Reviewed-by: Roland Scheidegger
Signed-off-by: Dave Airlie
---
docs/GL3.txt | 8 +-
docs/relnotes/11.3.0.html | 4 +-
src/gallium/auxiliary/tgsi/tgsi_exec.h | 3 +-
src/gallium/drivers/softpipe/Makefile.sources | 1 +
src/gallium/drivers/softpipe/sp_buffer.c | 371 ++++++++++++++++++
src/gallium/drivers/softpipe/sp_buffer.h | 37 ++
src/gallium/drivers/softpipe/sp_context.c | 15 +
src/gallium/drivers/softpipe/sp_context.h | 2 +
src/gallium/drivers/softpipe/sp_fs_exec.c | 5 +-
src/gallium/drivers/softpipe/sp_screen.c | 3 +-
src/gallium/drivers/softpipe/sp_state.h | 4 +-
.../drivers/softpipe/sp_state_derived.c | 3 +-
src/gallium/drivers/softpipe/sp_state_image.c | 28 ++
13 files changed, 473 insertions(+), 11 deletions(-)
create mode 100644 src/gallium/drivers/softpipe/sp_buffer.c
create mode 100644 src/gallium/drivers/softpipe/sp_buffer.h
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 33b7d128f5b..066889a90c4 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -150,7 +150,7 @@ GL 4.2, GLSL 4.20:
GL_ARB_texture_compression_bptc DONE (i965, nvc0, r600, radeonsi)
GL_ARB_compressed_texture_pixel_storage DONE (all drivers)
- GL_ARB_shader_atomic_counters DONE (i965, nvc0)
+ GL_ARB_shader_atomic_counters DONE (i965, nvc0, softpipe)
GL_ARB_texture_storage DONE (all drivers)
GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -179,7 +179,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_robust_buffer_access_behavior not started
GL_ARB_shader_image_size DONE (i965, radeonsi, softpipe)
- GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
+ GL_ARB_shader_storage_buffer_object DONE (i965, nvc0, softpipe)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
@@ -230,10 +230,10 @@ GLES3.1, GLSL ES 3.1
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_framebuffer_no_attachments DONE (i965, nvc0, r600, radeonsi)
GL_ARB_program_interface_query DONE (all drivers)
- GL_ARB_shader_atomic_counters DONE (i965, nvc0)
+ GL_ARB_shader_atomic_counters DONE (i965, nvc0, softpipe)
GL_ARB_shader_image_load_store DONE (i965, softpipe, radeonsi)
GL_ARB_shader_image_size DONE (i965, softpipe, radeonsi)
- GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
+ GL_ARB_shader_storage_buffer_object DONE (i965, nvc0, softpipe)
GL_ARB_shading_language_packing DONE (all drivers)
GL_ARB_separate_shader_objects DONE (all drivers)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html
index 2474f386bd9..f1d958d1f42 100644
--- a/docs/relnotes/11.3.0.html
+++ b/docs/relnotes/11.3.0.html
@@ -46,9 +46,11 @@ Note: some of the new features are only available with certain drivers.
- GL_ARB_framebuffer_no_attachments on nvc0, r600, radeonsi
- GL_ARB_internalformat_query2 on all drivers
-- GL_ARB_shader_atomic_counter_ops on nvc0
+- GL_ARB_shader_atomic_counters on softpipe
+- GL_ARB_shader_atomic_counter_ops on nvc0, softpipe
- GL_ARB_shader_image_load_store on radeonsi, softpipe
- GL_ARB_shader_image_size on radeonsi, softpipe
+- GL_ARB_shader_storage_buffer_objects on softpipe
- GL_ATI_fragment_shader on all Gallium drivers
- GL_EXT_base_instance on all drivers that support GL_ARB_base_instance
- GL_OES_draw_buffers_indexed and GL_EXT_draw_buffers_indexed on all drivers that support GL_ARB_draw_buffers_blend
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 257c58f777e..42fb922baa5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -528,8 +528,9 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
- case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return PIPE_MAX_SHADER_BUFFERS;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return PIPE_MAX_SHADER_IMAGES;
diff --git a/src/gallium/drivers/softpipe/Makefile.sources b/src/gallium/drivers/softpipe/Makefile.sources
index 3669dfc71f8..1d42351f975 100644
--- a/src/gallium/drivers/softpipe/Makefile.sources
+++ b/src/gallium/drivers/softpipe/Makefile.sources
@@ -1,4 +1,5 @@
C_SOURCES := \
+ sp_buffer.c \
sp_clear.c \
sp_clear.h \
sp_context.c \
diff --git a/src/gallium/drivers/softpipe/sp_buffer.c b/src/gallium/drivers/softpipe/sp_buffer.c
new file mode 100644
index 00000000000..69717baa1a0
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_buffer.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2016 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sp_context.h"
+#include "sp_buffer.h"
+#include "sp_texture.h"
+
+#include "util/u_format.h"
+
+static bool
+get_dimensions(const struct pipe_shader_buffer *bview,
+ const struct softpipe_resource *spr,
+ unsigned *width)
+{
+ *width = bview->buffer_size;
+ /*
+ * Bounds check the buffer size from the view
+ * and the buffer size from the underlying buffer.
+ */
+ if (*width > spr->base.width0)
+ return false;
+ return true;
+}
+
+/*
+ * Implement the image LOAD operation.
+ */
+static void
+sp_tgsi_load(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+{
+ struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
+ struct pipe_shader_buffer *bview;
+ struct softpipe_resource *spr;
+ unsigned width;
+ int c, j;
+ unsigned char *data_ptr;
+ const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
+
+ if (params->unit > PIPE_MAX_SHADER_BUFFERS)
+ goto fail_write_all_zero;
+
+ bview = &sp_buf->sp_bview[params->unit];
+ spr = softpipe_resource(bview->buffer);
+ if (!spr)
+ goto fail_write_all_zero;
+
+ if (!get_dimensions(bview, spr, &width))
+ return;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int s_coord;
+ bool fill_zero = false;
+ uint32_t sdata[4];
+
+ if (!(params->execmask & (1 << j)))
+ fill_zero = true;
+
+ s_coord = s[j];
+ if (s_coord >= width)
+ fill_zero = true;
+
+ if (fill_zero) {
+ for (c = 0; c < 4; c++)
+ rgba[c][j] = 0;
+ continue;
+ }
+ data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
+ for (c = 0; c < 4; c++) {
+ format_desc->fetch_rgba_uint(sdata, data_ptr, 0, 0);
+ ((uint32_t *)rgba[c])[j] = sdata[0];
+ data_ptr += 4;
+ }
+ }
+ return;
+fail_write_all_zero:
+ memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4);
+ return;
+}
+
+/*
+ * Implement the buffer STORE operation.
+ */
+static void
+sp_tgsi_store(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+{
+ struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
+ struct pipe_shader_buffer *bview;
+ struct softpipe_resource *spr;
+ unsigned width;
+ unsigned char *data_ptr;
+ int j, c;
+ const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
+
+ if (params->unit > PIPE_MAX_SHADER_BUFFERS)
+ return;
+
+ bview = &sp_buf->sp_bview[params->unit];
+ spr = softpipe_resource(bview->buffer);
+ if (!spr)
+ return;
+
+ if (!get_dimensions(bview, spr, &width))
+ return;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int s_coord;
+
+ if (!(params->execmask & (1 << j)))
+ continue;
+
+ s_coord = s[j];
+ if (s_coord >= width)
+ continue;
+
+ data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
+
+ for (c = 0; c < 4; c++) {
+ if (params->writemask & (1 << c)) {
+ unsigned temp[4];
+ unsigned char *dptr = data_ptr + (c * 4);
+ temp[0] = ((uint32_t *)rgba[c])[j];
+ format_desc->pack_rgba_uint(dptr, 0, temp, 0, 1, 1);
+ }
+ }
+ }
+}
+
+/*
+ * Implement atomic operations on unsigned integers.
+ */
+static void
+handle_op_uint(const struct pipe_shader_buffer *bview,
+ bool just_read,
+ unsigned char *data_ptr,
+ uint qi,
+ unsigned opcode,
+ unsigned writemask,
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+{
+ uint c;
+ const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
+ unsigned sdata[4];
+
+ for (c = 0; c < 4; c++) {
+ unsigned temp[4];
+ unsigned char *dptr = data_ptr + (c * 4);
+ format_desc->fetch_rgba_uint(temp, dptr, 0, 0);
+ sdata[c] = temp[0];
+ }
+
+ if (just_read) {
+ for (c = 0; c < 4; c++) {
+ ((uint32_t *)rgba[c])[qi] = sdata[c];
+ }
+ return;
+ }
+
+ switch (opcode) {
+ case TGSI_OPCODE_ATOMUADD:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] += ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMXCHG:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] = ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMCAS:
+ for (c = 0; c < 4; c++) {
+ unsigned dst_x = sdata[c];
+ unsigned cmp_x = ((uint32_t *)rgba[c])[qi];
+ unsigned src_x = ((uint32_t *)rgba2[c])[qi];
+ unsigned temp = sdata[c];
+ sdata[c] = (dst_x == cmp_x) ? src_x : dst_x;
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMAND:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] &= ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMOR:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] |= ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMXOR:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] ^= ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMUMIN:
+ for (c = 0; c < 4; c++) {
+ unsigned dst_x = sdata[c];
+ unsigned src_x = ((uint32_t *)rgba[c])[qi];
+ sdata[c] = MIN2(dst_x, src_x);
+ ((uint32_t *)rgba[c])[qi] = dst_x;
+ }
+ break;
+ case TGSI_OPCODE_ATOMUMAX:
+ for (c = 0; c < 4; c++) {
+ unsigned dst_x = sdata[c];
+ unsigned src_x = ((uint32_t *)rgba[c])[qi];
+ sdata[c] = MAX2(dst_x, src_x);
+ ((uint32_t *)rgba[c])[qi] = dst_x;
+ }
+ break;
+ case TGSI_OPCODE_ATOMIMIN:
+ for (c = 0; c < 4; c++) {
+ int dst_x = sdata[c];
+ int src_x = ((uint32_t *)rgba[c])[qi];
+ sdata[c] = MIN2(dst_x, src_x);
+ ((uint32_t *)rgba[c])[qi] = dst_x;
+ }
+ break;
+ case TGSI_OPCODE_ATOMIMAX:
+ for (c = 0; c < 4; c++) {
+ int dst_x = sdata[c];
+ int src_x = ((uint32_t *)rgba[c])[qi];
+ sdata[c] = MAX2(dst_x, src_x);
+ ((uint32_t *)rgba[c])[qi] = dst_x;
+ }
+ break;
+ default:
+ assert(!"Unexpected TGSI opcode in sp_tgsi_op");
+ break;
+ }
+
+ for (c = 0; c < 4; c++) {
+ if (writemask & (1 << c)) {
+ unsigned temp[4];
+ unsigned char *dptr = data_ptr + (c * 4);
+ temp[0] = sdata[c];
+ format_desc->pack_rgba_uint(dptr, 0, temp, 0, 1, 1);
+ }
+ }
+}
+
+/*
+ * Implement atomic buffer operations.
+ */
+static void
+sp_tgsi_op(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ unsigned opcode,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+{
+ struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
+ struct pipe_shader_buffer *bview;
+ struct softpipe_resource *spr;
+ unsigned width;
+ int j, c;
+ unsigned char *data_ptr;
+
+ if (params->unit > PIPE_MAX_SHADER_BUFFERS)
+ return;
+
+ bview = &sp_buf->sp_bview[params->unit];
+ spr = softpipe_resource(bview->buffer);
+ if (!spr)
+ goto fail_write_all_zero;
+
+ if (!get_dimensions(bview, spr, &width))
+ goto fail_write_all_zero;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int s_coord;
+ bool just_read = false;
+
+ s_coord = s[j];
+ if (s_coord >= width) {
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = 0;
+ }
+ continue;
+ }
+
+ /* just readback the value for atomic if execmask isn't set */
+ if (!(params->execmask & (1 << j))) {
+ just_read = true;
+ }
+
+ data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
+ /* we should see atomic operations on r32 formats */
+
+ handle_op_uint(bview, just_read, data_ptr, j,
+ opcode, params->writemask, rgba, rgba2);
+ }
+ return;
+fail_write_all_zero:
+ memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4);
+ return;
+}
+
+/*
+ * return size of the attached buffer for RESQ opcode.
+ */
+static void
+sp_tgsi_get_dims(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ int *dim)
+{
+ struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
+ struct pipe_shader_buffer *bview;
+ struct softpipe_resource *spr;
+
+ if (params->unit > PIPE_MAX_SHADER_BUFFERS)
+ return;
+
+ bview = &sp_buf->sp_bview[params->unit];
+ spr = softpipe_resource(bview->buffer);
+ if (!spr)
+ return;
+
+ *dim = bview->buffer_size;
+}
+
+struct sp_tgsi_buffer *
+sp_create_tgsi_buffer(void)
+{
+ struct sp_tgsi_buffer *buf = CALLOC_STRUCT(sp_tgsi_buffer);
+ if (!buf)
+ return NULL;
+
+ buf->base.load = sp_tgsi_load;
+ buf->base.store = sp_tgsi_store;
+ buf->base.op = sp_tgsi_op;
+ buf->base.get_dims = sp_tgsi_get_dims;
+ return buf;
+};
diff --git a/src/gallium/drivers/softpipe/sp_buffer.h b/src/gallium/drivers/softpipe/sp_buffer.h
new file mode 100644
index 00000000000..1822fe709fe
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_buffer.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2016 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SP_BUFFER_H
+#define SP_BUFFER_H
+#include "tgsi/tgsi_exec.h"
+
+struct sp_tgsi_buffer
+{
+ struct tgsi_buffer base;
+ struct pipe_shader_buffer sp_bview[PIPE_MAX_SHADER_BUFFERS];
+};
+
+struct sp_tgsi_buffer *
+sp_create_tgsi_buffer(void);
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 30b0276cfe0..f66fea2d5d5 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -38,6 +38,7 @@
#include "util/u_pstipple.h"
#include "util/u_inlines.h"
#include "tgsi/tgsi_exec.h"
+#include "sp_buffer.h"
#include "sp_clear.h"
#include "sp_context.h"
#include "sp_flush.h"
@@ -203,6 +204,10 @@ softpipe_create_context(struct pipe_screen *screen,
softpipe->tgsi.image[i] = sp_create_tgsi_image();
}
+ for (i = 0; i < PIPE_SHADER_TYPES; i++) {
+ softpipe->tgsi.buffer[i] = sp_create_tgsi_buffer();
+ }
+
softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE );
softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
@@ -288,6 +293,16 @@ softpipe_create_context(struct pipe_screen *screen,
(struct tgsi_image *)
softpipe->tgsi.image[PIPE_SHADER_GEOMETRY]);
+ draw_buffer(softpipe->draw,
+ PIPE_SHADER_VERTEX,
+ (struct tgsi_buffer *)
+ softpipe->tgsi.buffer[PIPE_SHADER_VERTEX]);
+
+ draw_buffer(softpipe->draw,
+ PIPE_SHADER_GEOMETRY,
+ (struct tgsi_buffer *)
+ softpipe->tgsi.buffer[PIPE_SHADER_GEOMETRY]);
+
if (debug_get_bool_option( "SOFTPIPE_NO_RAST", FALSE ))
softpipe->no_rast = TRUE;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 20a12353b38..70d00c88b6e 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -84,6 +84,7 @@ struct softpipe_context {
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+ struct pipe_shader_buffer buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer index_buffer;
@@ -174,6 +175,7 @@ struct softpipe_context {
struct {
struct sp_tgsi_sampler *sampler[PIPE_SHADER_TYPES];
struct sp_tgsi_image *image[PIPE_SHADER_TYPES];
+ struct sp_tgsi_buffer *buffer[PIPE_SHADER_TYPES];
} tgsi;
struct tgsi_exec_machine *fs_machine;
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 6a5f7acd263..155382af825 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -63,14 +63,15 @@ static void
exec_prepare( const struct sp_fragment_shader_variant *var,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *sampler,
- struct tgsi_image *image )
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer )
{
/*
* Bind tokens/shader to the interpreter's machine state.
*/
tgsi_exec_machine_bind_shader(machine,
var->tokens,
- sampler, image, NULL);
+ sampler, image, buffer);
}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 90f29d6e52a..00b414cea9d 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -259,7 +259,6 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
@@ -272,6 +271,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
return 0;
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return 4;
}
/* should only get here on unhandled cases */
debug_printf("Unexpected PIPE_CAP %d query\n", param);
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 570bc549cc4..2fc48ab13d8 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -57,6 +57,7 @@
struct tgsi_sampler;
struct tgsi_image;
+struct tgsi_buffer;
struct tgsi_exec_machine;
struct vertex_info;
@@ -83,7 +84,8 @@ struct sp_fragment_shader_variant
void (*prepare)(const struct sp_fragment_shader_variant *shader,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *sampler,
- struct tgsi_image *image);
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer);
unsigned (*run)(const struct sp_fragment_shader_variant *shader,
struct tgsi_exec_machine *machine,
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 65679e73515..4ce9d95bc6e 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -344,7 +344,8 @@ update_fragment_shader(struct softpipe_context *softpipe, unsigned prim)
softpipe->fs_machine,
(struct tgsi_sampler *) softpipe->
tgsi.sampler[PIPE_SHADER_FRAGMENT],
- (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_FRAGMENT]);
+ (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_FRAGMENT],
+ (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_FRAGMENT]);
}
else {
softpipe->fs_variant = NULL;
diff --git a/src/gallium/drivers/softpipe/sp_state_image.c b/src/gallium/drivers/softpipe/sp_state_image.c
index 8909fa26864..5947c934e86 100644
--- a/src/gallium/drivers/softpipe/sp_state_image.c
+++ b/src/gallium/drivers/softpipe/sp_state_image.c
@@ -24,6 +24,7 @@
#include "sp_context.h"
#include "sp_state.h"
#include "sp_image.h"
+#include "sp_buffer.h"
static void softpipe_set_shader_images(struct pipe_context *pipe,
unsigned shader,
@@ -51,7 +52,34 @@ static void softpipe_set_shader_images(struct pipe_context *pipe,
}
}
+static void softpipe_set_shader_buffers(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start,
+ unsigned num,
+ struct pipe_shader_buffer *buffers)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ unsigned i;
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(start + num <= Elements(softpipe->buffers[shader]));
+
+ /* set the new images */
+ for (i = 0; i < num; i++) {
+ int idx = start + i;
+
+ if (buffers) {
+ pipe_resource_reference(&softpipe->tgsi.buffer[shader]->sp_bview[idx].buffer, buffers[i].buffer);
+ softpipe->tgsi.buffer[shader]->sp_bview[idx] = buffers[i];
+ }
+ else {
+ pipe_resource_reference(&softpipe->tgsi.buffer[shader]->sp_bview[idx].buffer, NULL);
+ memset(&softpipe->tgsi.buffer[shader]->sp_bview[idx], 0, sizeof(struct pipe_shader_buffer));
+ }
+ }
+}
+
void softpipe_init_image_funcs(struct pipe_context *pipe)
{
pipe->set_shader_images = softpipe_set_shader_images;
+ pipe->set_shader_buffers = softpipe_set_shader_buffers;
}
From 703c1e69d89df303e5fb4c1873f9b3954bdeda9c Mon Sep 17 00:00:00 2001
From: Rhys Kidd
Date: Sun, 10 Apr 2016 20:43:25 -0400
Subject: [PATCH 085/224] glsl: Update hash table comments in constant
propagation
Signed-off-by: Rhys Kidd
Reviewed-by: Kenneth Graunke
---
src/compiler/glsl/opt_constant_propagation.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/compiler/glsl/opt_constant_propagation.cpp b/src/compiler/glsl/opt_constant_propagation.cpp
index 416ba16a3c5..4764d16de6d 100644
--- a/src/compiler/glsl/opt_constant_propagation.cpp
+++ b/src/compiler/glsl/opt_constant_propagation.cpp
@@ -122,7 +122,7 @@ public:
exec_list *acp;
/**
- * List of kill_entry: The masks of variables whose values were
+ * Hash table of kill_entry: The masks of variables whose values were
* killed in this block.
*/
hash_table *kills;
@@ -454,7 +454,7 @@ ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
}
}
- /* Add this writemask of the variable to the list of killed
+ /* Add this writemask of the variable to the hash table of killed
* variables in this block.
*/
hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
@@ -463,7 +463,7 @@ ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
entry->write_mask |= write_mask;
return;
}
- /* Not already in the list. Make new entry. */
+ /* Not already in the hash table. Make new entry. */
_mesa_hash_table_insert(this->kills, var,
new(this->mem_ctx) kill_entry(var, write_mask));
}
From ea8a65f503f05404d923a2a076064c3ffe6660aa Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev
Date: Tue, 12 Apr 2016 10:11:35 +0200
Subject: [PATCH 086/224] i965: Add autogenerated 'brw_nir_trig_workarounds.c'
to gitignore
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/.gitignore | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/mesa/drivers/dri/i965/.gitignore b/src/mesa/drivers/dri/i965/.gitignore
index 8eb9f4e1598..70aae3f4d4c 100644
--- a/src/mesa/drivers/dri/i965/.gitignore
+++ b/src/mesa/drivers/dri/i965/.gitignore
@@ -1,3 +1,4 @@
+brw_nir_trig_workarounds.c
i965_symbols_test
test_eu_compact
test_vec4_copy_propagation
From b0e3ba61b5f8bb285472d5acda5ff233b05aeef4 Mon Sep 17 00:00:00 2001
From: Haixia Shi
Date: Thu, 7 Apr 2016 11:05:08 -0700
Subject: [PATCH 087/224] dri/i965: extend GLES3 sRGB workaround to cover all
formats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It is incorrect to assume BGRA byte order for the GLES3 sRGB workaround.
v2: use _mesa_get_srgb_format_linear to handle all formats
Signed-off-by: Haixia Shi
Reviewed-by: Stéphane Marchesin
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_context.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 2d480d02366..63ac3bc31ed 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1151,10 +1151,9 @@ intel_gles3_srgb_workaround(struct brw_context *brw,
*/
fb->Visual.sRGBCapable = false;
for (int i = 0; i < BUFFER_COUNT; i++) {
- if (fb->Attachment[i].Renderbuffer &&
- fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
- fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
- }
+ struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
+ if (rb)
+ rb->Format = _mesa_get_srgb_format_linear(rb->Format);
}
}
From dccdb655a169a43de4427580e230f9d3706436cd Mon Sep 17 00:00:00 2001
From: Hans de Goede
Date: Mon, 11 Apr 2016 14:07:20 +0200
Subject: [PATCH 088/224] nv30: Add missing PIPE_SHADER_CAP_INTEGERS to
get_shader_param()
Add missing PIPE_SHADER_CAP_INTEGERS for frag shaders to
nv30_screen_get_shader_param().
Signed-off-by: Hans de Goede
Reviewed-by: Samuel Pitoiset
---
src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index db7c2d15fb1..ece8af72545 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -324,6 +324,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_SUBROUTINES:
+ case PIPE_SHADER_CAP_INTEGERS:
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
From fc67375379ec26eef63f8e530724cd53c97bc3d0 Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen
Date: Mon, 11 Apr 2016 15:53:43 +0200
Subject: [PATCH 089/224] radeonsi: Synchronize a streamout write after read
hazard.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Bas Nieuwenhuizen
Reviewed-by: Nicolai Hähnle
Reviewed-by: Marek Olšák
---
src/gallium/drivers/radeonsi/si_descriptors.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 6dd2e4fd89d..b3792c23474 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -883,6 +883,12 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
SI_CONTEXT_VS_PARTIAL_FLUSH;
}
+ /* All readers of the streamout targets need to be finished before we can
+ * start writing to the targets.
+ */
+ if (num_targets)
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+
/* Streamout buffers must be bound in 2 places:
* 1) in VGT by setting the VGT_STRMOUT registers
* 2) as shader resources
From 26171bd67e47cf25857cbce767ad048c8d99d1b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 01:42:00 +0200
Subject: [PATCH 090/224] gallium: add pipe_context::set_active_query_state for
pausing queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Roland Scheidegger
Reviewed-by: Nicolai Hähnle
---
src/gallium/docs/source/context.rst | 3 +++
src/gallium/drivers/ddebug/dd_context.c | 9 +++++++++
.../drivers/freedreno/freedreno_query.c | 6 ++++++
src/gallium/drivers/i915/i915_query.c | 6 ++++++
src/gallium/drivers/ilo/ilo_query.c | 6 ++++++
src/gallium/drivers/llvmpipe/lp_query.c | 6 ++++++
src/gallium/drivers/noop/noop_pipe.c | 6 ++++++
src/gallium/drivers/nouveau/nv30/nv30_query.c | 6 ++++++
src/gallium/drivers/nouveau/nv50/nv50_query.c | 6 ++++++
src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 6 ++++++
src/gallium/drivers/r300/r300_query.c | 6 ++++++
src/gallium/drivers/radeon/r600_query.c | 6 ++++++
src/gallium/drivers/rbug/rbug_context.c | 12 ++++++++++++
src/gallium/drivers/softpipe/sp_query.c | 7 +++++++
src/gallium/drivers/svga/svga_pipe_query.c | 7 +++++++
src/gallium/drivers/swr/swr_query.cpp | 7 +++++++
src/gallium/drivers/trace/tr_context.c | 19 +++++++++++++++++++
src/gallium/drivers/vc4/vc4_query.c | 6 ++++++
src/gallium/drivers/virgl/virgl_query.c | 6 ++++++
src/gallium/include/pipe/p_context.h | 6 ++++++
20 files changed, 142 insertions(+)
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 904e1ff04e7..3a45f402cd8 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -330,6 +330,9 @@ a resource without synchronizing with the CPU. This write will optionally
wait for the query to complete, and will optionally write whether the value
is available instead of the value itself.
+``set_active_query_state`` Set whether all current non-driver queries except
+TIME_ELAPSED are active or paused.
+
The interface currently includes the following types of queries:
``PIPE_QUERY_OCCLUSION_COUNTER`` counts the number of fragments which
diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c
index 9dfaa0af289..72a950a456a 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -123,6 +123,14 @@ dd_context_get_query_result(struct pipe_context *_pipe,
return pipe->get_query_result(pipe, dd_query_unwrap(query), wait, result);
}
+static void
+dd_context_set_active_query_state(struct pipe_context *_pipe, boolean enable)
+{
+ struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+ pipe->set_active_query_state(pipe, enable);
+}
+
static void
dd_context_render_condition(struct pipe_context *_pipe,
struct pipe_query *query, boolean condition,
@@ -667,6 +675,7 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
CTX_INIT(begin_query);
CTX_INIT(end_query);
CTX_INIT(get_query_result);
+ CTX_INIT(set_active_query_state);
CTX_INIT(create_blend_state);
CTX_INIT(bind_blend_state);
CTX_INIT(delete_blend_state);
diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c
index b87e8250719..a9427058579 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.c
+++ b/src/gallium/drivers/freedreno/freedreno_query.c
@@ -114,6 +114,11 @@ fd_get_driver_query_info(struct pipe_screen *pscreen,
return 1;
}
+static void
+fd_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
fd_query_screen_init(struct pipe_screen *pscreen)
{
@@ -128,5 +133,6 @@ fd_query_context_init(struct pipe_context *pctx)
pctx->begin_query = fd_begin_query;
pctx->end_query = fd_end_query;
pctx->get_query_result = fd_get_query_result;
+ pctx->set_active_query_state = fd_set_active_query_state;
pctx->render_condition = fd_render_condition;
}
diff --git a/src/gallium/drivers/i915/i915_query.c b/src/gallium/drivers/i915/i915_query.c
index 78d67cea2c9..fa1b01d1804 100644
--- a/src/gallium/drivers/i915/i915_query.c
+++ b/src/gallium/drivers/i915/i915_query.c
@@ -76,6 +76,11 @@ static boolean i915_get_query_result(struct pipe_context *ctx,
return TRUE;
}
+static void
+i915_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
i915_init_query_functions(struct i915_context *i915)
{
@@ -84,5 +89,6 @@ i915_init_query_functions(struct i915_context *i915)
i915->base.begin_query = i915_begin_query;
i915->base.end_query = i915_end_query;
i915->base.get_query_result = i915_get_query_result;
+ i915->base.set_active_query_state = i915_set_active_query_state;
}
diff --git a/src/gallium/drivers/ilo/ilo_query.c b/src/gallium/drivers/ilo/ilo_query.c
index 106bd42a335..8a42f58a87f 100644
--- a/src/gallium/drivers/ilo/ilo_query.c
+++ b/src/gallium/drivers/ilo/ilo_query.c
@@ -222,6 +222,11 @@ ilo_get_query_result(struct pipe_context *pipe, struct pipe_query *query,
return true;
}
+static void
+ilo_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
/**
* Initialize query-related functions.
*/
@@ -233,4 +238,5 @@ ilo_init_query_functions(struct ilo_context *ilo)
ilo->base.begin_query = ilo_begin_query;
ilo->base.end_query = ilo_end_query;
ilo->base.get_query_result = ilo_get_query_result;
+ ilo->base.set_active_query_state = ilo_set_active_query_state;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index fc593670671..2fddc90503f 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -320,6 +320,11 @@ llvmpipe_check_render_cond(struct llvmpipe_context *lp)
return TRUE;
}
+static void
+llvmpipe_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe )
{
llvmpipe->pipe.create_query = llvmpipe_create_query;
@@ -327,6 +332,7 @@ void llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe )
llvmpipe->pipe.begin_query = llvmpipe_begin_query;
llvmpipe->pipe.end_query = llvmpipe_end_query;
llvmpipe->pipe.get_query_result = llvmpipe_get_query_result;
+ llvmpipe->pipe.set_active_query_state = llvmpipe_set_active_query_state;
}
diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c
index fd0a5d0f830..55aca74628e 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -78,6 +78,11 @@ static boolean noop_get_query_result(struct pipe_context *ctx,
return TRUE;
}
+static void
+noop_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
/*
* resource
@@ -284,6 +289,7 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen,
ctx->begin_query = noop_begin_query;
ctx->end_query = noop_end_query;
ctx->get_query_result = noop_get_query_result;
+ ctx->set_active_query_state = noop_set_active_query_state;
ctx->transfer_map = noop_transfer_map;
ctx->transfer_flush_region = noop_transfer_flush_region;
ctx->transfer_unmap = noop_transfer_unmap;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c
index 75a4b0446fe..cb53a3663e5 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_query.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c
@@ -263,6 +263,11 @@ nv40_query_render_condition(struct pipe_context *pipe,
PUSH_DATA (push, 0x02000000 | q->qo[1]->hw->start);
}
+static void
+nv30_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
nv30_query_init(struct pipe_context *pipe)
{
@@ -273,6 +278,7 @@ nv30_query_init(struct pipe_context *pipe)
pipe->begin_query = nv30_query_begin;
pipe->end_query = nv30_query_end;
pipe->get_query_result = nv30_query_result;
+ pipe->set_active_query_state = nv30_set_active_query_state;
if (eng3d->oclass >= NV40_3D_CLASS)
pipe->render_condition = nv40_query_render_condition;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 4cd3b615606..fa70fb6950e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -143,6 +143,11 @@ nv50_render_condition(struct pipe_context *pipe,
PUSH_DATA (push, hq->bo->offset + hq->offset);
}
+static void
+nv50_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
nv50_init_query_functions(struct nv50_context *nv50)
{
@@ -153,6 +158,7 @@ nv50_init_query_functions(struct nv50_context *nv50)
pipe->begin_query = nv50_begin_query;
pipe->end_query = nv50_end_query;
pipe->get_query_result = nv50_get_query_result;
+ pipe->set_active_query_state = nv50_set_active_query_state;
pipe->render_condition = nv50_render_condition;
nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 92ca613cda1..b34271c4911 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -254,6 +254,11 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
return 0;
}
+static void
+nvc0_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
nvc0_init_query_functions(struct nvc0_context *nvc0)
{
@@ -265,6 +270,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
pipe->get_query_result_resource = nvc0_get_query_result_resource;
+ pipe->set_active_query_state = nvc0_set_active_query_state;
pipe->render_condition = nvc0_render_condition;
nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 6414e80828e..7603985b14b 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -200,6 +200,11 @@ static void r300_render_condition(struct pipe_context *pipe,
}
}
+static void
+r300_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void r300_init_query_functions(struct r300_context* r300)
{
r300->context.create_query = r300_create_query;
@@ -207,5 +212,6 @@ void r300_init_query_functions(struct r300_context* r300)
r300->context.begin_query = r300_begin_query;
r300->context.end_query = r300_end_query;
r300->context.get_query_result = r300_get_query_result;
+ r300->context.set_active_query_state = r300_set_active_query_state;
r300->context.render_condition = r300_render_condition;
}
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 7a2d2ee7f31..d780b8c182f 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1261,6 +1261,11 @@ static int r600_get_driver_query_group_info(struct pipe_screen *screen,
return 1;
}
+static void
+r600_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void r600_query_init(struct r600_common_context *rctx)
{
rctx->b.create_query = r600_create_query;
@@ -1269,6 +1274,7 @@ void r600_query_init(struct r600_common_context *rctx)
rctx->b.begin_query = r600_begin_query;
rctx->b.end_query = r600_end_query;
rctx->b.get_query_result = r600_get_query_result;
+ rctx->b.set_active_query_state = r600_set_active_query_state;
rctx->render_cond_atom.emit = r600_emit_query_predication;
if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c
index 9ecddad05ec..1280c45b539 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -211,6 +211,17 @@ rbug_get_query_result(struct pipe_context *_pipe,
return ret;
}
+static void
+rbug_set_active_query_state(struct pipe_context *_pipe, boolean enable)
+{
+ struct rbug_context *rb_pipe = rbug_context(_pipe);
+ struct pipe_context *pipe = rb_pipe->pipe;
+
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ pipe->set_active_query_state(pipe, enable);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+}
+
static void *
rbug_create_blend_state(struct pipe_context *_pipe,
const struct pipe_blend_state *blend)
@@ -1184,6 +1195,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
rb_pipe->base.begin_query = rbug_begin_query;
rb_pipe->base.end_query = rbug_end_query;
rb_pipe->base.get_query_result = rbug_get_query_result;
+ rb_pipe->base.set_active_query_state = rbug_set_active_query_state;
rb_pipe->base.create_blend_state = rbug_create_blend_state;
rb_pipe->base.bind_blend_state = rbug_bind_blend_state;
rb_pipe->base.delete_blend_state = rbug_delete_blend_state;
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index c28d28d5f5d..81e97107d59 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -283,6 +283,12 @@ softpipe_check_render_cond(struct softpipe_context *sp)
}
+static void
+softpipe_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
+
void softpipe_init_query_funcs(struct softpipe_context *softpipe )
{
softpipe->pipe.create_query = softpipe_create_query;
@@ -290,6 +296,7 @@ void softpipe_init_query_funcs(struct softpipe_context *softpipe )
softpipe->pipe.begin_query = softpipe_begin_query;
softpipe->pipe.end_query = softpipe_end_query;
softpipe->pipe.get_query_result = softpipe_get_query_result;
+ softpipe->pipe.set_active_query_state = softpipe_set_active_query_state;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 88f41eadc1d..75bc9ce092b 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -1246,6 +1246,12 @@ svga_get_timestamp(struct pipe_context *pipe)
}
+static void
+svga_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
+
void
svga_init_query_functions(struct svga_context *svga)
{
@@ -1254,6 +1260,7 @@ svga_init_query_functions(struct svga_context *svga)
svga->pipe.begin_query = svga_begin_query;
svga->pipe.end_query = svga_end_query;
svga->pipe.get_query_result = svga_get_query_result;
+ svga->pipe.set_active_query_state = svga_set_active_query_state;
svga->pipe.render_condition = svga_render_condition;
svga->pipe.get_timestamp = svga_get_timestamp;
}
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
index 810c50b2f8f..e4b8b683278 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -319,6 +319,12 @@ swr_check_render_cond(struct pipe_context *pipe)
return TRUE;
}
+
+static void
+swr_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
swr_query_init(struct pipe_context *pipe)
{
@@ -329,6 +335,7 @@ swr_query_init(struct pipe_context *pipe)
pipe->begin_query = swr_begin_query;
pipe->end_query = swr_end_query;
pipe->get_query_result = swr_get_query_result;
+ pipe->set_active_query_state = swr_set_active_query_state;
ctx->active_queries = 0;
}
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 08b1d32afb0..b575f2cdb34 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -273,6 +273,24 @@ trace_context_get_query_result(struct pipe_context *_pipe,
}
+static void
+trace_context_set_active_query_state(struct pipe_context *_pipe,
+ boolean enable)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_active_query_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(bool, enable);
+
+ pipe->set_active_query_state(pipe, enable);
+
+ trace_dump_call_end();
+}
+
+
static void *
trace_context_create_blend_state(struct pipe_context *_pipe,
const struct pipe_blend_state *state)
@@ -1781,6 +1799,7 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(begin_query);
TR_CTX_INIT(end_query);
TR_CTX_INIT(get_query_result);
+ TR_CTX_INIT(set_active_query_state);
TR_CTX_INIT(create_blend_state);
TR_CTX_INIT(bind_blend_state);
TR_CTX_INIT(delete_blend_state);
diff --git a/src/gallium/drivers/vc4/vc4_query.c b/src/gallium/drivers/vc4/vc4_query.c
index 270832eae3a..17400a37ca3 100644
--- a/src/gallium/drivers/vc4/vc4_query.c
+++ b/src/gallium/drivers/vc4/vc4_query.c
@@ -72,6 +72,11 @@ vc4_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
return true;
}
+static void
+vc4_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
vc4_query_init(struct pipe_context *pctx)
{
@@ -80,5 +85,6 @@ vc4_query_init(struct pipe_context *pctx)
pctx->begin_query = vc4_begin_query;
pctx->end_query = vc4_end_query;
pctx->get_query_result = vc4_get_query_result;
+ pctx->set_active_query_state = vc4_set_active_query_state;
}
diff --git a/src/gallium/drivers/virgl/virgl_query.c b/src/gallium/drivers/virgl/virgl_query.c
index b0200556342..5173bd39a45 100644
--- a/src/gallium/drivers/virgl/virgl_query.c
+++ b/src/gallium/drivers/virgl/virgl_query.c
@@ -164,6 +164,11 @@ static boolean virgl_get_query_result(struct pipe_context *ctx,
return TRUE;
}
+static void
+virgl_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void virgl_init_query_functions(struct virgl_context *vctx)
{
vctx->base.render_condition = virgl_render_condition;
@@ -172,4 +177,5 @@ void virgl_init_query_functions(struct virgl_context *vctx)
vctx->base.begin_query = virgl_begin_query;
vctx->base.end_query = virgl_end_query;
vctx->base.get_query_result = virgl_get_query_result;
+ vctx->base.set_active_query_state = virgl_set_active_query_state;
}
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 1c97e82ece5..82efaf5d8a9 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -173,6 +173,12 @@ struct pipe_context {
struct pipe_resource *resource,
unsigned offset);
+ /**
+ * Set whether all current non-driver queries except TIME_ELAPSED are
+ * active or paused.
+ */
+ void (*set_active_query_state)(struct pipe_context *pipe, boolean enable);
+
/*@}*/
/**
From e599b8f384b4fc48b450ed848d93e27e876de53f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 01:43:00 +0200
Subject: [PATCH 091/224] gallium: pause queries for all meta ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Roland Scheidegger
Reviewed-by: Nicolai Hähnle
---
src/gallium/auxiliary/cso_cache/cso_context.c | 4 ++++
src/gallium/auxiliary/cso_cache/cso_context.h | 1 +
src/gallium/auxiliary/hud/hud_context.c | 1 +
src/gallium/auxiliary/postprocess/pp_run.c | 1 +
src/gallium/auxiliary/util/u_blit.c | 1 +
src/gallium/auxiliary/util/u_blitter.c | 4 ++++
src/mesa/state_tracker/st_cb_clear.c | 1 +
src/mesa/state_tracker/st_cb_texture.c | 1 +
8 files changed, 14 insertions(+)
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 790e1211898..4e0cbdd8f9a 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -1539,6 +1539,8 @@ cso_save_state(struct cso_context *cso, unsigned state_mask)
cso_save_vertex_shader(cso);
if (state_mask & CSO_BIT_VIEWPORT)
cso_save_viewport(cso);
+ if (state_mask & CSO_BIT_PAUSE_QUERIES)
+ cso->pipe->set_active_query_state(cso->pipe, false);
}
@@ -1590,6 +1592,8 @@ cso_restore_state(struct cso_context *cso)
cso_restore_vertex_shader(cso);
if (state_mask & CSO_BIT_VIEWPORT)
cso_restore_viewport(cso);
+ if (state_mask & CSO_BIT_PAUSE_QUERIES)
+ cso->pipe->set_active_query_state(cso->pipe, true);
cso->saved_state = 0;
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index a3563d83a02..e27cbe9f721 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -170,6 +170,7 @@ void cso_set_render_condition(struct cso_context *cso,
#define CSO_BIT_VERTEX_ELEMENTS 0x10000
#define CSO_BIT_VERTEX_SHADER 0x20000
#define CSO_BIT_VIEWPORT 0x40000
+#define CSO_BIT_PAUSE_QUERIES 0x80000
#define CSO_BITS_ALL_SHADERS (CSO_BIT_VERTEX_SHADER | \
CSO_BIT_FRAGMENT_SHADER | \
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index 4673458171e..40017c8614f 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -477,6 +477,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
CSO_BIT_VERTEX_SHADER |
CSO_BIT_VERTEX_ELEMENTS |
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_PAUSE_QUERIES |
CSO_BIT_RENDER_CONDITION));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c
index 9dc8fb51ae2..bc79c5aab6e 100644
--- a/src/gallium/auxiliary/postprocess/pp_run.c
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -133,6 +133,7 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
CSO_BIT_VERTEX_SHADER |
CSO_BIT_VIEWPORT |
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_PAUSE_QUERIES |
CSO_BIT_RENDER_CONDITION));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 22c40d1382d..3677515423c 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -551,6 +551,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
CSO_BIT_STREAM_OUTPUTS |
CSO_BIT_VIEWPORT |
CSO_BIT_FRAMEBUFFER |
+ CSO_BIT_PAUSE_QUERIES |
CSO_BIT_FRAGMENT_SHADER |
CSO_BIT_VERTEX_SHADER |
CSO_BIT_TESSCTRL_SHADER |
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 43fbd8e6452..3ca2c48c4c7 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -529,6 +529,8 @@ static void blitter_set_running_flag(struct blitter_context_priv *ctx)
__LINE__);
}
ctx->base.running = TRUE;
+
+ ctx->base.pipe->set_active_query_state(ctx->base.pipe, false);
}
static void blitter_unset_running_flag(struct blitter_context_priv *ctx)
@@ -538,6 +540,8 @@ static void blitter_unset_running_flag(struct blitter_context_priv *ctx)
__LINE__);
}
ctx->base.running = FALSE;
+
+ ctx->base.pipe->set_active_query_state(ctx->base.pipe, true);
}
static void blitter_check_saved_vertex_states(struct blitter_context_priv *ctx)
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 55801469f23..362cef46286 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -206,6 +206,7 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
CSO_BIT_STREAM_OUTPUTS |
CSO_BIT_VERTEX_ELEMENTS |
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_PAUSE_QUERIES |
CSO_BITS_ALL_SHADERS));
/* blend state: RGBA masking */
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 3980f5d2f51..873acd225da 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1345,6 +1345,7 @@ try_pbo_upload_common(struct gl_context *ctx,
CSO_BIT_DEPTH_STENCIL_ALPHA |
CSO_BIT_RASTERIZER |
CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_PAUSE_QUERIES |
CSO_BITS_ALL_SHADERS));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
From f3eebb84ebd4c1dd7bd9b69b0b65273635443740 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 7 Apr 2016 02:59:09 +0200
Subject: [PATCH 092/224] radeonsi: implement and rely on
set_active_query_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeonsi/si_blit.c | 3 --
src/gallium/drivers/radeonsi/si_pipe.h | 4 +++
src/gallium/drivers/radeonsi/si_state.c | 32 +++++++++++++++++++-
src/gallium/drivers/radeonsi/si_state_draw.c | 10 ++++++
4 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c5ea8b17119..aed783f5b23 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -52,8 +52,6 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
{
struct si_context *sctx = (struct si_context *)ctx;
- r600_suspend_nontimer_queries(&sctx->b);
-
util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
@@ -95,7 +93,6 @@ static void si_blitter_end(struct pipe_context *ctx)
struct si_context *sctx = (struct si_context *)ctx;
sctx->b.render_cond_force_off = false;
- r600_resume_nontimer_queries(&sctx->b);
}
static unsigned u_max_sample(struct pipe_resource *r)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 4158fc5461e..8fcfcd2f5d1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -66,6 +66,9 @@
/* Compute only. */
#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 13) /* TODO: merge with TC? */
#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 14)
+/* Pipeline & streamout query controls. */
+#define SI_CONTEXT_START_PIPELINE_STATS (R600_CONTEXT_PRIVATE_FLAG << 15)
+#define SI_CONTEXT_STOP_PIPELINE_STATS (R600_CONTEXT_PRIVATE_FLAG << 16)
#define SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER (SI_CONTEXT_FLUSH_AND_INV_CB | \
SI_CONTEXT_FLUSH_AND_INV_CB_META | \
@@ -289,6 +292,7 @@ struct si_context {
bool db_stencil_clear;
bool db_stencil_disable_expclear;
unsigned ps_db_shader_control;
+ bool occlusion_queries_disabled;
/* Emitted draw state. */
int last_base_vertex;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 8087d2331ff..6e406718d65 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1348,6 +1348,26 @@ static void *si_create_db_flush_dsa(struct si_context *sctx)
/* DB RENDER STATE */
+static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
+{
+ struct si_context *sctx = (struct si_context*)ctx;
+
+ /* Pipeline stat & streamout queries. */
+ if (enable) {
+ sctx->b.flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
+ sctx->b.flags |= SI_CONTEXT_START_PIPELINE_STATS;
+ } else {
+ sctx->b.flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
+ sctx->b.flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
+ }
+
+ /* Occlusion queries. */
+ if (sctx->occlusion_queries_disabled != !enable) {
+ sctx->occlusion_queries_disabled = !enable;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ }
+}
+
static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
{
struct si_context *sctx = (struct si_context*)ctx;
@@ -1382,7 +1402,8 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
}
/* DB_COUNT_CONTROL (occlusion queries) */
- if (sctx->b.num_occlusion_queries > 0) {
+ if (sctx->b.num_occlusion_queries > 0 &&
+ !sctx->occlusion_queries_disabled) {
bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
if (sctx->b.chip_class >= CIK) {
@@ -3740,6 +3761,7 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.set_min_samples = si_set_min_samples;
sctx->b.b.set_tess_state = si_set_tess_state;
+ sctx->b.b.set_active_query_state = si_set_active_query_state;
sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
@@ -3970,6 +3992,14 @@ static void si_init_config(struct si_context *sctx)
si_pm4_cmd_add(pm4, 0x80000000);
si_pm4_cmd_end(pm4, false);
+ /* This enables pipeline stat & streamout queries.
+ * They are only disabled by blits.
+ */
+ si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
+ si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
+ EVENT_INDEX(0));
+ si_pm4_cmd_end(pm4, false);
+
si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 84b850a2992..ece0c6ddcf6 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -722,6 +722,16 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
}
}
+ if (sctx->flags & SI_CONTEXT_START_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
+ EVENT_INDEX(0));
+ } else if (sctx->flags & SI_CONTEXT_STOP_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) |
+ EVENT_INDEX(0));
+ }
+
sctx->flags = 0;
}
From 466aa5718594a1188460856840be324f84553730 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 7 Apr 2016 21:41:34 +0200
Subject: [PATCH 093/224] radeonsi: fix mask checking when emitting scissors
and viewports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Nicolai Hähnle
Reviewed-by: Grigori Goronzy
---
src/gallium/drivers/radeonsi/si_state.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6e406718d65..0c46425be9b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -912,8 +912,10 @@ static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
bool scissor_enable = sctx->queued.named.rasterizer->scissor_enable;
/* The simple case: Only 1 viewport is active. */
- if (mask & 1 &&
- !si_get_vs_info(sctx)->writes_viewport_index) {
+ if (!si_get_vs_info(sctx)->writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
si_emit_one_scissor(cs, &sctx->viewports.states[0],
scissor_enable ? &states[0] : NULL);
@@ -960,8 +962,10 @@ static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
unsigned mask = sctx->viewports.dirty_mask;
/* The simple case: Only 1 viewport is active. */
- if (mask & 1 &&
- !si_get_vs_info(sctx)->writes_viewport_index) {
+ if (!si_get_vs_info(sctx)->writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
radeon_emit(cs, fui(states[0].scale[0]));
radeon_emit(cs, fui(states[0].translate[0]));
From a4c288d8e1264b3228319f862b04ef31dbabb1a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 7 Apr 2016 03:24:06 +0200
Subject: [PATCH 094/224] gallium/radeon: unify checking streamout enable state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/r600_state_common.c | 5 ++---
src/gallium/drivers/radeon/r600_pipe_common.h | 6 ++++++
src/gallium/drivers/radeon/r600_streamout.c | 6 ------
src/gallium/drivers/radeonsi/si_state_draw.c | 3 +--
4 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index df41d3f028d..82babeba7cb 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1841,8 +1841,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
ia_switch_on_eop = true;
}
- if (rctx->b.streamout.streamout_enabled ||
- rctx->b.streamout.prims_gen_query_enabled)
+ if (r600_get_strmout_en(&rctx->b))
partial_vs_wave = true;
radeon_set_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
@@ -2018,7 +2017,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
rctx->b.family == CHIP_RV635) {
/* if we have gs shader or streamout
we need to do a wait idle after every draw */
- if (rctx->gs_shader || rctx->b.streamout.streamout_enabled) {
+ if (rctx->gs_shader || r600_get_strmout_en(&rctx->b)) {
radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
}
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 062c3193947..7da77368607 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -639,6 +639,12 @@ r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
(struct pipe_resource *)res);
}
+static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
+{
+ return rctx->streamout.streamout_enabled ||
+ rctx->streamout.prims_gen_query_enabled;
+}
+
static inline unsigned r600_tex_aniso_filter(unsigned filter)
{
if (filter <= 1) return 0;
diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c
index e977ed9fa10..fc9ec4859f6 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -311,12 +311,6 @@ void r600_emit_streamout_end(struct r600_common_context *rctx)
* are no buffers bound.
*/
-static bool r600_get_strmout_en(struct r600_common_context *rctx)
-{
- return rctx->streamout.streamout_enabled ||
- rctx->streamout.prims_gen_query_enabled;
-}
-
static void r600_emit_streamout_enable(struct r600_common_context *rctx,
struct r600_atom *atom)
{
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index ece0c6ddcf6..105c5fb9850 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -892,8 +892,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
if ((sctx->b.family == CHIP_HAWAII ||
sctx->b.family == CHIP_TONGA ||
sctx->b.family == CHIP_FIJI) &&
- (sctx->b.streamout.streamout_enabled ||
- sctx->b.streamout.prims_gen_query_enabled)) {
+ r600_get_strmout_en(&sctx->b)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
}
From aa79a3269f7ea1d6f5f43d3e98836494b5069a0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 20:37:06 +0200
Subject: [PATCH 095/224] r600g: fix typo in r600 register definitions
Acked-by: Edward O'Callaghan
---
src/gallium/drivers/r600/r600d.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 3d223edb5f4..ef99573ebc6 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -780,7 +780,7 @@
#define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5)
#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6)
#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7)
-#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8)
+#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x03) << 8)
#define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15)
#define S_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) & 0x03) << 13)
#define G_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) >> 13) & 0x03)
From b82893f93ab0f92dd44444e4a311fa253f423226 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 20:29:08 +0200
Subject: [PATCH 096/224] gallium/radeon: move pipeline stat context flags to
common code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_pipe_common.h | 5 ++++-
src/gallium/drivers/radeonsi/si_pipe.h | 3 ---
src/gallium/drivers/radeonsi/si_state.c | 8 ++++----
src/gallium/drivers/radeonsi/si_state_draw.c | 4 ++--
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 7da77368607..57af0ff963d 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -50,7 +50,10 @@
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
-#define R600_CONTEXT_PRIVATE_FLAG (1u << 1)
+/* Pipeline & streamout query controls. */
+#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
+#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
+#define R600_CONTEXT_PRIVATE_FLAG (1u << 3)
/* special primitive types */
#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 8fcfcd2f5d1..f665c8160e0 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -66,9 +66,6 @@
/* Compute only. */
#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 13) /* TODO: merge with TC? */
#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 14)
-/* Pipeline & streamout query controls. */
-#define SI_CONTEXT_START_PIPELINE_STATS (R600_CONTEXT_PRIVATE_FLAG << 15)
-#define SI_CONTEXT_STOP_PIPELINE_STATS (R600_CONTEXT_PRIVATE_FLAG << 16)
#define SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER (SI_CONTEXT_FLUSH_AND_INV_CB | \
SI_CONTEXT_FLUSH_AND_INV_CB_META | \
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 0c46425be9b..94130a99b1b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1358,11 +1358,11 @@ static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
/* Pipeline stat & streamout queries. */
if (enable) {
- sctx->b.flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
- sctx->b.flags |= SI_CONTEXT_START_PIPELINE_STATS;
+ sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
+ sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
} else {
- sctx->b.flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
- sctx->b.flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
+ sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
+ sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
}
/* Occlusion queries. */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 105c5fb9850..40cad504e09 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -722,11 +722,11 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
}
}
- if (sctx->flags & SI_CONTEXT_START_PIPELINE_STATS) {
+ if (sctx->flags & R600_CONTEXT_START_PIPELINE_STATS) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
EVENT_INDEX(0));
- } else if (sctx->flags & SI_CONTEXT_STOP_PIPELINE_STATS) {
+ } else if (sctx->flags & R600_CONTEXT_STOP_PIPELINE_STATS) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) |
EVENT_INDEX(0));
From 5248676f87bc94079321ab7fcdee888e4d3f5a4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 20:23:51 +0200
Subject: [PATCH 097/224] r600g: simplify r600_set_occlusion_query_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The caller does the same checking.
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/evergreen_state.c | 2 +-
src/gallium/drivers/r600/r600_pipe.h | 1 -
src/gallium/drivers/r600/r600_state.c | 2 +-
src/gallium/drivers/r600/r600_state_common.c | 5 +----
4 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 65952676987..077664d068e 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1802,7 +1802,7 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
- if (a->occlusion_query_enabled) {
+ if (rctx->b.num_occlusion_queries > 0) {
db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
if (rctx->b.chip_class == CAYMAN) {
db_count_control |= S_028004_SAMPLE_RATE(a->log_samples);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index cd0052a519f..de3fd06aac3 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -120,7 +120,6 @@ struct r600_db_state {
struct r600_db_misc_state {
struct r600_atom atom;
- bool occlusion_query_enabled;
bool flush_depthstencil_through_cb;
bool flush_depth_inplace;
bool flush_stencil_inplace;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3189a1360b1..62b46ce52a1 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1644,7 +1644,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
}
}
- if (a->occlusion_query_enabled) {
+ if (rctx->b.num_occlusion_queries > 0) {
if (rctx->b.chip_class >= R700) {
db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1);
}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 82babeba7cb..32a10491e8b 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2864,10 +2864,7 @@ static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable
{
struct r600_context *rctx = (struct r600_context*)ctx;
- if (rctx->db_misc_state.occlusion_query_enabled != enable) {
- rctx->db_misc_state.occlusion_query_enabled = enable;
- r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
- }
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
From e90fe60b72c9b08a8b06f055aafc514a0ad483c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 20:41:52 +0200
Subject: [PATCH 098/224] r600g: implement set_active_query_state for pausing
occlusion queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Use ZPASS_INCREMENT_DISABLE everywhere.
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/evergreen_state.c | 5 ++++-
src/gallium/drivers/r600/evergreend.h | 2 +-
src/gallium/drivers/r600/r600_pipe.h | 1 +
src/gallium/drivers/r600/r600_state.c | 6 +++++-
src/gallium/drivers/r600/r600_state_common.c | 12 ++++++++++++
src/gallium/drivers/r600/r600d.h | 1 +
src/gallium/drivers/radeon/r600_query.c | 6 ------
7 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 077664d068e..c1b0b56ece4 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1802,12 +1802,15 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
- if (rctx->b.num_occlusion_queries > 0) {
+ if (rctx->b.num_occlusion_queries > 0 &&
+ !a->occlusion_queries_disabled) {
db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
if (rctx->b.chip_class == CAYMAN) {
db_count_control |= S_028004_SAMPLE_RATE(a->log_samples);
}
db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
+ } else {
+ db_count_control |= S_028004_ZPASS_INCREMENT_DISABLE(1);
}
/* This is to fix a lockup when hyperz and alpha test are enabled at
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index ebe8c4a65ba..a900458f588 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1735,7 +1735,7 @@
#define S_028000_COPY_SAMPLE(x) (((x) & 0x7) << 8)
#define S_028000_COLOR_DISABLE(x) (((x) & 0x1) << 12)
#define R_028004_DB_COUNT_CONTROL 0x00028004
-#define S_028004_ZPASS_INCREMENT_DISABLE (((x) & 0x1) << 0)
+#define S_028004_ZPASS_INCREMENT_DISABLE(x) (((x) & 0x1) << 0)
#define S_028004_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 1)
#define S_028004_SAMPLE_RATE(x) (((x) & 0x7) << 4) /* cayman only */
#define R_028008_DB_DEPTH_VIEW 0x00028008
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index de3fd06aac3..0102638723b 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -120,6 +120,7 @@ struct r600_db_state {
struct r600_db_misc_state {
struct r600_atom atom;
+ bool occlusion_queries_disabled;
bool flush_depthstencil_through_cb;
bool flush_depth_inplace;
bool flush_stencil_inplace;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 62b46ce52a1..c4de9639a79 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1644,12 +1644,16 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
}
}
- if (rctx->b.num_occlusion_queries > 0) {
+ if (rctx->b.num_occlusion_queries > 0 &&
+ !a->occlusion_queries_disabled) {
if (rctx->b.chip_class >= R700) {
db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1);
}
db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
+ } else {
+ db_render_control |= S_028D0C_ZPASS_INCREMENT_DISABLE(1);
}
+
if (rctx->db_state.rsurf && rctx->db_state.rsurf->db_htile_surface) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 32a10491e8b..cdb493dad28 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2860,6 +2860,17 @@ static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resourc
}
}
+static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+
+ /* Occlusion queries. */
+ if (rctx->db_misc_state.occlusion_queries_disabled != !enable) {
+ rctx->db_misc_state.occlusion_queries_disabled = !enable;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
+ }
+}
+
static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
{
struct r600_context *rctx = (struct r600_context*)ctx;
@@ -2914,6 +2925,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
rctx->b.b.sampler_view_destroy = r600_sampler_view_destroy;
rctx->b.b.texture_barrier = r600_texture_barrier;
rctx->b.b.set_stream_output_targets = r600_set_streamout_targets;
+ rctx->b.b.set_active_query_state = r600_set_active_query_state;
rctx->b.b.draw_vbo = r600_draw_vbo;
rctx->b.invalidate_buffer = r600_invalidate_buffer;
rctx->b.set_occlusion_query_state = r600_set_occlusion_query_state;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index ef99573ebc6..0c18445f015 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -781,6 +781,7 @@
#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6)
#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7)
#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x03) << 8)
+#define S_028D0C_ZPASS_INCREMENT_DISABLE(x) (((x) & 0x1) << 11)
#define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15)
#define S_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) & 0x03) << 13)
#define G_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) >> 13) & 0x03)
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index d780b8c182f..7a2d2ee7f31 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1261,11 +1261,6 @@ static int r600_get_driver_query_group_info(struct pipe_screen *screen,
return 1;
}
-static void
-r600_set_active_query_state(struct pipe_context *pipe, boolean enable)
-{
-}
-
void r600_query_init(struct r600_common_context *rctx)
{
rctx->b.create_query = r600_create_query;
@@ -1274,7 +1269,6 @@ void r600_query_init(struct r600_common_context *rctx)
rctx->b.begin_query = r600_begin_query;
rctx->b.end_query = r600_end_query;
rctx->b.get_query_result = r600_get_query_result;
- rctx->b.set_active_query_state = r600_set_active_query_state;
rctx->render_cond_atom.emit = r600_emit_query_predication;
if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
From 12fee5b93e06ba2e7076b9a24ddf15d55a1ac3f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 21:10:58 +0200
Subject: [PATCH 099/224] r600g: add pausing pipeline & streamout queries into
set_active_query_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/evergreen_state.c | 12 ++++++++++++
src/gallium/drivers/r600/r600_hw_context.c | 10 ++++++++++
src/gallium/drivers/r600/r600_pipe.h | 2 +-
src/gallium/drivers/r600/r600_state.c | 6 ++++++
src/gallium/drivers/r600/r600_state_common.c | 9 +++++++++
5 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index c1b0b56ece4..f76d7a90595 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2395,6 +2395,12 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ /* This enables pipeline stat & streamout queries.
+ * They are only disabled by blits.
+ */
+ r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0));
+
cayman_init_common_regs(cb, rctx->b.chip_class,
rctx->b.family, rctx->screen->b.info.drm_minor);
@@ -2648,6 +2654,12 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ /* This enables pipeline stat & streamout queries.
+ * They are only disabled by blits.
+ */
+ r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0));
+
evergreen_init_common_regs(rctx, cb, rctx->b.chip_class,
rctx->b.family, rctx->screen->b.info.drm_minor);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 7a6f957945b..63b631ac27f 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -223,6 +223,16 @@ void r600_flush_emit(struct r600_context *rctx)
cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */
}
+ if (rctx->b.flags & R600_CONTEXT_START_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) |
+ EVENT_INDEX(0));
+ } else if (rctx->b.flags & R600_CONTEXT_STOP_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP) |
+ EVENT_INDEX(0));
+ }
+
if (wait_until) {
/* Use of WAIT_UNTIL is deprecated on Cayman+ */
if (rctx->b.family < CHIP_CAYMAN) {
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 0102638723b..86dd3c8e4c6 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -56,7 +56,7 @@
#define R600_CONTEXT_WAIT_CP_DMA_IDLE (R600_CONTEXT_PRIVATE_FLAG << 10)
/* the number of CS dwords for flushing and drawing */
-#define R600_MAX_FLUSH_CS_DWORDS 16
+#define R600_MAX_FLUSH_CS_DWORDS 18
#define R600_MAX_DRAW_CS_DWORDS 58
#define R600_MAX_USER_CONST_BUFFERS 13
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index c4de9639a79..02702ae7304 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2177,6 +2177,12 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ /* This enables pipeline stat & streamout queries.
+ * They are only disabled by blits.
+ */
+ r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0));
+
family = rctx->b.family;
ps_prio = 0;
vs_prio = 1;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index cdb493dad28..c03b75aea52 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2864,6 +2864,15 @@ static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable
{
struct r600_context *rctx = (struct r600_context*)ctx;
+ /* Pipeline stat & streamout queries. */
+ if (enable) {
+ rctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
+ rctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
+ } else {
+ rctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
+ rctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
+ }
+
/* Occlusion queries. */
if (rctx->db_misc_state.occlusion_queries_disabled != !enable) {
rctx->db_misc_state.occlusion_queries_disabled = !enable;
From 7347c068d88a0bd10f27a48c6e068cf098442098 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 21:12:13 +0200
Subject: [PATCH 100/224] r600g: don't manually stop queries for blitter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
r600_set_active_query_state does it better.
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/r600_blit.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index c52d5a9bad0..7ddd4fa063f 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -54,8 +54,6 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
{
struct r600_context *rctx = (struct r600_context *)ctx;
- r600_suspend_nontimer_queries(&rctx->b);
-
util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb);
util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso);
util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
@@ -98,7 +96,6 @@ static void r600_blitter_end(struct pipe_context *ctx)
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->b.render_cond_force_off = false;
- r600_resume_nontimer_queries(&rctx->b);
}
static unsigned u_max_sample(struct pipe_resource *r)
From 0222351fc114b9b8a34cd79598fb3f0fb4a75b0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 21:24:19 +0200
Subject: [PATCH 101/224] gallium/radeon: merge timer and non-timer query lists
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
All of them are paused only between IBs.
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/r600_hw_context.c | 3 +-
src/gallium/drivers/radeon/r600_pipe_common.c | 18 ++---
src/gallium/drivers/radeon/r600_pipe_common.h | 19 ++----
src/gallium/drivers/radeon/r600_query.c | 65 ++++---------------
4 files changed, 23 insertions(+), 82 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 63b631ac27f..3ef2ac5207e 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -65,8 +65,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
}
/* Count in queries_suspend. */
- num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
- ctx->b.num_cs_dw_timer_queries_suspend;
+ num_dw += ctx->b.num_cs_dw_queries_suspend;
/* Count in streamout_end at the end of CS. */
if (ctx->b.streamout.begin_emitted) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 32bd6e40d32..f58733278c8 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -156,14 +156,8 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
void r600_preflush_suspend_features(struct r600_common_context *ctx)
{
/* suspend queries */
- if (ctx->num_cs_dw_nontimer_queries_suspend) {
- /* Since non-timer queries are suspended during blits,
- * we have to guard against double-suspends. */
- r600_suspend_nontimer_queries(ctx);
- ctx->nontimer_queries_suspended_by_flush = true;
- }
- if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
- r600_suspend_timer_queries(ctx);
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ r600_suspend_queries(ctx);
ctx->streamout.suspended = false;
if (ctx->streamout.begin_emitted) {
@@ -180,12 +174,8 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
}
/* resume queries */
- if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
- r600_resume_timer_queries(ctx);
- if (ctx->nontimer_queries_suspended_by_flush) {
- ctx->nontimer_queries_suspended_by_flush = false;
- r600_resume_nontimer_queries(ctx);
- }
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ r600_resume_queries(ctx);
}
static void r600_flush_from_st(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 57af0ff963d..c387922c671 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -428,18 +428,11 @@ struct r600_common_context {
unsigned flags; /* flush flags */
/* Queries. */
- /* The list of active queries. */
+ /* Maintain the list of active queries for pausing between IBs. */
int num_occlusion_queries;
int num_perfect_occlusion_queries;
- /* Keep track of non-timer queries, because they should be suspended
- * during context flushing.
- * The timer queries (TIME_ELAPSED) shouldn't be suspended for blits,
- * but they should be suspended between IBs. */
- struct list_head active_nontimer_queries;
- struct list_head active_timer_queries;
- unsigned num_cs_dw_nontimer_queries_suspend;
- bool nontimer_queries_suspended_by_flush;
- unsigned num_cs_dw_timer_queries_suspend;
+ struct list_head active_queries;
+ unsigned num_cs_dw_queries_suspend;
/* Additional hardware info. */
unsigned backend_mask;
unsigned max_db; /* for OQ */
@@ -569,10 +562,8 @@ void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
/* r600_query.c */
void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
void r600_query_init(struct r600_common_context *rctx);
-void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
-void r600_resume_nontimer_queries(struct r600_common_context *ctx);
-void r600_suspend_timer_queries(struct r600_common_context *ctx);
-void r600_resume_timer_queries(struct r600_common_context *ctx);
+void r600_suspend_queries(struct r600_common_context *ctx);
+void r600_resume_queries(struct r600_common_context *ctx);
void r600_query_init_backend_mask(struct r600_common_context *ctx);
/* r600_streamout.c */
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 7a2d2ee7f31..aa86560bd9d 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -516,10 +516,7 @@ static void r600_query_hw_emit_start(struct r600_common_context *ctx,
query->ops->emit_start(ctx, query, query->buffer.buf, va);
- if (query->flags & R600_QUERY_HW_FLAG_TIMER)
- ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end;
- else
- ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end;
+ ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
}
static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
@@ -590,12 +587,8 @@ static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
query->buffer.results_end += query->result_size;
- if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
- if (query->flags & R600_QUERY_HW_FLAG_TIMER)
- ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw_end;
- else
- ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw_end;
- }
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
+ ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
r600_update_occlusion_query_state(ctx, query->b.type, -1);
r600_update_prims_generated_query_state(ctx, query->b.type, -1);
@@ -730,11 +723,8 @@ boolean r600_query_hw_begin(struct r600_common_context *rctx,
r600_query_hw_emit_start(rctx, query);
- if (query->flags & R600_QUERY_HW_FLAG_TIMER)
- LIST_ADDTAIL(&query->list, &rctx->active_timer_queries);
- else
- LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries);
- return true;
+ LIST_ADDTAIL(&query->list, &rctx->active_queries);
+ return true;
}
static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
@@ -973,28 +963,14 @@ static void r600_render_condition(struct pipe_context *ctx,
rctx->set_atom_dirty(rctx, atom, query != NULL);
}
-static void r600_suspend_queries(struct r600_common_context *ctx,
- struct list_head *query_list,
- unsigned *num_cs_dw_queries_suspend)
+void r600_suspend_queries(struct r600_common_context *ctx)
{
struct r600_query_hw *query;
- LIST_FOR_EACH_ENTRY(query, query_list, list) {
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
r600_query_hw_emit_stop(ctx, query);
}
- assert(*num_cs_dw_queries_suspend == 0);
-}
-
-void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
-{
- r600_suspend_queries(ctx, &ctx->active_nontimer_queries,
- &ctx->num_cs_dw_nontimer_queries_suspend);
-}
-
-void r600_suspend_timer_queries(struct r600_common_context *ctx)
-{
- r600_suspend_queries(ctx, &ctx->active_timer_queries,
- &ctx->num_cs_dw_timer_queries_suspend);
+ assert(ctx->num_cs_dw_queries_suspend == 0);
}
static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
@@ -1022,35 +998,21 @@ static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
return num_dw;
}
-static void r600_resume_queries(struct r600_common_context *ctx,
- struct list_head *query_list,
- unsigned *num_cs_dw_queries_suspend)
+void r600_resume_queries(struct r600_common_context *ctx)
{
struct r600_query_hw *query;
- unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list);
+ unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
- assert(*num_cs_dw_queries_suspend == 0);
+ assert(ctx->num_cs_dw_queries_suspend == 0);
/* Check CS space here. Resuming must not be interrupted by flushes. */
ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE);
- LIST_FOR_EACH_ENTRY(query, query_list, list) {
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
r600_query_hw_emit_start(ctx, query);
}
}
-void r600_resume_nontimer_queries(struct r600_common_context *ctx)
-{
- r600_resume_queries(ctx, &ctx->active_nontimer_queries,
- &ctx->num_cs_dw_nontimer_queries_suspend);
-}
-
-void r600_resume_timer_queries(struct r600_common_context *ctx)
-{
- r600_resume_queries(ctx, &ctx->active_timer_queries,
- &ctx->num_cs_dw_timer_queries_suspend);
-}
-
/* Get backends mask */
void r600_query_init_backend_mask(struct r600_common_context *ctx)
{
@@ -1274,8 +1236,7 @@ void r600_query_init(struct r600_common_context *rctx)
if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
rctx->b.render_condition = r600_render_condition;
- LIST_INITHEAD(&rctx->active_nontimer_queries);
- LIST_INITHEAD(&rctx->active_timer_queries);
+ LIST_INITHEAD(&rctx->active_queries);
}
void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
From e241a63512d87ee5585db9768869133f5665a1ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 21:26:23 +0200
Subject: [PATCH 102/224] gallium/radeon: remove R600_QUERY_HW_FLAG_TIMER
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
not used anymore
Reviewed-by: Edward O'Callaghan
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/r600_hw_context.c | 2 +-
src/gallium/drivers/radeon/r600_perfcounter.c | 1 -
src/gallium/drivers/radeon/r600_query.c | 4 +---
src/gallium/drivers/radeon/r600_query.h | 3 +--
4 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 3ef2ac5207e..0c3b58030b6 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -64,7 +64,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
}
- /* Count in queries_suspend. */
+ /* Count in r600_suspend_queries. */
num_dw += ctx->b.num_cs_dw_queries_suspend;
/* Count in streamout_end at the end of CS. */
diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
index f3529a1fe0f..9ab17d9e04c 100644
--- a/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -310,7 +310,6 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
query->b.b.ops = &batch_query_ops;
query->b.ops = &batch_query_hw_ops;
- query->b.flags = R600_QUERY_HW_FLAG_TIMER;
query->num_counters = num_queries;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index aa86560bd9d..de6e37b9f62 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -369,13 +369,11 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
query->result_size = 16;
query->num_cs_dw_begin = 8;
query->num_cs_dw_end = 8;
- query->flags = R600_QUERY_HW_FLAG_TIMER;
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 8;
query->num_cs_dw_end = 8;
- query->flags = R600_QUERY_HW_FLAG_TIMER |
- R600_QUERY_HW_FLAG_NO_START;
+ query->flags = R600_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 8b2c4e3fe93..9f3a917d727 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -84,8 +84,7 @@ struct r600_query {
enum {
R600_QUERY_HW_FLAG_NO_START = (1 << 0),
- R600_QUERY_HW_FLAG_TIMER = (1 << 1),
- R600_QUERY_HW_FLAG_PREDICATE = (1 << 2),
+ R600_QUERY_HW_FLAG_PREDICATE = (1 << 1),
};
struct r600_query_hw_ops {
From 7e78b5ed38e289ddb6397a211361b6e3be4bf9ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 10 Apr 2016 16:37:33 +0200
Subject: [PATCH 103/224] pb_buffer: switch pb_buffer::size to 64 bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
being able to allocate more than 4 GB may be useful
Reviewed-by: Nicolai Hähnle
---
src/gallium/auxiliary/pipebuffer/pb_buffer.h | 6 +++---
src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 5 +++--
src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 7 ++++---
src/gallium/drivers/r300/r300_texture_desc.c | 3 ++-
src/gallium/drivers/radeon/r600_buffer_common.c | 2 +-
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 5 +++--
src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c | 5 +++--
src/gallium/winsys/svga/drm/vmw_buffer.c | 4 ++--
8 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 803c1d39192..33c23068c27 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -87,9 +87,9 @@ struct pb_desc
/**
- * Size. Regular (32bit) unsigned for now.
+ * 64-bit type for GPU buffer sizes and offsets.
*/
-typedef unsigned pb_size;
+typedef uint64_t pb_size;
/**
@@ -98,8 +98,8 @@ typedef unsigned pb_size;
struct pb_buffer
{
struct pipe_reference reference;
- unsigned size;
unsigned alignment;
+ pb_size size;
unsigned usage;
/**
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index fbbe8d11eb0..64af321558e 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -40,6 +40,7 @@
#include
#include
#endif
+#include
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
@@ -208,7 +209,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
while (curr != &fenced_mgr->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(!fenced_buf->fence);
- debug_printf("%10p %7u %8u %7s\n",
+ debug_printf("%10p %"PRIu64" %8u %7s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
@@ -224,7 +225,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(fenced_buf->buffer);
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
- debug_printf("%10p %7u %8u %7s %10p %s\n",
+ debug_printf("%10p %"PRIu64" %8u %7s %10p %s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 3d3a7aba7fb..4e36866e08c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -41,6 +41,7 @@
#include "util/list.h"
#include "util/u_time.h"
#include "util/u_debug_stack.h"
+#include
#include "pb_buffer.h"
#include "pb_bufmgr.h"
@@ -190,7 +191,7 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
underflow = !check_random_pattern(map, buf->underflow_size,
&min_ofs, &max_ofs);
if(underflow) {
- debug_printf("buffer underflow (offset -%u%s to -%u bytes) detected\n",
+ debug_printf("buffer underflow (offset -%"PRIu64"%s to -%"PRIu64" bytes) detected\n",
buf->underflow_size - min_ofs,
min_ofs == 0 ? "+" : "",
buf->underflow_size - max_ofs);
@@ -200,7 +201,7 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
buf->overflow_size,
&min_ofs, &max_ofs);
if(overflow) {
- debug_printf("buffer overflow (size %u plus offset %u to %u%s bytes) detected\n",
+ debug_printf("buffer overflow (size %"PRIu64" plus offset %"PRIu64" to %"PRIu64"%s bytes) detected\n",
buf->base.size,
min_ofs,
max_ofs,
@@ -349,7 +350,7 @@ pb_debug_manager_dump_locked(struct pb_debug_manager *mgr)
buf = LIST_ENTRY(struct pb_debug_buffer, curr, head);
debug_printf("buffer = %p\n", (void *) buf);
- debug_printf(" .size = 0x%x\n", buf->base.size);
+ debug_printf(" .size = 0x%"PRIx64"\n", buf->base.size);
debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE);
curr = next;
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 8fa98c5804e..2442d726cd1 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -25,6 +25,7 @@
#include "r300_context.h"
#include "util/u_format.h"
+#include
/* Returns the number of pixels that the texture should be aligned to
* in the given dimension. */
@@ -614,7 +615,7 @@ void r300_texture_desc_init(struct r300_screen *rscreen,
"r300: I got a pre-allocated buffer to use it as a texture "
"storage, but the buffer is too small. I'll use the buffer "
"anyway, because I can't crash here, but it's dangerous. "
- "This can be a DDX bug. Got: %iB, Need: %iB, Info:\n",
+ "This can be a DDX bug. Got: %"PRIu64"B, Need: %uB, Info:\n",
tex->buf->size, tex->tex.size_in_bytes);
r300_tex_print_info(tex, "texture_desc_init");
/* Ooops, what now. Apps will break if we fail this,
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 33ba0fbca9b..606b1fcc19f 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -192,7 +192,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
res->TC_L2_dirty = false;
if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
- fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %u bytes\n",
+ fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
res->gpu_address, res->gpu_address + res->buf->size,
res->buf->size);
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 08856dff430..c92a66ec5b1 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -40,6 +40,7 @@
#include
#include
#include
+#include
static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
{
@@ -297,8 +298,8 @@ void radeon_bo_destroy(struct pb_buffer *_buf)
sizeof(va)) != 0 &&
va.operation == RADEON_VA_RESULT_ERROR) {
fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
- fprintf(stderr, "radeon: size : %d bytes\n", bo->base.size);
- fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
+ fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
+ fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
}
}
diff --git a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
index c1b9eb95c52..d049d1dbc46 100644
--- a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
+++ b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
@@ -40,6 +40,7 @@
#include
#include
#endif
+#include
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
@@ -172,7 +173,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
while(curr != &fenced_mgr->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(!fenced_buf->fence);
- debug_printf("%10p %7u %8u %7s\n",
+ debug_printf("%10p %"PRIu64" %8u %7s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
@@ -188,7 +189,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(fenced_buf->buffer);
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
- debug_printf("%10p %7u %8u %7s %10p %s\n",
+ debug_printf("%10p %"PRIu64" %8u %7s %10p %s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c b/src/gallium/winsys/svga/drm/vmw_buffer.c
index c082dcc34e9..3ac80c7caf5 100644
--- a/src/gallium/winsys/svga/drm/vmw_buffer.c
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.c
@@ -154,7 +154,7 @@ vmw_gmr_buffer_unmap(struct pb_buffer *_buf)
static void
vmw_gmr_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
*base_buf = buf;
*offset = 0;
@@ -266,7 +266,7 @@ vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf,
struct SVGAGuestPtr *ptr)
{
struct pb_buffer *base_buf;
- unsigned offset = 0;
+ pb_size offset = 0;
struct vmw_gmr_buffer *gmr_buf;
pb_get_base_buffer( buf, &base_buf, &offset );
From 0ba0933f488cbb22ad1a221b0057ac9753130916 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 10 Apr 2016 16:48:55 +0200
Subject: [PATCH 104/224] winsys/amdgpu: add support for 64-bit buffer sizes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
v2: fail in radeon_winsys_bo_create if size > 32 bits
Reviewed-by: Nicolai Hähnle
---
src/gallium/auxiliary/util/u_math.h | 6 ++++
src/gallium/drivers/radeon/radeon_winsys.h | 4 +--
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 29 ++++++++++---------
.../winsys/amdgpu/drm/amdgpu_surface.c | 2 +-
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 8 +++--
5 files changed, 30 insertions(+), 19 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index e92f83a8109..b4ac0db3c50 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -792,6 +792,12 @@ align(int value, int alignment)
return (value + alignment - 1) & ~(alignment - 1);
}
+static inline uint64_t
+align64(uint64_t value, unsigned alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
/**
* Works like align but on npot alignments.
*/
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index baecca72383..743dbd1fb46 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -449,7 +449,7 @@ struct radeon_winsys {
* \return The created buffer object.
*/
struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
- unsigned size,
+ uint64_t size,
unsigned alignment,
boolean use_reusable_pool,
enum radeon_bo_domain domain,
@@ -528,7 +528,7 @@ struct radeon_winsys {
* \param Size Size in bytes for the new buffer.
*/
struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws,
- void *pointer, unsigned size);
+ void *pointer, uint64_t size);
/**
* Whether the buffer was created from a user pointer.
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index c79bed45753..04ef17da7bf 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -36,6 +36,7 @@
#include
#include
#include
+#include
static inline struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo)
{
@@ -141,9 +142,9 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
amdgpu_fence_reference(&bo->fence[i], NULL);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- bo->ws->allocated_vram -= align(bo->base.size, bo->ws->gart_page_size);
+ bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->gart_page_size);
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- bo->ws->allocated_gtt -= align(bo->base.size, bo->ws->gart_page_size);
+ bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->gart_page_size);
FREE(bo);
}
@@ -265,7 +266,7 @@ static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
}
static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
- unsigned size,
+ uint64_t size,
unsigned alignment,
unsigned usage,
enum radeon_bo_domain initial_domain,
@@ -303,9 +304,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
- fprintf(stderr, "amdgpu: size : %d bytes\n", size);
- fprintf(stderr, "amdgpu: alignment : %d bytes\n", alignment);
- fprintf(stderr, "amdgpu: domains : %d\n", initial_domain);
+ fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
+ fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
+ fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
goto error_bo_alloc;
}
@@ -331,9 +332,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
if (initial_domain & RADEON_DOMAIN_VRAM)
- ws->allocated_vram += align(size, ws->gart_page_size);
+ ws->allocated_vram += align64(size, ws->gart_page_size);
else if (initial_domain & RADEON_DOMAIN_GTT)
- ws->allocated_gtt += align(size, ws->gart_page_size);
+ ws->allocated_gtt += align64(size, ws->gart_page_size);
amdgpu_add_buffer_to_global_list(bo);
@@ -458,7 +459,7 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
static struct pb_buffer *
amdgpu_bo_create(struct radeon_winsys *rws,
- unsigned size,
+ uint64_t size,
unsigned alignment,
boolean use_reusable_pool,
enum radeon_bo_domain domain,
@@ -482,7 +483,7 @@ amdgpu_bo_create(struct radeon_winsys *rws,
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
*/
- size = align(size, ws->gart_page_size);
+ size = align64(size, ws->gart_page_size);
/* Only set one usage bit each for domains and flags, or the cache manager
* might consider different sets of domains / flags compatible
@@ -592,9 +593,9 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
*offset = whandle->offset;
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- ws->allocated_vram += align(bo->base.size, ws->gart_page_size);
+ ws->allocated_vram += align64(bo->base.size, ws->gart_page_size);
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+ ws->allocated_gtt += align64(bo->base.size, ws->gart_page_size);
amdgpu_add_buffer_to_global_list(bo);
@@ -648,7 +649,7 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
}
static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
- void *pointer, unsigned size)
+ void *pointer, uint64_t size)
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
amdgpu_bo_handle buf_handle;
@@ -684,7 +685,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
bo->initial_domain = RADEON_DOMAIN_GTT;
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
- ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+ ws->allocated_gtt += align64(bo->base.size, ws->gart_page_size);
amdgpu_add_buffer_to_global_list(bo);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 4c837a8e20f..1164a3058c5 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -212,7 +212,7 @@ static int compute_level(struct amdgpu_winsys *ws,
}
surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level];
- surf_level->offset = align(surf->bo_size, AddrSurfInfoOut->baseAlign);
+ surf_level->offset = align64(surf->bo_size, AddrSurfInfoOut->baseAlign);
surf_level->slice_size = AddrSurfInfoOut->sliceSize;
surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe);
surf_level->npix_x = u_minify(surf->npix_x, level);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index c92a66ec5b1..9cc3d0393e3 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -718,7 +718,7 @@ static void radeon_bo_set_metadata(struct pb_buffer *_buf,
static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys *rws,
- unsigned size,
+ uint64_t size,
unsigned alignment,
boolean use_reusable_pool,
enum radeon_bo_domain domain,
@@ -728,6 +728,10 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
struct radeon_bo *bo;
unsigned usage = 0;
+ /* Only 32-bit sizes are supported. */
+ if (size > UINT_MAX)
+ return NULL;
+
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
@@ -769,7 +773,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
}
static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
- void *pointer, unsigned size)
+ void *pointer, uint64_t size)
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct drm_radeon_gem_userptr args;
From 0689741e51ba38de9c865592a16da97e2dfc350a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 10 Apr 2016 16:50:17 +0200
Subject: [PATCH 105/224] winsys/radeon: fix printing allocation failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
print as unsigned instead of signed
Reviewed-by: Nicolai Hähnle
---
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 9cc3d0393e3..dd6555c9502 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -530,10 +530,10 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
&args, sizeof(args))) {
fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
- fprintf(stderr, "radeon: size : %d bytes\n", size);
- fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
- fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
- fprintf(stderr, "radeon: flags : %d\n", args.flags);
+ fprintf(stderr, "radeon: size : %u bytes\n", size);
+ fprintf(stderr, "radeon: alignment : %u bytes\n", alignment);
+ fprintf(stderr, "radeon: domains : %u\n", args.initial_domain);
+ fprintf(stderr, "radeon: flags : %u\n", args.flags);
return NULL;
}
From 1dd8832e046ddef6d9ee69210127bebc9ddb35eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 10 Apr 2016 17:14:49 +0200
Subject: [PATCH 106/224] gallium/radeon: allow allocating textures >= 4 GB
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_buffer_common.c | 2 +-
src/gallium/drivers/radeon/r600_pipe_common.h | 17 ++++++++++-------
src/gallium/drivers/radeon/r600_texture.c | 12 ++++++------
3 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 606b1fcc19f..0f892c0e6e4 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -102,7 +102,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
bool r600_init_resource(struct r600_common_screen *rscreen,
struct r600_resource *res,
- unsigned size, unsigned alignment,
+ uint64_t size, unsigned alignment,
bool use_reusable_pool)
{
struct r600_texture *rtex = (struct r600_texture*)res;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index c387922c671..9b28758d9f1 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -140,6 +140,9 @@ struct radeon_shader_binary {
void radeon_shader_binary_init(struct radeon_shader_binary *b);
void radeon_shader_binary_clean(struct radeon_shader_binary *b);
+/* Only 32-bit buffer allocations are supported, gallium doesn't support more
+ * at the moment.
+ */
struct r600_resource {
struct u_resource b;
@@ -184,8 +187,8 @@ struct r600_transfer {
};
struct r600_fmask_info {
- unsigned offset;
- unsigned size;
+ uint64_t offset;
+ uint64_t size;
unsigned alignment;
unsigned pitch_in_pixels;
unsigned bank_height;
@@ -194,8 +197,8 @@ struct r600_fmask_info {
};
struct r600_cmask_info {
- unsigned offset;
- unsigned size;
+ uint64_t offset;
+ uint64_t size;
unsigned alignment;
unsigned pitch;
unsigned height;
@@ -215,7 +218,7 @@ struct r600_htile_info {
struct r600_texture {
struct r600_resource resource;
- unsigned size;
+ uint64_t size;
bool is_depth;
unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
@@ -227,7 +230,7 @@ struct r600_texture {
struct r600_fmask_info fmask;
struct r600_cmask_info cmask;
struct r600_resource *cmask_buffer;
- unsigned dcc_offset; /* 0 = disabled */
+ uint64_t dcc_offset; /* 0 = disabled */
unsigned cb_color_info; /* fast clear enable bit */
unsigned color_clear_value[2];
@@ -509,7 +512,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
unsigned usage);
bool r600_init_resource(struct r600_common_screen *rscreen,
struct r600_resource *res,
- unsigned size, unsigned alignment,
+ uint64_t size, unsigned alignment,
bool use_reusable_pool);
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 4850b73f291..93916855683 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -482,7 +482,7 @@ static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
r600_texture_get_fmask_info(rscreen, rtex,
rtex->resource.b.b.nr_samples, &rtex->fmask);
- rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment);
+ rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
rtex->size = rtex->fmask.offset + rtex->fmask.size;
}
@@ -585,7 +585,7 @@ static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
}
- rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment);
+ rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
rtex->size = rtex->cmask.offset + rtex->cmask.size;
if (rscreen->chip_class >= SI)
@@ -747,14 +747,14 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
(rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
if (rtex->fmask.size)
- fprintf(f, " FMask: offset=%u, size=%u, alignment=%u, pitch_in_pixels=%u, "
+ fprintf(f, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
"bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
if (rtex->cmask.size)
- fprintf(f, " CMask: offset=%u, size=%u, alignment=%u, pitch=%u, "
+ fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch=%u, "
"height=%u, xalign=%u, yalign=%u, slice_tile_max=%u\n",
rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
rtex->cmask.pitch, rtex->cmask.height, rtex->cmask.xalign,
@@ -768,7 +768,7 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
if (rtex->dcc_offset) {
- fprintf(f, " DCC: offset=%u, size=%"PRIu64", alignment=%"PRIu64"\n",
+ fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
rtex->dcc_offset, rtex->surface.dcc_size,
rtex->surface.dcc_alignment);
for (i = 0; i <= rtex->surface.last_level; i++)
@@ -873,7 +873,7 @@ r600_texture_create_object(struct pipe_screen *screen,
if (!buf && rtex->surface.dcc_size &&
!(rscreen->debug_flags & DBG_NO_DCC)) {
/* Reserve space for the DCC buffer. */
- rtex->dcc_offset = align(rtex->size, rtex->surface.dcc_alignment);
+ rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
rtex->cb_color_info |= VI_S_028C70_DCC_ENABLE(1);
}
From 73aeebd772cfb840dee05d5815239b365d68f09e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 10 Apr 2016 17:21:19 +0200
Subject: [PATCH 107/224] radeonsi: allow clearing buffers >= 4 GB
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Only CMASK and DCC clears can use this, because only textures can be so
large.
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/r600_blit.c | 2 +-
src/gallium/drivers/radeon/r600_pipe_common.c | 2 +-
src/gallium/drivers/radeon/r600_pipe_common.h | 4 ++--
src/gallium/drivers/radeonsi/si_cp_dma.c | 6 +++---
4 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 7ddd4fa063f..96643912243 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -581,7 +581,7 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
}
static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer)
{
struct r600_context *rctx = (struct r600_context*)ctx;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index f58733278c8..a64a091a4fd 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -963,7 +963,7 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
}
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 9b28758d9f1..cb8a34bf4ec 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -475,7 +475,7 @@ struct r600_common_context {
const struct pipe_box *src_box);
void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer);
void (*blit_decompress_depth)(struct pipe_context *ctx,
@@ -547,7 +547,7 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
unsigned processor);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer);
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index dc62415823e..001ddd4bfae 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -124,7 +124,7 @@ static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer)
static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst,
struct pipe_resource *src, unsigned byte_count,
- unsigned remaining_size, unsigned *flags)
+ uint64_t remaining_size, unsigned *flags)
{
si_need_cs_space(sctx);
@@ -158,7 +158,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - CP_DMA_ALIGNMENT)
static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer)
{
struct si_context *sctx = (struct si_context*)ctx;
@@ -180,7 +180,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
sctx->b.gfx.cs,
PIPE_TRANSFER_WRITE);
map += offset;
- for (unsigned i = 0; i < size; i++) {
+ for (uint64_t i = 0; i < size; i++) {
unsigned byte_within_dword = (offset + i) % 4;
*map++ = (value >> (byte_within_dword * 8)) & 0xff;
}
From b7878146c444628e8f579d57f4c3af03ad1fc201 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 10 Apr 2016 22:39:54 +0200
Subject: [PATCH 108/224] gallium/radeon: removing dead code for sharing
stencil buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is a remnant of the times when the DDX was allocating depth-stencil
buffers for windows. Now, st/dri allocates them and doesn't share them.
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_texture.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 93916855683..619c085fb9e 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -222,10 +222,6 @@ static int r600_setup_surface(struct pipe_screen *screen,
rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe;
rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
- if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
- rtex->surface.stencil_offset =
- rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
- }
}
if (offset) {
From 2d7be5d37e70d19df88be53222bf02def40e93e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 10 Apr 2016 22:48:48 +0200
Subject: [PATCH 109/224] gallium/radeon: never choose a linear tiling for DB
surfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Just for consistency. This is actually not a problem, because both addrlib
and radeon check and fix this.
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_texture.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 619c085fb9e..72af5344b70 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -943,13 +943,12 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
force_tiling = true;
/* Handle common candidates for the linear mode.
- * Compressed textures must always be tiled. */
- if (!force_tiling && !util_format_is_compressed(templ->format)) {
- /* Not everything can be linear, so we cannot enforce it
- * for all textures. */
- if ((rscreen->debug_flags & DBG_NO_TILING) &&
- (!util_format_is_depth_or_stencil(templ->format) ||
- !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)))
+ * Compressed textures and DB surfaces must always be tiled.
+ */
+ if (!force_tiling && !util_format_is_compressed(templ->format) &&
+ (!util_format_is_depth_or_stencil(templ->format) ||
+ templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)) {
+ if (rscreen->debug_flags & DBG_NO_TILING)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
From 1a98be001f06ae2d50d444d1103cc15b67502a14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 7 Apr 2016 15:34:45 +0200
Subject: [PATCH 110/224] gallium/radeon: fix maximum texture anisotropy setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We were overdoing it for non-power-of-two values.
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_pipe_common.h | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index cb8a34bf4ec..85c4ec0d6e6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -644,11 +644,15 @@ static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
static inline unsigned r600_tex_aniso_filter(unsigned filter)
{
- if (filter <= 1) return 0;
- if (filter <= 2) return 1;
- if (filter <= 4) return 2;
- if (filter <= 8) return 3;
- /* else */ return 4;
+ if (filter < 2)
+ return 0;
+ if (filter < 4)
+ return 1;
+ if (filter < 8)
+ return 2;
+ if (filter < 16)
+ return 3;
+ return 4;
}
static inline unsigned r600_wavefront_size(enum radeon_family family)
From f7420ef5b4640a92a5aaa57341c59e0d4185a4a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 7 Apr 2016 17:02:51 +0200
Subject: [PATCH 111/224] radeonsi: enable some sampler fields to match the
closed driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
copied from the Vulkan driver
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeonsi/si_state.c | 9 +++++++--
src/gallium/drivers/radeonsi/sid.h | 3 +++
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 94130a99b1b..9c8a9044d30 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3406,13 +3406,18 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
r600_tex_aniso_filter(state->max_anisotropy) << 9 |
S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
- S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
+ S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
+ S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
- S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
+ S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
+ S_008F38_MIP_POINT_PRECLAMP(1) |
+ S_008F38_DISABLE_LSB_CEIL(1) |
+ S_008F38_FILTER_PREC_FIX(1) |
+ S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
S_008F3C_BORDER_COLOR_TYPE(border_color_type);
return rstate;
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 892084707d2..12b616e96a9 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -2392,6 +2392,9 @@
#define S_008F38_FILTER_PREC_FIX(x) (((x) & 0x1) << 30)
#define G_008F38_FILTER_PREC_FIX(x) (((x) >> 30) & 0x1)
#define C_008F38_FILTER_PREC_FIX 0xBFFFFFFF
+#define S_008F38_ANISO_OVERRIDE(x) (((x) & 0x1) << 31)
+#define G_008F38_ANISO_OVERRIDE(x) (((x) >> 31) & 0x1)
+#define C_008F38_ANISO_OVERRIDE 0x7FFFFFFF
#define R_008F3C_SQ_IMG_SAMP_WORD3 0x008F3C
#define S_008F3C_BORDER_COLOR_PTR(x) (((x) & 0xFFF) << 0)
#define G_008F3C_BORDER_COLOR_PTR(x) (((x) >> 0) & 0xFFF)
From ddd33431c54379ecf0dce71078e34a07be82e2fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 02:08:23 +0200
Subject: [PATCH 112/224] radeonsi: clean up aniso state translation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_pipe_common.h | 15 +++++++++++++++
src/gallium/drivers/radeonsi/si_state.c | 18 +++---------------
src/gallium/drivers/radeonsi/sid.h | 5 +++++
3 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 85c4ec0d6e6..e227e48addd 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -642,6 +642,21 @@ static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
rctx->streamout.prims_gen_query_enabled;
}
+#define SQ_TEX_XY_FILTER_POINT 0x00
+#define SQ_TEX_XY_FILTER_BILINEAR 0x01
+#define SQ_TEX_XY_FILTER_ANISO_POINT 0x02
+#define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
+
+static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
+{
+ if (filter == PIPE_TEX_FILTER_LINEAR)
+ return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
+ : SQ_TEX_XY_FILTER_BILINEAR;
+ else
+ return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
+ : SQ_TEX_XY_FILTER_POINT;
+}
+
static inline unsigned r600_tex_aniso_filter(unsigned filter)
{
if (filter < 2)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 9c8a9044d30..fe27ca5ac16 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1863,17 +1863,6 @@ static unsigned si_tex_wrap(unsigned wrap)
}
}
-static unsigned si_tex_filter(unsigned filter)
-{
- switch (filter) {
- default:
- case PIPE_TEX_FILTER_NEAREST:
- return V_008F38_SQ_TEX_XY_FILTER_POINT;
- case PIPE_TEX_FILTER_LINEAR:
- return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
- }
-}
-
static unsigned si_tex_mipfilter(unsigned filter)
{
switch (filter) {
@@ -3344,7 +3333,6 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
- unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
unsigned border_color_type, border_color_index = 0;
if (!rstate) {
@@ -3403,7 +3391,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
- r600_tex_aniso_filter(state->max_anisotropy) << 9 |
+ S_008F30_MAX_ANISO_RATIO(r600_tex_aniso_filter(state->max_anisotropy)) |
S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
@@ -3411,8 +3399,8 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
- S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
- S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
+ S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, state->max_anisotropy)) |
+ S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, state->max_anisotropy)) |
S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
S_008F38_MIP_POINT_PRECLAMP(1) |
S_008F38_DISABLE_LSB_CEIL(1) |
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 12b616e96a9..f0aa605c2d9 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -2307,6 +2307,9 @@
#define V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER 0x05
#define V_008F30_SQ_TEX_CLAMP_BORDER 0x06
#define V_008F30_SQ_TEX_MIRROR_ONCE_BORDER 0x07
+#define S_008F30_MAX_ANISO_RATIO(x) (((x) & 0x07) << 9)
+#define G_008F30_MAX_ANISO_RATIO(x) (((x) >> 9) & 0x07)
+#define C_008F30_MAX_ANISO_RATIO 0xFFFFF1FF
#define S_008F30_DEPTH_COMPARE_FUNC(x) (((x) & 0x07) << 12)
#define G_008F30_DEPTH_COMPARE_FUNC(x) (((x) >> 12) & 0x07)
#define C_008F30_DEPTH_COMPARE_FUNC 0xFFFF8FFF
@@ -2371,6 +2374,8 @@
#define C_008F38_XY_MIN_FILTER 0xFF3FFFFF
#define V_008F38_SQ_TEX_XY_FILTER_POINT 0x00
#define V_008F38_SQ_TEX_XY_FILTER_BILINEAR 0x01
+#define V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 0x02
+#define V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
#define S_008F38_Z_FILTER(x) (((x) & 0x03) << 24)
#define G_008F38_Z_FILTER(x) (((x) >> 24) & 0x03)
#define C_008F38_Z_FILTER 0xFCFFFFFF
From b0d4469519bf07c4051af8eb86ab71647fb1eb61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 7 Apr 2016 21:37:43 +0200
Subject: [PATCH 113/224] radeonsi: disable aniso filtering for non-mipmap
textures on SI-CI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The closed driver does this, but it looks at base_level and last_level
and uses a conditional assignment, which LLVM can't generate on SGPRs.
That led me to invent this solution that abuses the image descriptor.
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeonsi/si_shader.c | 36 +++++++++++++++++++++++-
src/gallium/drivers/radeonsi/si_state.c | 10 +++++++
2 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 08da3e37550..59c6f41f803 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3339,6 +3339,35 @@ static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
return get_sampler_desc_custom(ctx, list, index, type);
}
+/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
+ *
+ * SI-CI:
+ * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
+ * filtering manually. The driver sets img7 to a mask clearing
+ * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
+ * s_and_b32 samp0, samp0, img7
+ *
+ * VI:
+ * The ANISO_OVERRIDE sampler field enables this fix in TA.
+ */
+static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
+ LLVMValueRef res, LLVMValueRef samp)
+{
+ LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
+ LLVMValueRef img7, samp0;
+
+ if (ctx->screen->b.chip_class >= VI)
+ return samp;
+
+ img7 = LLVMBuildExtractElement(builder, res,
+ LLVMConstInt(ctx->i32, 7, 0), "");
+ samp0 = LLVMBuildExtractElement(builder, samp,
+ LLVMConstInt(ctx->i32, 0, 0), "");
+ samp0 = LLVMBuildAnd(builder, samp0, img7, "");
+ return LLVMBuildInsertElement(builder, samp, samp0,
+ LLVMConstInt(ctx->i32, 0, 0), "");
+}
+
static void tex_fetch_ptrs(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data,
@@ -3370,6 +3399,7 @@ static void tex_fetch_ptrs(
*fmask_ptr = get_sampler_desc(ctx, ind_index, DESC_FMASK);
} else {
*samp_ptr = get_sampler_desc(ctx, ind_index, DESC_SAMPLER);
+ *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
*fmask_ptr = NULL;
}
} else {
@@ -4701,9 +4731,13 @@ static void preload_samplers(struct si_shader_context *ctx)
if (info->is_msaa_sampler[i])
ctx->fmasks[i] =
get_sampler_desc(ctx, offset, DESC_FMASK);
- else
+ else {
ctx->sampler_states[i] =
get_sampler_desc(ctx, offset, DESC_SAMPLER);
+ ctx->sampler_states[i] =
+ sici_fix_sampler_aniso(ctx, ctx->sampler_views[i],
+ ctx->sampler_states[i]);
+ }
}
}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index fe27ca5ac16..d75565a5c24 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3136,6 +3136,16 @@ si_make_texture_descriptor(struct si_screen *screen,
} else {
state[6] = 0;
state[7] = 0;
+
+ /* The last dword is unused by hw. The shader uses it to clear
+ * bits in the first dword of sampler state.
+ */
+ if (screen->b.chip_class <= CIK && res->nr_samples <= 1) {
+ if (first_level == last_level)
+ state[7] = C_008F30_MAX_ANISO_RATIO;
+ else
+ state[7] = 0xffffffff;
+ }
}
/* Initialize the sampler view for FMASK. */
From 3bc2d967c4c626f0efadfca8771a90797a12c22b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Fri, 8 Apr 2016 02:09:59 +0200
Subject: [PATCH 114/224] r600g: clean up aniso state translation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/evergreen_state.c | 9 ++++-----
src/gallium/drivers/r600/evergreend.h | 14 +++++++-------
src/gallium/drivers/r600/r600_pipe.h | 1 -
src/gallium/drivers/r600/r600_state.c | 17 +++++++++++++----
src/gallium/drivers/r600/r600_state_common.c | 11 -----------
src/gallium/drivers/r600/r600d.h | 8 +++++---
6 files changed, 29 insertions(+), 31 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index f76d7a90595..2de813b9afe 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -561,7 +561,6 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state);
- unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
if (!ss) {
return NULL;
@@ -574,10 +573,10 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
- S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
- S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) |
+ S_03C000_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, state->max_anisotropy)) |
+ S_03C000_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, state->max_anisotropy)) |
S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
- S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
+ S_03C000_MAX_ANISO_RATIO(r600_tex_aniso_filter(state->max_anisotropy)) |
S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
S_03C000_BORDER_COLOR_TYPE(ss->border_color_use ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0);
/* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */
@@ -852,7 +851,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
view->tex_resource_words[4] |= S_030010_BASE_LEVEL(first_level);
view->tex_resource_words[5] |= S_030014_LAST_LEVEL(last_level);
/* aniso max 16 samples */
- view->tex_resource_words[6] |= S_030018_MAX_ANISO(4);
+ view->tex_resource_words[6] |= S_030018_MAX_ANISO_RATIO(4);
}
view->tex_resource_words[7] = S_03001C_DATA_FORMAT(format) |
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index a900458f588..ece421e3d33 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1202,11 +1202,11 @@
#define G_030014_LAST_ARRAY(x) (((x) >> 17) & 0x1FFF)
#define C_030014_LAST_ARRAY 0xC001FFFF
#define R_030018_SQ_TEX_RESOURCE_WORD6_0 0x030018
-/* FMASK_BANK_HEIGHT and MAX_ANISO share the first two bits.
+/* FMASK_BANK_HEIGHT and MAX_ANISO_RATIO share the first two bits.
* The former is only used with MSAA textures. */
-#define S_030018_MAX_ANISO(x) (((x) & 0x7) << 0)
-#define G_030018_MAX_ANISO(x) (((x) >> 0) & 0x7)
-#define C_030018_MAX_ANISO 0xFFFFFFF8
+#define S_030018_MAX_ANISO_RATIO(x) (((x) & 0x7) << 0)
+#define G_030018_MAX_ANISO_RATIO(x) (((x) >> 0) & 0x7)
+#define C_030018_MAX_ANISO_RATIO 0xFFFFFFF8
#define S_030018_FMASK_BANK_HEIGHT(x) (((x) & 0x3) << 0)
#define S_030018_PERF_MODULATION(x) (((x) & 0x7) << 3)
#define G_030018_PERF_MODULATION(x) (((x) >> 3) & 0x7)
@@ -1344,9 +1344,9 @@
#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 15)
#define G_03C000_MIP_FILTER(x) (((x) >> 15) & 0x3)
#define C_03C000_MIP_FILTER 0xFFFE7FFF
-#define S_03C000_MAX_ANISO(x) (((x) & 0x7) << 17)
-#define G_03C000_MAX_ANISO(x) (((x) >> 17) & 0x7)
-#define C_03C000_MAX_ANISO 0xFFF1FFFF
+#define S_03C000_MAX_ANISO_RATIO(x) (((x) & 0x7) << 17)
+#define G_03C000_MAX_ANISO_RATIO(x) (((x) >> 17) & 0x7)
+#define C_03C000_MAX_ANISO_RATIO 0xFFF1FFFF
#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 20)
#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 20) & 0x3)
#define C_03C000_BORDER_COLOR_TYPE 0xFFCFFFFF
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 86dd3c8e4c6..52f04b2fc4a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -746,7 +746,6 @@ void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
uint32_t r600_translate_stencil_op(int s_op);
uint32_t r600_translate_fill(uint32_t func);
unsigned r600_tex_wrap(unsigned wrap);
-unsigned r600_tex_filter(unsigned filter);
unsigned r600_tex_mipfilter(unsigned filter);
unsigned r600_tex_compare(unsigned compare);
bool sampler_state_needs_border_color(const struct pipe_sampler_state *state);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 02702ae7304..140f946feb7 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -558,11 +558,20 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
return rs;
}
+static unsigned r600_tex_filter(unsigned filter, unsigned max_aniso)
+{
+ if (filter == PIPE_TEX_FILTER_LINEAR)
+ return max_aniso > 1 ? V_03C000_SQ_TEX_XY_FILTER_ANISO_BILINEAR
+ : V_03C000_SQ_TEX_XY_FILTER_BILINEAR;
+ else
+ return max_aniso > 1 ? V_03C000_SQ_TEX_XY_FILTER_ANISO_POINT
+ : V_03C000_SQ_TEX_XY_FILTER_POINT;
+}
+
static void *r600_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state);
- unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 4 : 0;
if (!ss) {
return NULL;
@@ -576,10 +585,10 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
- S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
- S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) |
+ S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter, state->max_anisotropy)) |
+ S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter, state->max_anisotropy)) |
S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
- S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
+ S_03C000_MAX_ANISO_RATIO(r600_tex_aniso_filter(state->max_anisotropy)) |
S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
S_03C000_BORDER_COLOR_TYPE(ss->border_color_use ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0);
/* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index c03b75aea52..95666a34ec4 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2122,17 +2122,6 @@ unsigned r600_tex_wrap(unsigned wrap)
}
}
-unsigned r600_tex_filter(unsigned filter)
-{
- switch (filter) {
- default:
- case PIPE_TEX_FILTER_NEAREST:
- return V_03C000_SQ_TEX_XY_FILTER_POINT;
- case PIPE_TEX_FILTER_LINEAR:
- return V_03C000_SQ_TEX_XY_FILTER_BILINEAR;
- }
-}
-
unsigned r600_tex_mipfilter(unsigned filter)
{
switch (filter) {
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 0c18445f015..ecabb340a9c 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -1267,6 +1267,8 @@
#define V_03C000_SQ_TEX_XY_FILTER_POINT 0x00000000
#define V_03C000_SQ_TEX_XY_FILTER_BILINEAR 0x00000001
#define V_03C000_SQ_TEX_XY_FILTER_BICUBIC 0x00000002
+#define V_03C000_SQ_TEX_XY_FILTER_ANISO_POINT 0x00000004
+#define V_03C000_SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x00000005
#define S_03C000_XY_MIN_FILTER(x) (((x) & 0x7) << 12)
#define G_03C000_XY_MIN_FILTER(x) (((x) >> 12) & 0x7)
#define C_03C000_XY_MIN_FILTER 0xFFFF8FFF
@@ -1279,9 +1281,9 @@
#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 17)
#define G_03C000_MIP_FILTER(x) (((x) >> 17) & 0x3)
#define C_03C000_MIP_FILTER 0xFFF9FFFF
-#define S_03C000_MAX_ANISO(x) (((x) & 0x7) << 19)
-#define G_03C000_MAX_ANISO(x) (((x) >> 19) & 0x7)
-#define C_03C000_MAX_ANISO 0xFFB7FFFF
+#define S_03C000_MAX_ANISO_RATIO(x) (((x) & 0x7) << 19)
+#define G_03C000_MAX_ANISO_RATIO(x) (((x) >> 19) & 0x7)
+#define C_03C000_MAX_ANISO_RATIO 0xFFB7FFFF
#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 22)
#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 22) & 0x3)
#define C_03C000_BORDER_COLOR_TYPE 0xFF3FFFFF
From b36f19bf98e206264b4de6ce5ca510c2d305ffe4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 7 Apr 2016 21:18:14 +0200
Subject: [PATCH 115/224] r600g: disable aniso filtering for non-mipmap
textures on EG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
this is the default behavior of the closed driver when running on VI
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/r600/evergreen_state.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 2de813b9afe..0e055877761 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -848,10 +848,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
view->tex_resource_words[5] |= S_030014_LAST_LEVEL(log_samples);
view->tex_resource_words[6] |= S_030018_FMASK_BANK_HEIGHT(fmask_bankh);
} else {
+ bool no_mip = first_level == last_level;
+
view->tex_resource_words[4] |= S_030010_BASE_LEVEL(first_level);
view->tex_resource_words[5] |= S_030014_LAST_LEVEL(last_level);
/* aniso max 16 samples */
- view->tex_resource_words[6] |= S_030018_MAX_ANISO_RATIO(4);
+ view->tex_resource_words[6] |= S_030018_MAX_ANISO_RATIO(no_mip ? 0 : 4);
}
view->tex_resource_words[7] = S_03001C_DATA_FORMAT(format) |
From a57309f807dc1e4450cd8c5ac132de0de4e17f89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Mon, 11 Apr 2016 19:26:03 +0200
Subject: [PATCH 116/224] winsys/amdgpu: remove hack for low VRAM configuration
A better solution will be used.
Reviewed-by: Alex Deucher
Reviewed-by: Edward O'Callaghan
---
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 04ef17da7bf..1b2793a5d6b 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -469,16 +469,6 @@ amdgpu_bo_create(struct radeon_winsys *rws,
struct amdgpu_winsys_bo *bo;
unsigned usage = 0;
- /* Don't use VRAM if the GPU doesn't have much. This is only the initial
- * domain. The kernel is free to move the buffer if it wants to.
- *
- * 64MB means no VRAM by todays standards.
- */
- if (domain & RADEON_DOMAIN_VRAM && ws->info.vram_size <= 64*1024*1024) {
- domain = RADEON_DOMAIN_GTT;
- flags = RADEON_FLAG_GTT_WC;
- }
-
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
From 5a4b74d1ba2c156766a7a5dbfef099c7db5d6694 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Mon, 11 Apr 2016 19:56:07 +0200
Subject: [PATCH 117/224] gallium/radeon: relax requirements on VRAM placements
on APUs
This makes Tonga with vramlimit=128 2x faster in Heaven.
Reviewed-by: Alex Deucher
Reviewed-by: Edward O'Callaghan