mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-04 21:30:26 +01:00
Merge ../mesa into vulkan
This commit is contained in:
commit
fc2a66cfcd
230 changed files with 3091 additions and 2649 deletions
|
|
@ -178,7 +178,13 @@ GL 4.4, GLSL 4.40:
|
|||
GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers)
|
||||
GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_clear_texture DONE (i965) (gallium - in progress, VMware)
|
||||
GL_ARB_enhanced_layouts not started
|
||||
GL_ARB_enhanced_layouts in progress (Timothy)
|
||||
- compile-time constant expressions in progress
|
||||
- explicit byte offsets for blocks in progress
|
||||
- forced alignment within blocks in progress
|
||||
- specified vec4-slot component numbers in progress
|
||||
- specified transform/feedback layout in progress
|
||||
- input/output block locations in progress
|
||||
GL_ARB_multi_bind DONE (all drivers)
|
||||
GL_ARB_query_buffer_object not started
|
||||
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
|
|
|
|||
|
|
@ -45,11 +45,13 @@ Note: some of the new features are only available with certain drivers.
|
|||
|
||||
<ul>
|
||||
<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
|
||||
<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
|
||||
<li>GL_ARB_shader_storage_buffer_object on i965</li>
|
||||
<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
|
||||
<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
|
||||
<li>GL_ARB_texture_query_lod on softpipe</li>
|
||||
<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
|
||||
<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
|
||||
<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
|
||||
</ul>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
|
|
|||
|
|
@ -136,8 +136,14 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
|
|||
{
|
||||
struct timespec abs_time;
|
||||
int rt;
|
||||
|
||||
assert(mtx != NULL);
|
||||
assert(cond != NULL);
|
||||
assert(xt != NULL);
|
||||
|
||||
abs_time.tv_sec = xt->sec;
|
||||
abs_time.tv_nsec = xt->nsec;
|
||||
|
||||
rt = pthread_cond_timedwait(cond, mtx, &abs_time);
|
||||
if (rt == ETIMEDOUT)
|
||||
return thrd_busy;
|
||||
|
|
|
|||
|
|
@ -131,12 +131,10 @@ const __DRIconfig *
|
|||
dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
|
||||
EGLenum colorspace)
|
||||
{
|
||||
if (colorspace == EGL_GL_COLORSPACE_SRGB_KHR)
|
||||
return surface_type == EGL_WINDOW_BIT ? conf->dri_srgb_double_config :
|
||||
conf->dri_srgb_single_config;
|
||||
else
|
||||
return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config :
|
||||
conf->dri_single_config;
|
||||
const bool srgb = colorspace == EGL_GL_COLORSPACE_SRGB_KHR;
|
||||
|
||||
return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config[srgb] :
|
||||
conf->dri_single_config[srgb];
|
||||
}
|
||||
|
||||
static EGLBoolean
|
||||
|
|
@ -284,14 +282,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
|
|||
if (num_configs == 1) {
|
||||
conf = (struct dri2_egl_config *) matching_config;
|
||||
|
||||
if (double_buffer && srgb && !conf->dri_srgb_double_config)
|
||||
conf->dri_srgb_double_config = dri_config;
|
||||
else if (double_buffer && !srgb && !conf->dri_double_config)
|
||||
conf->dri_double_config = dri_config;
|
||||
else if (!double_buffer && srgb && !conf->dri_srgb_single_config)
|
||||
conf->dri_srgb_single_config = dri_config;
|
||||
else if (!double_buffer && !srgb && !conf->dri_single_config)
|
||||
conf->dri_single_config = dri_config;
|
||||
if (double_buffer && !conf->dri_double_config[srgb])
|
||||
conf->dri_double_config[srgb] = dri_config;
|
||||
else if (!double_buffer && !conf->dri_single_config[srgb])
|
||||
conf->dri_single_config[srgb] = dri_config;
|
||||
else
|
||||
/* a similar config type is already added (unlikely) => discard */
|
||||
return NULL;
|
||||
|
|
@ -301,18 +295,13 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
|
|||
if (conf == NULL)
|
||||
return NULL;
|
||||
|
||||
if (double_buffer)
|
||||
conf->dri_double_config[srgb] = dri_config;
|
||||
else
|
||||
conf->dri_single_config[srgb] = dri_config;
|
||||
|
||||
memcpy(&conf->base, &base, sizeof base);
|
||||
if (double_buffer) {
|
||||
if (srgb)
|
||||
conf->dri_srgb_double_config = dri_config;
|
||||
else
|
||||
conf->dri_double_config = dri_config;
|
||||
} else {
|
||||
if (srgb)
|
||||
conf->dri_srgb_single_config = dri_config;
|
||||
else
|
||||
conf->dri_single_config = dri_config;
|
||||
}
|
||||
conf->base.SurfaceType = 0;
|
||||
conf->base.ConfigID = config_id;
|
||||
|
||||
_eglLinkConfig(&conf->base);
|
||||
|
|
@ -1021,10 +1010,10 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
|
|||
* doubleBufferMode check in
|
||||
* src/mesa/main/context.c:check_compatible()
|
||||
*/
|
||||
if (dri2_config->dri_double_config)
|
||||
dri_config = dri2_config->dri_double_config;
|
||||
if (dri2_config->dri_double_config[0])
|
||||
dri_config = dri2_config->dri_double_config[0];
|
||||
else
|
||||
dri_config = dri2_config->dri_single_config;
|
||||
dri_config = dri2_config->dri_single_config[0];
|
||||
|
||||
/* EGL_WINDOW_BIT is set only when there is a dri_double_config. This
|
||||
* makes sure the back buffer will always be used.
|
||||
|
|
@ -2424,13 +2413,18 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
|
|||
unsigned wait_flags = 0;
|
||||
EGLint ret = EGL_CONDITION_SATISFIED_KHR;
|
||||
|
||||
if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
|
||||
/* The EGL_KHR_fence_sync spec states:
|
||||
*
|
||||
* "If no context is current for the bound API,
|
||||
* the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored.
|
||||
*/
|
||||
if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
|
||||
wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS;
|
||||
|
||||
/* the sync object should take a reference while waiting */
|
||||
dri2_egl_ref_sync(dri2_sync);
|
||||
|
||||
if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context,
|
||||
if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
|
||||
dri2_sync->fence, wait_flags,
|
||||
timeout))
|
||||
dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
|
||||
|
|
|
|||
|
|
@ -284,10 +284,8 @@ struct dri2_egl_surface
|
|||
struct dri2_egl_config
|
||||
{
|
||||
_EGLConfig base;
|
||||
const __DRIconfig *dri_single_config;
|
||||
const __DRIconfig *dri_double_config;
|
||||
const __DRIconfig *dri_srgb_single_config;
|
||||
const __DRIconfig *dri_srgb_double_config;
|
||||
const __DRIconfig *dri_single_config[2];
|
||||
const __DRIconfig *dri_double_config[2];
|
||||
};
|
||||
|
||||
struct dri2_egl_image
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
|
|||
struct dri2_egl_surface *dri2_surf;
|
||||
struct gbm_surface *window = native_window;
|
||||
struct gbm_dri_surface *surf;
|
||||
const __DRIconfig *config;
|
||||
|
||||
(void) drv;
|
||||
|
||||
|
|
@ -130,21 +131,20 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
|
|||
goto cleanup_surf;
|
||||
}
|
||||
|
||||
if (dri2_dpy->dri2) {
|
||||
const __DRIconfig *config =
|
||||
dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
|
||||
dri2_surf->base.GLColorspace);
|
||||
config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
|
||||
dri2_surf->base.GLColorspace);
|
||||
|
||||
if (dri2_dpy->dri2) {
|
||||
dri2_surf->dri_drawable =
|
||||
(*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
|
||||
dri2_surf->gbm_surf);
|
||||
|
||||
} else {
|
||||
assert(dri2_dpy->swrast != NULL);
|
||||
|
||||
dri2_surf->dri_drawable =
|
||||
(*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
|
||||
dri2_conf->dri_double_config,
|
||||
dri2_surf->gbm_surf);
|
||||
(*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
|
||||
dri2_surf->gbm_surf);
|
||||
|
||||
}
|
||||
if (dri2_surf->dri_drawable == NULL) {
|
||||
|
|
|
|||
|
|
@ -1645,6 +1645,7 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
|
|||
struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
|
||||
struct wl_egl_window *window = native_window;
|
||||
struct dri2_egl_surface *dri2_surf;
|
||||
const __DRIconfig *config;
|
||||
|
||||
(void) drv;
|
||||
|
||||
|
|
@ -1669,10 +1670,12 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
|
|||
dri2_surf->base.Width = -1;
|
||||
dri2_surf->base.Height = -1;
|
||||
|
||||
config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
|
||||
dri2_surf->base.GLColorspace);
|
||||
|
||||
dri2_surf->dri_drawable =
|
||||
(*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
|
||||
dri2_conf->dri_double_config,
|
||||
dri2_surf);
|
||||
(*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen,
|
||||
config, dri2_surf);
|
||||
if (dri2_surf->dri_drawable == NULL) {
|
||||
_eglError(EGL_BAD_ALLOC, "swrast->createNewDrawable");
|
||||
goto cleanup_dri_drawable;
|
||||
|
|
|
|||
|
|
@ -206,6 +206,7 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
|
|||
xcb_generic_error_t *error;
|
||||
xcb_drawable_t drawable;
|
||||
xcb_screen_t *screen;
|
||||
const __DRIconfig *config;
|
||||
|
||||
STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
|
||||
drawable = (uintptr_t) native_surface;
|
||||
|
|
@ -245,19 +246,18 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
|
|||
dri2_surf->drawable = drawable;
|
||||
}
|
||||
|
||||
if (dri2_dpy->dri2) {
|
||||
const __DRIconfig *config =
|
||||
dri2_get_dri_config(dri2_conf, type, dri2_surf->base.GLColorspace);
|
||||
config = dri2_get_dri_config(dri2_conf, type,
|
||||
dri2_surf->base.GLColorspace);
|
||||
|
||||
if (dri2_dpy->dri2) {
|
||||
dri2_surf->dri_drawable =
|
||||
(*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
|
||||
dri2_surf);
|
||||
} else {
|
||||
assert(dri2_dpy->swrast);
|
||||
dri2_surf->dri_drawable =
|
||||
(*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
|
||||
dri2_conf->dri_double_config,
|
||||
dri2_surf);
|
||||
(*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
|
||||
dri2_surf);
|
||||
}
|
||||
|
||||
if (dri2_surf->dri_drawable == NULL) {
|
||||
|
|
|
|||
|
|
@ -116,6 +116,53 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
break;
|
||||
}
|
||||
|
||||
if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
|
||||
fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
|
||||
fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
|
||||
const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
|
||||
unsigned input;
|
||||
|
||||
if (src0->Register.Indirect && src0->Indirect.ArrayID)
|
||||
input = info->input_array_first[src0->Indirect.ArrayID];
|
||||
else
|
||||
input = src0->Register.Index;
|
||||
|
||||
/* For the INTERP opcodes, the interpolation is always
|
||||
* PERSPECTIVE unless LINEAR is specified.
|
||||
*/
|
||||
switch (info->input_interpolate[input]) {
|
||||
case TGSI_INTERPOLATE_COLOR:
|
||||
case TGSI_INTERPOLATE_CONSTANT:
|
||||
case TGSI_INTERPOLATE_PERSPECTIVE:
|
||||
switch (fullinst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_INTERP_CENTROID:
|
||||
info->uses_persp_opcode_interp_centroid = true;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_OFFSET:
|
||||
info->uses_persp_opcode_interp_offset = true;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_SAMPLE:
|
||||
info->uses_persp_opcode_interp_sample = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case TGSI_INTERPOLATE_LINEAR:
|
||||
switch (fullinst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_INTERP_CENTROID:
|
||||
info->uses_linear_opcode_interp_centroid = true;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_OFFSET:
|
||||
info->uses_linear_opcode_interp_offset = true;
|
||||
break;
|
||||
case TGSI_OPCODE_INTERP_SAMPLE:
|
||||
info->uses_linear_opcode_interp_sample = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
|
||||
fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
|
||||
info->uses_doubles = true;
|
||||
|
|
@ -236,8 +283,48 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
|
||||
info->num_inputs++;
|
||||
|
||||
if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
|
||||
info->uses_centroid = TRUE;
|
||||
/* Only interpolated varyings. Don't include POSITION.
|
||||
* Don't include integer varyings, because they are not
|
||||
* interpolated.
|
||||
*/
|
||||
if (semName == TGSI_SEMANTIC_GENERIC ||
|
||||
semName == TGSI_SEMANTIC_TEXCOORD ||
|
||||
semName == TGSI_SEMANTIC_COLOR ||
|
||||
semName == TGSI_SEMANTIC_BCOLOR ||
|
||||
semName == TGSI_SEMANTIC_FOG ||
|
||||
semName == TGSI_SEMANTIC_CLIPDIST ||
|
||||
semName == TGSI_SEMANTIC_CULLDIST) {
|
||||
switch (fulldecl->Interp.Interpolate) {
|
||||
case TGSI_INTERPOLATE_COLOR:
|
||||
case TGSI_INTERPOLATE_PERSPECTIVE:
|
||||
switch (fulldecl->Interp.Location) {
|
||||
case TGSI_INTERPOLATE_LOC_CENTER:
|
||||
info->uses_persp_center = true;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_CENTROID:
|
||||
info->uses_persp_centroid = true;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_SAMPLE:
|
||||
info->uses_persp_sample = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LINEAR:
|
||||
switch (fulldecl->Interp.Location) {
|
||||
case TGSI_INTERPOLATE_LOC_CENTER:
|
||||
info->uses_linear_center = true;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_CENTROID:
|
||||
info->uses_linear_centroid = true;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LOC_SAMPLE:
|
||||
info->uses_linear_sample = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
/* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
|
||||
}
|
||||
}
|
||||
|
||||
if (semName == TGSI_SEMANTIC_PRIMID)
|
||||
info->uses_primid = TRUE;
|
||||
|
|
|
|||
|
|
@ -82,7 +82,18 @@ struct tgsi_shader_info
|
|||
boolean writes_stencil; /**< does fragment shader write stencil value? */
|
||||
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
|
||||
boolean uses_kill; /**< KILL or KILL_IF instruction used? */
|
||||
boolean uses_centroid;
|
||||
boolean uses_persp_center;
|
||||
boolean uses_persp_centroid;
|
||||
boolean uses_persp_sample;
|
||||
boolean uses_linear_center;
|
||||
boolean uses_linear_centroid;
|
||||
boolean uses_linear_sample;
|
||||
boolean uses_persp_opcode_interp_centroid;
|
||||
boolean uses_persp_opcode_interp_offset;
|
||||
boolean uses_persp_opcode_interp_sample;
|
||||
boolean uses_linear_opcode_interp_centroid;
|
||||
boolean uses_linear_opcode_interp_offset;
|
||||
boolean uses_linear_opcode_interp_sample;
|
||||
boolean uses_instanceid;
|
||||
boolean uses_vertexid;
|
||||
boolean uses_vertexid_nobase;
|
||||
|
|
|
|||
|
|
@ -268,6 +268,15 @@ The integer capabilities:
|
|||
bounds_max states of pipe_depth_stencil_alpha_state behave according
|
||||
to the GL_EXT_depth_bounds_test specification.
|
||||
* ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
|
||||
* ``PIPE_CAP_FORCE_PERSAMPLE_INTERP``: If the driver can force per-sample
|
||||
interpolation for all fragment shader inputs if
|
||||
pipe_rasterizer_state::force_persample_interp is set. This is only used
|
||||
by GL3-level sample shading (ARB_sample_shading). GL4-level sample shading
|
||||
(ARB_gpu_shader5) doesn't use this. While GL3 hardware has a state for it,
|
||||
GL4 hardware will likely need to emulate it with a shader variant, or by
|
||||
selecting the interpolation weights with a conditional assignment
|
||||
in the shader.
|
||||
|
||||
|
||||
|
||||
.. _pipe_capf:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
C_SOURCES := \
|
||||
dd_pipe.h \
|
||||
dd_public.h \
|
||||
dd_context.c \
|
||||
dd_draw.c \
|
||||
dd_screen.c
|
||||
dd_pipe.h \
|
||||
dd_public.h \
|
||||
dd_screen.c \
|
||||
dd_util.h
|
||||
|
|
|
|||
|
|
@ -30,9 +30,6 @@
|
|||
#include "util/u_dump.h"
|
||||
#include "util/u_format.h"
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
#include "os/os_process.h"
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
enum call_type
|
||||
|
|
@ -88,33 +85,13 @@ struct dd_call
|
|||
} info;
|
||||
};
|
||||
|
||||
|
||||
static FILE *
|
||||
dd_get_file_stream(struct dd_context *dctx)
|
||||
{
|
||||
struct pipe_screen *screen = dctx->pipe->screen;
|
||||
static unsigned index;
|
||||
char proc_name[128], dir[256], name[512];
|
||||
FILE *f;
|
||||
|
||||
if (!os_get_process_name(proc_name, sizeof(proc_name))) {
|
||||
fprintf(stderr, "dd: can't get the process name\n");
|
||||
FILE *f = dd_get_debug_file();
|
||||
if (!f)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", "."));
|
||||
|
||||
if (mkdir(dir, 0774) && errno != EEXIST) {
|
||||
fprintf(stderr, "dd: can't create a directory (%i)\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++);
|
||||
f = fopen(name, "w");
|
||||
if (!f) {
|
||||
fprintf(stderr, "dd: can't open file %s\n", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
|
||||
fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
|
||||
|
|
|
|||
|
|
@ -31,9 +31,7 @@
|
|||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_screen.h"
|
||||
|
||||
/* name of the directory in home */
|
||||
#define DD_DIR "ddebug_dumps"
|
||||
#include "dd_util.h"
|
||||
|
||||
enum dd_mode {
|
||||
DD_DETECT_HANGS,
|
||||
|
|
|
|||
71
src/gallium/drivers/ddebug/dd_util.h
Normal file
71
src/gallium/drivers/ddebug/dd_util.h
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2015 Advanced Micro Devices, Inc.
|
||||
* Copyright 2008 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef DD_UTIL_H
|
||||
#define DD_UTIL_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "os/os_process.h"
|
||||
#include "util/u_debug.h"
|
||||
|
||||
/* name of the directory in home */
|
||||
#define DD_DIR "ddebug_dumps"
|
||||
|
||||
static inline FILE *
|
||||
dd_get_debug_file()
|
||||
{
|
||||
static unsigned index;
|
||||
char proc_name[128], dir[256], name[512];
|
||||
FILE *f;
|
||||
|
||||
if (!os_get_process_name(proc_name, sizeof(proc_name))) {
|
||||
fprintf(stderr, "dd: can't get the process name\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", "."));
|
||||
|
||||
if (mkdir(dir, 0774) && errno != EEXIST) {
|
||||
fprintf(stderr, "dd: can't create a directory (%i)\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++);
|
||||
f = fopen(name, "w");
|
||||
if (!f) {
|
||||
fprintf(stderr, "dd: can't open file %s\n", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
#endif /* DD_UTIL_H */
|
||||
|
|
@ -235,6 +235,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
|
|
|
|||
|
|
@ -248,6 +248,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
|
|||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
|
|
|
|||
|
|
@ -470,6 +470,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
|||
|
|
@ -297,6 +297,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ PUSH_AVAIL(struct nouveau_pushbuf *push)
|
|||
static inline bool
|
||||
PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
|
||||
{
|
||||
/* Provide a buffer so that fences always have room to be emitted */
|
||||
size += 8;
|
||||
if (PUSH_AVAIL(push) < size)
|
||||
return nouveau_pushbuf_space(push, size, 0, 0) == 0;
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -78,12 +78,12 @@ nv30_format_info_table[PIPE_FORMAT_COUNT] = {
|
|||
_(B4G4R4X4_UNORM , S___),
|
||||
_(B4G4R4A4_UNORM , S___),
|
||||
_(B5G6R5_UNORM , SB__),
|
||||
_(B8G8R8X8_UNORM , SB__),
|
||||
_(B8G8R8X8_SRGB , S___),
|
||||
_(B8G8R8A8_UNORM , SB__),
|
||||
_(B8G8R8A8_SRGB , S___),
|
||||
_(BGRX8888_UNORM , SB__),
|
||||
_(BGRX8888_SRGB , S___),
|
||||
_(BGRA8888_UNORM , SB__),
|
||||
_(BGRA8888_SRGB , S___),
|
||||
_(R8G8B8A8_UNORM , __V_),
|
||||
_(R8G8B8A8_SNORM , S___),
|
||||
_(RGBA8888_SNORM , S___),
|
||||
_(DXT1_RGB , S___),
|
||||
_(DXT1_SRGB , S___),
|
||||
_(DXT1_RGBA , S___),
|
||||
|
|
@ -138,8 +138,8 @@ const struct nv30_format
|
|||
nv30_format_table[PIPE_FORMAT_COUNT] = {
|
||||
R_(B5G5R5X1_UNORM , X1R5G5B5 ),
|
||||
R_(B5G6R5_UNORM , R5G6B5 ),
|
||||
R_(B8G8R8X8_UNORM , X8R8G8B8 ),
|
||||
R_(B8G8R8A8_UNORM , A8R8G8B8 ),
|
||||
R_(BGRX8888_UNORM , X8R8G8B8 ),
|
||||
R_(BGRA8888_UNORM , A8R8G8B8 ),
|
||||
Z_(Z16_UNORM , Z16 ),
|
||||
Z_(X8Z24_UNORM , Z24S8 ),
|
||||
Z_(S8_UINT_Z24_UNORM , Z24S8 ),
|
||||
|
|
@ -223,11 +223,11 @@ nv30_texfmt_table[PIPE_FORMAT_COUNT] = {
|
|||
_(B4G4R4X4_UNORM , A4R4G4B4, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(B4G4R4A4_UNORM , A4R4G4B4, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
|
||||
_(B5G6R5_UNORM , R5G6B5 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(B8G8R8X8_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(B8G8R8X8_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
|
||||
_(B8G8R8A8_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
|
||||
_(B8G8R8A8_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
|
||||
_(R8G8B8A8_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
|
||||
_(BGRX8888_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(BGRX8888_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
|
||||
_(BGRA8888_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
|
||||
_(BGRA8888_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
|
||||
_(RGBA8888_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
|
||||
_(DXT1_RGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(DXT1_SRGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
|
||||
_(DXT1_RGBA , DXT1 , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
|
||||
|
|
|
|||
|
|
@ -170,6 +170,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
|
||||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
@ -346,7 +347,9 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence)
|
|||
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2);
|
||||
assert(PUSH_AVAIL(push) >= 3);
|
||||
PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
|
||||
(2 /* size */ << 18) | (7 /* subchan */ << 13));
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, *sequence);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -191,7 +191,11 @@ nv30_vbo_validate(struct nv30_context *nv30)
|
|||
if (!nv30->vertex || nv30->draw_flags)
|
||||
return;
|
||||
|
||||
#ifdef PIPE_ARCH_BIG_ENDIAN
|
||||
if (1) { /* Figure out where the buffers are getting messed up */
|
||||
#else
|
||||
if (unlikely(vertex->need_conversion)) {
|
||||
#endif
|
||||
nv30->vbo_fifo = ~0;
|
||||
nv30->vbo_user = 0;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -215,6 +215,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
@ -387,6 +388,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
|
|||
/* we need to do it after possible flush in MARK_RING */
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
assert(PUSH_AVAIL(push) >= 5);
|
||||
PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
|
||||
PUSH_DATAh(push, screen->fence.bo->offset);
|
||||
PUSH_DATA (push, screen->fence.bo->offset);
|
||||
|
|
|
|||
|
|
@ -201,6 +201,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_VERTEXID_NOBASE:
|
||||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
@ -536,7 +537,8 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
|
|||
/* we need to do it after possible flush in MARK_RING */
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
|
||||
assert(PUSH_AVAIL(push) >= 5);
|
||||
PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
|
||||
PUSH_DATAh(push, screen->fence.bo->offset);
|
||||
PUSH_DATA (push, screen->fence.bo->offset);
|
||||
PUSH_DATA (push, *sequence);
|
||||
|
|
|
|||
|
|
@ -103,14 +103,14 @@
|
|||
|
||||
|
||||
/**
|
||||
* Writing relocations.
|
||||
* Writing buffers.
|
||||
*/
|
||||
|
||||
#define OUT_CS_RELOC(r) do { \
|
||||
assert((r)); \
|
||||
assert((r)->cs_buf); \
|
||||
OUT_CS(0xc0001000); /* PKT3_NOP */ \
|
||||
OUT_CS(cs_winsys->cs_get_reloc(cs_copy, (r)->cs_buf) * 4); \
|
||||
OUT_CS(cs_winsys->cs_lookup_buffer(cs_copy, (r)->cs_buf) * 4); \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1049,7 +1049,7 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
|
|||
|
||||
assert(r300->vbo_cs);
|
||||
OUT_CS(0xc0001000); /* PKT3_NOP */
|
||||
OUT_CS(r300->rws->cs_get_reloc(r300->cs, r300->vbo_cs) * 4);
|
||||
OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo_cs) * 4);
|
||||
END_CS;
|
||||
}
|
||||
|
||||
|
|
@ -1320,7 +1320,7 @@ validate:
|
|||
continue;
|
||||
tex = r300_resource(fb->cbufs[i]->texture);
|
||||
assert(tex && tex->buf && "cbuf is marked, but NULL!");
|
||||
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
|
||||
RADEON_USAGE_READWRITE,
|
||||
r300_surface(fb->cbufs[i])->domain,
|
||||
tex->b.b.nr_samples > 1 ?
|
||||
|
|
@ -1331,7 +1331,7 @@ validate:
|
|||
if (fb->zsbuf) {
|
||||
tex = r300_resource(fb->zsbuf->texture);
|
||||
assert(tex && tex->buf && "zsbuf is marked, but NULL!");
|
||||
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
|
||||
RADEON_USAGE_READWRITE,
|
||||
r300_surface(fb->zsbuf)->domain,
|
||||
tex->b.b.nr_samples > 1 ?
|
||||
|
|
@ -1342,7 +1342,7 @@ validate:
|
|||
/* The AA resolve buffer. */
|
||||
if (r300->aa_state.dirty) {
|
||||
if (aa->dest) {
|
||||
r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, aa->dest->cs_buf,
|
||||
RADEON_USAGE_WRITE,
|
||||
aa->dest->domain,
|
||||
RADEON_PRIO_COLOR_BUFFER);
|
||||
|
|
@ -1356,20 +1356,20 @@ validate:
|
|||
}
|
||||
|
||||
tex = r300_resource(texstate->sampler_views[i]->base.texture);
|
||||
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
|
||||
tex->domain, RADEON_PRIO_SHADER_TEXTURE_RO);
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
|
||||
tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
|
||||
}
|
||||
}
|
||||
/* ...occlusion query buffer... */
|
||||
if (r300->query_current)
|
||||
r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buf,
|
||||
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_QUERY);
|
||||
/* ...vertex buffer for SWTCL path... */
|
||||
if (r300->vbo_cs)
|
||||
r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs,
|
||||
r300->rws->cs_add_buffer(r300->cs, r300->vbo_cs,
|
||||
RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_VERTEX_BUFFER);
|
||||
/* ...vertex buffers for HWTCL path... */
|
||||
if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
|
||||
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
|
||||
|
|
@ -1382,18 +1382,18 @@ validate:
|
|||
if (!buf)
|
||||
continue;
|
||||
|
||||
r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->cs_buf,
|
||||
RADEON_USAGE_READ,
|
||||
r300_resource(buf)->domain,
|
||||
RADEON_PRIO_SHADER_BUFFER_RO);
|
||||
RADEON_PRIO_SAMPLER_BUFFER);
|
||||
}
|
||||
}
|
||||
/* ...and index buffer for HWTCL path. */
|
||||
if (index_buffer)
|
||||
r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->cs_buf,
|
||||
RADEON_USAGE_READ,
|
||||
r300_resource(index_buffer)->domain,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_INDEX_BUFFER);
|
||||
|
||||
/* Now do the validation (flush is called inside cs_validate on failure). */
|
||||
if (!r300->rws->cs_validate(r300->cs)) {
|
||||
|
|
|
|||
|
|
@ -196,6 +196,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
/* SWTCL-only features. */
|
||||
|
|
|
|||
|
|
@ -442,7 +442,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
|
|||
unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
|
||||
(struct r600_resource*)cb->base.texture,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW);
|
||||
RADEON_PRIO_SHADER_RW_BUFFER);
|
||||
|
||||
radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
|
||||
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
|
||||
|
|
@ -566,7 +566,7 @@ void evergreen_emit_cs_shader(
|
|||
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
|
||||
code_bo, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_DATA));
|
||||
RADEON_PRIO_USER_SHADER));
|
||||
}
|
||||
|
||||
static void evergreen_launch_grid(
|
||||
|
|
|
|||
|
|
@ -65,9 +65,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
|
|||
csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
|
||||
/* emit reloc before writing cs so that cs is always in consistent state */
|
||||
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_BUFFER);
|
||||
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_BUFFER);
|
||||
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
|
||||
cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
|
||||
cs->buf[cs->cdw++] = src_offset & 0xffffffff;
|
||||
|
|
@ -131,7 +131,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
|
|||
/* This must be done after r600_need_cs_space. */
|
||||
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
|
||||
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_CP_DMA);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
|
||||
radeon_emit(cs, clear_value); /* DATA [31:0] */
|
||||
|
|
|
|||
|
|
@ -783,6 +783,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
|
||||
va = tmp->resource.gpu_address;
|
||||
|
||||
if (state->format == PIPE_FORMAT_X24S8_UINT ||
|
||||
state->format == PIPE_FORMAT_S8X24_UINT ||
|
||||
state->format == PIPE_FORMAT_X32_S8X24_UINT ||
|
||||
state->format == PIPE_FORMAT_S8_UINT)
|
||||
view->is_stencil_sampler = true;
|
||||
|
||||
view->tex_resource = &tmp->resource;
|
||||
view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
|
||||
S_030000_PITCH((pitch / 8) - 1) |
|
||||
|
|
@ -1584,7 +1590,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
|
|||
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
|
||||
cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
|
||||
tex->cmask_buffer, RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_COLOR_META);
|
||||
RADEON_PRIO_CMASK);
|
||||
} else {
|
||||
cmask_reloc = reloc;
|
||||
}
|
||||
|
|
@ -1767,7 +1773,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
|
|||
radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
|
||||
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
|
||||
reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
|
||||
cs->buf[cs->cdw++] = reloc_idx;
|
||||
} else {
|
||||
|
|
@ -1823,9 +1829,9 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
|
|||
S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) |
|
||||
S_028000_COPY_CENTROID(1) |
|
||||
S_028000_COPY_SAMPLE(a->copy_sample);
|
||||
} else if (a->flush_depthstencil_in_place) {
|
||||
db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) |
|
||||
S_028000_STENCIL_COMPRESS_DISABLE(1);
|
||||
} else if (a->flush_depth_inplace || a->flush_stencil_inplace) {
|
||||
db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) |
|
||||
S_028000_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace);
|
||||
db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
|
||||
}
|
||||
if (a->htile_clear) {
|
||||
|
|
@ -1881,7 +1887,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
|
|||
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
|
||||
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
|
||||
}
|
||||
state->dirty_mask = 0;
|
||||
}
|
||||
|
|
@ -1929,7 +1935,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
|
|||
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
|
||||
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
|
||||
radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
|
||||
|
|
@ -1954,7 +1960,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
|
|||
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
|
||||
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
|
||||
|
||||
dirty_mask &= ~(1 << buffer_index);
|
||||
}
|
||||
|
|
@ -2018,9 +2024,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
|
|||
|
||||
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
|
||||
RADEON_USAGE_READ,
|
||||
rview->tex_resource->b.b.nr_samples > 1 ?
|
||||
RADEON_PRIO_SHADER_TEXTURE_MSAA :
|
||||
RADEON_PRIO_SHADER_TEXTURE_RO);
|
||||
r600_get_sampler_view_priority(rview->tex_resource));
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
|
||||
radeon_emit(cs, reloc);
|
||||
|
||||
|
|
@ -2140,7 +2144,8 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
|
|||
(shader->buffer->gpu_address + shader->offset) >> 8);
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
|
||||
RADEON_USAGE_READ,
|
||||
RADEON_PRIO_INTERNAL_SHADER));
|
||||
}
|
||||
|
||||
static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
|
||||
|
|
@ -2199,7 +2204,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
|
|||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW));
|
||||
RADEON_PRIO_RINGS_STREAMOUT));
|
||||
radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
|
||||
state->esgs_ring.buffer_size >> 8);
|
||||
|
||||
|
|
@ -2209,7 +2214,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
|
|||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW));
|
||||
RADEON_PRIO_RINGS_STREAMOUT));
|
||||
radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
|
||||
state->gsvs_ring.buffer_size >> 8);
|
||||
} else {
|
||||
|
|
@ -3330,9 +3335,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
|
|||
size = (cheight * pitch) / 4;
|
||||
/* emit reloc before writing cs so that cs is always in consistent state */
|
||||
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
|
||||
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
|
||||
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
|
||||
cs->buf[cs->cdw++] = base >> 8;
|
||||
cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
|
||||
|
|
@ -3376,11 +3381,11 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
|
|||
}
|
||||
|
||||
if (src->format != dst->format || src_box->depth > 1 ||
|
||||
rdst->dirty_level_mask != 0) {
|
||||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
if (rsrc->dirty_level_mask) {
|
||||
if (rsrc->dirty_level_mask & (1 << src_level)) {
|
||||
ctx->flush_resource(ctx, src);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -202,20 +202,28 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
|
|||
|
||||
static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
|
||||
struct r600_texture *texture,
|
||||
bool is_stencil_sampler,
|
||||
unsigned first_level, unsigned last_level,
|
||||
unsigned first_layer, unsigned last_layer)
|
||||
{
|
||||
struct pipe_surface *zsurf, surf_tmpl = {{0}};
|
||||
unsigned layer, max_layer, checked_last_layer, level;
|
||||
unsigned *dirty_level_mask;
|
||||
|
||||
/* Enable decompression in DB_RENDER_CONTROL */
|
||||
rctx->db_misc_state.flush_depthstencil_in_place = true;
|
||||
if (is_stencil_sampler) {
|
||||
rctx->db_misc_state.flush_stencil_inplace = true;
|
||||
dirty_level_mask = &texture->stencil_dirty_level_mask;
|
||||
} else {
|
||||
rctx->db_misc_state.flush_depth_inplace = true;
|
||||
dirty_level_mask = &texture->dirty_level_mask;
|
||||
}
|
||||
r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
|
||||
|
||||
surf_tmpl.format = texture->resource.b.b.format;
|
||||
|
||||
for (level = first_level; level <= last_level; level++) {
|
||||
if (!(texture->dirty_level_mask & (1 << level)))
|
||||
if (!(*dirty_level_mask & (1 << level)))
|
||||
continue;
|
||||
|
||||
surf_tmpl.u.tex.level = level;
|
||||
|
|
@ -242,12 +250,13 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
|
|||
/* The texture will always be dirty if some layers or samples aren't flushed.
|
||||
* I don't think this case occurs often though. */
|
||||
if (first_layer == 0 && last_layer == max_layer) {
|
||||
texture->dirty_level_mask &= ~(1 << level);
|
||||
*dirty_level_mask &= ~(1 << level);
|
||||
}
|
||||
}
|
||||
|
||||
/* Disable decompression in DB_RENDER_CONTROL */
|
||||
rctx->db_misc_state.flush_depthstencil_in_place = false;
|
||||
rctx->db_misc_state.flush_depth_inplace = false;
|
||||
rctx->db_misc_state.flush_stencil_inplace = false;
|
||||
r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
|
||||
}
|
||||
|
||||
|
|
@ -259,12 +268,14 @@ void r600_decompress_depth_textures(struct r600_context *rctx,
|
|||
|
||||
while (depth_texture_mask) {
|
||||
struct pipe_sampler_view *view;
|
||||
struct r600_pipe_sampler_view *rview;
|
||||
struct r600_texture *tex;
|
||||
|
||||
i = u_bit_scan(&depth_texture_mask);
|
||||
|
||||
view = &textures->views[i]->base;
|
||||
assert(view);
|
||||
rview = (struct r600_pipe_sampler_view*)view;
|
||||
|
||||
tex = (struct r600_texture *)view->texture;
|
||||
assert(tex->is_depth && !tex->is_flushing_texture);
|
||||
|
|
@ -272,6 +283,7 @@ void r600_decompress_depth_textures(struct r600_context *rctx,
|
|||
if (rctx->b.chip_class >= EVERGREEN ||
|
||||
r600_can_read_depth(tex)) {
|
||||
r600_blit_decompress_depth_in_place(rctx, tex,
|
||||
rview->is_stencil_sampler,
|
||||
view->u.tex.first_level, view->u.tex.last_level,
|
||||
0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
|
||||
} else {
|
||||
|
|
@ -367,9 +379,14 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
|
|||
if (rtex->is_depth && !rtex->is_flushing_texture) {
|
||||
if (rctx->b.chip_class >= EVERGREEN ||
|
||||
r600_can_read_depth(rtex)) {
|
||||
r600_blit_decompress_depth_in_place(rctx, rtex,
|
||||
r600_blit_decompress_depth_in_place(rctx, rtex, false,
|
||||
level, level,
|
||||
first_layer, last_layer);
|
||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
|
||||
r600_blit_decompress_depth_in_place(rctx, rtex, true,
|
||||
level, level,
|
||||
first_layer, last_layer);
|
||||
}
|
||||
} else {
|
||||
if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
|
||||
return false; /* error */
|
||||
|
|
|
|||
|
|
@ -419,9 +419,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
|
|||
|
||||
/* This must be done after r600_need_cs_space. */
|
||||
src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
|
||||
dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
|
||||
radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */
|
||||
|
|
@ -472,9 +472,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
|
|||
csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
|
||||
/* emit reloc before writing cs so that cs is always in consistent state */
|
||||
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_BUFFER);
|
||||
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_BUFFER);
|
||||
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
|
||||
cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
|
||||
cs->buf[cs->cdw++] = src_offset & 0xfffffffc;
|
||||
|
|
|
|||
|
|
@ -342,6 +342,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_VERTEXID_NOBASE:
|
||||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
/* Stream output. */
|
||||
|
|
|
|||
|
|
@ -109,7 +109,8 @@ struct r600_db_misc_state {
|
|||
struct r600_atom atom;
|
||||
bool occlusion_query_enabled;
|
||||
bool flush_depthstencil_through_cb;
|
||||
bool flush_depthstencil_in_place;
|
||||
bool flush_depth_inplace;
|
||||
bool flush_stencil_inplace;
|
||||
bool copy_depth, copy_stencil;
|
||||
unsigned copy_sample;
|
||||
unsigned log_samples;
|
||||
|
|
@ -253,6 +254,7 @@ struct r600_pipe_sampler_view {
|
|||
struct r600_resource *tex_resource;
|
||||
uint32_t tex_resource_words[8];
|
||||
bool skip_mip_address_reloc;
|
||||
bool is_stencil_sampler;
|
||||
};
|
||||
|
||||
struct r600_rasterizer_state {
|
||||
|
|
|
|||
|
|
@ -710,6 +710,12 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
break;
|
||||
}
|
||||
|
||||
if (state->format == PIPE_FORMAT_X24S8_UINT ||
|
||||
state->format == PIPE_FORMAT_S8X24_UINT ||
|
||||
state->format == PIPE_FORMAT_X32_S8X24_UINT ||
|
||||
state->format == PIPE_FORMAT_S8_UINT)
|
||||
view->is_stencil_sampler = true;
|
||||
|
||||
view->tex_resource = &tmp->resource;
|
||||
view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
|
||||
S_038000_TILE_MODE(array_mode) |
|
||||
|
|
@ -1605,7 +1611,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
|
|||
radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
|
||||
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
|
||||
reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
|
||||
cs->buf[cs->cdw++] = reloc_idx;
|
||||
} else {
|
||||
|
|
@ -1659,9 +1665,9 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
|
|||
if (rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 ||
|
||||
rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RV635)
|
||||
db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
|
||||
} else if (a->flush_depthstencil_in_place) {
|
||||
db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(1) |
|
||||
S_028D0C_STENCIL_COMPRESS_DISABLE(1);
|
||||
} else if (a->flush_depth_inplace || a->flush_stencil_inplace) {
|
||||
db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) |
|
||||
S_028D0C_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace);
|
||||
db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
|
||||
}
|
||||
if (a->htile_clear) {
|
||||
|
|
@ -1720,7 +1726,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
|
|||
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
|
||||
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1753,7 +1759,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
|
|||
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
|
||||
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
|
||||
radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
|
||||
|
|
@ -1769,7 +1775,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
|
|||
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
|
||||
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
|
||||
|
||||
dirty_mask &= ~(1 << buffer_index);
|
||||
}
|
||||
|
|
@ -1821,9 +1827,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
|
|||
|
||||
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
|
||||
RADEON_USAGE_READ,
|
||||
rview->tex_resource->b.b.nr_samples > 1 ?
|
||||
RADEON_PRIO_SHADER_TEXTURE_MSAA :
|
||||
RADEON_PRIO_SHADER_TEXTURE_RO);
|
||||
r600_get_sampler_view_priority(rview->tex_resource));
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, reloc);
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
|
|
@ -1945,7 +1949,8 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
|
|||
radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
|
||||
RADEON_USAGE_READ,
|
||||
RADEON_PRIO_INTERNAL_SHADER));
|
||||
}
|
||||
|
||||
static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
|
||||
|
|
@ -1999,7 +2004,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
|
|||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW));
|
||||
RADEON_PRIO_RINGS_STREAMOUT));
|
||||
radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
|
||||
state->esgs_ring.buffer_size >> 8);
|
||||
|
||||
|
|
@ -2008,7 +2013,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
|
|||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW));
|
||||
RADEON_PRIO_RINGS_STREAMOUT));
|
||||
radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
|
||||
state->gsvs_ring.buffer_size >> 8);
|
||||
} else {
|
||||
|
|
@ -2914,9 +2919,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
|
|||
size = (cheight * pitch) / 4;
|
||||
/* emit reloc before writing cs so that cs is always in consistent state */
|
||||
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_TEXTURE);
|
||||
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_TEXTURE);
|
||||
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
|
||||
cs->buf[cs->cdw++] = base >> 8;
|
||||
cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
|
||||
|
|
|
|||
|
|
@ -1683,7 +1683,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
|||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
|
||||
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
|
||||
(struct r600_resource*)info.indirect,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ,
|
||||
RADEON_PRIO_DRAW_INDIRECT);
|
||||
}
|
||||
|
||||
if (info.indexed) {
|
||||
|
|
@ -1712,7 +1713,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
|||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
|
||||
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
|
||||
(struct r600_resource*)ib.buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ,
|
||||
RADEON_PRIO_INDEX_BUFFER);
|
||||
}
|
||||
else {
|
||||
uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
|
||||
|
|
@ -1724,7 +1726,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
|||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
|
||||
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
|
||||
(struct r600_resource*)ib.buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ,
|
||||
RADEON_PRIO_INDEX_BUFFER);
|
||||
|
||||
cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing);
|
||||
cs->buf[cs->cdw++] = max_size;
|
||||
|
|
@ -1751,7 +1754,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
|||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
|
||||
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
|
||||
t->buf_filled_size, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SO_FILLED_SIZE);
|
||||
}
|
||||
|
||||
if (likely(!info.indirect)) {
|
||||
|
|
@ -1776,6 +1779,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
|||
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
|
||||
|
||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
|
||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
||||
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
}
|
||||
if (rctx->framebuffer.compressed_cb_mask) {
|
||||
struct pipe_surface *surf;
|
||||
|
|
@ -1941,7 +1947,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
|
|||
r600_emit_command_buffer(cs, &shader->command_buffer);
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
|
||||
RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER));
|
||||
}
|
||||
|
||||
unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
|
||||
|
|
@ -2669,7 +2675,7 @@ void r600_trace_emit(struct r600_context *rctx)
|
|||
|
||||
va = rscreen->b.trace_bo->gpu_address;
|
||||
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
|
||||
radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
|
||||
radeon_emit(cs, va & 0xFFFFFFFFUL);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFFUL);
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
|
|||
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
}
|
||||
}
|
||||
return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage,
|
||||
return rctx->ws->cs_add_buffer(ring->cs, rbo->cs_buf, usage,
|
||||
rbo->domains, priority) * 4;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -359,6 +359,7 @@ static const struct debug_named_value common_debug_options[] = {
|
|||
{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
|
||||
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
|
||||
{ "nowc", DBG_NO_WC, "Disable GTT write combining" },
|
||||
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
|
||||
|
||||
DEBUG_NAMED_VALUE_END /* must be last */
|
||||
};
|
||||
|
|
|
|||
|
|
@ -98,6 +98,7 @@
|
|||
#define DBG_PRECOMPILE (1llu << 39)
|
||||
#define DBG_INFO (1llu << 40)
|
||||
#define DBG_NO_WC (1llu << 41)
|
||||
#define DBG_CHECK_VM (1llu << 42)
|
||||
|
||||
#define R600_MAP_BUFFER_ALIGNMENT 64
|
||||
|
||||
|
|
@ -204,6 +205,7 @@ struct r600_texture {
|
|||
unsigned pitch_override;
|
||||
bool is_depth;
|
||||
unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
|
||||
unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
|
||||
struct r600_texture *flushed_depth_texture;
|
||||
boolean is_flushing_texture;
|
||||
struct radeon_surf surface;
|
||||
|
|
@ -622,6 +624,18 @@ static inline unsigned r600_wavefront_size(enum radeon_family family)
|
|||
}
|
||||
}
|
||||
|
||||
static inline enum radeon_bo_priority
|
||||
r600_get_sampler_view_priority(struct r600_resource *res)
|
||||
{
|
||||
if (res->b.b.target == PIPE_BUFFER)
|
||||
return RADEON_PRIO_SAMPLER_BUFFER;
|
||||
|
||||
if (res->b.b.nr_samples > 1)
|
||||
return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
|
||||
|
||||
return RADEON_PRIO_SAMPLER_TEXTURE;
|
||||
}
|
||||
|
||||
#define COMPUTE_DBG(rscreen, fmt, args...) \
|
||||
do { \
|
||||
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
|
||||
|
|
|
|||
|
|
@ -226,7 +226,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
|
|||
assert(0);
|
||||
}
|
||||
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_QUERY);
|
||||
|
||||
if (r600_is_timer_query(query->type))
|
||||
ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
|
||||
|
|
@ -288,7 +288,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
|||
assert(0);
|
||||
}
|
||||
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_QUERY);
|
||||
|
||||
query->buffer.results_end += query->result_size;
|
||||
|
||||
|
|
@ -344,7 +344,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
|
|||
radeon_emit(cs, va + results_base);
|
||||
radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
|
||||
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_QUERY);
|
||||
results_base += query->result_size;
|
||||
|
||||
/* set CONTINUE bit for all packets except the first */
|
||||
|
|
@ -990,7 +990,8 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
|
|||
radeon_emit(cs, buffer->gpu_address);
|
||||
radeon_emit(cs, buffer->gpu_address >> 32);
|
||||
|
||||
r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
|
||||
r600_emit_reloc(ctx, &ctx->rings.gfx, buffer,
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
|
||||
|
||||
/* analyze results */
|
||||
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
|
||||
|
|
|
|||
|
|
@ -217,7 +217,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
|
|||
radeon_emit(cs, va >> 8); /* BUFFER_BASE */
|
||||
|
||||
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
|
||||
|
||||
/* R7xx requires this packet after updating BUFFER_BASE.
|
||||
* Without this, R7xx locks up. */
|
||||
|
|
@ -227,7 +227,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
|
|||
radeon_emit(cs, va >> 8);
|
||||
|
||||
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -245,7 +245,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
|
|||
radeon_emit(cs, va >> 32); /* src address hi */
|
||||
|
||||
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
|
||||
} else {
|
||||
/* Start from the beginning. */
|
||||
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
|
||||
|
|
@ -289,7 +289,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx)
|
|||
radeon_emit(cs, 0); /* unused */
|
||||
|
||||
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
|
||||
|
||||
/* Zero the buffer size. The counters (primitives generated,
|
||||
* primitives emitted) may be enabled even if there is not
|
||||
|
|
|
|||
|
|
@ -110,8 +110,8 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
|
|||
{
|
||||
int reloc_idx;
|
||||
|
||||
reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
|
||||
RADEON_PRIO_MIN);
|
||||
reloc_idx = dec->ws->cs_add_buffer(dec->cs, cs_buf, usage, domain,
|
||||
RADEON_PRIO_UVD);
|
||||
if (!dec->use_legacy) {
|
||||
uint64_t addr;
|
||||
addr = dec->ws->buffer_get_virtual_address(cs_buf);
|
||||
|
|
|
|||
|
|
@ -516,7 +516,7 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *b
|
|||
{
|
||||
int reloc_idx;
|
||||
|
||||
reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN);
|
||||
reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
|
||||
if (enc->use_vm) {
|
||||
uint64_t addr;
|
||||
addr = enc->ws->buffer_get_virtual_address(buf);
|
||||
|
|
|
|||
|
|
@ -178,20 +178,59 @@ enum radeon_value_id {
|
|||
RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
|
||||
};
|
||||
|
||||
/* Each group of four has the same priority. */
|
||||
enum radeon_bo_priority {
|
||||
RADEON_PRIO_MIN,
|
||||
RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */
|
||||
RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */
|
||||
RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */
|
||||
RADEON_PRIO_COLOR_BUFFER,
|
||||
RADEON_PRIO_DEPTH_BUFFER,
|
||||
RADEON_PRIO_SHADER_TEXTURE_MSAA,
|
||||
RADEON_PRIO_COLOR_BUFFER_MSAA,
|
||||
RADEON_PRIO_DEPTH_BUFFER_MSAA,
|
||||
RADEON_PRIO_COLOR_META,
|
||||
RADEON_PRIO_DEPTH_META,
|
||||
RADEON_PRIO_MAX /* must be <= 15 */
|
||||
RADEON_PRIO_FENCE = 0,
|
||||
RADEON_PRIO_TRACE,
|
||||
RADEON_PRIO_SO_FILLED_SIZE,
|
||||
RADEON_PRIO_QUERY,
|
||||
|
||||
RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
|
||||
RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
|
||||
RADEON_PRIO_DRAW_INDIRECT,
|
||||
RADEON_PRIO_INDEX_BUFFER,
|
||||
|
||||
RADEON_PRIO_CP_DMA = 8,
|
||||
|
||||
RADEON_PRIO_VCE = 12,
|
||||
RADEON_PRIO_UVD,
|
||||
RADEON_PRIO_SDMA_BUFFER,
|
||||
RADEON_PRIO_SDMA_TEXTURE,
|
||||
|
||||
RADEON_PRIO_USER_SHADER = 16,
|
||||
RADEON_PRIO_INTERNAL_SHADER, /* fetch shader, etc. */
|
||||
|
||||
/* gap: 20 */
|
||||
|
||||
RADEON_PRIO_CONST_BUFFER = 24,
|
||||
RADEON_PRIO_DESCRIPTORS,
|
||||
RADEON_PRIO_BORDER_COLORS,
|
||||
|
||||
RADEON_PRIO_SAMPLER_BUFFER = 28,
|
||||
RADEON_PRIO_VERTEX_BUFFER,
|
||||
|
||||
RADEON_PRIO_SHADER_RW_BUFFER = 32,
|
||||
RADEON_PRIO_RINGS_STREAMOUT,
|
||||
RADEON_PRIO_SCRATCH_BUFFER,
|
||||
RADEON_PRIO_COMPUTE_GLOBAL,
|
||||
|
||||
RADEON_PRIO_SAMPLER_TEXTURE = 36,
|
||||
RADEON_PRIO_SHADER_RW_IMAGE,
|
||||
|
||||
RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 40,
|
||||
|
||||
RADEON_PRIO_COLOR_BUFFER = 44,
|
||||
|
||||
RADEON_PRIO_DEPTH_BUFFER = 48,
|
||||
|
||||
RADEON_PRIO_COLOR_BUFFER_MSAA = 52,
|
||||
|
||||
RADEON_PRIO_DEPTH_BUFFER_MSAA = 56,
|
||||
|
||||
RADEON_PRIO_CMASK = 60,
|
||||
RADEON_PRIO_DCC,
|
||||
RADEON_PRIO_HTILE,
|
||||
/* 63 is the maximum value */
|
||||
};
|
||||
|
||||
struct winsys_handle;
|
||||
|
|
@ -329,6 +368,12 @@ struct radeon_surf {
|
|||
uint32_t num_banks;
|
||||
};
|
||||
|
||||
struct radeon_bo_list_item {
|
||||
struct pb_buffer *buf;
|
||||
uint64_t vm_address;
|
||||
uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
|
||||
};
|
||||
|
||||
struct radeon_winsys {
|
||||
/**
|
||||
* The screen object this winsys was created for
|
||||
|
|
@ -556,18 +601,17 @@ struct radeon_winsys {
|
|||
void (*cs_destroy)(struct radeon_winsys_cs *cs);
|
||||
|
||||
/**
|
||||
* Add a new buffer relocation. Every relocation must first be added
|
||||
* before it can be written.
|
||||
* Add a buffer. Each buffer used by a CS must be added using this function.
|
||||
*
|
||||
* \param cs A command stream to add buffer for validation against.
|
||||
* \param buf A winsys buffer to validate.
|
||||
* \param cs Command stream
|
||||
* \param buf Buffer
|
||||
* \param usage Whether the buffer is used for read and/or write.
|
||||
* \param domain Bitmask of the RADEON_DOMAIN_* flags.
|
||||
* \param priority A higher number means a greater chance of being
|
||||
* placed in the requested domain. 15 is the maximum.
|
||||
* \return Relocation index.
|
||||
* \return Buffer index.
|
||||
*/
|
||||
unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
|
||||
unsigned (*cs_add_buffer)(struct radeon_winsys_cs *cs,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_domain domain,
|
||||
|
|
@ -580,21 +624,21 @@ struct radeon_winsys {
|
|||
* \param buf Buffer
|
||||
* \return The buffer index, or -1 if the buffer has not been added.
|
||||
*/
|
||||
int (*cs_get_reloc)(struct radeon_winsys_cs *cs,
|
||||
struct radeon_winsys_cs_handle *buf);
|
||||
int (*cs_lookup_buffer)(struct radeon_winsys_cs *cs,
|
||||
struct radeon_winsys_cs_handle *buf);
|
||||
|
||||
/**
|
||||
* Return TRUE if there is enough memory in VRAM and GTT for the relocs
|
||||
* added so far. If the validation fails, all the relocations which have
|
||||
* Return TRUE if there is enough memory in VRAM and GTT for the buffers
|
||||
* added so far. If the validation fails, all buffers which have
|
||||
* been added since the last call of cs_validate will be removed and
|
||||
* the CS will be flushed (provided there are still any relocations).
|
||||
* the CS will be flushed (provided there are still any buffers).
|
||||
*
|
||||
* \param cs A command stream to validate.
|
||||
*/
|
||||
boolean (*cs_validate)(struct radeon_winsys_cs *cs);
|
||||
|
||||
/**
|
||||
* Return TRUE if there is enough memory in VRAM and GTT for the relocs
|
||||
* Return TRUE if there is enough memory in VRAM and GTT for the buffers
|
||||
* added so far.
|
||||
*
|
||||
* \param cs A command stream to validate.
|
||||
|
|
@ -603,6 +647,16 @@ struct radeon_winsys {
|
|||
*/
|
||||
boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
|
||||
|
||||
/**
|
||||
* Return the buffer list.
|
||||
*
|
||||
* \param cs Command stream
|
||||
* \param list Returned buffer list. Set to NULL to query the count only.
|
||||
* \return The buffer count.
|
||||
*/
|
||||
unsigned (*cs_get_buffer_list)(struct radeon_winsys_cs *cs,
|
||||
struct radeon_bo_list_item *list);
|
||||
|
||||
/**
|
||||
* Flush a command stream.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -62,9 +62,9 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
|
|||
r600_need_dma_space(&ctx->b, ncopy * 7);
|
||||
|
||||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_BUFFER);
|
||||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_BUFFER);
|
||||
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
|
||||
|
|
@ -172,9 +172,9 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
|
|||
r600_need_dma_space(&ctx->b, ncopy * 12);
|
||||
|
||||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
|
||||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
|
||||
|
||||
copy_height = size * 4 / pitch;
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
|
|
@ -242,7 +242,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
|
|||
|
||||
if (src->format != dst->format ||
|
||||
rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
|
||||
rdst->dirty_level_mask & (1 << dst_level)) {
|
||||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -180,19 +180,27 @@ static void si_blit_decompress_depth(struct pipe_context *ctx,
|
|||
|
||||
static void si_blit_decompress_depth_in_place(struct si_context *sctx,
|
||||
struct r600_texture *texture,
|
||||
bool is_stencil_sampler,
|
||||
unsigned first_level, unsigned last_level,
|
||||
unsigned first_layer, unsigned last_layer)
|
||||
{
|
||||
struct pipe_surface *zsurf, surf_tmpl = {{0}};
|
||||
unsigned layer, max_layer, checked_last_layer, level;
|
||||
unsigned *dirty_level_mask;
|
||||
|
||||
sctx->db_inplace_flush_enabled = true;
|
||||
if (is_stencil_sampler) {
|
||||
sctx->db_flush_stencil_inplace = true;
|
||||
dirty_level_mask = &texture->stencil_dirty_level_mask;
|
||||
} else {
|
||||
sctx->db_flush_depth_inplace = true;
|
||||
dirty_level_mask = &texture->dirty_level_mask;
|
||||
}
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
|
||||
surf_tmpl.format = texture->resource.b.b.format;
|
||||
|
||||
for (level = first_level; level <= last_level; level++) {
|
||||
if (!(texture->dirty_level_mask & (1 << level)))
|
||||
if (!(*dirty_level_mask & (1 << level)))
|
||||
continue;
|
||||
|
||||
surf_tmpl.u.tex.level = level;
|
||||
|
|
@ -220,11 +228,12 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx,
|
|||
/* The texture will always be dirty if some layers aren't flushed.
|
||||
* I don't think this case occurs often though. */
|
||||
if (first_layer == 0 && last_layer == max_layer) {
|
||||
texture->dirty_level_mask &= ~(1 << level);
|
||||
*dirty_level_mask &= ~(1 << level);
|
||||
}
|
||||
}
|
||||
|
||||
sctx->db_inplace_flush_enabled = false;
|
||||
sctx->db_flush_depth_inplace = false;
|
||||
sctx->db_flush_stencil_inplace = false;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
}
|
||||
|
||||
|
|
@ -236,17 +245,20 @@ void si_flush_depth_textures(struct si_context *sctx,
|
|||
|
||||
while (mask) {
|
||||
struct pipe_sampler_view *view;
|
||||
struct si_sampler_view *sview;
|
||||
struct r600_texture *tex;
|
||||
|
||||
i = u_bit_scan(&mask);
|
||||
|
||||
view = textures->views.views[i];
|
||||
assert(view);
|
||||
sview = (struct si_sampler_view*)view;
|
||||
|
||||
tex = (struct r600_texture *)view->texture;
|
||||
assert(tex->is_depth && !tex->is_flushing_texture);
|
||||
|
||||
si_blit_decompress_depth_in_place(sctx, tex,
|
||||
sview->is_stencil_sampler,
|
||||
view->u.tex.first_level, view->u.tex.last_level,
|
||||
0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
|
||||
}
|
||||
|
|
@ -436,9 +448,13 @@ static void si_decompress_subresource(struct pipe_context *ctx,
|
|||
struct r600_texture *rtex = (struct r600_texture*)tex;
|
||||
|
||||
if (rtex->is_depth && !rtex->is_flushing_texture) {
|
||||
si_blit_decompress_depth_in_place(sctx, rtex,
|
||||
si_blit_decompress_depth_in_place(sctx, rtex, false,
|
||||
level, level,
|
||||
first_layer, last_layer);
|
||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
||||
si_blit_decompress_depth_in_place(sctx, rtex, true,
|
||||
level, level,
|
||||
first_layer, last_layer);
|
||||
} else if (rtex->fmask.size || rtex->cmask.size) {
|
||||
si_blit_decompress_color(ctx, rtex, level, level,
|
||||
first_layer, last_layer);
|
||||
|
|
|
|||
|
|
@ -297,7 +297,7 @@ static void si_launch_grid(
|
|||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
shader->scratch_bo,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW);
|
||||
RADEON_PRIO_SCRATCH_BUFFER);
|
||||
|
||||
scratch_buffer_va = shader->scratch_bo->gpu_address;
|
||||
}
|
||||
|
|
@ -311,7 +311,7 @@ static void si_launch_grid(
|
|||
kernel_args_va += kernel_args_offset;
|
||||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
|
||||
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
|
||||
|
|
@ -340,7 +340,7 @@ static void si_launch_grid(
|
|||
}
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW);
|
||||
RADEON_PRIO_COMPUTE_GLOBAL);
|
||||
}
|
||||
|
||||
/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
|
||||
|
|
@ -362,7 +362,7 @@ static void si_launch_grid(
|
|||
shader_va += pc;
|
||||
#endif
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
|
||||
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
|
||||
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
|
|||
/* This must be done after need_cs_space. */
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_CP_DMA);
|
||||
|
||||
/* Flush the caches for the first copy only.
|
||||
* Also wait for the previous CP DMA operations. */
|
||||
|
|
@ -240,9 +240,9 @@ void si_copy_buffer(struct si_context *sctx,
|
|||
|
||||
/* This must be done after r600_need_cs_space. */
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
|
||||
|
||||
si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include "si_shader.h"
|
||||
#include "sid.h"
|
||||
#include "sid_tables.h"
|
||||
#include "ddebug/dd_util.h"
|
||||
|
||||
|
||||
static void si_dump_shader(struct si_shader_selector *sel, const char *name,
|
||||
|
|
@ -392,6 +393,141 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
|
|||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
static void si_dump_last_ib(struct si_context *sctx, FILE *f)
|
||||
{
|
||||
int last_trace_id = -1;
|
||||
|
||||
if (!sctx->last_ib)
|
||||
return;
|
||||
|
||||
if (sctx->last_trace_buf) {
|
||||
/* We are expecting that the ddebug pipe has already
|
||||
* waited for the context, so this buffer should be idle.
|
||||
* If the GPU is hung, there is no point in waiting for it.
|
||||
*/
|
||||
uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
|
||||
NULL,
|
||||
PIPE_TRANSFER_UNSYNCHRONIZED |
|
||||
PIPE_TRANSFER_READ);
|
||||
if (map)
|
||||
last_trace_id = *map;
|
||||
}
|
||||
|
||||
si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
|
||||
last_trace_id);
|
||||
free(sctx->last_ib); /* dump only once */
|
||||
sctx->last_ib = NULL;
|
||||
r600_resource_reference(&sctx->last_trace_buf, NULL);
|
||||
}
|
||||
|
||||
static const char *priority_to_string(enum radeon_bo_priority priority)
|
||||
{
|
||||
#define ITEM(x) [RADEON_PRIO_##x] = #x
|
||||
static const char *table[64] = {
|
||||
ITEM(FENCE),
|
||||
ITEM(TRACE),
|
||||
ITEM(SO_FILLED_SIZE),
|
||||
ITEM(QUERY),
|
||||
ITEM(IB1),
|
||||
ITEM(IB2),
|
||||
ITEM(DRAW_INDIRECT),
|
||||
ITEM(INDEX_BUFFER),
|
||||
ITEM(CP_DMA),
|
||||
ITEM(VCE),
|
||||
ITEM(UVD),
|
||||
ITEM(SDMA_BUFFER),
|
||||
ITEM(SDMA_TEXTURE),
|
||||
ITEM(USER_SHADER),
|
||||
ITEM(INTERNAL_SHADER),
|
||||
ITEM(CONST_BUFFER),
|
||||
ITEM(DESCRIPTORS),
|
||||
ITEM(BORDER_COLORS),
|
||||
ITEM(SAMPLER_BUFFER),
|
||||
ITEM(VERTEX_BUFFER),
|
||||
ITEM(SHADER_RW_BUFFER),
|
||||
ITEM(RINGS_STREAMOUT),
|
||||
ITEM(SCRATCH_BUFFER),
|
||||
ITEM(COMPUTE_GLOBAL),
|
||||
ITEM(SAMPLER_TEXTURE),
|
||||
ITEM(SHADER_RW_IMAGE),
|
||||
ITEM(SAMPLER_TEXTURE_MSAA),
|
||||
ITEM(COLOR_BUFFER),
|
||||
ITEM(DEPTH_BUFFER),
|
||||
ITEM(COLOR_BUFFER_MSAA),
|
||||
ITEM(DEPTH_BUFFER_MSAA),
|
||||
ITEM(CMASK),
|
||||
ITEM(DCC),
|
||||
ITEM(HTILE),
|
||||
};
|
||||
#undef ITEM
|
||||
|
||||
assert(priority < ARRAY_SIZE(table));
|
||||
return table[priority];
|
||||
}
|
||||
|
||||
static int bo_list_compare_va(const struct radeon_bo_list_item *a,
|
||||
const struct radeon_bo_list_item *b)
|
||||
{
|
||||
return a->vm_address < b->vm_address ? -1 :
|
||||
a->vm_address > b->vm_address ? 1 : 0;
|
||||
}
|
||||
|
||||
static void si_dump_last_bo_list(struct si_context *sctx, FILE *f)
|
||||
{
|
||||
unsigned i,j;
|
||||
|
||||
if (!sctx->last_bo_list)
|
||||
return;
|
||||
|
||||
/* Sort the list according to VM adddresses first. */
|
||||
qsort(sctx->last_bo_list, sctx->last_bo_count,
|
||||
sizeof(sctx->last_bo_list[0]), (void*)bo_list_compare_va);
|
||||
|
||||
fprintf(f, "Buffer list (in units of pages = 4kB):\n"
|
||||
COLOR_YELLOW " Size VM start page "
|
||||
"VM end page Usage" COLOR_RESET "\n");
|
||||
|
||||
for (i = 0; i < sctx->last_bo_count; i++) {
|
||||
/* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
|
||||
const unsigned page_size = 4096;
|
||||
uint64_t va = sctx->last_bo_list[i].vm_address;
|
||||
uint64_t size = sctx->last_bo_list[i].buf->size;
|
||||
bool hit = false;
|
||||
|
||||
/* If there's unused virtual memory between 2 buffers, print it. */
|
||||
if (i) {
|
||||
uint64_t previous_va_end = sctx->last_bo_list[i-1].vm_address +
|
||||
sctx->last_bo_list[i-1].buf->size;
|
||||
|
||||
if (va > previous_va_end) {
|
||||
fprintf(f, " %10"PRIu64" -- hole --\n",
|
||||
(va - previous_va_end) / page_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Print the buffer. */
|
||||
fprintf(f, " %10"PRIu64" 0x%013"PRIx64" 0x%013"PRIx64" ",
|
||||
size / page_size, va / page_size, (va + size) / page_size);
|
||||
|
||||
/* Print the usage. */
|
||||
for (j = 0; j < 64; j++) {
|
||||
if (!(sctx->last_bo_list[i].priority_usage & (1llu << j)))
|
||||
continue;
|
||||
|
||||
fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
|
||||
hit = true;
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
|
||||
" Other buffers can still be allocated there.\n\n");
|
||||
|
||||
for (i = 0; i < sctx->last_bo_count; i++)
|
||||
pb_reference(&sctx->last_bo_list[i].buf, NULL);
|
||||
free(sctx->last_bo_list);
|
||||
sctx->last_bo_list = NULL;
|
||||
}
|
||||
|
||||
static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
|
||||
unsigned flags)
|
||||
{
|
||||
|
|
@ -406,34 +542,126 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
|
|||
si_dump_shader(sctx->gs_shader, "Geometry", f);
|
||||
si_dump_shader(sctx->ps_shader, "Fragment", f);
|
||||
|
||||
if (sctx->last_ib) {
|
||||
int last_trace_id = -1;
|
||||
|
||||
if (sctx->last_trace_buf) {
|
||||
/* We are expecting that the ddebug pipe has already
|
||||
* waited for the context, so this buffer should be idle.
|
||||
* If the GPU is hung, there is no point in waiting for it.
|
||||
*/
|
||||
uint32_t *map =
|
||||
sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
|
||||
NULL,
|
||||
PIPE_TRANSFER_UNSYNCHRONIZED |
|
||||
PIPE_TRANSFER_READ);
|
||||
if (map)
|
||||
last_trace_id = *map;
|
||||
}
|
||||
|
||||
si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
|
||||
last_trace_id);
|
||||
free(sctx->last_ib); /* dump only once */
|
||||
sctx->last_ib = NULL;
|
||||
r600_resource_reference(&sctx->last_trace_buf, NULL);
|
||||
}
|
||||
si_dump_last_bo_list(sctx, f);
|
||||
si_dump_last_ib(sctx, f);
|
||||
|
||||
fprintf(f, "Done.\n");
|
||||
}
|
||||
|
||||
static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
|
||||
{
|
||||
char line[2000];
|
||||
unsigned sec, usec;
|
||||
int progress = 0;
|
||||
uint64_t timestamp = 0;
|
||||
bool fault = false;
|
||||
|
||||
FILE *p = popen("dmesg", "r");
|
||||
if (!p)
|
||||
return false;
|
||||
|
||||
while (fgets(line, sizeof(line), p)) {
|
||||
char *msg, len;
|
||||
|
||||
/* Get the timestamp. */
|
||||
if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
|
||||
assert(0);
|
||||
continue;
|
||||
}
|
||||
timestamp = sec * 1000000llu + usec;
|
||||
|
||||
/* If just updating the timestamp. */
|
||||
if (!out_addr)
|
||||
continue;
|
||||
|
||||
/* Process messages only if the timestamp is newer. */
|
||||
if (timestamp <= sctx->dmesg_timestamp)
|
||||
continue;
|
||||
|
||||
/* Only process the first VM fault. */
|
||||
if (fault)
|
||||
continue;
|
||||
|
||||
/* Remove trailing \n */
|
||||
len = strlen(line);
|
||||
if (len && line[len-1] == '\n')
|
||||
line[len-1] = 0;
|
||||
|
||||
/* Get the message part. */
|
||||
msg = strchr(line, ']');
|
||||
if (!msg) {
|
||||
assert(0);
|
||||
continue;
|
||||
}
|
||||
msg++;
|
||||
|
||||
switch (progress) {
|
||||
case 0:
|
||||
if (strstr(msg, "GPU fault detected:"))
|
||||
progress = 1;
|
||||
break;
|
||||
case 1:
|
||||
msg = strstr(msg, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
|
||||
if (msg) {
|
||||
msg = strstr(msg, "0x");
|
||||
if (msg) {
|
||||
msg += 2;
|
||||
if (sscanf(msg, "%X", out_addr) == 1)
|
||||
fault = true;
|
||||
}
|
||||
}
|
||||
progress = 0;
|
||||
break;
|
||||
default:
|
||||
progress = 0;
|
||||
}
|
||||
}
|
||||
pclose(p);
|
||||
|
||||
if (timestamp > sctx->dmesg_timestamp)
|
||||
sctx->dmesg_timestamp = timestamp;
|
||||
return fault;
|
||||
}
|
||||
|
||||
void si_check_vm_faults(struct si_context *sctx)
|
||||
{
|
||||
struct pipe_screen *screen = sctx->b.b.screen;
|
||||
FILE *f;
|
||||
uint32_t addr;
|
||||
|
||||
/* Use conservative timeout 800ms, after which we won't wait any
|
||||
* longer and assume the GPU is hung.
|
||||
*/
|
||||
screen->fence_finish(screen, sctx->last_gfx_fence, 800*1000*1000);
|
||||
|
||||
if (!si_vm_fault_occured(sctx, &addr))
|
||||
return;
|
||||
|
||||
f = dd_get_debug_file();
|
||||
if (!f)
|
||||
return;
|
||||
|
||||
fprintf(f, "VM fault report.\n\n");
|
||||
fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
|
||||
fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
|
||||
fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
|
||||
fprintf(f, "Failing VM page: 0x%08x\n\n", addr);
|
||||
|
||||
si_dump_last_bo_list(sctx, f);
|
||||
si_dump_last_ib(sctx, f);
|
||||
fclose(f);
|
||||
|
||||
fprintf(stderr, "Detected a VM fault, exiting...\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void si_init_debug_functions(struct si_context *sctx)
|
||||
{
|
||||
sctx->b.b.dump_debug_state = si_dump_debug_state;
|
||||
|
||||
/* Set the initial dmesg timestamp for this context, so that
|
||||
* only new messages will be checked for VM faults.
|
||||
*/
|
||||
if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
|
||||
si_vm_fault_occured(sctx, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
|
|||
util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
|
||||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
|
||||
|
||||
desc->list_dirty = false;
|
||||
desc->pointer_dirty = true;
|
||||
|
|
@ -138,23 +138,12 @@ static void si_release_sampler_views(struct si_sampler_views *views)
|
|||
si_release_descriptors(&views->desc);
|
||||
}
|
||||
|
||||
static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource *res)
|
||||
{
|
||||
if (res->b.b.target == PIPE_BUFFER)
|
||||
return RADEON_PRIO_SHADER_BUFFER_RO;
|
||||
|
||||
if (res->b.b.nr_samples > 1)
|
||||
return RADEON_PRIO_SHADER_TEXTURE_MSAA;
|
||||
|
||||
return RADEON_PRIO_SHADER_TEXTURE_RO;
|
||||
}
|
||||
|
||||
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
|
||||
struct si_sampler_views *views)
|
||||
{
|
||||
uint64_t mask = views->desc.enabled_mask;
|
||||
|
||||
/* Add relocations to the CS. */
|
||||
/* Add buffers to the CS. */
|
||||
while (mask) {
|
||||
int i = u_bit_scan64(&mask);
|
||||
struct si_sampler_view *rview =
|
||||
|
|
@ -165,13 +154,13 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
|
|||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
rview->resource, RADEON_USAGE_READ,
|
||||
si_get_resource_ro_priority(rview->resource));
|
||||
r600_get_sampler_view_priority(rview->resource));
|
||||
}
|
||||
|
||||
if (!views->desc.buffer)
|
||||
return;
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
|
||||
}
|
||||
|
||||
static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
|
||||
|
|
@ -190,7 +179,7 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
|
|||
if (rview->resource)
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
rview->resource, RADEON_USAGE_READ,
|
||||
si_get_resource_ro_priority(rview->resource));
|
||||
r600_get_sampler_view_priority(rview->resource));
|
||||
|
||||
pipe_sampler_view_reference(&views->views[slot], view);
|
||||
memcpy(views->desc.list + slot*8, view_desc, 8*4);
|
||||
|
|
@ -270,7 +259,7 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
|
|||
if (!states->desc.buffer)
|
||||
return;
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
|
||||
}
|
||||
|
||||
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
|
||||
|
|
@ -335,7 +324,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
|
|||
{
|
||||
uint64_t mask = buffers->desc.enabled_mask;
|
||||
|
||||
/* Add relocations to the CS. */
|
||||
/* Add buffers to the CS. */
|
||||
while (mask) {
|
||||
int i = u_bit_scan64(&mask);
|
||||
|
||||
|
|
@ -348,7 +337,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
|
|||
return;
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
buffers->desc.buffer, RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_PRIO_DESCRIPTORS);
|
||||
}
|
||||
|
||||
/* VERTEX BUFFERS */
|
||||
|
|
@ -369,14 +358,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
|
|||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
|
||||
}
|
||||
|
||||
if (!desc->buffer)
|
||||
return;
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
desc->buffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_PRIO_DESCRIPTORS);
|
||||
}
|
||||
|
||||
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
|
||||
|
|
@ -403,7 +392,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
|
|||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
desc->buffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_PRIO_DESCRIPTORS);
|
||||
|
||||
assert(count <= SI_NUM_VERTEX_BUFFERS);
|
||||
|
||||
|
|
@ -447,7 +436,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
|
|||
if (!bound[ve->vertex_buffer_index]) {
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
(struct r600_resource*)vb->buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
|
||||
bound[ve->vertex_buffer_index] = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -870,7 +859,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
|
|||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
rbuffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_BUFFER_RO);
|
||||
RADEON_PRIO_SAMPLER_BUFFER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1017,10 +1006,10 @@ void si_init_all_descriptors(struct si_context *sctx)
|
|||
for (i = 0; i < SI_NUM_SHADERS; i++) {
|
||||
si_init_buffer_resources(&sctx->const_buffers[i],
|
||||
SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
|
||||
si_init_buffer_resources(&sctx->rw_buffers[i],
|
||||
SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
|
||||
|
||||
si_init_descriptors(&sctx->samplers[i].views.desc,
|
||||
SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
|
||||
|
|
|
|||
|
|
@ -79,9 +79,9 @@ static void si_dma_copy_buffer(struct si_context *ctx,
|
|||
r600_need_dma_space(&ctx->b, ncopy * 5);
|
||||
|
||||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_BUFFER);
|
||||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SDMA_BUFFER);
|
||||
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
csize = size < max_csize ? size : max_csize;
|
||||
|
|
@ -178,9 +178,9 @@ static void si_dma_copy_tile(struct si_context *ctx,
|
|||
r600_need_dma_space(&ctx->b, ncopy * 9);
|
||||
|
||||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
|
||||
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
|
||||
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
cheight = copy_height;
|
||||
|
|
@ -246,13 +246,13 @@ void si_dma_copy(struct pipe_context *ctx,
|
|||
goto fallback;
|
||||
|
||||
if (src->format != dst->format || src_box->depth > 1 ||
|
||||
rdst->dirty_level_mask != 0 ||
|
||||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
|
||||
rdst->cmask.size || rdst->fmask.size ||
|
||||
rsrc->cmask.size || rsrc->fmask.size) {
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
if (rsrc->dirty_level_mask) {
|
||||
if (rsrc->dirty_level_mask & (1 << src_level)) {
|
||||
ctx->flush_resource(ctx, src);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ void si_need_cs_space(struct si_context *ctx)
|
|||
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
|
||||
|
||||
/* There are two memory usage counters in the winsys for all buffers
|
||||
* that have been added (cs_add_reloc) and two counters in the pipe
|
||||
* that have been added (cs_add_buffer) and two counters in the pipe
|
||||
* driver for those that haven't been added yet.
|
||||
*/
|
||||
if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
|
||||
|
|
@ -85,14 +85,27 @@ void si_context_gfx_flush(void *context, unsigned flags,
|
|||
if (ctx->trace_buf)
|
||||
si_trace_emit(ctx);
|
||||
|
||||
/* Save the IB for debug contexts. */
|
||||
if (ctx->is_debug) {
|
||||
unsigned i;
|
||||
|
||||
/* Save the IB for debug contexts. */
|
||||
free(ctx->last_ib);
|
||||
ctx->last_ib_dw_size = cs->cdw;
|
||||
ctx->last_ib = malloc(cs->cdw * 4);
|
||||
memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
|
||||
r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
|
||||
r600_resource_reference(&ctx->trace_buf, NULL);
|
||||
|
||||
/* Save the buffer list. */
|
||||
if (ctx->last_bo_list) {
|
||||
for (i = 0; i < ctx->last_bo_count; i++)
|
||||
pb_reference(&ctx->last_bo_list[i].buf, NULL);
|
||||
free(ctx->last_bo_list);
|
||||
}
|
||||
ctx->last_bo_count = ws->cs_get_buffer_list(cs, NULL);
|
||||
ctx->last_bo_list = calloc(ctx->last_bo_count,
|
||||
sizeof(ctx->last_bo_list[0]));
|
||||
ws->cs_get_buffer_list(cs, ctx->last_bo_list);
|
||||
}
|
||||
|
||||
/* Flush the CS. */
|
||||
|
|
@ -103,6 +116,10 @@ void si_context_gfx_flush(void *context, unsigned flags,
|
|||
if (fence)
|
||||
ws->fence_reference(fence, ctx->last_gfx_fence);
|
||||
|
||||
/* Check VM faults if needed. */
|
||||
if (ctx->screen->b.debug_flags & DBG_CHECK_VM)
|
||||
si_check_vm_faults(ctx);
|
||||
|
||||
si_begin_new_cs(ctx);
|
||||
}
|
||||
|
||||
|
|
@ -154,6 +171,7 @@ void si_begin_new_cs(struct si_context *ctx)
|
|||
si_mark_atom_dirty(ctx, &ctx->db_render_state);
|
||||
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
|
||||
si_mark_atom_dirty(ctx, &ctx->spi_map);
|
||||
si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
|
||||
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
|
||||
si_all_descriptors_begin_new_cs(ctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -81,6 +81,11 @@ static void si_destroy_context(struct pipe_context *context)
|
|||
r600_resource_reference(&sctx->trace_buf, NULL);
|
||||
r600_resource_reference(&sctx->last_trace_buf, NULL);
|
||||
free(sctx->last_ib);
|
||||
if (sctx->last_bo_list) {
|
||||
for (i = 0; i < sctx->last_bo_count; i++)
|
||||
pb_reference(&sctx->last_bo_list[i].buf, NULL);
|
||||
free(sctx->last_bo_list);
|
||||
}
|
||||
FREE(sctx);
|
||||
}
|
||||
|
||||
|
|
@ -107,6 +112,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
|||
if (sctx == NULL)
|
||||
return NULL;
|
||||
|
||||
if (sscreen->b.debug_flags & DBG_CHECK_VM)
|
||||
flags |= PIPE_CONTEXT_DEBUG;
|
||||
|
||||
sctx->b.b.screen = screen; /* this must be set first */
|
||||
sctx->b.b.priv = priv;
|
||||
sctx->b.b.destroy = si_destroy_context;
|
||||
|
|
@ -287,6 +295,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_QUERY_LOD:
|
||||
case PIPE_CAP_TEXTURE_GATHER_SM5:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ struct si_sampler_view {
|
|||
* [4..7] = buffer descriptor */
|
||||
uint32_t state[8];
|
||||
uint32_t fmask_state[8];
|
||||
bool is_stencil_sampler;
|
||||
};
|
||||
|
||||
struct si_sampler_state {
|
||||
|
|
@ -187,9 +188,11 @@ struct si_context {
|
|||
struct si_viewports viewports;
|
||||
struct si_stencil_ref stencil_ref;
|
||||
struct r600_atom spi_map;
|
||||
struct r600_atom spi_ps_input;
|
||||
|
||||
/* Precomputed states. */
|
||||
struct si_pm4_state *init_config;
|
||||
bool init_config_has_vgt_flush;
|
||||
struct si_pm4_state *vgt_shader_config[4];
|
||||
/* With rasterizer discard, there doesn't have to be a pixel shader.
|
||||
* In that case, we bind this one: */
|
||||
|
|
@ -207,6 +210,7 @@ struct si_context {
|
|||
struct si_vertex_element *vertex_elements;
|
||||
unsigned sprite_coord_enable;
|
||||
bool flatshade;
|
||||
bool force_persample_interp;
|
||||
|
||||
/* shader descriptors */
|
||||
struct si_descriptors vertex_buffers;
|
||||
|
|
@ -237,7 +241,8 @@ struct si_context {
|
|||
bool dbcb_depth_copy_enabled;
|
||||
bool dbcb_stencil_copy_enabled;
|
||||
unsigned dbcb_copy_sample;
|
||||
bool db_inplace_flush_enabled;
|
||||
bool db_flush_depth_inplace;
|
||||
bool db_flush_stencil_inplace;
|
||||
bool db_depth_clear;
|
||||
bool db_depth_disable_expclear;
|
||||
unsigned ps_db_shader_control;
|
||||
|
|
@ -276,6 +281,9 @@ struct si_context {
|
|||
struct r600_resource *last_trace_buf;
|
||||
struct r600_resource *trace_buf;
|
||||
unsigned trace_id;
|
||||
uint64_t dmesg_timestamp;
|
||||
unsigned last_bo_count;
|
||||
struct radeon_bo_list_item *last_bo_list;
|
||||
};
|
||||
|
||||
/* cik_sdma.c */
|
||||
|
|
@ -310,6 +318,7 @@ void si_init_cp_dma_functions(struct si_context *sctx);
|
|||
|
||||
/* si_debug.c */
|
||||
void si_init_debug_functions(struct si_context *sctx);
|
||||
void si_check_vm_faults(struct si_context *sctx);
|
||||
|
||||
/* si_dma.c */
|
||||
void si_dma_copy(struct pipe_context *ctx,
|
||||
|
|
|
|||
|
|
@ -140,7 +140,8 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
|
|||
struct r600_resource *ib = state->indirect_buffer;
|
||||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ,
|
||||
RADEON_PRIO_IB2);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
|
||||
radeon_emit(cs, ib->gpu_address);
|
||||
|
|
|
|||
|
|
@ -855,6 +855,56 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
|
|||
}
|
||||
}
|
||||
|
||||
/* This shouldn't be used by explicit INTERP opcodes. */
|
||||
static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
|
||||
unsigned param)
|
||||
{
|
||||
struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
|
||||
unsigned sample_param = 0;
|
||||
LLVMValueRef default_ij, sample_ij, force_sample;
|
||||
|
||||
default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
|
||||
|
||||
/* If the shader doesn't use center/centroid, just return the parameter.
|
||||
*
|
||||
* If the shader only uses one set of (i,j), "si_emit_spi_ps_input" can
|
||||
* switch between center/centroid and sample without shader changes.
|
||||
*/
|
||||
switch (param) {
|
||||
case SI_PARAM_PERSP_CENTROID:
|
||||
case SI_PARAM_PERSP_CENTER:
|
||||
if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
|
||||
return default_ij;
|
||||
|
||||
sample_param = SI_PARAM_PERSP_SAMPLE;
|
||||
break;
|
||||
|
||||
case SI_PARAM_LINEAR_CENTROID:
|
||||
case SI_PARAM_LINEAR_CENTER:
|
||||
if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
|
||||
return default_ij;
|
||||
|
||||
sample_param = SI_PARAM_LINEAR_SAMPLE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return default_ij;
|
||||
}
|
||||
|
||||
/* Otherwise, we have to select (i,j) based on a user data SGPR. */
|
||||
sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
|
||||
|
||||
/* TODO: this can be done more efficiently by switching between
|
||||
* 2 prologs.
|
||||
*/
|
||||
force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
|
||||
SI_PARAM_PS_STATE_BITS);
|
||||
force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
|
||||
LLVMInt1TypeInContext(gallivm->context), "");
|
||||
return LLVMBuildSelect(gallivm->builder, force_sample,
|
||||
sample_ij, default_ij, "");
|
||||
}
|
||||
|
||||
static void declare_input_fs(
|
||||
struct radeon_llvm_context *radeon_bld,
|
||||
unsigned input_index,
|
||||
|
|
@ -925,7 +975,7 @@ static void declare_input_fs(
|
|||
if (interp_param_idx == -1)
|
||||
return;
|
||||
else if (interp_param_idx)
|
||||
interp_param = LLVMGetParam(main_fn, interp_param_idx);
|
||||
interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
|
||||
|
||||
/* fs.constant returns the param from the middle vertex, so it's not
|
||||
* really useful for flat shading. It's meant to be used for custom
|
||||
|
|
@ -3458,6 +3508,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
|
|||
|
||||
case TGSI_PROCESSOR_FRAGMENT:
|
||||
params[SI_PARAM_ALPHA_REF] = f32;
|
||||
params[SI_PARAM_PS_STATE_BITS] = i32;
|
||||
params[SI_PARAM_PRIM_MASK] = i32;
|
||||
last_sgpr = SI_PARAM_PRIM_MASK;
|
||||
params[SI_PARAM_PERSP_SAMPLE] = v2i32;
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ struct radeon_shader_reloc;
|
|||
#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
|
||||
#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
|
||||
#define SI_SGPR_ALPHA_REF 8 /* PS only */
|
||||
#define SI_SGPR_PS_STATE_BITS 9 /* PS only */
|
||||
|
||||
#define SI_VS_NUM_USER_SGPR 12
|
||||
#define SI_LS_NUM_USER_SGPR 13
|
||||
|
|
@ -95,7 +96,7 @@ struct radeon_shader_reloc;
|
|||
#define SI_TES_NUM_USER_SGPR 10
|
||||
#define SI_GS_NUM_USER_SGPR 8
|
||||
#define SI_GSCOPY_NUM_USER_SGPR 4
|
||||
#define SI_PS_NUM_USER_SGPR 9
|
||||
#define SI_PS_NUM_USER_SGPR 10
|
||||
|
||||
/* LLVM function parameter indices */
|
||||
#define SI_PARAM_RW_BUFFERS 0
|
||||
|
|
@ -148,23 +149,27 @@ struct radeon_shader_reloc;
|
|||
|
||||
/* PS only parameters */
|
||||
#define SI_PARAM_ALPHA_REF 4
|
||||
#define SI_PARAM_PRIM_MASK 5
|
||||
#define SI_PARAM_PERSP_SAMPLE 6
|
||||
#define SI_PARAM_PERSP_CENTER 7
|
||||
#define SI_PARAM_PERSP_CENTROID 8
|
||||
#define SI_PARAM_PERSP_PULL_MODEL 9
|
||||
#define SI_PARAM_LINEAR_SAMPLE 10
|
||||
#define SI_PARAM_LINEAR_CENTER 11
|
||||
#define SI_PARAM_LINEAR_CENTROID 12
|
||||
#define SI_PARAM_LINE_STIPPLE_TEX 13
|
||||
#define SI_PARAM_POS_X_FLOAT 14
|
||||
#define SI_PARAM_POS_Y_FLOAT 15
|
||||
#define SI_PARAM_POS_Z_FLOAT 16
|
||||
#define SI_PARAM_POS_W_FLOAT 17
|
||||
#define SI_PARAM_FRONT_FACE 18
|
||||
#define SI_PARAM_ANCILLARY 19
|
||||
#define SI_PARAM_SAMPLE_COVERAGE 20
|
||||
#define SI_PARAM_POS_FIXED_PT 21
|
||||
/* Bits:
|
||||
* 0: force_persample_interp
|
||||
*/
|
||||
#define SI_PARAM_PS_STATE_BITS 5
|
||||
#define SI_PARAM_PRIM_MASK 6
|
||||
#define SI_PARAM_PERSP_SAMPLE 7
|
||||
#define SI_PARAM_PERSP_CENTER 8
|
||||
#define SI_PARAM_PERSP_CENTROID 9
|
||||
#define SI_PARAM_PERSP_PULL_MODEL 10
|
||||
#define SI_PARAM_LINEAR_SAMPLE 11
|
||||
#define SI_PARAM_LINEAR_CENTER 12
|
||||
#define SI_PARAM_LINEAR_CENTROID 13
|
||||
#define SI_PARAM_LINE_STIPPLE_TEX 14
|
||||
#define SI_PARAM_POS_X_FLOAT 15
|
||||
#define SI_PARAM_POS_Y_FLOAT 16
|
||||
#define SI_PARAM_POS_Z_FLOAT 17
|
||||
#define SI_PARAM_POS_W_FLOAT 18
|
||||
#define SI_PARAM_FRONT_FACE 19
|
||||
#define SI_PARAM_ANCILLARY 20
|
||||
#define SI_PARAM_SAMPLE_COVERAGE 21
|
||||
#define SI_PARAM_POS_FIXED_PT 22
|
||||
|
||||
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
|
||||
|
||||
|
|
@ -182,6 +187,14 @@ struct si_shader_selector {
|
|||
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
|
||||
unsigned type;
|
||||
|
||||
/* Whether the shader has to use a conditional assignment to
|
||||
* choose between weights when emulating
|
||||
* pipe_rasterizer_state::force_persample_interp.
|
||||
* If false, "si_emit_spi_ps_input" will take care of it instead.
|
||||
*/
|
||||
bool forces_persample_interp_for_persp;
|
||||
bool forces_persample_interp_for_linear;
|
||||
|
||||
unsigned gs_output_prim;
|
||||
unsigned gs_max_out_vertices;
|
||||
unsigned gs_num_invocations;
|
||||
|
|
|
|||
|
|
@ -686,6 +686,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
|
|||
|
||||
rs->two_side = state->light_twoside;
|
||||
rs->multisample_enable = state->multisample;
|
||||
rs->force_persample_interp = state->force_persample_interp;
|
||||
rs->clip_plane_enable = state->clip_plane_enable;
|
||||
rs->line_stipple_enable = state->line_stipple_enable;
|
||||
rs->poly_stipple_enable = state->poly_stipple_enable;
|
||||
|
|
@ -998,10 +999,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
|
|||
S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
|
||||
S_028000_COPY_CENTROID(1) |
|
||||
S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
|
||||
} else if (sctx->db_inplace_flush_enabled) {
|
||||
} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
|
||||
radeon_emit(cs,
|
||||
S_028000_DEPTH_COMPRESS_DISABLE(1) |
|
||||
S_028000_STENCIL_COMPRESS_DISABLE(1));
|
||||
S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
|
||||
S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
|
||||
} else if (sctx->db_depth_clear) {
|
||||
radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
|
||||
} else {
|
||||
|
|
@ -2238,7 +2239,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
|
|||
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
tex->cmask_buffer, RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_COLOR_META);
|
||||
RADEON_PRIO_CMASK);
|
||||
}
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
|
||||
|
|
@ -2285,7 +2286,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
|
|||
if (zb->db_htile_data_base) {
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
rtex->htile_buffer, RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_DEPTH_META);
|
||||
RADEON_PRIO_HTILE);
|
||||
}
|
||||
|
||||
radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
|
||||
|
|
@ -2411,6 +2412,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
pipe_resource_reference(&view->base.texture, texture);
|
||||
view->resource = &tmp->resource;
|
||||
|
||||
if (state->format == PIPE_FORMAT_X24S8_UINT ||
|
||||
state->format == PIPE_FORMAT_S8X24_UINT ||
|
||||
state->format == PIPE_FORMAT_X32_S8X24_UINT ||
|
||||
state->format == PIPE_FORMAT_S8_UINT)
|
||||
view->is_stencil_sampler = true;
|
||||
|
||||
/* Buffer resource. */
|
||||
if (texture->target == PIPE_BUFFER) {
|
||||
unsigned stride, num_records;
|
||||
|
|
@ -3391,7 +3398,7 @@ static void si_init_config(struct si_context *sctx)
|
|||
if (sctx->b.chip_class >= CIK)
|
||||
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
|
||||
si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_DATA);
|
||||
RADEON_PRIO_BORDER_COLORS);
|
||||
|
||||
si_pm4_upload_indirect_buffer(sctx, pm4);
|
||||
sctx->init_config = pm4;
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ struct si_state_rasterizer {
|
|||
bool flatshade;
|
||||
bool two_side;
|
||||
bool multisample_enable;
|
||||
bool force_persample_interp;
|
||||
bool line_stipple_enable;
|
||||
unsigned sprite_coord_enable;
|
||||
unsigned pa_sc_line_stipple;
|
||||
|
|
@ -123,6 +124,7 @@ union si_state_atoms {
|
|||
struct r600_atom *viewports;
|
||||
struct r600_atom *stencil_ref;
|
||||
struct r600_atom *spi_map;
|
||||
struct r600_atom *spi_ps_input;
|
||||
} s;
|
||||
struct r600_atom *array[0];
|
||||
};
|
||||
|
|
|
|||
|
|
@ -353,7 +353,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
|
|||
if (sctx->scratch_buffer) {
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
sctx->scratch_buffer, RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RESOURCE_RW);
|
||||
RADEON_PRIO_SCRATCH_BUFFER);
|
||||
|
||||
}
|
||||
sctx->emit_scratch_reloc = false;
|
||||
|
|
@ -467,7 +467,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
|
|||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
t->buf_filled_size, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_SO_FILLED_SIZE);
|
||||
}
|
||||
|
||||
/* draw packet */
|
||||
|
|
@ -521,7 +521,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
|
|||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
(struct r600_resource *)info->indirect,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
|
||||
}
|
||||
|
||||
if (info->indexed) {
|
||||
|
|
@ -531,7 +531,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
|
|||
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
|
||||
(struct r600_resource *)ib->buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
|
||||
|
||||
if (info->indirect) {
|
||||
uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
|
||||
|
|
@ -813,9 +813,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
}
|
||||
}
|
||||
|
||||
/* TODO: VI should read index buffers through TC, so this shouldn't be
|
||||
* needed on VI. */
|
||||
if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
|
||||
/* VI reads index buffers through TC L2. */
|
||||
if (info->indexed && sctx->b.chip_class <= CIK &&
|
||||
r600_resource(ib.buffer)->TC_L2_dirty) {
|
||||
sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
|
||||
r600_resource(ib.buffer)->TC_L2_dirty = false;
|
||||
}
|
||||
|
|
@ -858,6 +858,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
|
||||
|
||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
|
||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
||||
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
}
|
||||
if (sctx->framebuffer.compressed_cb_mask) {
|
||||
struct pipe_surface *surf;
|
||||
|
|
@ -883,7 +886,7 @@ void si_trace_emit(struct si_context *sctx)
|
|||
|
||||
sctx->trace_id++;
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
|
||||
S_370_WR_CONFIRM(1) |
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ static void si_shader_ls(struct si_shader *shader)
|
|||
return;
|
||||
|
||||
va = shader->bo->gpu_address;
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
|
||||
|
||||
/* We need at least 2 components for LS.
|
||||
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
|
||||
|
|
@ -138,7 +138,7 @@ static void si_shader_hs(struct si_shader *shader)
|
|||
return;
|
||||
|
||||
va = shader->bo->gpu_address;
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
|
||||
|
||||
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
|
||||
num_sgprs = shader->num_sgprs;
|
||||
|
|
@ -173,7 +173,7 @@ static void si_shader_es(struct si_shader *shader)
|
|||
return;
|
||||
|
||||
va = shader->bo->gpu_address;
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
|
||||
|
||||
if (shader->selector->type == PIPE_SHADER_VERTEX) {
|
||||
vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
|
||||
|
|
@ -279,7 +279,7 @@ static void si_shader_gs(struct si_shader *shader)
|
|||
S_028B90_ENABLE(gs_num_invocations > 0));
|
||||
|
||||
va = shader->bo->gpu_address;
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
|
||||
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
|
||||
|
||||
|
|
@ -327,7 +327,7 @@ static void si_shader_vs(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
|
||||
|
||||
va = shader->bo->gpu_address;
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
|
||||
|
||||
if (shader->is_gs_copy_shader) {
|
||||
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
|
||||
|
|
@ -400,7 +400,7 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
struct si_pm4_state *pm4;
|
||||
unsigned i, spi_ps_in_control;
|
||||
unsigned num_sgprs, num_user_sgprs;
|
||||
unsigned spi_baryc_cntl = 0, spi_ps_input_ena;
|
||||
unsigned spi_baryc_cntl = 0;
|
||||
uint64_t va;
|
||||
|
||||
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
|
|
@ -437,19 +437,6 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
S_0286D8_BC_OPTIMIZE_DISABLE(1);
|
||||
|
||||
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
|
||||
spi_ps_input_ena = shader->spi_ps_input_ena;
|
||||
/* we need to enable at least one of them, otherwise we hang the GPU */
|
||||
assert(G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_CENTER_ENA(spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) ||
|
||||
G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena));
|
||||
|
||||
si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena);
|
||||
si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena);
|
||||
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
|
||||
|
||||
si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format);
|
||||
|
|
@ -458,7 +445,7 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
|
||||
|
||||
va = shader->bo->gpu_address;
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
|
||||
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
|
||||
|
||||
|
|
@ -680,6 +667,34 @@ static void *si_create_shader_state(struct pipe_context *ctx,
|
|||
tgsi_scan_shader(state->tokens, &sel->info);
|
||||
p_atomic_inc(&sscreen->b.num_shaders_created);
|
||||
|
||||
/* First set which opcode uses which (i,j) pair. */
|
||||
if (sel->info.uses_persp_opcode_interp_centroid)
|
||||
sel->info.uses_persp_centroid = true;
|
||||
|
||||
if (sel->info.uses_linear_opcode_interp_centroid)
|
||||
sel->info.uses_linear_centroid = true;
|
||||
|
||||
if (sel->info.uses_persp_opcode_interp_offset ||
|
||||
sel->info.uses_persp_opcode_interp_sample)
|
||||
sel->info.uses_persp_center = true;
|
||||
|
||||
if (sel->info.uses_linear_opcode_interp_offset ||
|
||||
sel->info.uses_linear_opcode_interp_sample)
|
||||
sel->info.uses_linear_center = true;
|
||||
|
||||
/* Determine if the shader has to use a conditional assignment when
|
||||
* emulating force_persample_interp.
|
||||
*/
|
||||
sel->forces_persample_interp_for_persp =
|
||||
sel->info.uses_persp_center +
|
||||
sel->info.uses_persp_centroid +
|
||||
sel->info.uses_persp_sample >= 2;
|
||||
|
||||
sel->forces_persample_interp_for_linear =
|
||||
sel->info.uses_linear_center +
|
||||
sel->info.uses_linear_centroid +
|
||||
sel->info.uses_linear_sample >= 2;
|
||||
|
||||
switch (pipe_shader_type) {
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
sel->gs_output_prim =
|
||||
|
|
@ -1064,6 +1079,77 @@ bcolor:
|
|||
assert(ps->nparam == num_written);
|
||||
}
|
||||
|
||||
static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
|
||||
struct si_shader *ps = sctx->ps_shader->current;
|
||||
unsigned input_ena = ps->spi_ps_input_ena;
|
||||
|
||||
/* we need to enable at least one of them, otherwise we hang the GPU */
|
||||
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_CENTER_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
|
||||
G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
|
||||
G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
|
||||
|
||||
if (sctx->force_persample_interp) {
|
||||
unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) +
|
||||
G_0286CC_PERSP_CENTER_ENA(input_ena) +
|
||||
G_0286CC_PERSP_CENTROID_ENA(input_ena);
|
||||
unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) +
|
||||
G_0286CC_LINEAR_CENTER_ENA(input_ena) +
|
||||
G_0286CC_LINEAR_CENTROID_ENA(input_ena);
|
||||
|
||||
/* If only one set of (i,j) coordinates is used, we can disable
|
||||
* CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates
|
||||
* where CENTER/CENTROID are expected, effectively forcing per-sample
|
||||
* interpolation.
|
||||
*/
|
||||
if (num_persp == 1) {
|
||||
input_ena &= C_0286CC_PERSP_CENTER_ENA;
|
||||
input_ena &= C_0286CC_PERSP_CENTROID_ENA;
|
||||
input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
}
|
||||
if (num_linear == 1) {
|
||||
input_ena &= C_0286CC_LINEAR_CENTER_ENA;
|
||||
input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
|
||||
input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1);
|
||||
}
|
||||
|
||||
/* If at least 2 sets of coordinates are used, we can't use this
|
||||
* trick and have to select SAMPLE using a conditional assignment
|
||||
* in the shader with "force_persample_interp" being a shader constant.
|
||||
*/
|
||||
}
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
|
||||
radeon_emit(cs, input_ena);
|
||||
radeon_emit(cs, input_ena);
|
||||
|
||||
if (ps->selector->forces_persample_interp_for_persp ||
|
||||
ps->selector->forces_persample_interp_for_linear)
|
||||
radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
|
||||
SI_SGPR_PS_STATE_BITS * 4,
|
||||
sctx->force_persample_interp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that.
|
||||
*/
|
||||
static void si_init_config_add_vgt_flush(struct si_context *sctx)
|
||||
{
|
||||
if (sctx->init_config_has_vgt_flush)
|
||||
return;
|
||||
|
||||
si_pm4_cmd_begin(sctx->init_config, PKT3_EVENT_WRITE);
|
||||
si_pm4_cmd_add(sctx->init_config, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
||||
si_pm4_cmd_end(sctx->init_config, false);
|
||||
sctx->init_config_has_vgt_flush = true;
|
||||
}
|
||||
|
||||
/* Initialize state related to ESGS / GSVS ring buffers */
|
||||
static void si_init_gs_rings(struct si_context *sctx)
|
||||
{
|
||||
|
|
@ -1084,6 +1170,8 @@ static void si_init_gs_rings(struct si_context *sctx)
|
|||
return;
|
||||
}
|
||||
|
||||
si_init_config_add_vgt_flush(sctx);
|
||||
|
||||
/* Append these registers to the init config state. */
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
if (sctx->b.chip_class >= VI) {
|
||||
|
|
@ -1330,6 +1418,8 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
|
|||
|
||||
assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
|
||||
|
||||
si_init_config_add_vgt_flush(sctx);
|
||||
|
||||
/* Append these registers to the init config state. */
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
|
||||
|
|
@ -1535,6 +1625,12 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
si_mark_atom_dirty(sctx, &sctx->spi_map);
|
||||
}
|
||||
|
||||
if (si_pm4_state_changed(sctx, ps) ||
|
||||
sctx->force_persample_interp != rs->force_persample_interp) {
|
||||
sctx->force_persample_interp = rs->force_persample_interp;
|
||||
si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
|
||||
}
|
||||
|
||||
if (si_pm4_state_changed(sctx, ls) ||
|
||||
si_pm4_state_changed(sctx, hs) ||
|
||||
si_pm4_state_changed(sctx, es) ||
|
||||
|
|
@ -1563,6 +1659,7 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
void si_init_shader_functions(struct si_context *sctx)
|
||||
{
|
||||
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
|
||||
si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
|
||||
|
||||
sctx->b.b.create_vs_state = si_create_vs_state;
|
||||
sctx->b.b.create_tcs_state = si_create_tcs_state;
|
||||
|
|
|
|||
|
|
@ -247,6 +247,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
|
|||
} else {
|
||||
FREE(transfer);
|
||||
}
|
||||
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
|
|
@ -275,9 +275,9 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe,
|
|||
struct svga_screen *ss = svga_screen(pipe->screen);
|
||||
struct svga_context *svga = svga_context(pipe);
|
||||
struct svga_buffer *sbuf = svga_buffer(transfer->resource);
|
||||
|
||||
|
||||
pipe_mutex_lock(ss->swc_mutex);
|
||||
|
||||
|
||||
assert(sbuf->map.count);
|
||||
if (sbuf->map.count) {
|
||||
--sbuf->map.count;
|
||||
|
|
@ -296,7 +296,7 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe,
|
|||
*/
|
||||
|
||||
SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n");
|
||||
|
||||
|
||||
sbuf->dma.flags.discard = TRUE;
|
||||
|
||||
svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0);
|
||||
|
|
@ -316,28 +316,28 @@ svga_buffer_destroy( struct pipe_screen *screen,
|
|||
struct svga_buffer *sbuf = svga_buffer( buf );
|
||||
|
||||
assert(!p_atomic_read(&buf->reference.count));
|
||||
|
||||
|
||||
assert(!sbuf->dma.pending);
|
||||
|
||||
if(sbuf->handle)
|
||||
if (sbuf->handle)
|
||||
svga_buffer_destroy_host_surface(ss, sbuf);
|
||||
|
||||
if(sbuf->uploaded.buffer)
|
||||
|
||||
if (sbuf->uploaded.buffer)
|
||||
pipe_resource_reference(&sbuf->uploaded.buffer, NULL);
|
||||
|
||||
if(sbuf->hwbuf)
|
||||
if (sbuf->hwbuf)
|
||||
svga_buffer_destroy_hw_storage(ss, sbuf);
|
||||
|
||||
if(sbuf->swbuf && !sbuf->user)
|
||||
|
||||
if (sbuf->swbuf && !sbuf->user)
|
||||
align_free(sbuf->swbuf);
|
||||
|
||||
|
||||
ss->total_resource_bytes -= sbuf->size;
|
||||
|
||||
FREE(sbuf);
|
||||
}
|
||||
|
||||
|
||||
struct u_resource_vtbl svga_buffer_vtbl =
|
||||
struct u_resource_vtbl svga_buffer_vtbl =
|
||||
{
|
||||
u_default_resource_get_handle, /* get_handle */
|
||||
svga_buffer_destroy, /* resource_destroy */
|
||||
|
|
@ -355,11 +355,11 @@ svga_buffer_create(struct pipe_screen *screen,
|
|||
{
|
||||
struct svga_screen *ss = svga_screen(screen);
|
||||
struct svga_buffer *sbuf;
|
||||
|
||||
|
||||
sbuf = CALLOC_STRUCT(svga_buffer);
|
||||
if(!sbuf)
|
||||
if (!sbuf)
|
||||
goto error1;
|
||||
|
||||
|
||||
sbuf->b.b = *template;
|
||||
sbuf->b.vtbl = &svga_buffer_vtbl;
|
||||
pipe_reference_init(&sbuf->b.b.reference, 1);
|
||||
|
|
@ -378,7 +378,7 @@ svga_buffer_create(struct pipe_screen *screen,
|
|||
}
|
||||
}
|
||||
|
||||
if(svga_buffer_needs_hw_storage(template->bind)) {
|
||||
if (svga_buffer_needs_hw_storage(template->bind)) {
|
||||
|
||||
/* If the buffer will be used for vertex/index/stream data, set all
|
||||
* the flags so that the buffer will be accepted for all those uses.
|
||||
|
|
@ -396,22 +396,22 @@ svga_buffer_create(struct pipe_screen *screen,
|
|||
sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT;
|
||||
}
|
||||
|
||||
if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
|
||||
if (svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
|
||||
goto error2;
|
||||
}
|
||||
else {
|
||||
sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64);
|
||||
if(!sbuf->swbuf)
|
||||
if (!sbuf->swbuf)
|
||||
goto error2;
|
||||
}
|
||||
|
||||
|
||||
debug_reference(&sbuf->b.b.reference,
|
||||
(debug_reference_descriptor)debug_describe_resource, 0);
|
||||
|
||||
sbuf->size = util_resource_size(&sbuf->b.b);
|
||||
ss->total_resource_bytes += sbuf->size;
|
||||
|
||||
return &sbuf->b.b;
|
||||
return &sbuf->b.b;
|
||||
|
||||
error2:
|
||||
FREE(sbuf);
|
||||
|
|
@ -419,6 +419,7 @@ error1:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
struct pipe_resource *
|
||||
svga_user_buffer_create(struct pipe_screen *screen,
|
||||
void *ptr,
|
||||
|
|
@ -426,11 +427,11 @@ svga_user_buffer_create(struct pipe_screen *screen,
|
|||
unsigned bind)
|
||||
{
|
||||
struct svga_buffer *sbuf;
|
||||
|
||||
|
||||
sbuf = CALLOC_STRUCT(svga_buffer);
|
||||
if(!sbuf)
|
||||
if (!sbuf)
|
||||
goto no_sbuf;
|
||||
|
||||
|
||||
pipe_reference_init(&sbuf->b.b.reference, 1);
|
||||
sbuf->b.vtbl = &svga_buffer_vtbl;
|
||||
sbuf->b.b.screen = screen;
|
||||
|
|
@ -448,8 +449,8 @@ svga_user_buffer_create(struct pipe_screen *screen,
|
|||
|
||||
debug_reference(&sbuf->b.b.reference,
|
||||
(debug_reference_descriptor)debug_describe_resource, 0);
|
||||
|
||||
return &sbuf->b.b;
|
||||
|
||||
return &sbuf->b.b;
|
||||
|
||||
no_sbuf:
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -48,9 +48,11 @@ svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv)
|
|||
{
|
||||
char res[128];
|
||||
debug_describe_resource(res, sv->texture);
|
||||
util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod);
|
||||
util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>",
|
||||
res, sv->min_lod, sv->max_lod);
|
||||
}
|
||||
|
||||
|
||||
struct svga_sampler_view *
|
||||
svga_get_tex_sampler_view(struct pipe_context *pipe,
|
||||
struct pipe_resource *pt,
|
||||
|
|
@ -58,10 +60,11 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
|
|||
{
|
||||
struct svga_context *svga = svga_context(pipe);
|
||||
struct svga_screen *ss = svga_screen(pipe->screen);
|
||||
struct svga_texture *tex = svga_texture(pt);
|
||||
struct svga_texture *tex = svga_texture(pt);
|
||||
struct svga_sampler_view *sv = NULL;
|
||||
SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE;
|
||||
SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format, PIPE_BIND_SAMPLER_VIEW);
|
||||
SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format,
|
||||
PIPE_BIND_SAMPLER_VIEW);
|
||||
boolean view = TRUE;
|
||||
|
||||
assert(pt);
|
||||
|
|
@ -155,7 +158,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
|
|||
sv->key.cachable = 0;
|
||||
sv->handle = tex->handle;
|
||||
debug_reference(&sv->reference,
|
||||
(debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
|
||||
(debug_reference_descriptor)
|
||||
svga_debug_describe_sampler_view, 0);
|
||||
return sv;
|
||||
}
|
||||
|
||||
|
|
@ -164,13 +168,16 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
|
|||
pipe_mutex_unlock(ss->tex_mutex);
|
||||
|
||||
debug_reference(&sv->reference,
|
||||
(debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
|
||||
(debug_reference_descriptor)
|
||||
svga_debug_describe_sampler_view, 0);
|
||||
|
||||
return sv;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v)
|
||||
svga_validate_sampler_view(struct svga_context *svga,
|
||||
struct svga_sampler_view *v)
|
||||
{
|
||||
struct svga_texture *tex = svga_texture(v->texture);
|
||||
unsigned numFaces;
|
||||
|
|
@ -186,7 +193,7 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
|
|||
|
||||
age = tex->age;
|
||||
|
||||
if(tex->b.b.target == PIPE_TEXTURE_CUBE)
|
||||
if (tex->b.b.target == PIPE_TEXTURE_CUBE)
|
||||
numFaces = 6;
|
||||
else
|
||||
numFaces = 1;
|
||||
|
|
@ -207,12 +214,13 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
|
|||
v->age = age;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
|
||||
{
|
||||
struct svga_texture *tex = svga_texture(v->texture);
|
||||
|
||||
if(v->handle != tex->handle) {
|
||||
if (v->handle != tex->handle) {
|
||||
struct svga_screen *ss = svga_screen(v->texture->screen);
|
||||
SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
|
||||
svga_screen_surface_destroy(ss, &v->key, &v->handle);
|
||||
|
|
|
|||
|
|
@ -380,6 +380,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -704,6 +704,24 @@ emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
|
|||
assert(size == 0);
|
||||
}
|
||||
|
||||
if (size % 16 != 0) {
|
||||
/* GL's buffer range sizes can be any number of bytes but the
|
||||
* SVGA3D device requires a multiple of 16 bytes.
|
||||
*/
|
||||
const unsigned total_size = buffer->b.b.width0;
|
||||
|
||||
if (offset + align(size, 16) <= total_size) {
|
||||
/* round up size to multiple of 16 */
|
||||
size = align(size, 16);
|
||||
}
|
||||
else {
|
||||
/* round down to mulitple of 16 (this may cause rendering problems
|
||||
* but should avoid a device error).
|
||||
*/
|
||||
size &= ~16;
|
||||
}
|
||||
}
|
||||
|
||||
assert(size % 16 == 0);
|
||||
ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
|
||||
index,
|
||||
|
|
|
|||
|
|
@ -188,6 +188,9 @@ vc4_context_destroy(struct pipe_context *pctx)
|
|||
pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
|
||||
pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
|
||||
|
||||
pipe_surface_reference(&vc4->color_write, NULL);
|
||||
pipe_surface_reference(&vc4->color_read, NULL);
|
||||
|
||||
vc4_program_fini(pctx);
|
||||
|
||||
ralloc_free(vc4);
|
||||
|
|
|
|||
|
|
@ -103,7 +103,6 @@ struct vc4_uncompiled_shader {
|
|||
/** How many variants of this program were compiled, for shader-db. */
|
||||
uint32_t compiled_variant_count;
|
||||
struct pipe_shader_state base;
|
||||
const struct tgsi_token *twoside_tokens;
|
||||
};
|
||||
|
||||
struct vc4_ubo_range {
|
||||
|
|
|
|||
|
|
@ -1738,27 +1738,6 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
|||
}
|
||||
|
||||
const struct tgsi_token *tokens = key->shader_state->base.tokens;
|
||||
if (c->fs_key && c->fs_key->light_twoside) {
|
||||
if (!key->shader_state->twoside_tokens) {
|
||||
const struct tgsi_lowering_config lowering_config = {
|
||||
.color_two_side = true,
|
||||
};
|
||||
struct tgsi_shader_info info;
|
||||
key->shader_state->twoside_tokens =
|
||||
tgsi_transform_lowering(&lowering_config,
|
||||
key->shader_state->base.tokens,
|
||||
&info);
|
||||
|
||||
/* If no transformation occurred, then NULL is
|
||||
* returned and we just use our original tokens.
|
||||
*/
|
||||
if (!key->shader_state->twoside_tokens) {
|
||||
key->shader_state->twoside_tokens =
|
||||
key->shader_state->base.tokens;
|
||||
}
|
||||
}
|
||||
tokens = key->shader_state->twoside_tokens;
|
||||
}
|
||||
|
||||
if (vc4_debug & VC4_DEBUG_TGSI) {
|
||||
fprintf(stderr, "%s prog %d/%d TGSI:\n",
|
||||
|
|
@ -1772,6 +1751,8 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
|||
nir_convert_to_ssa(c->s);
|
||||
if (stage == QSTAGE_FRAG)
|
||||
vc4_nir_lower_blend(c);
|
||||
if (c->fs_key && c->fs_key->light_twoside)
|
||||
nir_lower_two_sided_color(c->s);
|
||||
vc4_nir_lower_io(c);
|
||||
nir_lower_idiv(c->s);
|
||||
nir_lower_load_const_to_scalar(c->s);
|
||||
|
|
@ -2190,8 +2171,6 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
|
|||
hash_table_foreach(vc4->vs_cache, entry)
|
||||
delete_from_cache_if_matches(vc4->vs_cache, entry, so);
|
||||
|
||||
if (so->twoside_tokens != so->base.tokens)
|
||||
free((void *)so->twoside_tokens);
|
||||
free((void *)so->base.tokens);
|
||||
free(so);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -181,6 +181,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
/* Stream output. */
|
||||
|
|
|
|||
|
|
@ -106,10 +106,15 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
|
|||
{
|
||||
for (int i = 0; i < exec->bo_count; i++) {
|
||||
struct drm_gem_cma_object *obj = exec->bo[i];
|
||||
struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
|
||||
struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
|
||||
struct vc4_bo *bo = drm_bo->bo;
|
||||
|
||||
memcpy(bo->map, obj->vaddr, bo->size);
|
||||
|
||||
if (drm_bo->validated_shader) {
|
||||
free(drm_bo->validated_shader->texture_samples);
|
||||
free(drm_bo->validated_shader);
|
||||
}
|
||||
free(obj);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -631,6 +631,7 @@ enum pipe_cap
|
|||
PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR,
|
||||
PIPE_CAP_DEPTH_BOUNDS_TEST,
|
||||
PIPE_CAP_TGSI_TXQS,
|
||||
PIPE_CAP_FORCE_PERSAMPLE_INTERP,
|
||||
};
|
||||
|
||||
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ struct pipe_rasterizer_state
|
|||
unsigned point_tri_clip:1; /** large points clipped as tris or points */
|
||||
unsigned point_size_per_vertex:1; /**< size computed in vertex shader */
|
||||
unsigned multisample:1; /* XXX maybe more ms state in future */
|
||||
unsigned force_persample_interp:1;
|
||||
unsigned line_smooth:1;
|
||||
unsigned line_stipple_enable:1;
|
||||
unsigned line_last_pixel:1;
|
||||
|
|
|
|||
|
|
@ -188,10 +188,10 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
|
|||
* may occur as the stvis->color_format.
|
||||
*/
|
||||
switch(format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_BGRA8888_UNORM:
|
||||
depth = 32;
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_BGRX8888_UNORM:
|
||||
depth = 24;
|
||||
break;
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
|
|
@ -261,13 +261,13 @@ dri_image_drawable_get_buffers(struct dri_drawable *drawable,
|
|||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
image_format = __DRI_IMAGE_FORMAT_RGB565;
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_BGRX8888_UNORM:
|
||||
image_format = __DRI_IMAGE_FORMAT_XRGB8888;
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_BGRA8888_UNORM:
|
||||
image_format = __DRI_IMAGE_FORMAT_ARGB8888;
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
case PIPE_FORMAT_RGBA8888_UNORM:
|
||||
image_format = __DRI_IMAGE_FORMAT_ABGR8888;
|
||||
break;
|
||||
default:
|
||||
|
|
@ -314,10 +314,10 @@ dri2_allocate_buffer(__DRIscreen *sPriv,
|
|||
|
||||
switch (format) {
|
||||
case 32:
|
||||
pf = PIPE_FORMAT_B8G8R8A8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRA8888_UNORM;
|
||||
break;
|
||||
case 24:
|
||||
pf = PIPE_FORMAT_B8G8R8X8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRX8888_UNORM;
|
||||
break;
|
||||
case 16:
|
||||
pf = PIPE_FORMAT_Z16_UNORM;
|
||||
|
|
@ -724,13 +724,13 @@ dri2_create_image_from_winsys(__DRIscreen *_screen,
|
|||
pf = PIPE_FORMAT_B5G6R5_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_XRGB8888:
|
||||
pf = PIPE_FORMAT_B8G8R8X8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRX8888_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_ARGB8888:
|
||||
pf = PIPE_FORMAT_B8G8R8A8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRA8888_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_ABGR8888:
|
||||
pf = PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
pf = PIPE_FORMAT_RGBA8888_UNORM;
|
||||
break;
|
||||
default:
|
||||
pf = PIPE_FORMAT_NONE;
|
||||
|
|
@ -845,13 +845,13 @@ dri2_create_image(__DRIscreen *_screen,
|
|||
pf = PIPE_FORMAT_B5G6R5_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_XRGB8888:
|
||||
pf = PIPE_FORMAT_B8G8R8X8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRX8888_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_ARGB8888:
|
||||
pf = PIPE_FORMAT_B8G8R8A8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRA8888_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_ABGR8888:
|
||||
pf = PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
pf = PIPE_FORMAT_RGBA8888_UNORM;
|
||||
break;
|
||||
default:
|
||||
pf = PIPE_FORMAT_NONE;
|
||||
|
|
@ -1293,6 +1293,7 @@ dri2_load_opencl_interop(struct dri_screen *screen)
|
|||
}
|
||||
|
||||
struct dri2_fence {
|
||||
struct dri_screen *driscreen;
|
||||
struct pipe_fence_handle *pipe_fence;
|
||||
void *cl_event;
|
||||
};
|
||||
|
|
@ -1313,6 +1314,7 @@ dri2_create_fence(__DRIcontext *_ctx)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
fence->driscreen = dri_screen(_ctx->driScreenPriv);
|
||||
return fence;
|
||||
}
|
||||
|
||||
|
|
@ -1336,6 +1338,7 @@ dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
fence->driscreen = driscreen;
|
||||
return fence;
|
||||
}
|
||||
|
||||
|
|
@ -1360,9 +1363,9 @@ static GLboolean
|
|||
dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
|
||||
uint64_t timeout)
|
||||
{
|
||||
struct dri_screen *driscreen = dri_screen(_ctx->driScreenPriv);
|
||||
struct pipe_screen *screen = driscreen->base.screen;
|
||||
struct dri2_fence *fence = (struct dri2_fence*)_fence;
|
||||
struct dri_screen *driscreen = fence->driscreen;
|
||||
struct pipe_screen *screen = driscreen->base.screen;
|
||||
|
||||
/* No need to flush. The context was flushed when the fence was created. */
|
||||
|
||||
|
|
|
|||
|
|
@ -231,11 +231,11 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
|
|||
if (format == __DRI_TEXTURE_FORMAT_RGB) {
|
||||
/* only need to cover the formats recognized by dri_fill_st_visual */
|
||||
switch (internal_format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
internal_format = PIPE_FORMAT_B8G8R8X8_UNORM;
|
||||
case PIPE_FORMAT_BGRA8888_UNORM:
|
||||
internal_format = PIPE_FORMAT_BGRX8888_UNORM;
|
||||
break;
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
internal_format = PIPE_FORMAT_X8R8G8B8_UNORM;
|
||||
case PIPE_FORMAT_ARGB8888_UNORM:
|
||||
internal_format = PIPE_FORMAT_XRGB8888_UNORM;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -370,7 +370,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
|
|||
|
||||
#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
|
||||
|
||||
int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
|
||||
int amdgpu_lookup_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
|
||||
{
|
||||
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
|
||||
int i = cs->buffer_indices_hashlist[hash];
|
||||
|
|
@ -379,15 +379,15 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
|
|||
if (i == -1 || cs->buffers[i].bo == bo)
|
||||
return i;
|
||||
|
||||
/* Hash collision, look for the BO in the list of relocs linearly. */
|
||||
/* Hash collision, look for the BO in the list of buffers linearly. */
|
||||
for (i = cs->num_buffers - 1; i >= 0; i--) {
|
||||
if (cs->buffers[i].bo == bo) {
|
||||
/* Put this reloc in the hash list.
|
||||
/* Put this buffer in the hash list.
|
||||
* This will prevent additional hash collisions if there are
|
||||
* several consecutive get_reloc calls for the same buffer.
|
||||
* several consecutive lookup_buffer calls for the same buffer.
|
||||
*
|
||||
* Example: Assuming buffers A,B,C collide in the hash list,
|
||||
* the following sequence of relocs:
|
||||
* the following sequence of buffers:
|
||||
* AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
|
||||
* will collide here: ^ and here: ^,
|
||||
* meaning that we should get very few collisions in the end. */
|
||||
|
|
@ -398,32 +398,33 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
|
||||
static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs,
|
||||
struct amdgpu_winsys_bo *bo,
|
||||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_domain domains,
|
||||
unsigned priority,
|
||||
enum radeon_bo_domain *added_domains)
|
||||
{
|
||||
struct amdgpu_cs_buffer *reloc;
|
||||
struct amdgpu_cs_buffer *buffer;
|
||||
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
|
||||
int i = -1;
|
||||
|
||||
priority = MIN2(priority, 15);
|
||||
assert(priority < 64);
|
||||
*added_domains = 0;
|
||||
|
||||
i = amdgpu_get_reloc(cs, bo);
|
||||
i = amdgpu_lookup_buffer(cs, bo);
|
||||
|
||||
if (i >= 0) {
|
||||
reloc = &cs->buffers[i];
|
||||
reloc->usage |= usage;
|
||||
*added_domains = domains & ~reloc->domains;
|
||||
reloc->domains |= domains;
|
||||
cs->flags[i] = MAX2(cs->flags[i], priority);
|
||||
buffer = &cs->buffers[i];
|
||||
buffer->priority_usage |= 1llu << priority;
|
||||
buffer->usage |= usage;
|
||||
*added_domains = domains & ~buffer->domains;
|
||||
buffer->domains |= domains;
|
||||
cs->flags[i] = MAX2(cs->flags[i], priority / 4);
|
||||
return i;
|
||||
}
|
||||
|
||||
/* New relocation, check if the backing array is large enough. */
|
||||
/* New buffer, check if the backing array is large enough. */
|
||||
if (cs->num_buffers >= cs->max_num_buffers) {
|
||||
uint32_t size;
|
||||
cs->max_num_buffers += 10;
|
||||
|
|
@ -437,16 +438,17 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
|
|||
cs->flags = realloc(cs->flags, cs->max_num_buffers);
|
||||
}
|
||||
|
||||
/* Initialize the new relocation. */
|
||||
/* Initialize the new buffer. */
|
||||
cs->buffers[cs->num_buffers].bo = NULL;
|
||||
amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
|
||||
cs->handles[cs->num_buffers] = bo->bo;
|
||||
cs->flags[cs->num_buffers] = priority;
|
||||
cs->flags[cs->num_buffers] = priority / 4;
|
||||
p_atomic_inc(&bo->num_cs_references);
|
||||
reloc = &cs->buffers[cs->num_buffers];
|
||||
reloc->bo = bo;
|
||||
reloc->usage = usage;
|
||||
reloc->domains = domains;
|
||||
buffer = &cs->buffers[cs->num_buffers];
|
||||
buffer->bo = bo;
|
||||
buffer->priority_usage = 1llu << priority;
|
||||
buffer->usage = usage;
|
||||
buffer->domains = domains;
|
||||
|
||||
cs->buffer_indices_hashlist[hash] = cs->num_buffers;
|
||||
|
||||
|
|
@ -454,7 +456,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
|
|||
return cs->num_buffers++;
|
||||
}
|
||||
|
||||
static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
|
||||
static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_domain domains,
|
||||
|
|
@ -466,7 +468,7 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
|
|||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
|
||||
enum radeon_bo_domain added_domains;
|
||||
unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain,
|
||||
unsigned index = amdgpu_add_buffer(cs, bo, usage, bo->initial_domain,
|
||||
priority, &added_domains);
|
||||
|
||||
if (added_domains & RADEON_DOMAIN_GTT)
|
||||
|
|
@ -477,12 +479,12 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
|
|||
return index;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs,
|
||||
static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *buf)
|
||||
{
|
||||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||
|
||||
return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf);
|
||||
return amdgpu_lookup_buffer(cs, (struct amdgpu_winsys_bo*)buf);
|
||||
}
|
||||
|
||||
static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
|
||||
|
|
@ -500,6 +502,22 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64
|
|||
return status;
|
||||
}
|
||||
|
||||
static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_bo_list_item *list)
|
||||
{
|
||||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||
int i;
|
||||
|
||||
if (list) {
|
||||
for (i = 0; i < cs->num_buffers; i++) {
|
||||
pb_reference(&list[i].buf, &cs->buffers[i].bo->base);
|
||||
list[i].vm_address = cs->buffers[i].bo->va;
|
||||
list[i].priority_usage = cs->buffers[i].priority_usage;
|
||||
}
|
||||
}
|
||||
return cs->num_buffers;
|
||||
}
|
||||
|
||||
static void amdgpu_cs_do_submission(struct amdgpu_cs *cs,
|
||||
struct pipe_fence_handle **out_fence)
|
||||
{
|
||||
|
|
@ -621,8 +639,8 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
|
|||
fprintf(stderr, "amdgpu: command stream overflowed\n");
|
||||
}
|
||||
|
||||
amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer,
|
||||
RADEON_USAGE_READ, 0, RADEON_PRIO_MIN);
|
||||
amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer,
|
||||
RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
|
||||
|
||||
/* If the CS is not empty or overflowed.... */
|
||||
if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
|
||||
|
|
@ -682,10 +700,11 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
|
|||
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
|
||||
ws->base.cs_create = amdgpu_cs_create;
|
||||
ws->base.cs_destroy = amdgpu_cs_destroy;
|
||||
ws->base.cs_add_reloc = amdgpu_cs_add_reloc;
|
||||
ws->base.cs_get_reloc = amdgpu_cs_get_reloc;
|
||||
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
|
||||
ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer;
|
||||
ws->base.cs_validate = amdgpu_cs_validate;
|
||||
ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
|
||||
ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
|
||||
ws->base.cs_flush = amdgpu_cs_flush;
|
||||
ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
|
||||
ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ struct amdgpu_ctx {
|
|||
|
||||
struct amdgpu_cs_buffer {
|
||||
struct amdgpu_winsys_bo *bo;
|
||||
uint64_t priority_usage;
|
||||
enum radeon_bo_usage usage;
|
||||
enum radeon_bo_domain domains;
|
||||
};
|
||||
|
|
@ -68,7 +69,7 @@ struct amdgpu_cs {
|
|||
struct amdgpu_cs_request request;
|
||||
struct amdgpu_cs_ib_info ib;
|
||||
|
||||
/* Relocs. */
|
||||
/* Buffers. */
|
||||
unsigned max_num_buffers;
|
||||
unsigned num_buffers;
|
||||
amdgpu_bo_handle *handles;
|
||||
|
|
@ -115,7 +116,7 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
|
|||
*rdst = rsrc;
|
||||
}
|
||||
|
||||
int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
|
||||
int amdgpu_lookup_buffer(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
|
||||
|
||||
static inline struct amdgpu_cs *
|
||||
amdgpu_cs(struct radeon_winsys_cs *base)
|
||||
|
|
@ -129,7 +130,7 @@ amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
|
|||
{
|
||||
int num_refs = bo->num_cs_references;
|
||||
return num_refs == bo->rws->num_cs ||
|
||||
(num_refs && amdgpu_get_reloc(cs, bo) != -1);
|
||||
(num_refs && amdgpu_lookup_buffer(cs, bo) != -1);
|
||||
}
|
||||
|
||||
static inline boolean
|
||||
|
|
@ -142,7 +143,7 @@ amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
|
|||
if (!bo->num_cs_references)
|
||||
return FALSE;
|
||||
|
||||
index = amdgpu_get_reloc(cs, bo);
|
||||
index = amdgpu_lookup_buffer(cs, bo);
|
||||
if (index == -1)
|
||||
return FALSE;
|
||||
|
||||
|
|
|
|||
|
|
@ -37,13 +37,13 @@
|
|||
/*
|
||||
This file replaces libdrm's radeon_cs_gem with our own implemention.
|
||||
It's optimized specifically for Radeon DRM.
|
||||
Reloc writes and space checking are faster and simpler than their
|
||||
Adding buffers and space checking are faster and simpler than their
|
||||
counterparts in libdrm (the time complexity of all the functions
|
||||
is O(1) in nearly all scenarios, thanks to hashing).
|
||||
|
||||
It works like this:
|
||||
|
||||
cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
|
||||
cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
|
||||
also adds the size of 'buf' to the used_gart and used_vram winsys variables
|
||||
based on the domains, which are simply or'd for the accounting purposes.
|
||||
The adding is skipped if the reloc is already present in the list, but it
|
||||
|
|
@ -58,8 +58,8 @@
|
|||
(done in the pipe driver)
|
||||
|
||||
cs_write_reloc(cs, buf) just writes a reloc that has been added using
|
||||
cs_add_reloc. The read_domain and write_domain parameters have been removed,
|
||||
because we already specify them in cs_add_reloc.
|
||||
cs_add_buffer. The read_domain and write_domain parameters have been removed,
|
||||
because we already specify them in cs_add_buffer.
|
||||
*/
|
||||
|
||||
#include "radeon_drm_cs.h"
|
||||
|
|
@ -99,8 +99,8 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
|
|||
|
||||
csc->fd = ws->fd;
|
||||
csc->nrelocs = 512;
|
||||
csc->relocs_bo = (struct radeon_bo**)
|
||||
CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
|
||||
csc->relocs_bo = (struct radeon_bo_item*)
|
||||
CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0]));
|
||||
if (!csc->relocs_bo) {
|
||||
return FALSE;
|
||||
}
|
||||
|
|
@ -139,8 +139,8 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
|
|||
unsigned i;
|
||||
|
||||
for (i = 0; i < csc->crelocs; i++) {
|
||||
p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
|
||||
radeon_bo_reference(&csc->relocs_bo[i], NULL);
|
||||
p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
|
||||
radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
|
||||
}
|
||||
|
||||
csc->crelocs = 0;
|
||||
|
|
@ -221,21 +221,21 @@ static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
|
|||
reloc->flags = MAX2(reloc->flags, priority);
|
||||
}
|
||||
|
||||
int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
|
||||
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
|
||||
{
|
||||
unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
|
||||
int i = csc->reloc_indices_hashlist[hash];
|
||||
|
||||
/* not found or found */
|
||||
if (i == -1 || csc->relocs_bo[i] == bo)
|
||||
if (i == -1 || csc->relocs_bo[i].bo == bo)
|
||||
return i;
|
||||
|
||||
/* Hash collision, look for the BO in the list of relocs linearly. */
|
||||
for (i = csc->crelocs - 1; i >= 0; i--) {
|
||||
if (csc->relocs_bo[i] == bo) {
|
||||
if (csc->relocs_bo[i].bo == bo) {
|
||||
/* Put this reloc in the hash list.
|
||||
* This will prevent additional hash collisions if there are
|
||||
* several consecutive get_reloc calls for the same buffer.
|
||||
* several consecutive lookup_buffer calls for the same buffer.
|
||||
*
|
||||
* Example: Assuming buffers A,B,C collide in the hash list,
|
||||
* the following sequence of relocs:
|
||||
|
|
@ -249,7 +249,7 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
|
||||
static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
|
||||
struct radeon_bo *bo,
|
||||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_domain domains,
|
||||
|
|
@ -263,16 +263,17 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
|
|||
enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
|
||||
int i = -1;
|
||||
|
||||
priority = MIN2(priority, 15);
|
||||
assert(priority < 64);
|
||||
*added_domains = 0;
|
||||
|
||||
i = radeon_get_reloc(csc, bo);
|
||||
i = radeon_lookup_buffer(csc, bo);
|
||||
|
||||
if (i >= 0) {
|
||||
reloc = &csc->relocs[i];
|
||||
update_reloc(reloc, rd, wd, priority, added_domains);
|
||||
update_reloc(reloc, rd, wd, priority / 4, added_domains);
|
||||
csc->relocs_bo[i].priority_usage |= 1llu << priority;
|
||||
|
||||
/* For async DMA, every add_reloc call must add a buffer to the list
|
||||
/* For async DMA, every add_buffer call must add a buffer to the list
|
||||
* no matter how many duplicates there are. This is due to the fact
|
||||
* the DMA CS checker doesn't use NOP packets for offset patching,
|
||||
* but always uses the i-th buffer from the list to patch the i-th
|
||||
|
|
@ -292,7 +293,7 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
|
|||
uint32_t size;
|
||||
csc->nrelocs += 10;
|
||||
|
||||
size = csc->nrelocs * sizeof(struct radeon_bo*);
|
||||
size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
|
||||
csc->relocs_bo = realloc(csc->relocs_bo, size);
|
||||
|
||||
size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
|
||||
|
|
@ -302,14 +303,15 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
|
|||
}
|
||||
|
||||
/* Initialize the new relocation. */
|
||||
csc->relocs_bo[csc->crelocs] = NULL;
|
||||
radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
|
||||
csc->relocs_bo[csc->crelocs].bo = NULL;
|
||||
csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
|
||||
radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
|
||||
p_atomic_inc(&bo->num_cs_references);
|
||||
reloc = &csc->relocs[csc->crelocs];
|
||||
reloc->handle = bo->handle;
|
||||
reloc->read_domains = rd;
|
||||
reloc->write_domain = wd;
|
||||
reloc->flags = priority;
|
||||
reloc->flags = priority / 4;
|
||||
|
||||
csc->reloc_indices_hashlist[hash] = csc->crelocs;
|
||||
|
||||
|
|
@ -319,7 +321,7 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
|
|||
return csc->crelocs++;
|
||||
}
|
||||
|
||||
static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
|
||||
static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_domain domains,
|
||||
|
|
@ -328,7 +330,8 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
|
|||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||
struct radeon_bo *bo = (struct radeon_bo*)buf;
|
||||
enum radeon_bo_domain added_domains;
|
||||
unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
|
||||
unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
|
||||
&added_domains);
|
||||
|
||||
if (added_domains & RADEON_DOMAIN_GTT)
|
||||
cs->csc->used_gart += bo->base.size;
|
||||
|
|
@ -338,12 +341,12 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
|
|||
return index;
|
||||
}
|
||||
|
||||
static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
|
||||
static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *buf)
|
||||
{
|
||||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||
|
||||
return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
|
||||
return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
|
||||
}
|
||||
|
||||
static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
|
||||
|
|
@ -356,14 +359,14 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
|
|||
if (status) {
|
||||
cs->csc->validated_crelocs = cs->csc->crelocs;
|
||||
} else {
|
||||
/* Remove lately-added relocations. The validation failed with them
|
||||
/* Remove lately-added buffers. The validation failed with them
|
||||
* and the CS is about to be flushed because of that. Keep only
|
||||
* the already-validated relocations. */
|
||||
* the already-validated buffers. */
|
||||
unsigned i;
|
||||
|
||||
for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
|
||||
p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
|
||||
radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
|
||||
p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
|
||||
radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
|
||||
}
|
||||
cs->csc->crelocs = cs->csc->validated_crelocs;
|
||||
|
||||
|
|
@ -397,6 +400,22 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui
|
|||
return gtt < cs->ws->info.gart_size * 0.7;
|
||||
}
|
||||
|
||||
static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_bo_list_item *list)
|
||||
{
|
||||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||
int i;
|
||||
|
||||
if (list) {
|
||||
for (i = 0; i < cs->csc->crelocs; i++) {
|
||||
pb_reference(&list[i].buf, &cs->csc->relocs_bo[i].bo->base);
|
||||
list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
|
||||
list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
|
||||
}
|
||||
}
|
||||
return cs->csc->crelocs;
|
||||
}
|
||||
|
||||
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
|
||||
{
|
||||
unsigned i;
|
||||
|
|
@ -425,7 +444,7 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs
|
|||
}
|
||||
|
||||
for (i = 0; i < csc->crelocs; i++)
|
||||
p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
|
||||
p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
|
||||
|
||||
radeon_cs_context_cleanup(csc);
|
||||
}
|
||||
|
|
@ -513,7 +532,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
|
|||
|
||||
for (i = 0; i < crelocs; i++) {
|
||||
/* Update the number of active asynchronous CS ioctls for the buffer. */
|
||||
p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
|
||||
p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
|
||||
}
|
||||
|
||||
switch (cs->base.ring_type) {
|
||||
|
|
@ -607,7 +626,7 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
|
|||
if (!bo->num_cs_references)
|
||||
return FALSE;
|
||||
|
||||
index = radeon_get_reloc(cs->csc, bo);
|
||||
index = radeon_lookup_buffer(cs->csc, bo);
|
||||
if (index == -1)
|
||||
return FALSE;
|
||||
|
||||
|
|
@ -631,9 +650,9 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
|
|||
fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
|
||||
RADEON_DOMAIN_GTT, 0);
|
||||
/* Add the fence as a dummy relocation. */
|
||||
cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
|
||||
cs->ws->base.cs_add_buffer(rcs, cs->ws->base.buffer_get_cs_handle(fence),
|
||||
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
|
||||
RADEON_PRIO_MIN);
|
||||
RADEON_PRIO_FENCE);
|
||||
return (struct pipe_fence_handle*)fence;
|
||||
}
|
||||
|
||||
|
|
@ -657,10 +676,11 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
|
|||
ws->base.ctx_destroy = radeon_drm_ctx_destroy;
|
||||
ws->base.cs_create = radeon_drm_cs_create;
|
||||
ws->base.cs_destroy = radeon_drm_cs_destroy;
|
||||
ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
|
||||
ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
|
||||
ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
|
||||
ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
|
||||
ws->base.cs_validate = radeon_drm_cs_validate;
|
||||
ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
|
||||
ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
|
||||
ws->base.cs_flush = radeon_drm_cs_flush;
|
||||
ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
|
||||
ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,11 @@
|
|||
|
||||
#include "radeon_drm_bo.h"
|
||||
|
||||
struct radeon_bo_item {
|
||||
struct radeon_bo *bo;
|
||||
uint64_t priority_usage;
|
||||
};
|
||||
|
||||
struct radeon_cs_context {
|
||||
uint32_t buf[16 * 1024];
|
||||
|
||||
|
|
@ -40,12 +45,13 @@ struct radeon_cs_context {
|
|||
|
||||
uint32_t cs_trace_id;
|
||||
|
||||
/* Relocs. */
|
||||
/* Buffers. */
|
||||
unsigned nrelocs;
|
||||
unsigned crelocs;
|
||||
unsigned validated_crelocs;
|
||||
struct radeon_bo **relocs_bo;
|
||||
struct radeon_bo_item *relocs_bo;
|
||||
struct drm_radeon_cs_reloc *relocs;
|
||||
uint64_t *priority_usage;
|
||||
|
||||
int reloc_indices_hashlist[512];
|
||||
|
||||
|
|
@ -77,7 +83,7 @@ struct radeon_drm_cs {
|
|||
struct radeon_bo *trace_buf;
|
||||
};
|
||||
|
||||
int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
|
||||
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo);
|
||||
|
||||
static inline struct radeon_drm_cs *
|
||||
radeon_drm_cs(struct radeon_winsys_cs *base)
|
||||
|
|
@ -91,7 +97,7 @@ radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
|
|||
{
|
||||
int num_refs = bo->num_cs_references;
|
||||
return num_refs == bo->rws->num_cs ||
|
||||
(num_refs && radeon_get_reloc(cs->csc, bo) != -1);
|
||||
(num_refs && radeon_lookup_buffer(cs->csc, bo) != -1);
|
||||
}
|
||||
|
||||
static inline boolean
|
||||
|
|
@ -103,7 +109,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
|
|||
if (!bo->num_cs_references)
|
||||
return FALSE;
|
||||
|
||||
index = radeon_get_reloc(cs->csc, bo);
|
||||
index = radeon_lookup_buffer(cs->csc, bo);
|
||||
if (index == -1)
|
||||
return FALSE;
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context
|
|||
}
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.handle = csc->relocs_bo[0]->handle;
|
||||
args.handle = csc->relocs_bo[0].bo->handle;
|
||||
for (i = 0; i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT; i++) {
|
||||
usleep(1);
|
||||
lockup = drmCommandWriteRead(csc->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args));
|
||||
|
|
@ -94,15 +94,15 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context
|
|||
fprintf(dump, "\n");
|
||||
|
||||
for (i = 0; i < csc->crelocs; i++) {
|
||||
unsigned j, ndw = (csc->relocs_bo[i]->base.size + 3) >> 2;
|
||||
unsigned j, ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2;
|
||||
|
||||
ptr = radeon_bo_do_map(csc->relocs_bo[i]);
|
||||
ptr = radeon_bo_do_map(csc->relocs_bo[i].bo);
|
||||
if (ptr) {
|
||||
fprintf(dump, "static uint32_t bo_%04d_data[%d] = {\n ", i, ndw);
|
||||
for (j = 0; j < ndw; j++) {
|
||||
if (j && !(j % 8)) {
|
||||
uint32_t offset = (j - 8) << 2;
|
||||
fprintf(dump, " /* [0x%08x] va[0x%016"PRIx64"] */\n ", offset, offset + csc->relocs_bo[i]->va);
|
||||
fprintf(dump, " /* [0x%08x] va[0x%016"PRIx64"] */\n ", offset, offset + csc->relocs_bo[i].bo->va);
|
||||
}
|
||||
fprintf(dump, " 0x%08x,", ptr[j]);
|
||||
}
|
||||
|
|
@ -139,16 +139,16 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context
|
|||
fprintf(dump, "\n");
|
||||
|
||||
for (i = 0; i < csc->crelocs; i++) {
|
||||
unsigned ndw = (csc->relocs_bo[i]->base.size + 3) >> 2;
|
||||
unsigned ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2;
|
||||
uint32_t *ptr;
|
||||
|
||||
ptr = radeon_bo_do_map(csc->relocs_bo[i]);
|
||||
ptr = radeon_bo_do_map(csc->relocs_bo[i].bo);
|
||||
if (ptr) {
|
||||
fprintf(dump, " bo[%d] = bo_new(&ctx, %d, bo_%04d_data, 0x%016"PRIx64", 0x%08x);\n",
|
||||
i, ndw, i, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment);
|
||||
i, ndw, i, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment);
|
||||
} else {
|
||||
fprintf(dump, " bo[%d] = bo_new(&ctx, %d, NULL, 0x%016"PRIx64", 0x%08x);\n",
|
||||
i, ndw, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment);
|
||||
i, ndw, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment);
|
||||
}
|
||||
}
|
||||
fprintf(dump, "\n");
|
||||
|
|
|
|||
|
|
@ -187,7 +187,6 @@ LIBGLSL_FILES = \
|
|||
opt_constant_variable.cpp \
|
||||
opt_copy_propagation.cpp \
|
||||
opt_copy_propagation_elements.cpp \
|
||||
opt_cse.cpp \
|
||||
opt_dead_builtin_variables.cpp \
|
||||
opt_dead_builtin_varyings.cpp \
|
||||
opt_dead_code.cpp \
|
||||
|
|
|
|||
|
|
@ -6358,7 +6358,7 @@ ast_interface_block::hir(exec_list *instructions,
|
|||
const glsl_type *block_array_type =
|
||||
process_array_type(&loc, block_type, this->array_specifier, state);
|
||||
|
||||
/* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
|
||||
/* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
|
||||
*
|
||||
* * Arrays of arrays of blocks are not allowed
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -2786,6 +2786,17 @@ layout_defaults:
|
|||
if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, state, $1)) {
|
||||
YYERROR;
|
||||
}
|
||||
|
||||
/* From the GLSL 4.50 spec, section 4.4.5:
|
||||
*
|
||||
* "It is a compile-time error to specify the binding identifier for
|
||||
* the global scope or for block member declarations."
|
||||
*/
|
||||
if (state->default_shader_storage_qualifier->flags.q.explicit_binding) {
|
||||
_mesa_glsl_error(& @1, state,
|
||||
"binding qualifier cannot be set for default layout");
|
||||
}
|
||||
|
||||
$$ = NULL;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1901,7 +1901,6 @@ do_common_optimization(exec_list *ir, bool linked,
|
|||
progress = do_constant_variable_unlinked(ir) || progress;
|
||||
progress = do_constant_folding(ir) || progress;
|
||||
progress = do_minmax_prune(ir) || progress;
|
||||
progress = do_cse(ir) || progress;
|
||||
progress = do_rebalance_tree(ir) || progress;
|
||||
progress = do_algebraic(ir, native_integers, options) || progress;
|
||||
progress = do_lower_jumps(ir) || progress;
|
||||
|
|
|
|||
|
|
@ -1661,8 +1661,8 @@ glsl_type::std430_size(bool row_major) const
|
|||
unsigned int array_len;
|
||||
|
||||
if (this->is_array()) {
|
||||
element_type = this->fields.array;
|
||||
array_len = this->length;
|
||||
element_type = this->without_array();
|
||||
array_len = this->arrays_of_arrays_size();
|
||||
} else {
|
||||
element_type = this;
|
||||
array_len = 1;
|
||||
|
|
@ -1685,10 +1685,12 @@ glsl_type::std430_size(bool row_major) const
|
|||
}
|
||||
|
||||
if (this->is_array()) {
|
||||
if (this->fields.array->is_record())
|
||||
return this->length * this->fields.array->std430_size(row_major);
|
||||
if (this->without_array()->is_record())
|
||||
return this->arrays_of_arrays_size() *
|
||||
this->without_array()->std430_size(row_major);
|
||||
else
|
||||
return this->length * this->fields.array->std430_base_alignment(row_major);
|
||||
return this->arrays_of_arrays_size() *
|
||||
this->without_array()->std430_base_alignment(row_major);
|
||||
}
|
||||
|
||||
if (this->is_record() || this->is_interface()) {
|
||||
|
|
|
|||
|
|
@ -87,7 +87,6 @@ bool do_constant_variable_unlinked(exec_list *instructions);
|
|||
bool do_copy_propagation(exec_list *instructions);
|
||||
bool do_copy_propagation_elements(exec_list *instructions);
|
||||
bool do_constant_propagation(exec_list *instructions);
|
||||
bool do_cse(exec_list *instructions);
|
||||
void do_dead_builtin_varyings(struct gl_context *ctx,
|
||||
gl_shader *producer, gl_shader *consumer,
|
||||
unsigned num_tfeedback_decls,
|
||||
|
|
|
|||
|
|
@ -110,11 +110,7 @@ struct gl_uniform_storage {
|
|||
*/
|
||||
bool initialized;
|
||||
|
||||
struct gl_opaque_uniform_index sampler[MESA_SHADER_STAGES];
|
||||
|
||||
struct gl_opaque_uniform_index image[MESA_SHADER_STAGES];
|
||||
|
||||
struct gl_opaque_uniform_index subroutine[MESA_SHADER_STAGES];
|
||||
struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES];
|
||||
|
||||
/**
|
||||
* Storage used by the driver for the uniform
|
||||
|
|
|
|||
|
|
@ -135,16 +135,16 @@ set_opaque_binding(gl_shader_program *prog, const char *name, int binding)
|
|||
|
||||
if (shader) {
|
||||
if (storage->type->base_type == GLSL_TYPE_SAMPLER &&
|
||||
storage->sampler[sh].active) {
|
||||
storage->opaque[sh].active) {
|
||||
for (unsigned i = 0; i < elements; i++) {
|
||||
const unsigned index = storage->sampler[sh].index + i;
|
||||
const unsigned index = storage->opaque[sh].index + i;
|
||||
shader->SamplerUnits[index] = storage->storage[i].i;
|
||||
}
|
||||
|
||||
} else if (storage->type->base_type == GLSL_TYPE_IMAGE &&
|
||||
storage->image[sh].active) {
|
||||
storage->opaque[sh].active) {
|
||||
for (unsigned i = 0; i < elements; i++) {
|
||||
const unsigned index = storage->image[sh].index + i;
|
||||
const unsigned index = storage->opaque[sh].index + i;
|
||||
shader->ImageUnits[index] = storage->storage[i].i;
|
||||
}
|
||||
}
|
||||
|
|
@ -244,8 +244,8 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
|
|||
for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
|
||||
gl_shader *shader = prog->_LinkedShaders[sh];
|
||||
|
||||
if (shader && storage->sampler[sh].active) {
|
||||
unsigned index = storage->sampler[sh].index;
|
||||
if (shader && storage->opaque[sh].active) {
|
||||
unsigned index = storage->opaque[sh].index;
|
||||
|
||||
shader->SamplerUnits[index] = storage->storage[0].i;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -566,7 +566,7 @@ private:
|
|||
struct gl_uniform_storage *uniform, const char *name)
|
||||
{
|
||||
if (base_type->is_sampler()) {
|
||||
uniform->sampler[shader_type].active = true;
|
||||
uniform->opaque[shader_type].active = true;
|
||||
|
||||
/* Handle multiple samplers inside struct arrays */
|
||||
if (this->record_array_count > 1) {
|
||||
|
|
@ -586,8 +586,8 @@ private:
|
|||
/* In this case, we've already seen this uniform so we just use
|
||||
* the next sampler index recorded the last time we visited.
|
||||
*/
|
||||
uniform->sampler[shader_type].index = index;
|
||||
index = inner_array_size + uniform->sampler[shader_type].index;
|
||||
uniform->opaque[shader_type].index = index;
|
||||
index = inner_array_size + uniform->opaque[shader_type].index;
|
||||
this->record_next_sampler->put(index, name_copy);
|
||||
|
||||
ralloc_free(name_copy);
|
||||
|
|
@ -605,13 +605,13 @@ private:
|
|||
* structs. This allows the offset to be easily calculated for
|
||||
* indirect indexing.
|
||||
*/
|
||||
uniform->sampler[shader_type].index = this->next_sampler;
|
||||
uniform->opaque[shader_type].index = this->next_sampler;
|
||||
this->next_sampler +=
|
||||
inner_array_size * this->record_array_count;
|
||||
|
||||
/* Store the next index for future passes over the struct array
|
||||
*/
|
||||
index = uniform->sampler[shader_type].index + inner_array_size;
|
||||
index = uniform->opaque[shader_type].index + inner_array_size;
|
||||
this->record_next_sampler->put(index, name_copy);
|
||||
ralloc_free(name_copy);
|
||||
}
|
||||
|
|
@ -619,22 +619,19 @@ private:
|
|||
/* Increment the sampler by 1 for non-arrays and by the number of
|
||||
* array elements for arrays.
|
||||
*/
|
||||
uniform->sampler[shader_type].index = this->next_sampler;
|
||||
uniform->opaque[shader_type].index = this->next_sampler;
|
||||
this->next_sampler += MAX2(1, uniform->array_elements);
|
||||
}
|
||||
|
||||
const gl_texture_index target = base_type->sampler_index();
|
||||
const unsigned shadow = base_type->sampler_shadow;
|
||||
for (unsigned i = uniform->sampler[shader_type].index;
|
||||
for (unsigned i = uniform->opaque[shader_type].index;
|
||||
i < MIN2(this->next_sampler, MAX_SAMPLERS);
|
||||
i++) {
|
||||
this->targets[i] = target;
|
||||
this->shader_samplers_used |= 1U << i;
|
||||
this->shader_shadow_samplers |= shadow << i;
|
||||
}
|
||||
} else {
|
||||
uniform->sampler[shader_type].index = ~0;
|
||||
uniform->sampler[shader_type].active = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -642,17 +639,14 @@ private:
|
|||
struct gl_uniform_storage *uniform)
|
||||
{
|
||||
if (base_type->is_image()) {
|
||||
uniform->image[shader_type].index = this->next_image;
|
||||
uniform->image[shader_type].active = true;
|
||||
uniform->opaque[shader_type].index = this->next_image;
|
||||
uniform->opaque[shader_type].active = true;
|
||||
|
||||
/* Increment the image index by 1 for non-arrays and by the
|
||||
* number of array elements for arrays.
|
||||
*/
|
||||
this->next_image += MAX2(1, uniform->array_elements);
|
||||
|
||||
} else {
|
||||
uniform->image[shader_type].index = ~0;
|
||||
uniform->image[shader_type].active = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -660,17 +654,14 @@ private:
|
|||
struct gl_uniform_storage *uniform)
|
||||
{
|
||||
if (base_type->is_subroutine()) {
|
||||
uniform->subroutine[shader_type].index = this->next_subroutine;
|
||||
uniform->subroutine[shader_type].active = true;
|
||||
uniform->opaque[shader_type].index = this->next_subroutine;
|
||||
uniform->opaque[shader_type].active = true;
|
||||
|
||||
/* Increment the subroutine index by 1 for non-arrays and by the
|
||||
* number of array elements for arrays.
|
||||
*/
|
||||
this->next_subroutine += MAX2(1, uniform->array_elements);
|
||||
|
||||
} else {
|
||||
uniform->subroutine[shader_type].index = ~0;
|
||||
uniform->subroutine[shader_type].active = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -738,13 +729,17 @@ private:
|
|||
base_type = type;
|
||||
}
|
||||
|
||||
/* Initialise opaque data */
|
||||
this->uniforms[id].opaque[shader_type].index = ~0;
|
||||
this->uniforms[id].opaque[shader_type].active = false;
|
||||
|
||||
/* This assigns uniform indices to sampler and image uniforms. */
|
||||
handle_samplers(base_type, &this->uniforms[id], name);
|
||||
handle_images(base_type, &this->uniforms[id]);
|
||||
handle_subroutines(base_type, &this->uniforms[id]);
|
||||
|
||||
/* For array of arrays or struct arrays the base location may have
|
||||
* already been set so dont set it again.
|
||||
* already been set so don't set it again.
|
||||
*/
|
||||
if (ubo_block_index == -1 && current_var->data.location == -1) {
|
||||
current_var->data.location = id;
|
||||
|
|
@ -769,7 +764,7 @@ private:
|
|||
this->explicit_location + field_counter;
|
||||
field_counter += entries;
|
||||
} else {
|
||||
this->uniforms[id].remap_location = this->explicit_location;
|
||||
this->uniforms[id].remap_location = this->explicit_location;
|
||||
}
|
||||
} else {
|
||||
/* Initialize to to indicate that no location is set */
|
||||
|
|
@ -820,12 +815,13 @@ private:
|
|||
if (type->without_array()->is_matrix()) {
|
||||
const glsl_type *matrix = type->without_array();
|
||||
const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4;
|
||||
const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements;
|
||||
const unsigned items =
|
||||
row_major ? matrix->matrix_columns : matrix->vector_elements;
|
||||
|
||||
assert(items <= 4);
|
||||
if (packing == GLSL_INTERFACE_PACKING_STD430)
|
||||
this->uniforms[id].matrix_stride = items < 3 ? items * N :
|
||||
glsl_align(items * N, 16);
|
||||
glsl_align(items * N, 16);
|
||||
else
|
||||
this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
|
||||
this->uniforms[id].row_major = row_major;
|
||||
|
|
@ -1029,7 +1025,7 @@ link_set_image_access_qualifiers(struct gl_shader_program *prog)
|
|||
assert(found);
|
||||
(void) found;
|
||||
const gl_uniform_storage *storage = &prog->UniformStorage[id];
|
||||
const unsigned index = storage->image[i].index;
|
||||
const unsigned index = storage->opaque[i].index;
|
||||
const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
|
||||
var->data.image_write_only ? GL_WRITE_ONLY :
|
||||
GL_READ_WRITE);
|
||||
|
|
@ -1159,7 +1155,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) {
|
||||
ir_variable *const var = node->as_variable();
|
||||
|
||||
if ((var == NULL) || (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage))
|
||||
if ((var == NULL) || (var->data.mode != ir_var_uniform &&
|
||||
var->data.mode != ir_var_shader_storage))
|
||||
continue;
|
||||
|
||||
parcel.set_and_process(prog, var);
|
||||
|
|
@ -1168,7 +1165,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
|
||||
prog->_LinkedShaders[i]->shadow_samplers = parcel.shader_shadow_samplers;
|
||||
|
||||
STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) == sizeof(parcel.targets));
|
||||
STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) ==
|
||||
sizeof(parcel.targets));
|
||||
memcpy(prog->_LinkedShaders[i]->SamplerTargets, parcel.targets,
|
||||
sizeof(prog->_LinkedShaders[i]->SamplerTargets));
|
||||
}
|
||||
|
|
@ -1238,7 +1236,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
if (!sh)
|
||||
continue;
|
||||
|
||||
if (!uniforms[i].subroutine[j].active)
|
||||
if (!uniforms[i].opaque[j].active)
|
||||
continue;
|
||||
|
||||
/* How many new entries for this uniform? */
|
||||
|
|
@ -1268,7 +1266,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
if (!sh)
|
||||
continue;
|
||||
|
||||
if (!uniforms[i].subroutine[j].active)
|
||||
if (!uniforms[i].opaque[j].active)
|
||||
continue;
|
||||
|
||||
sh->SubroutineUniformRemapTable =
|
||||
|
|
|
|||
|
|
@ -2132,7 +2132,7 @@ link_intrastage_shaders(void *mem_ctx,
|
|||
|
||||
|
||||
if (!ok) {
|
||||
ctx->Driver.DeleteShader(ctx, linked);
|
||||
_mesa_delete_shader(ctx, linked);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -3421,10 +3421,13 @@ build_program_resource_list(struct gl_shader_program *shProg)
|
|||
if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
|
||||
return;
|
||||
|
||||
if (!add_packed_varyings(shProg, input_stage))
|
||||
return;
|
||||
if (!add_packed_varyings(shProg, output_stage))
|
||||
return;
|
||||
/* Program interface needs to expose varyings in case of SSO. */
|
||||
if (shProg->SeparateShader) {
|
||||
if (!add_packed_varyings(shProg, input_stage))
|
||||
return;
|
||||
if (!add_packed_varyings(shProg, output_stage))
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add inputs and outputs to the resource list. */
|
||||
if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
|
||||
|
|
@ -3497,7 +3500,7 @@ build_program_resource_list(struct gl_shader_program *shProg)
|
|||
continue;
|
||||
|
||||
for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) {
|
||||
if (!shProg->UniformStorage[i].subroutine[j].active)
|
||||
if (!shProg->UniformStorage[i].opaque[j].active)
|
||||
continue;
|
||||
|
||||
type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j);
|
||||
|
|
@ -3732,7 +3735,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
|
||||
for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] != NULL)
|
||||
ctx->Driver.DeleteShader(ctx, prog->_LinkedShaders[i]);
|
||||
_mesa_delete_shader(ctx, prog->_LinkedShaders[i]);
|
||||
|
||||
prog->_LinkedShaders[i] = NULL;
|
||||
}
|
||||
|
|
@ -3747,7 +3750,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
|
||||
if (!prog->LinkStatus) {
|
||||
if (sh)
|
||||
ctx->Driver.DeleteShader(ctx, sh);
|
||||
_mesa_delete_shader(ctx, sh);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
|
@ -3770,7 +3773,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
}
|
||||
if (!prog->LinkStatus) {
|
||||
if (sh)
|
||||
ctx->Driver.DeleteShader(ctx, sh);
|
||||
_mesa_delete_shader(ctx, sh);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -166,6 +166,8 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
|
|||
shader->info.separate_shader = shader_prog->SeparateShader;
|
||||
shader->info.gs.vertices_out = sh->Geom.VerticesOut;
|
||||
shader->info.gs.invocations = sh->Geom.Invocations;
|
||||
shader->info.has_transform_feedback_varyings =
|
||||
shader_prog->TransformFeedback.NumVarying > 0;
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1513,6 +1513,9 @@ typedef struct nir_shader_info {
|
|||
/* Whether or not separate shader objects were used */
|
||||
bool separate_shader;
|
||||
|
||||
/** Was this shader linked with any transform feedback varyings? */
|
||||
bool has_transform_feedback_varyings;
|
||||
|
||||
struct {
|
||||
/** The maximum number of vertices the geometry shader might write. */
|
||||
unsigned vertices_out;
|
||||
|
|
|
|||
|
|
@ -229,6 +229,7 @@ SYSTEM_VALUE(num_work_groups, 3, 0)
|
|||
LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -63,31 +63,46 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
|
|||
*size = location;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if we're processing a stage whose inputs are arrays indexed
|
||||
* by a vertex number (such as geometry shader inputs).
|
||||
*/
|
||||
static bool
|
||||
deref_has_indirect(nir_deref_var *deref)
|
||||
stage_uses_per_vertex_inputs(struct lower_io_state *state)
|
||||
{
|
||||
for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
|
||||
if (tail->deref_type == nir_deref_type_array) {
|
||||
nir_deref_array *arr = nir_deref_as_array(tail);
|
||||
if (arr->deref_array_type == nir_deref_array_type_indirect)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
gl_shader_stage stage = state->builder.shader->stage;
|
||||
return stage == MESA_SHADER_GEOMETRY;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
|
||||
get_io_offset(nir_deref_var *deref, nir_instr *instr,
|
||||
nir_ssa_def **vertex_index,
|
||||
nir_ssa_def **out_indirect,
|
||||
struct lower_io_state *state)
|
||||
{
|
||||
bool found_indirect = false;
|
||||
nir_ssa_def *indirect = NULL;
|
||||
unsigned base_offset = 0;
|
||||
|
||||
nir_builder *b = &state->builder;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_deref *tail = &deref->deref;
|
||||
|
||||
/* For per-vertex input arrays (i.e. geometry shader inputs), keep the
|
||||
* outermost array index separate. Process the rest normally.
|
||||
*/
|
||||
if (vertex_index != NULL) {
|
||||
tail = tail->child;
|
||||
assert(tail->deref_type == nir_deref_type_array);
|
||||
nir_deref_array *deref_array = nir_deref_as_array(tail);
|
||||
|
||||
nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
|
||||
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
|
||||
vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
|
||||
}
|
||||
*vertex_index = vtx;
|
||||
}
|
||||
|
||||
while (tail->child != NULL) {
|
||||
const struct glsl_type *parent_type = tail->type;
|
||||
tail = tail->child;
|
||||
|
|
@ -103,14 +118,7 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
|
|||
nir_imul(b, nir_imm_int(b, size),
|
||||
nir_ssa_for_src(b, deref_array->indirect, 1));
|
||||
|
||||
if (found_indirect) {
|
||||
indirect->ssa =
|
||||
nir_iadd(b, nir_ssa_for_src(b, *indirect, 1), mul);
|
||||
} else {
|
||||
indirect->ssa = mul;
|
||||
}
|
||||
indirect->is_ssa = true;
|
||||
found_indirect = true;
|
||||
indirect = indirect ? nir_iadd(b, indirect, mul) : mul;
|
||||
}
|
||||
} else if (tail->deref_type == nir_deref_type_struct) {
|
||||
nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
|
||||
|
|
@ -122,17 +130,24 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
|
|||
}
|
||||
}
|
||||
|
||||
*out_indirect = indirect;
|
||||
return base_offset;
|
||||
}
|
||||
|
||||
static nir_intrinsic_op
|
||||
load_op(nir_variable_mode mode, bool has_indirect)
|
||||
load_op(struct lower_io_state *state,
|
||||
nir_variable_mode mode, bool per_vertex, bool has_indirect)
|
||||
{
|
||||
nir_intrinsic_op op;
|
||||
switch (mode) {
|
||||
case nir_var_shader_in:
|
||||
op = has_indirect ? nir_intrinsic_load_input_indirect :
|
||||
nir_intrinsic_load_input;
|
||||
if (per_vertex) {
|
||||
op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect :
|
||||
nir_intrinsic_load_per_vertex_input;
|
||||
} else {
|
||||
op = has_indirect ? nir_intrinsic_load_input_indirect :
|
||||
nir_intrinsic_load_input;
|
||||
}
|
||||
break;
|
||||
case nir_var_uniform:
|
||||
op = has_indirect ? nir_intrinsic_load_uniform_indirect :
|
||||
|
|
@ -169,17 +184,22 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
if (mode != nir_var_shader_in && mode != nir_var_uniform)
|
||||
continue;
|
||||
|
||||
bool has_indirect = deref_has_indirect(intrin->variables[0]);
|
||||
bool per_vertex = stage_uses_per_vertex_inputs(state) &&
|
||||
mode == nir_var_shader_in;
|
||||
|
||||
nir_ssa_def *indirect;
|
||||
nir_ssa_def *vertex_index;
|
||||
|
||||
unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
|
||||
per_vertex ? &vertex_index : NULL,
|
||||
&indirect, state);
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(state->mem_ctx,
|
||||
load_op(mode, has_indirect));
|
||||
load_op(state, mode, per_vertex,
|
||||
indirect));
|
||||
load->num_components = intrin->num_components;
|
||||
|
||||
nir_src indirect;
|
||||
unsigned offset = get_io_offset(intrin->variables[0],
|
||||
&intrin->instr, &indirect, state);
|
||||
|
||||
unsigned location = intrin->variables[0]->var->data.driver_location;
|
||||
if (mode == nir_var_uniform) {
|
||||
load->const_index[0] = location;
|
||||
|
|
@ -188,8 +208,11 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
load->const_index[0] = location + offset;
|
||||
}
|
||||
|
||||
if (has_indirect)
|
||||
load->src[0] = indirect;
|
||||
if (per_vertex)
|
||||
load->src[0] = nir_src_for_ssa(vertex_index);
|
||||
|
||||
if (indirect)
|
||||
load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect);
|
||||
|
||||
if (intrin->dest.is_ssa) {
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
|
|
@ -209,10 +232,14 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
|
||||
continue;
|
||||
|
||||
bool has_indirect = deref_has_indirect(intrin->variables[0]);
|
||||
nir_ssa_def *indirect;
|
||||
|
||||
unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
|
||||
NULL, &indirect, state);
|
||||
offset += intrin->variables[0]->var->data.driver_location;
|
||||
|
||||
nir_intrinsic_op store_op;
|
||||
if (has_indirect) {
|
||||
if (indirect) {
|
||||
store_op = nir_intrinsic_store_output_indirect;
|
||||
} else {
|
||||
store_op = nir_intrinsic_store_output;
|
||||
|
|
@ -221,18 +248,12 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
|
||||
store_op);
|
||||
store->num_components = intrin->num_components;
|
||||
|
||||
nir_src indirect;
|
||||
unsigned offset = get_io_offset(intrin->variables[0],
|
||||
&intrin->instr, &indirect, state);
|
||||
offset += intrin->variables[0]->var->data.driver_location;
|
||||
|
||||
store->const_index[0] = offset;
|
||||
|
||||
nir_src_copy(&store->src[0], &intrin->src[0], store);
|
||||
|
||||
if (has_indirect)
|
||||
store->src[1] = indirect;
|
||||
if (indirect)
|
||||
store->src[1] = nir_src_for_ssa(indirect);
|
||||
|
||||
nir_instr_insert_before(&intrin->instr, &store->instr);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
|
|
|
|||
|
|
@ -155,13 +155,13 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr
|
|||
}
|
||||
|
||||
if (location > shader_program->NumUniformStorage - 1 ||
|
||||
!shader_program->UniformStorage[location].sampler[stage].active) {
|
||||
!shader_program->UniformStorage[location].opaque[stage].active) {
|
||||
assert(!"cannot return a sampler");
|
||||
return;
|
||||
}
|
||||
|
||||
instr->sampler_index +=
|
||||
shader_program->UniformStorage[location].sampler[stage].index;
|
||||
shader_program->UniformStorage[location].opaque[stage].index;
|
||||
|
||||
instr->sampler = NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -443,6 +443,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
break;
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_input_indirect:
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
case nir_intrinsic_load_per_vertex_input_indirect:
|
||||
var_list = &state->shader->inputs;
|
||||
break;
|
||||
case nir_intrinsic_store_output:
|
||||
|
|
|
|||
|
|
@ -154,6 +154,8 @@ nir_sweep(nir_shader *nir)
|
|||
/* First, move ownership of all the memory to a temporary context; assume dead. */
|
||||
ralloc_adopt(rubbish, nir);
|
||||
|
||||
ralloc_steal(nir, (char *)nir->info.name);
|
||||
|
||||
/* Variables and registers are not dead. Steal them back. */
|
||||
steal_list(nir, nir_variable, &nir->uniforms);
|
||||
steal_list(nir, nir_variable, &nir->inputs);
|
||||
|
|
|
|||
|
|
@ -1,472 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file opt_cse.cpp
|
||||
*
|
||||
* constant subexpression elimination at the GLSL IR level.
|
||||
*
|
||||
* Compare to brw_fs_cse.cpp for a more complete CSE implementation. This one
|
||||
* is generic and handles texture operations, but it's rather simple currently
|
||||
* and doesn't support modification of variables in the available expressions
|
||||
* list, so it can't do variables other than uniforms or shader inputs.
|
||||
*/
|
||||
|
||||
#include "ir.h"
|
||||
#include "ir_visitor.h"
|
||||
#include "ir_rvalue_visitor.h"
|
||||
#include "ir_basic_block.h"
|
||||
#include "ir_optimization.h"
|
||||
#include "ir_builder.h"
|
||||
#include "glsl_types.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
static bool debug = false;
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* This is the record of an available expression for common subexpression
|
||||
* elimination.
|
||||
*/
|
||||
class ae_entry : public exec_node
|
||||
{
|
||||
public:
|
||||
ae_entry(ir_instruction *base_ir, ir_rvalue **val)
|
||||
: val(val), base_ir(base_ir)
|
||||
{
|
||||
assert(val);
|
||||
assert(*val);
|
||||
assert(base_ir);
|
||||
|
||||
var = NULL;
|
||||
}
|
||||
|
||||
void init(ir_instruction *base_ir, ir_rvalue **val)
|
||||
{
|
||||
this->val = val;
|
||||
this->base_ir = base_ir;
|
||||
this->var = NULL;
|
||||
|
||||
assert(val);
|
||||
assert(*val);
|
||||
assert(base_ir);
|
||||
}
|
||||
|
||||
/**
|
||||
* The pointer to the expression that we might be able to reuse
|
||||
*
|
||||
* Note the double pointer -- this is the place in the base_ir expression
|
||||
* tree that we would rewrite to move the expression out to a new variable
|
||||
* assignment.
|
||||
*/
|
||||
ir_rvalue **val;
|
||||
|
||||
/**
|
||||
* Root instruction in the basic block where the expression appeared.
|
||||
*
|
||||
* This is used so that we can insert the new variable declaration into the
|
||||
* instruction stream (since *val is just somewhere in base_ir's expression
|
||||
* tree).
|
||||
*/
|
||||
ir_instruction *base_ir;
|
||||
|
||||
/**
|
||||
* The variable that the expression has been stored in, if it's been CSEd
|
||||
* once already.
|
||||
*/
|
||||
ir_variable *var;
|
||||
};
|
||||
|
||||
class cse_visitor : public ir_rvalue_visitor {
|
||||
public:
|
||||
cse_visitor(exec_list *validate_instructions)
|
||||
: validate_instructions(validate_instructions)
|
||||
{
|
||||
progress = false;
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
this->ae = new(mem_ctx) exec_list;
|
||||
}
|
||||
~cse_visitor()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit_enter(ir_function_signature *ir);
|
||||
virtual ir_visitor_status visit_enter(ir_loop *ir);
|
||||
virtual ir_visitor_status visit_enter(ir_if *ir);
|
||||
virtual ir_visitor_status visit_enter(ir_call *ir);
|
||||
virtual void handle_rvalue(ir_rvalue **rvalue);
|
||||
|
||||
bool progress;
|
||||
|
||||
private:
|
||||
void *mem_ctx;
|
||||
|
||||
ir_rvalue *try_cse(ir_rvalue *rvalue);
|
||||
void add_to_ae(ir_rvalue **rvalue);
|
||||
|
||||
/**
|
||||
* Move all nodes from the ae list to the free list
|
||||
*/
|
||||
void empty_ae_list();
|
||||
|
||||
/**
|
||||
* Get and initialize a new ae_entry
|
||||
*
|
||||
* This will either come from the free list or be freshly allocated.
|
||||
*/
|
||||
ae_entry *get_ae_entry(ir_rvalue **rvalue);
|
||||
|
||||
/** List of ae_entry: The available expressions to reuse */
|
||||
exec_list *ae;
|
||||
|
||||
/**
|
||||
* The whole shader, so that we can validate_ir_tree in debug mode.
|
||||
*
|
||||
* This proved quite useful when trying to get the tree manipulation
|
||||
* right.
|
||||
*/
|
||||
exec_list *validate_instructions;
|
||||
|
||||
/**
|
||||
* List of available-for-use ae_entry objects.
|
||||
*/
|
||||
exec_list free_ae_entries;
|
||||
};
|
||||
|
||||
/**
|
||||
* Visitor to walk an expression tree to check that all variables referenced
|
||||
* are constants.
|
||||
*/
|
||||
class is_cse_candidate_visitor : public ir_hierarchical_visitor
|
||||
{
|
||||
public:
|
||||
|
||||
is_cse_candidate_visitor()
|
||||
: ok(true)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *ir);
|
||||
|
||||
bool ok;
|
||||
};
|
||||
|
||||
|
||||
class contains_rvalue_visitor : public ir_rvalue_visitor
|
||||
{
|
||||
public:
|
||||
|
||||
contains_rvalue_visitor(ir_rvalue *val)
|
||||
: val(val)
|
||||
{
|
||||
found = false;
|
||||
}
|
||||
|
||||
virtual void handle_rvalue(ir_rvalue **rvalue);
|
||||
|
||||
bool found;
|
||||
|
||||
private:
|
||||
ir_rvalue *val;
|
||||
};
|
||||
|
||||
} /* unnamed namespace */
|
||||
|
||||
static void
|
||||
dump_ae(exec_list *ae)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
printf("CSE: AE contents:\n");
|
||||
foreach_in_list(ae_entry, entry, ae) {
|
||||
printf("CSE: AE %2d (%p): ", i, entry);
|
||||
(*entry->val)->print();
|
||||
printf("\n");
|
||||
|
||||
if (entry->var)
|
||||
printf("CSE: in var %p:\n", entry->var);
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
|
||||
{
|
||||
/* Currently, since we don't handle kills of the ae based on variables
|
||||
* getting assigned, we can only handle constant variables.
|
||||
*/
|
||||
if (ir->var->data.read_only) {
|
||||
return visit_continue;
|
||||
} else {
|
||||
if (debug)
|
||||
printf("CSE: non-candidate: var %s is not read only\n", ir->var->name);
|
||||
ok = false;
|
||||
return visit_stop;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
|
||||
{
|
||||
if (*rvalue == val)
|
||||
found = true;
|
||||
}
|
||||
|
||||
static bool
|
||||
contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
|
||||
{
|
||||
contains_rvalue_visitor v(needle);
|
||||
haystack->accept(&v);
|
||||
return v.found;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_cse_candidate(ir_rvalue *ir)
|
||||
{
|
||||
/* Our temporary variable assignment generation isn't ready to handle
|
||||
* anything bigger than a vector.
|
||||
*/
|
||||
if (!ir->type->is_vector() && !ir->type->is_scalar()) {
|
||||
if (debug)
|
||||
printf("CSE: non-candidate: not a vector/scalar\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Only handle expressions and textures currently. We may want to extend
|
||||
* to variable-index array dereferences at some point.
|
||||
*/
|
||||
switch (ir->ir_type) {
|
||||
case ir_type_expression:
|
||||
case ir_type_texture:
|
||||
break;
|
||||
default:
|
||||
if (debug)
|
||||
printf("CSE: non-candidate: not an expression/texture\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
is_cse_candidate_visitor v;
|
||||
|
||||
ir->accept(&v);
|
||||
|
||||
return v.ok;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to find and return a reference to a previous computation of a given
|
||||
* expression.
|
||||
*
|
||||
* Walk the list of available expressions checking if any of them match the
|
||||
* rvalue, and if so, move the previous copy of the expression to a temporary
|
||||
* and return a reference of the temporary.
|
||||
*/
|
||||
ir_rvalue *
|
||||
cse_visitor::try_cse(ir_rvalue *rvalue)
|
||||
{
|
||||
foreach_in_list(ae_entry, entry, ae) {
|
||||
if (debug) {
|
||||
printf("Comparing to AE %p: ", entry);
|
||||
(*entry->val)->print();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (!rvalue->equals(*entry->val))
|
||||
continue;
|
||||
|
||||
if (debug) {
|
||||
printf("CSE: Replacing: ");
|
||||
(*entry->val)->print();
|
||||
printf("\n");
|
||||
printf("CSE: with: ");
|
||||
rvalue->print();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (!entry->var) {
|
||||
ir_instruction *base_ir = entry->base_ir;
|
||||
|
||||
ir_variable *var = new(rvalue) ir_variable(rvalue->type,
|
||||
"cse",
|
||||
ir_var_temporary);
|
||||
|
||||
/* Write the previous expression result into a new variable. */
|
||||
base_ir->insert_before(var);
|
||||
ir_assignment *assignment = assign(var, *entry->val);
|
||||
base_ir->insert_before(assignment);
|
||||
|
||||
/* Replace the expression in the original tree with a deref of the
|
||||
* variable, but keep tracking the expression for further reuse.
|
||||
*/
|
||||
*entry->val = new(rvalue) ir_dereference_variable(var);
|
||||
entry->val = &assignment->rhs;
|
||||
|
||||
entry->var = var;
|
||||
|
||||
/* Update the base_irs in the AE list. We have to be sure that
|
||||
* they're correct -- expressions from our base_ir that weren't moved
|
||||
* need to stay in this base_ir (so that later consumption of them
|
||||
* puts new variables between our new variable and our base_ir), but
|
||||
* expressions from our base_ir that we *did* move need base_ir
|
||||
* updated so that any further elimination from inside gets its new
|
||||
* assignments put before our new assignment.
|
||||
*/
|
||||
foreach_in_list(ae_entry, fixup_entry, ae) {
|
||||
if (contains_rvalue(assignment->rhs, *fixup_entry->val))
|
||||
fixup_entry->base_ir = assignment;
|
||||
}
|
||||
|
||||
if (debug)
|
||||
dump_ae(ae);
|
||||
}
|
||||
|
||||
/* Replace the expression in our current tree with the variable. */
|
||||
return new(rvalue) ir_dereference_variable(entry->var);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
cse_visitor::empty_ae_list()
|
||||
{
|
||||
free_ae_entries.append_list(ae);
|
||||
}
|
||||
|
||||
ae_entry *
|
||||
cse_visitor::get_ae_entry(ir_rvalue **rvalue)
|
||||
{
|
||||
ae_entry *entry = (ae_entry *) free_ae_entries.pop_head();
|
||||
if (entry) {
|
||||
entry->init(base_ir, rvalue);
|
||||
} else {
|
||||
entry = new(mem_ctx) ae_entry(base_ir, rvalue);
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
/** Add the rvalue to the list of available expressions for CSE. */
|
||||
void
|
||||
cse_visitor::add_to_ae(ir_rvalue **rvalue)
|
||||
{
|
||||
if (debug) {
|
||||
printf("CSE: Add to AE: ");
|
||||
(*rvalue)->print();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
ae->push_tail(get_ae_entry(rvalue));
|
||||
|
||||
if (debug)
|
||||
dump_ae(ae);
|
||||
}
|
||||
|
||||
void
|
||||
cse_visitor::handle_rvalue(ir_rvalue **rvalue)
|
||||
{
|
||||
if (!*rvalue)
|
||||
return;
|
||||
|
||||
if (debug) {
|
||||
printf("CSE: handle_rvalue ");
|
||||
(*rvalue)->print();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (!is_cse_candidate(*rvalue))
|
||||
return;
|
||||
|
||||
ir_rvalue *new_rvalue = try_cse(*rvalue);
|
||||
if (new_rvalue) {
|
||||
*rvalue = new_rvalue;
|
||||
progress = true;
|
||||
|
||||
if (debug)
|
||||
validate_ir_tree(validate_instructions);
|
||||
} else {
|
||||
add_to_ae(rvalue);
|
||||
}
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
cse_visitor::visit_enter(ir_if *ir)
|
||||
{
|
||||
handle_rvalue(&ir->condition);
|
||||
|
||||
empty_ae_list();
|
||||
visit_list_elements(this, &ir->then_instructions);
|
||||
|
||||
empty_ae_list();
|
||||
visit_list_elements(this, &ir->else_instructions);
|
||||
|
||||
empty_ae_list();
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
cse_visitor::visit_enter(ir_function_signature *ir)
|
||||
{
|
||||
empty_ae_list();
|
||||
visit_list_elements(this, &ir->body);
|
||||
|
||||
empty_ae_list();
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
cse_visitor::visit_enter(ir_loop *ir)
|
||||
{
|
||||
empty_ae_list();
|
||||
visit_list_elements(this, &ir->body_instructions);
|
||||
|
||||
empty_ae_list();
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
cse_visitor::visit_enter(ir_call *)
|
||||
{
|
||||
/* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
|
||||
* pointer to the (ir_rvalue *) on the stack. Since we save those pointers
|
||||
* in the AE list, we can't let handle_rvalue get called.
|
||||
*/
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does a (uniform-value) constant subexpression elimination pass on the code
|
||||
* present in the instruction stream.
|
||||
*/
|
||||
bool
|
||||
do_cse(exec_list *instructions)
|
||||
{
|
||||
cse_visitor v(instructions);
|
||||
|
||||
visit_list_elements(&v, instructions);
|
||||
|
||||
return v.progress;
|
||||
}
|
||||
|
|
@ -91,6 +91,14 @@ _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
|
|||
return shader;
|
||||
}
|
||||
|
||||
void
|
||||
_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
|
||||
{
|
||||
free((void *)sh->Source);
|
||||
free(sh->Label);
|
||||
ralloc_free(sh);
|
||||
}
|
||||
|
||||
void
|
||||
_mesa_clear_shader_program_data(struct gl_shader_program *shProg)
|
||||
{
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue