From c1452983b44cc8ee238b8c7e2cfca1105c707487 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 Aug 2015 09:52:12 +1000 Subject: [PATCH 01/26] mesa/texgetimage: fix missing stencil check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GetTexImage can read to stencil8 but only from a stencil or depthstencil textures. This fixes a bunch of failures in CTS GL33-CTS.gtf32.GL3Tests.packed_pixels Reviewed-by: Marek Olšák Cc: "11.0" Signed-off-by: Dave Airlie --- src/mesa/main/texgetimage.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 3c1e166ffa1..f62553dac24 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -1213,6 +1213,13 @@ getteximage_error_check(struct gl_context *ctx, "%s(format=GL_STENCIL_INDEX)", caller); return true; } + else if (_mesa_is_stencil_format(format) + && !_mesa_is_depthstencil_format(baseFormat) + && !_mesa_is_stencil_format(baseFormat)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(format mismatch)", caller); + return true; + } else if (_mesa_is_ycbcr_format(format) && !_mesa_is_ycbcr_format(baseFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, From 941346a80323c9419b70e3987b900a69ebb08fb4 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Sun, 9 Aug 2015 02:03:33 +0200 Subject: [PATCH 02/26] gallium/auxiliary: optimize rgb9e5 helper a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This code (lifted straight from the extension) was doing things the most inefficient way you could think of. This drops some of the more expensive float operations, in particular - int-cast floors (pointless, values always positive) - 2 raised to (signed) integers (replace with simple exponent manipulation), getting rid of a misguided comment in the process (implement with table...) - float division (replace with mul of reverse of those exponents) This is like 3 times faster (measured for float3_to_rgb9e5), though it depends (e.g. llvm is clever enough to replace exp2 with ldexp whereas gcc is not, division is not too bad on cpus with early-exit divs). Note that keeping the double math for now (float x + 0.5), as the results may otherwise differ. Acked-by: Marek Olšák --- src/gallium/auxiliary/util/u_format_rgb9e5.h | 35 ++++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format_rgb9e5.h b/src/gallium/auxiliary/util/u_format_rgb9e5.h index 59fc291e917..d11bfa833f1 100644 --- a/src/gallium/auxiliary/util/u_format_rgb9e5.h +++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h @@ -21,7 +21,8 @@ * DEALINGS IN THE SOFTWARE. */ -/* Copied from EXT_texture_shared_exponent and edited. */ +/* Copied from EXT_texture_shared_exponent and edited, getting rid of + * expensive float math bits too. */ #ifndef RGB9E5_H #define RGB9E5_H @@ -39,7 +40,6 @@ #define RGB9E5_MANTISSA_VALUES (1<= 0); - /* This exp2 function could be replaced by a table. */ - denom = exp2(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS); + revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS); - maxm = (int) floor(maxrgb / denom + 0.5); - if (maxm == MAX_RGB9E5_MANTISSA+1) { - denom *= 2; + maxm = (int) (maxrgb * revdenom.value + 0.5); + if (maxm == MAX_RGB9E5_MANTISSA + 1) { + revdenom.value *= 0.5f; exp_shared += 1; assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); } else { assert(maxm <= MAX_RGB9E5_MANTISSA); } - rm = (int) floor(rc / denom + 0.5); - gm = (int) floor(gc / denom + 0.5); - bm = (int) floor(bc / denom + 0.5); + rm = (int) (rc * revdenom.value + 0.5); + gm = (int) (gc * revdenom.value + 0.5); + bm = (int) (bc * revdenom.value + 0.5); assert(rm <= MAX_RGB9E5_MANTISSA); assert(gm <= MAX_RGB9E5_MANTISSA); @@ -151,15 +150,15 @@ static inline void rgb9e5_to_float3(unsigned rgb, float retval[3]) { rgb9e5 v; int exponent; - float scale; + float754 scale = {0}; v.raw = rgb; exponent = v.field.biasedexponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS; - scale = exp2f(exponent); + scale.field.biasedexponent = exponent + 127; - retval[0] = v.field.r * scale; - retval[1] = v.field.g * scale; - retval[2] = v.field.b * scale; + retval[0] = v.field.r * scale.value; + retval[1] = v.field.g * scale.value; + retval[2] = v.field.b * scale.value; } #endif From 48e6404c04da6c9655d7a8b625830d0d40f393ae Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Sun, 9 Aug 2015 02:50:10 +0200 Subject: [PATCH 03/26] gallium/auxiliary: optimize rgb9e5 helper some more MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I used this as some testing ground for investigating some compiler bits initially (e.g. lrint calls etc.), figured I could do much better in the end just for fun... This is mathematically equivalent, but uses some tricks to avoid doubles and also replaces some float math with ints. Good for another performance doubling or so. As a side note, some quick tests show that llvm's loop vectorizer would be able to properly vectorize this version (which it failed to do earlier due to doubles, producing a mess), giving another 3 times performance increase with sse2 (more with sse4.1), but this may not apply to mesa. No piglit change. Acked-by: Marek Olšák --- src/gallium/auxiliary/util/u_format_rgb9e5.h | 81 ++++++++++---------- 1 file changed, 39 insertions(+), 42 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format_rgb9e5.h b/src/gallium/auxiliary/util/u_format_rgb9e5.h index d11bfa833f1..21feba7b710 100644 --- a/src/gallium/auxiliary/util/u_format_rgb9e5.h +++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h @@ -74,62 +74,59 @@ typedef union { } field; } rgb9e5; -static inline float rgb9e5_ClampRange(float x) -{ - if (x > 0.0f) { - if (x >= MAX_RGB9E5) { - return MAX_RGB9E5; - } else { - return x; - } - } else { - /* NaN gets here too since comparisons with NaN always fail! */ - return 0.0f; - } -} -/* Ok, FloorLog2 is not correct for the denorm and zero values, but we - are going to do a max of this value with the minimum rgb9e5 exponent - that will hide these problem cases. */ -static inline int rgb9e5_FloorLog2(float x) +static inline int rgb9e5_ClampRange(float x) { float754 f; - + float754 max; f.value = x; - return (f.field.biasedexponent - 127); + max.value = MAX_RGB9E5; + + if (f.raw > 0x7f800000) + /* catches neg, NaNs */ + return 0; + else if (f.raw >= max.raw) + return max.raw; + else + return f.raw; } static inline unsigned float3_to_rgb9e5(const float rgb[3]) { rgb9e5 retval; - float maxrgb; - int rm, gm, bm; - float rc, gc, bc; - int exp_shared, maxm; + int rm, gm, bm, exp_shared; float754 revdenom = {0}; + float754 rc, bc, gc, maxrgb; - rc = rgb9e5_ClampRange(rgb[0]); - gc = rgb9e5_ClampRange(rgb[1]); - bc = rgb9e5_ClampRange(rgb[2]); + rc.raw = rgb9e5_ClampRange(rgb[0]); + gc.raw = rgb9e5_ClampRange(rgb[1]); + bc.raw = rgb9e5_ClampRange(rgb[2]); + maxrgb.raw = MAX3(rc.raw, gc.raw, bc.raw); - maxrgb = MAX3(rc, gc, bc); - exp_shared = MAX2(-RGB9E5_EXP_BIAS - 1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS; + /* + * Compared to what the spec suggests, instead of conditionally adjusting + * the exponent after the fact do it here by doing the equivalent of +0.5 - + * the int add will spill over into the exponent in this case. + */ + maxrgb.raw += maxrgb.raw & (1 << (23-9)); + exp_shared = MAX2((maxrgb.raw >> 23), -RGB9E5_EXP_BIAS - 1 + 127) + + 1 + RGB9E5_EXP_BIAS - 127; + revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS - + RGB9E5_MANTISSA_BITS) + 1; assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); - assert(exp_shared >= 0); - revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS); - maxm = (int) (maxrgb * revdenom.value + 0.5); - if (maxm == MAX_RGB9E5_MANTISSA + 1) { - revdenom.value *= 0.5f; - exp_shared += 1; - assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); - } else { - assert(maxm <= MAX_RGB9E5_MANTISSA); - } - - rm = (int) (rc * revdenom.value + 0.5); - gm = (int) (gc * revdenom.value + 0.5); - bm = (int) (bc * revdenom.value + 0.5); + /* + * The spec uses strict round-up behavior (d3d10 disagrees, but in any case + * must match what is done above for figuring out exponent). + * We avoid the doubles ((int) rc * revdenom + 0.5) by doing the rounding + * ourselves (revdenom was adjusted by +1, above). + */ + rm = (int) (rc.value * revdenom.value); + gm = (int) (gc.value * revdenom.value); + bm = (int) (bc.value * revdenom.value); + rm = (rm & 1) + (rm >> 1); + gm = (gm & 1) + (gm >> 1); + bm = (bm & 1) + (bm >> 1); assert(rm <= MAX_RGB9E5_MANTISSA); assert(gm <= MAX_RGB9E5_MANTISSA); From 73e5adc4b2bf082addd1ae76fb23c2773887162b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 26 Aug 2015 10:37:09 +1000 Subject: [PATCH 04/26] mesa/formats: pass correct parameter to _mesa_is_format_compressed commit 26c549e69d12e44e2e36c09764ce2cceab262a1b Author: Nanley Chery Date: Fri Jul 31 10:26:36 2015 -0700 mesa/formats: remove compressed formats from matching function caused a regression in my CTS testing, this looks like a clear thinko. Reviewed-by: Nanley Chery sSigned-off-by: Dave Airlie --- src/mesa/main/formats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index 8dd07d88f40..34a4434c3ba 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1979,7 +1979,7 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format, case MESA_FORMAT_X8R8G8B8_SRGB: return GL_FALSE; default: - assert(_mesa_is_format_compressed(format)); + assert(_mesa_is_format_compressed(mesa_format)); if (error) *error = GL_INVALID_ENUM; } From c2a766880d6a92a0b7b3411062f61090d77f65c0 Mon Sep 17 00:00:00 2001 From: Marta Lofstedt Date: Wed, 19 Aug 2015 15:33:21 +0200 Subject: [PATCH 05/26] mesa/es3.1: Enable getting MAX_COMPUTE_WORK_GROUP_ values for OpenGL ES 3.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the OpenGL ES 3.1 specification chapter 17, the MAX_COMPUTE_WORK_GROUP_COUNT and MAX_COMPUTE_WORK_GROUP_SIZE is available for glGetIntegeri_v. Signed-off-by: Marta Lofstedt Reviewed-by: Tapani Pälli --- src/mesa/main/get.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 307a5ffbd1c..c6919977087 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -2049,7 +2049,7 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v) return TYPE_INT; case GL_MAX_COMPUTE_WORK_GROUP_COUNT: - if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_compute_shader) + if (!_mesa_has_compute_shaders(ctx)) goto invalid_enum; if (index >= 3) goto invalid_value; @@ -2057,7 +2057,7 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v) return TYPE_INT; case GL_MAX_COMPUTE_WORK_GROUP_SIZE: - if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_compute_shader) + if (!_mesa_has_compute_shaders(ctx)) goto invalid_enum; if (index >= 3) goto invalid_value; From ae8d0e7abef27b25637ee25b857c44f13aef0d11 Mon Sep 17 00:00:00 2001 From: Marta Lofstedt Date: Wed, 19 Aug 2015 15:30:33 +0200 Subject: [PATCH 06/26] mesa/es3.1: Allow GL_COMPUTE_WORK_GROUP_SIZE for OpenGL ES 3.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to OpenGL ES specification section 7.12, GL_COMPUTE_WORK_GROUP_SIZE, is supported by the glGetProgramiv function. Signed-off-by: Marta Lofstedt Reviewed-by: Tapani Pälli --- src/mesa/main/shaderapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index b227c17548e..0e0e0d6ba30 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -756,7 +756,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, return; case GL_COMPUTE_WORK_GROUP_SIZE: { int i; - if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_compute_shader) + if (!_mesa_has_compute_shaders(ctx)) break; if (!shProg->LinkStatus) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramiv(program not " From e0c2ea03377b52058324f735f7e1f55bb9d29750 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Tue, 28 Jul 2015 11:25:35 +0300 Subject: [PATCH 07/26] mesa: GetTexLevelParameter{if}v changes for OpenGL ES 3.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch refactors existing parameters check to first check common enums between desktop GL and GLES 3.1 and modifies get_tex_level_parameter_image to be compatible with enums specified in 3.1. v2: remove extra is_gles31() checks (suggested by Ilia) Signed-off-by: Tapani Pälli Reviewed-by: Anuj Phogat (v1) Reviewed-by: Marta Lofstedt (v1) Reviewed-by: Ilia Mirkin --- src/mesa/main/texparam.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 16739f1779b..72d36117498 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1208,20 +1208,34 @@ static GLboolean legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, bool dsa) { + /* Common targets for desktop GL and GLES 3.1. */ switch (target) { - case GL_TEXTURE_1D: - case GL_PROXY_TEXTURE_1D: case GL_TEXTURE_2D: - case GL_PROXY_TEXTURE_2D: case GL_TEXTURE_3D: - case GL_PROXY_TEXTURE_3D: return GL_TRUE; + case GL_TEXTURE_2D_ARRAY_EXT: + return ctx->Extensions.EXT_texture_array; case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + return ctx->Extensions.ARB_texture_cube_map; + case GL_TEXTURE_2D_MULTISAMPLE: + return ctx->Extensions.ARB_texture_multisample; + } + + if (!_mesa_is_desktop_gl(ctx)) + return GL_FALSE; + + /* Rest of the desktop GL targets. */ + switch (target) { + case GL_TEXTURE_1D: + case GL_PROXY_TEXTURE_1D: + case GL_PROXY_TEXTURE_2D: + case GL_PROXY_TEXTURE_3D: + return GL_TRUE; case GL_PROXY_TEXTURE_CUBE_MAP_ARB: return ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_CUBE_MAP_ARRAY_ARB: @@ -1232,7 +1246,6 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, return ctx->Extensions.NV_texture_rectangle; case GL_TEXTURE_1D_ARRAY_EXT: case GL_PROXY_TEXTURE_1D_ARRAY_EXT: - case GL_TEXTURE_2D_ARRAY_EXT: case GL_PROXY_TEXTURE_2D_ARRAY_EXT: return ctx->Extensions.EXT_texture_array; case GL_TEXTURE_BUFFER: @@ -1254,7 +1267,6 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, * "target may also be TEXTURE_BUFFER, indicating the texture buffer." */ return ctx->API == API_OPENGL_CORE && ctx->Version >= 31; - case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: case GL_PROXY_TEXTURE_2D_MULTISAMPLE: case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY: From f8b01ae47cc4760d5687e50f3315b5a89e19cd26 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Tue, 18 Aug 2015 03:23:29 +0300 Subject: [PATCH 08/26] radeonsi: mark unreachable paths to avoid warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we get: warning: 'num_user_sgprs' may be used uninitialized in this function ... Reviewed-by: Michel Dänzer Signed-off-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_shader.c | 2 +- src/gallium/drivers/radeonsi/si_state_shaders.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fa6c15a6591..6b70a8f4f48 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2418,7 +2418,7 @@ static void tex_fetch_args( num_deriv_channels = 1; break; default: - assert(0); /* no other targets are valid here */ + unreachable("invalid target"); } for (param = 0; param < 2; param++) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 0347014948d..a09f588b356 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -181,7 +181,7 @@ static void si_shader_es(struct si_shader *shader) vgpr_comp_cnt = 3; /* all components are needed for TES */ num_user_sgprs = SI_TES_NUM_USER_SGPR; } else - assert(0); + unreachable("invalid shader selector type"); num_sgprs = shader->num_sgprs; /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */ @@ -338,7 +338,7 @@ static void si_shader_vs(struct si_shader *shader) vgpr_comp_cnt = 3; /* all components are needed for TES */ num_user_sgprs = SI_TES_NUM_USER_SGPR; } else - assert(0); + unreachable("invalid shader selector type"); num_sgprs = shader->num_sgprs; if (num_user_sgprs > num_sgprs) { From f432ae899fb81468778dbeb17ac7615da3ed5c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 24 Aug 2015 00:22:37 +0200 Subject: [PATCH 09/26] mesa: create multisample fallback textures like normal textures This works if drivers upsample on upload (like all radeon ones do). The alternative is an unexpected GL error from anything calling _mesa_update_state and possibly other issues. Cc: 10.6 11.0 Reviewed-by: Dave Airlie --- src/mesa/main/texstore.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index fc83310d4e3..539402668c4 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -1004,6 +1004,7 @@ store_texsubimage(struct gl_context *ctx, /* compute slice info (and do some sanity checks) */ switch (target) { case GL_TEXTURE_2D: + case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_RECTANGLE: case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_EXTERNAL_OES: @@ -1025,6 +1026,7 @@ store_texsubimage(struct gl_context *ctx, srcImageStride = _mesa_image_row_stride(packing, width, format, type); break; case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: numSlices = depth; sliceOffset = zoffset; depth = 1; From 332fb341dd100f167055d68871a240fd8d416a85 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 24 Aug 2015 09:35:04 -0400 Subject: [PATCH 10/26] mesa: uncomment checks in es31 computation, add texture_ms Signed-off-by: Ilia Mirkin Reviewed-by: Martin Peres --- src/mesa/main/version.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index fd7ae53ccbd..f811c1ade4e 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -450,13 +450,15 @@ compute_version_es2(const struct gl_extensions *extensions) extensions->ARB_arrays_of_arrays && extensions->ARB_compute_shader && extensions->ARB_draw_indirect && + extensions->ARB_explicit_uniform_location && false /*extensions->ARB_framebuffer_no_attachments*/ && extensions->ARB_shader_atomic_counters && extensions->ARB_shader_image_load_store && - false /*extensions->ARB_shader_image_size*/ && - false /*extensions->ARB_shader_storage_buffer_object*/ && + extensions->ARB_shader_image_size && + extensions->ARB_shader_storage_buffer_object && extensions->ARB_shading_language_packing && extensions->ARB_stencil_texturing && + extensions->ARB_texture_multisample && extensions->ARB_gpu_shader5 && extensions->EXT_shader_integer_mix); From a3b617a25859ebbfe74b294b7e6bee7af1d24b8c Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 24 Aug 2015 11:34:42 -0400 Subject: [PATCH 11/26] mesa: remove pointless es31 checks, fix indirect to only be in es31 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Tapani Pälli --- src/mesa/main/get.c | 37 +----------------------- src/mesa/main/get_hash_params.py | 48 ++++++++++++++++---------------- 2 files changed, 25 insertions(+), 60 deletions(-) diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index c6919977087..680576cab8f 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -361,48 +361,13 @@ static const int extra_ARB_shader_image_load_store_and_tessellation[] = { EXTRA_END }; -static const int extra_ARB_draw_indirect_es31[] = { - EXT(ARB_draw_indirect), - EXTRA_API_ES31, - EXTRA_END -}; - -static const int extra_ARB_shader_image_load_store_es31[] = { - EXT(ARB_shader_image_load_store), - EXTRA_API_ES31, - EXTRA_END -}; - -static const int extra_ARB_shader_atomic_counters_es31[] = { - EXT(ARB_shader_atomic_counters), - EXTRA_API_ES31, - EXTRA_END -}; - -static const int extra_ARB_texture_multisample_es31[] = { - EXT(ARB_texture_multisample), - EXTRA_API_ES31, - EXTRA_END -}; - -static const int extra_ARB_texture_gather_es31[] = { - EXT(ARB_texture_gather), - EXTRA_API_ES31, - EXTRA_END -}; - +/* HACK: remove when ARB_compute_shader is actually supported */ static const int extra_ARB_compute_shader_es31[] = { EXT(ARB_compute_shader), EXTRA_API_ES31, EXTRA_END }; -static const int extra_ARB_explicit_uniform_location_es31[] = { - EXT(ARB_explicit_uniform_location), - EXTRA_API_ES31, - EXTRA_END -}; - EXTRA_EXT(ARB_texture_cube_map); EXTRA_EXT(EXT_texture_array); EXTRA_EXT(NV_fog_distance); diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 517c391955d..73213f407f3 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -410,33 +410,33 @@ descriptor=[ # Enums in OpenGL and ES 3.1 { "apis": ["GL", "GL_CORE", "GLES31"], "params": [ # GL_ARB_shader_image_load_store / GLES 3.1 - [ "MAX_IMAGE_UNITS", "CONTEXT_INT(Const.MaxImageUnits), extra_ARB_shader_image_load_store_es31" ], - [ "MAX_VERTEX_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms), extra_ARB_shader_image_load_store_es31" ], - [ "MAX_FRAGMENT_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms), extra_ARB_shader_image_load_store_es31" ], - [ "MAX_COMBINED_IMAGE_UNIFORMS", "CONTEXT_INT(Const.MaxCombinedImageUniforms), extra_ARB_shader_image_load_store_es31" ], + [ "MAX_IMAGE_UNITS", "CONTEXT_INT(Const.MaxImageUnits), extra_ARB_shader_image_load_store" ], + [ "MAX_VERTEX_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms), extra_ARB_shader_image_load_store" ], + [ "MAX_FRAGMENT_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms), extra_ARB_shader_image_load_store" ], + [ "MAX_COMBINED_IMAGE_UNIFORMS", "CONTEXT_INT(Const.MaxCombinedImageUniforms), extra_ARB_shader_image_load_store" ], # GL_ARB_shader_atomic_counters / GLES 3.1 - [ "ATOMIC_COUNTER_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_atomic_counters_es31" ], - [ "MAX_ATOMIC_COUNTER_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxAtomicBufferBindings), extra_ARB_shader_atomic_counters_es31" ], - [ "MAX_ATOMIC_COUNTER_BUFFER_SIZE", "CONTEXT_INT(Const.MaxAtomicBufferSize), extra_ARB_shader_atomic_counters_es31" ], - [ "MAX_VERTEX_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ], - [ "MAX_VERTEX_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters), extra_ARB_shader_atomic_counters_es31" ], - [ "MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ], - [ "MAX_FRAGMENT_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters), extra_ARB_shader_atomic_counters_es31" ], - [ "MAX_COMBINED_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.MaxCombinedAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ], - [ "MAX_COMBINED_ATOMIC_COUNTERS", "CONTEXT_INT(Const.MaxCombinedAtomicCounters), extra_ARB_shader_atomic_counters_es31" ], + [ "ATOMIC_COUNTER_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_atomic_counters" ], + [ "MAX_ATOMIC_COUNTER_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxAtomicBufferBindings), extra_ARB_shader_atomic_counters" ], + [ "MAX_ATOMIC_COUNTER_BUFFER_SIZE", "CONTEXT_INT(Const.MaxAtomicBufferSize), extra_ARB_shader_atomic_counters" ], + [ "MAX_VERTEX_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers), extra_ARB_shader_atomic_counters" ], + [ "MAX_VERTEX_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters), extra_ARB_shader_atomic_counters" ], + [ "MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers), extra_ARB_shader_atomic_counters" ], + [ "MAX_FRAGMENT_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters), extra_ARB_shader_atomic_counters" ], + [ "MAX_COMBINED_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.MaxCombinedAtomicBuffers), extra_ARB_shader_atomic_counters" ], + [ "MAX_COMBINED_ATOMIC_COUNTERS", "CONTEXT_INT(Const.MaxCombinedAtomicCounters), extra_ARB_shader_atomic_counters" ], # GL_ARB_texture_multisample / GLES 3.1 - [ "TEXTURE_BINDING_2D_MULTISAMPLE", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_INDEX, extra_ARB_texture_multisample_es31" ], - [ "MAX_COLOR_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxColorTextureSamples), extra_ARB_texture_multisample_es31" ], - [ "MAX_DEPTH_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxDepthTextureSamples), extra_ARB_texture_multisample_es31" ], - [ "MAX_INTEGER_SAMPLES", "CONTEXT_INT(Const.MaxIntegerSamples), extra_ARB_texture_multisample_es31" ], - [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample_es31" ], - [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample_es31" ], + [ "TEXTURE_BINDING_2D_MULTISAMPLE", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_INDEX, extra_ARB_texture_multisample" ], + [ "MAX_COLOR_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxColorTextureSamples), extra_ARB_texture_multisample" ], + [ "MAX_DEPTH_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxDepthTextureSamples), extra_ARB_texture_multisample" ], + [ "MAX_INTEGER_SAMPLES", "CONTEXT_INT(Const.MaxIntegerSamples), extra_ARB_texture_multisample" ], + [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample" ], + [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample" ], # GL_ARB_texture_gather / GLES 3.1 - [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather_es31"], - [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather_es31"], + [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather"], + [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather"], # GL_ARB_compute_shader / GLES 3.1 [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader_es31" ], @@ -449,13 +449,13 @@ descriptor=[ [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ], # GL_ARB_explicit_uniform_location / GLES 3.1 - [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location_es31" ], + [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ], ]}, # Enums in OpenGL Core profile and ES 3.1 -{ "apis": ["GL_CORE", "GLES3"], "params": [ +{ "apis": ["GL_CORE", "GLES31"], "params": [ # GL_ARB_draw_indirect / GLES 3.1 - [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect_es31" ], + [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect" ], ]}, # Remaining enums are only in OpenGL From 7b5c92391f15533ec02327d617c4e8639a2f8bb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 11 Jul 2015 12:34:46 +0200 Subject: [PATCH 12/26] gallium: add an interface for dumping debug driver state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Paul Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/include/pipe/p_context.h | 12 ++++++++++++ src/gallium/include/pipe/p_defines.h | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index f89dae98a2f..9d8f5bdc8d2 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -32,6 +32,7 @@ #include "p_format.h" #include "p_video_enums.h" #include "p_defines.h" +#include #ifdef __cplusplus extern "C" { @@ -617,6 +618,17 @@ struct pipe_context { * Return information about unexpected device resets. */ enum pipe_reset_status (*get_device_reset_status)(struct pipe_context *ctx); + + /** + * Dump driver-specific debug information into a stream. This is + * used by debugging tools. + * + * \param ctx pipe context + * \param stream where the output should be written to + * \param flags a mask of PIPE_DEBUG_* flags + */ + void (*dump_debug_state)(struct pipe_context *ctx, FILE *stream, + unsigned flags); }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 2ba56eac793..4f2aa14e129 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -328,6 +328,11 @@ enum pipe_flush_flags PIPE_FLUSH_END_OF_FRAME = (1 << 0) }; +/** + * Flags for pipe_context::dump_debug_state. + */ +#define PIPE_DEBUG_DEVICE_IS_HUNG (1 << 0) + /** * Flags for pipe_context::memory_barrier. */ From 0fc21ecfc0891d239f20bf7724e51bc75503570c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 25 Jul 2015 18:40:59 +0200 Subject: [PATCH 13/26] gallium: add flags parameter to pipe_screen::context_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows creating compute-only and debug contexts. Reviewed-by: Brian Paul Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/auxiliary/util/u_tests.c | 2 +- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 2 +- src/gallium/drivers/freedreno/a2xx/fd2_context.c | 2 +- src/gallium/drivers/freedreno/a2xx/fd2_context.h | 2 +- src/gallium/drivers/freedreno/a3xx/fd3_context.c | 2 +- src/gallium/drivers/freedreno/a3xx/fd3_context.h | 2 +- src/gallium/drivers/freedreno/a4xx/fd4_context.c | 2 +- src/gallium/drivers/freedreno/a4xx/fd4_context.h | 2 +- src/gallium/drivers/i915/i915_context.c | 2 +- src/gallium/drivers/i915/i915_context.h | 2 +- src/gallium/drivers/ilo/ilo_context.c | 2 +- src/gallium/drivers/llvmpipe/lp_context.c | 3 ++- src/gallium/drivers/llvmpipe/lp_context.h | 3 ++- src/gallium/drivers/noop/noop_pipe.c | 3 ++- src/gallium/drivers/nouveau/nv30/nv30_context.c | 2 +- src/gallium/drivers/nouveau/nv30/nv30_context.h | 2 +- src/gallium/drivers/nouveau/nv50/nv50_context.c | 2 +- src/gallium/drivers/nouveau/nv50/nv50_context.h | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 2 +- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r600/r600_pipe.c | 5 +++-- src/gallium/drivers/radeonsi/si_pipe.c | 5 +++-- src/gallium/drivers/rbug/rbug_screen.c | 6 +++--- src/gallium/drivers/softpipe/sp_context.c | 4 ++-- src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/svga/svga_context.c | 4 ++-- src/gallium/drivers/svga/svga_context.h | 2 +- src/gallium/drivers/trace/tr_screen.c | 7 +++++-- src/gallium/drivers/vc4/vc4_context.c | 2 +- src/gallium/drivers/vc4/vc4_context.h | 2 +- src/gallium/include/pipe/p_defines.h | 15 +++++++++++++++ src/gallium/include/pipe/p_screen.h | 11 +++++++++-- src/gallium/state_trackers/clover/core/queue.cpp | 2 +- src/gallium/state_trackers/glx/xlib/xm_st.c | 2 +- src/gallium/state_trackers/nine/device9.c | 2 +- src/gallium/state_trackers/omx/vid_dec.c | 2 +- src/gallium/state_trackers/omx/vid_enc.c | 4 ++-- src/gallium/state_trackers/va/context.c | 3 ++- src/gallium/state_trackers/vdpau/device.c | 2 +- src/gallium/state_trackers/xa/xa_context.c | 2 +- src/gallium/state_trackers/xvmc/context.c | 2 +- src/gallium/tests/graw/clear.c | 2 +- src/gallium/tests/graw/fs-test.c | 2 +- src/gallium/tests/graw/graw_util.h | 2 +- src/gallium/tests/graw/gs-test.c | 2 +- src/gallium/tests/graw/quad-sample.c | 2 +- src/gallium/tests/graw/shader-leak.c | 2 +- src/gallium/tests/graw/tri-gs.c | 2 +- src/gallium/tests/graw/tri-instanced.c | 2 +- src/gallium/tests/graw/vs-test.c | 2 +- src/gallium/tests/trivial/compute.c | 2 +- src/gallium/tests/trivial/quad-tex.c | 2 +- src/gallium/tests/trivial/tri.c | 2 +- src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c | 2 +- src/mesa/state_tracker/st_manager.c | 2 +- 57 files changed, 96 insertions(+), 65 deletions(-) diff --git a/src/gallium/auxiliary/util/u_tests.c b/src/gallium/auxiliary/util/u_tests.c index 6a489d63c09..a94e5cc2949 100644 --- a/src/gallium/auxiliary/util/u_tests.c +++ b/src/gallium/auxiliary/util/u_tests.c @@ -457,7 +457,7 @@ null_constant_buffer(struct pipe_context *ctx) void util_run_tests(struct pipe_screen *screen) { - struct pipe_context *ctx = screen->context_create(screen, NULL); + struct pipe_context *ctx = screen->context_create(screen, NULL, 0); tgsi_vs_window_space_position(ctx); null_sampler_view(ctx, TGSI_TEXTURE_2D); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index b7009837293..9d0e4a1eae0 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -1120,7 +1120,7 @@ vl_create_mpeg12_decoder(struct pipe_context *context, dec->base = *templat; dec->base.context = context; - dec->context = context->screen->context_create(context->screen, NULL); + dec->context = context->screen->context_create(context->screen, NULL, 0); dec->base.destroy = vl_mpeg12_destroy; dec->base.begin_frame = vl_mpeg12_begin_frame; diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.c b/src/gallium/drivers/freedreno/a2xx/fd2_context.c index 6089ebc1516..3bed73573a6 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c @@ -86,7 +86,7 @@ static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = { }; struct pipe_context * -fd2_context_create(struct pipe_screen *pscreen, void *priv) +fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct fd_screen *screen = fd_screen(pscreen); struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context); diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.h b/src/gallium/drivers/freedreno/a2xx/fd2_context.h index 74147107930..eeadf84ff20 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_context.h +++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.h @@ -47,6 +47,6 @@ fd2_context(struct fd_context *ctx) } struct pipe_context * -fd2_context_create(struct pipe_screen *pscreen, void *priv); +fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); #endif /* FD2_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c index dc33783e398..74cbbf2edd8 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -98,7 +98,7 @@ static const uint8_t primtypes[PIPE_PRIM_MAX] = { }; struct pipe_context * -fd3_context_create(struct pipe_screen *pscreen, void *priv) +fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct fd_screen *screen = fd_screen(pscreen); struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h index 6e20b2ff9bc..250bcf89596 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h @@ -119,6 +119,6 @@ fd3_context(struct fd_context *ctx) } struct pipe_context * -fd3_context_create(struct pipe_screen *pscreen, void *priv); +fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); #endif /* FD3_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c index e172d350517..625512ccd1b 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c @@ -96,7 +96,7 @@ static const uint8_t primtypes[PIPE_PRIM_MAX] = { }; struct pipe_context * -fd4_context_create(struct pipe_screen *pscreen, void *priv) +fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct fd_screen *screen = fd_screen(pscreen); struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h index 0b749916841..af9475699db 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h @@ -97,6 +97,6 @@ fd4_context(struct fd_context *ctx) } struct pipe_context * -fd4_context_create(struct pipe_screen *pscreen, void *priv); +fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); #endif /* FD4_CONTEXT_H_ */ diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 788381bbe2e..05f8e93ddea 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -155,7 +155,7 @@ static void i915_destroy(struct pipe_context *pipe) } struct pipe_context * -i915_create_context(struct pipe_screen *screen, void *priv) +i915_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct i915_context *i915; diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index c8c7d64f5cb..1ed685188db 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -401,7 +401,7 @@ void i915_init_string_functions( struct i915_context *i915 ); * i915_context.c */ struct pipe_context *i915_create_context(struct pipe_screen *screen, - void *priv); + void *priv, unsigned flags); /*********************************************************************** diff --git a/src/gallium/drivers/ilo/ilo_context.c b/src/gallium/drivers/ilo/ilo_context.c index b9a16aab81d..2a00cf1c93c 100644 --- a/src/gallium/drivers/ilo/ilo_context.c +++ b/src/gallium/drivers/ilo/ilo_context.c @@ -135,7 +135,7 @@ ilo_context_destroy(struct pipe_context *pipe) } static struct pipe_context * -ilo_context_create(struct pipe_screen *screen, void *priv) +ilo_context_create(struct pipe_screen *screen, void *priv, unsigned flags) { struct ilo_screen *is = ilo_screen(screen); struct ilo_context *ilo; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 80cb6578bd1..bd7c0a1e4af 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -128,7 +128,8 @@ llvmpipe_render_condition ( struct pipe_context *pipe, } struct pipe_context * -llvmpipe_create_context( struct pipe_screen *screen, void *priv ) +llvmpipe_create_context(struct pipe_screen *screen, void *priv, + unsigned flags) { struct llvmpipe_context *llvmpipe; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index c273b25f096..c9a5d678244 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -160,7 +160,8 @@ struct llvmpipe_context { struct pipe_context * -llvmpipe_create_context( struct pipe_screen *screen, void *priv ); +llvmpipe_create_context(struct pipe_screen *screen, void *priv, + unsigned flags); struct pipe_resource * llvmpipe_user_buffer_create(struct pipe_screen *screen, diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index aeec6778b6d..e644685123e 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -260,7 +260,8 @@ static void noop_destroy_context(struct pipe_context *ctx) FREE(ctx); } -static struct pipe_context *noop_create_context(struct pipe_screen *screen, void *priv) +static struct pipe_context *noop_create_context(struct pipe_screen *screen, + void *priv, unsigned flags) { struct pipe_context *ctx = CALLOC_STRUCT(pipe_context); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c index 6e88ed725d6..46590eecdf3 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_context.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c @@ -190,7 +190,7 @@ nv30_context_destroy(struct pipe_context *pipe) } while(0) struct pipe_context * -nv30_context_create(struct pipe_screen *pscreen, void *priv) +nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct nv30_screen *screen = nv30_screen(pscreen); struct nv30_context *nv30 = CALLOC_STRUCT(nv30_context); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h b/src/gallium/drivers/nouveau/nv30/nv30_context.h index d5c18bb62dc..0ab2f95bc20 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_context.h +++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h @@ -132,7 +132,7 @@ nv30_context(struct pipe_context *pipe) } struct pipe_context * -nv30_context_create(struct pipe_screen *pscreen, void *priv); +nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); void nv30_vbo_init(struct pipe_context *pipe); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index f8d46db7c67..11638dd7f14 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -240,7 +240,7 @@ nv50_context_get_sample_position(struct pipe_context *, unsigned, unsigned, float *); struct pipe_context * -nv50_create(struct pipe_screen *pscreen, void *priv) +nv50_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct nv50_screen *screen = nv50_screen(pscreen); struct nv50_context *nv50; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index ce12e714774..e7adf472ed0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -186,7 +186,7 @@ nv50_context_shader_stage(unsigned pipe) } /* nv50_context.c */ -struct pipe_context *nv50_create(struct pipe_screen *, void *); +struct pipe_context *nv50_create(struct pipe_screen *, void *, unsigned flags); void nv50_bufctx_fence(struct nouveau_bufctx *, bool on_flush); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 7a15a11f560..613cad69aa5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -262,7 +262,7 @@ nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned, float *); struct pipe_context * -nvc0_create(struct pipe_screen *pscreen, void *priv) +nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct nvc0_screen *screen = nvc0_screen(pscreen); struct nvc0_context *nvc0; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index df1a891a43e..6ed79cf9553 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -214,7 +214,7 @@ nvc0_shader_stage(unsigned pipe) /* nvc0_context.c */ -struct pipe_context *nvc0_create(struct pipe_screen *, void *); +struct pipe_context *nvc0_create(struct pipe_screen *, void *, unsigned flags); void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *, bool on_flush); void nvc0_default_kick_notify(struct nouveau_pushbuf *); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 8c24ad6d98a..b393769c861 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -363,7 +363,7 @@ static void r300_init_states(struct pipe_context *pipe) } struct pipe_context* r300_create_context(struct pipe_screen* screen, - void *priv) + void *priv, unsigned flags) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 18ae11a3a24..f298d88004b 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -705,7 +705,7 @@ r300_get_nonnull_cb(struct pipe_framebuffer_state *fb, unsigned i) } struct pipe_context* r300_create_context(struct pipe_screen* screen, - void *priv); + void *priv, unsigned flags); /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 6ffe5615fbf..f6efaa312f2 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -108,7 +108,8 @@ static void r600_destroy_context(struct pipe_context *context) FREE(rctx); } -static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) +static struct pipe_context *r600_create_context(struct pipe_screen *screen, + void *priv, unsigned flags) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); struct r600_screen* rscreen = (struct r600_screen *)screen; @@ -624,7 +625,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen->global_pool = compute_memory_pool_new(rscreen); /* Create the auxiliary context. This must be done last. */ - rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL); + rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL, 0); #if 0 /* This is for testing whether aux_context and buffer clearing work correctly. */ struct pipe_resource templ = {}; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 473a2e9ad12..029b3cce488 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -92,7 +92,8 @@ si_amdgpu_get_reset_status(struct pipe_context *ctx) return sctx->b.ws->ctx_query_reset_status(sctx->b.ctx); } -static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv) +static struct pipe_context *si_create_context(struct pipe_screen *screen, + void *priv, unsigned flags) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; @@ -586,7 +587,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; /* Create the auxiliary context. This must be done last. */ - sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL); + sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0); return &sscreen->b.b; } diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c index 7da4e81560a..ac764029a2f 100644 --- a/src/gallium/drivers/rbug/rbug_screen.c +++ b/src/gallium/drivers/rbug/rbug_screen.c @@ -129,13 +129,13 @@ rbug_screen_is_format_supported(struct pipe_screen *_screen, static struct pipe_context * rbug_screen_context_create(struct pipe_screen *_screen, - void *priv) + void *priv, unsigned flags) { struct rbug_screen *rb_screen = rbug_screen(_screen); struct pipe_screen *screen = rb_screen->screen; struct pipe_context *result; - result = screen->context_create(screen, priv); + result = screen->context_create(screen, priv, flags); if (result) return rbug_context_create(_screen, result); return NULL; @@ -281,7 +281,7 @@ rbug_screen_create(struct pipe_screen *screen) rb_screen->screen = screen; - rb_screen->private_context = screen->context_create(screen, NULL); + rb_screen->private_context = screen->context_create(screen, NULL, 0); if (!rb_screen->private_context) goto err_free; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 34d2b80f19b..d2a32200e47 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -186,8 +186,8 @@ softpipe_render_condition( struct pipe_context *pipe, struct pipe_context * -softpipe_create_context( struct pipe_screen *screen, - void *priv ) +softpipe_create_context(struct pipe_screen *screen, + void *priv, unsigned flags) { struct softpipe_screen *sp_screen = softpipe_screen(screen); struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 577df814b29..073b71a913e 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -211,7 +211,7 @@ softpipe_context( struct pipe_context *pipe ) struct pipe_context * -softpipe_create_context( struct pipe_screen *, void *priv ); +softpipe_create_context(struct pipe_screen *, void *priv, unsigned flags); struct pipe_resource * softpipe_user_buffer_create(struct pipe_screen *screen, diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 0ffff9c506b..673d17ad4a7 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -81,8 +81,8 @@ static void svga_destroy( struct pipe_context *pipe ) -struct pipe_context *svga_context_create( struct pipe_screen *screen, - void *priv ) +struct pipe_context *svga_context_create(struct pipe_screen *screen, + void *priv, unsigned flags) { struct svga_screen *svgascreen = svga_screen(screen); struct svga_context *svga = NULL; diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 71f038df8c1..2726346bc50 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -478,7 +478,7 @@ void svga_surfaces_flush(struct svga_context *svga); struct pipe_context * svga_context_create(struct pipe_screen *screen, - void *priv); + void *priv, unsigned flags); /*********************************************************************** diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 1d86a378eea..8b02680c77e 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -204,7 +204,8 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, static struct pipe_context * -trace_screen_context_create(struct pipe_screen *_screen, void *priv) +trace_screen_context_create(struct pipe_screen *_screen, void *priv, + unsigned flags) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; @@ -213,8 +214,10 @@ trace_screen_context_create(struct pipe_screen *_screen, void *priv) trace_dump_call_begin("pipe_screen", "context_create"); trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, priv); + trace_dump_arg(uint, flags); - result = screen->context_create(screen, priv); + result = screen->context_create(screen, priv, flags); trace_dump_ret(ptr, result); diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index fff63158c9d..87d781d088d 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -194,7 +194,7 @@ vc4_context_destroy(struct pipe_context *pctx) } struct pipe_context * -vc4_context_create(struct pipe_screen *pscreen, void *priv) +vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct vc4_screen *screen = vc4_screen(pscreen); struct vc4_context *vc4; diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 3a63af8f2b0..33b6ec2b92d 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -365,7 +365,7 @@ vc4_sampler_state(struct pipe_sampler_state *psampler) } struct pipe_context *vc4_context_create(struct pipe_screen *pscreen, - void *priv); + void *priv, unsigned flags); void vc4_draw_init(struct pipe_context *pctx); void vc4_state_init(struct pipe_context *pctx); void vc4_program_init(struct pipe_context *pctx); diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 4f2aa14e129..88e37e9f056 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -333,6 +333,21 @@ enum pipe_flush_flags */ #define PIPE_DEBUG_DEVICE_IS_HUNG (1 << 0) +/** + * Create a compute-only context. Use in pipe_screen::context_create. + * This disables draw, blit, and clear*, render_condition, and other graphics + * functions. Interop with other graphics contexts is still allowed. + * This allows scheduling jobs on a compute-only hardware command queue that + * can run in parallel with graphics without stalling it. + */ +#define PIPE_CONTEXT_COMPUTE_ONLY (1 << 0) + +/** + * Gather debug information and expect that pipe_context::dump_debug_state + * will be called. Use in pipe_screen::context_create. + */ +#define PIPE_CONTEXT_DEBUG (1 << 1) + /** * Flags for pipe_context::memory_barrier. */ diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 0d2658313e5..a7b7b72ac89 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -125,8 +125,15 @@ struct pipe_screen { */ uint64_t (*get_timestamp)(struct pipe_screen *); - struct pipe_context * (*context_create)( struct pipe_screen *, - void *priv ); + /** + * Create a context. + * + * \param screen pipe screen + * \param priv a pointer to set in pipe_context::priv + * \param flags a mask of PIPE_CONTEXT_* flags + */ + struct pipe_context * (*context_create)(struct pipe_screen *screen, + void *priv, unsigned flags); /** * Check if the given pipe_format is supported as a texture or diff --git a/src/gallium/state_trackers/clover/core/queue.cpp b/src/gallium/state_trackers/clover/core/queue.cpp index 87f9dcc6476..4aaf67de241 100644 --- a/src/gallium/state_trackers/clover/core/queue.cpp +++ b/src/gallium/state_trackers/clover/core/queue.cpp @@ -30,7 +30,7 @@ using namespace clover; command_queue::command_queue(clover::context &ctx, clover::device &dev, cl_command_queue_properties props) : context(ctx), device(dev), props(props) { - pipe = dev.pipe->context_create(dev.pipe, NULL); + pipe = dev.pipe->context_create(dev.pipe, NULL, PIPE_CONTEXT_COMPUTE_ONLY); if (!pipe) throw error(CL_INVALID_DEVICE); } diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c index 9d0f2d25025..f598430dc26 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_st.c +++ b/src/gallium/state_trackers/glx/xlib/xm_st.c @@ -398,7 +398,7 @@ xmesa_get_context(struct st_framebuffer_iface *stfbi) pipe = xstfb->display->pipe; if (!pipe) { - pipe = xstfb->screen->context_create(xstfb->screen, NULL); + pipe = xstfb->screen->context_create(xstfb->screen, NULL, 0); if (!pipe) return NULL; xstfb->display->pipe = pipe; diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 99197a4361b..f14ffea13e1 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -163,7 +163,7 @@ NineDevice9_ctor( struct NineDevice9 *This, if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) DBG("Application asked mixed Software Vertex Processing. Ignoring.\n"); - This->pipe = This->screen->context_create(This->screen, NULL); + This->pipe = This->screen->context_create(This->screen, NULL, 0); if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */ This->cso = cso_create_context(This->pipe); diff --git a/src/gallium/state_trackers/omx/vid_dec.c b/src/gallium/state_trackers/omx/vid_dec.c index 9e7e7ba7787..5584348761e 100644 --- a/src/gallium/state_trackers/omx/vid_dec.c +++ b/src/gallium/state_trackers/omx/vid_dec.c @@ -162,7 +162,7 @@ static OMX_ERRORTYPE vid_dec_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam return OMX_ErrorInsufficientResources; screen = priv->screen->pscreen; - priv->pipe = screen->context_create(screen, priv->screen); + priv->pipe = screen->context_create(screen, priv->screen, 0); if (!priv->pipe) return OMX_ErrorInsufficientResources; diff --git a/src/gallium/state_trackers/omx/vid_enc.c b/src/gallium/state_trackers/omx/vid_enc.c index 2bd0194189f..aa45089ae04 100644 --- a/src/gallium/state_trackers/omx/vid_enc.c +++ b/src/gallium/state_trackers/omx/vid_enc.c @@ -185,7 +185,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_STACKED_FRAMES); - priv->s_pipe = screen->context_create(screen, priv->screen); + priv->s_pipe = screen->context_create(screen, priv->screen, 0); if (!priv->s_pipe) return OMX_ErrorInsufficientResources; @@ -202,7 +202,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam return OMX_ErrorInsufficientResources; } - priv->t_pipe = screen->context_create(screen, priv->screen); + priv->t_pipe = screen->context_create(screen, priv->screen, 0); if (!priv->t_pipe) return OMX_ErrorInsufficientResources; diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c index 0a733b39004..8b003aedaec 100644 --- a/src/gallium/state_trackers/va/context.c +++ b/src/gallium/state_trackers/va/context.c @@ -100,7 +100,8 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx) if (!drv->vscreen) goto error_screen; - drv->pipe = drv->vscreen->pscreen->context_create(drv->vscreen->pscreen, drv->vscreen); + drv->pipe = drv->vscreen->pscreen->context_create(drv->vscreen->pscreen, + drv->vscreen, 0); if (!drv->pipe) goto error_pipe; diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c index ba36afc9421..31c95054f56 100644 --- a/src/gallium/state_trackers/vdpau/device.c +++ b/src/gallium/state_trackers/vdpau/device.c @@ -70,7 +70,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, } pscreen = dev->vscreen->pscreen; - dev->context = pscreen->context_create(pscreen, dev->vscreen); + dev->context = pscreen->context_create(pscreen, dev->vscreen, 0); if (!dev->context) { ret = VDP_STATUS_RESOURCES; goto no_context; diff --git a/src/gallium/state_trackers/xa/xa_context.c b/src/gallium/state_trackers/xa/xa_context.c index ebfb290af13..5553beb2014 100644 --- a/src/gallium/state_trackers/xa/xa_context.c +++ b/src/gallium/state_trackers/xa/xa_context.c @@ -56,7 +56,7 @@ xa_context_create(struct xa_tracker *xa) struct xa_context *ctx = calloc(1, sizeof(*ctx)); ctx->xa = xa; - ctx->pipe = xa->screen->context_create(xa->screen, NULL); + ctx->pipe = xa->screen->context_create(xa->screen, NULL, 0); ctx->cso = cso_create_context(ctx->pipe); ctx->shaders = xa_shaders_create(ctx); renderer_init_state(ctx); diff --git a/src/gallium/state_trackers/xvmc/context.c b/src/gallium/state_trackers/xvmc/context.c index 9ded2e5f2e2..4702b44d1f4 100644 --- a/src/gallium/state_trackers/xvmc/context.c +++ b/src/gallium/state_trackers/xvmc/context.c @@ -237,7 +237,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, return BadAlloc; } - pipe = vscreen->pscreen->context_create(vscreen->pscreen, vscreen); + pipe = vscreen->pscreen->context_create(vscreen->pscreen, vscreen, 0); if (!pipe) { XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL context.\n"); vl_screen_destroy(vscreen); diff --git a/src/gallium/tests/graw/clear.c b/src/gallium/tests/graw/clear.c index f38da47407f..533ce9f58d0 100644 --- a/src/gallium/tests/graw/clear.c +++ b/src/gallium/tests/graw/clear.c @@ -61,7 +61,7 @@ static void init( void ) exit(1); } - ctx = screen->context_create(screen, NULL); + ctx = screen->context_create(screen, NULL, 0); if (ctx == NULL) exit(3); diff --git a/src/gallium/tests/graw/fs-test.c b/src/gallium/tests/graw/fs-test.c index fc5803fd3a9..bd5259afe46 100644 --- a/src/gallium/tests/graw/fs-test.c +++ b/src/gallium/tests/graw/fs-test.c @@ -398,7 +398,7 @@ static void init( void ) exit(1); } - ctx = screen->context_create(screen, NULL); + ctx = screen->context_create(screen, NULL, 0); if (ctx == NULL) exit(3); diff --git a/src/gallium/tests/graw/graw_util.h b/src/gallium/tests/graw/graw_util.h index e7cd0aa3ac3..f09c1eadc9c 100644 --- a/src/gallium/tests/graw/graw_util.h +++ b/src/gallium/tests/graw/graw_util.h @@ -60,7 +60,7 @@ graw_util_create_window(struct graw_info *info, return FALSE; } - info->ctx = info->screen->context_create(info->screen, NULL); + info->ctx = info->screen->context_create(info->screen, NULL, 0); if (info->ctx == NULL) { debug_printf("graw: Failed to create context\n"); return FALSE; diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c index b685323fe3c..c680b62eaaa 100644 --- a/src/gallium/tests/graw/gs-test.c +++ b/src/gallium/tests/graw/gs-test.c @@ -505,7 +505,7 @@ static void init( void ) exit(1); } - ctx = screen->context_create(screen, NULL); + ctx = screen->context_create(screen, NULL, 0); if (ctx == NULL) exit(3); diff --git a/src/gallium/tests/graw/quad-sample.c b/src/gallium/tests/graw/quad-sample.c index 2953fe16a8a..97f241ff844 100644 --- a/src/gallium/tests/graw/quad-sample.c +++ b/src/gallium/tests/graw/quad-sample.c @@ -313,7 +313,7 @@ static void init( void ) exit(1); } - ctx = screen->context_create(screen, NULL); + ctx = screen->context_create(screen, NULL, 0); if (ctx == NULL) exit(3); diff --git a/src/gallium/tests/graw/shader-leak.c b/src/gallium/tests/graw/shader-leak.c index 3c585c0a5a6..a4502afff64 100644 --- a/src/gallium/tests/graw/shader-leak.c +++ b/src/gallium/tests/graw/shader-leak.c @@ -188,7 +188,7 @@ static void init( void ) exit(1); } - ctx = screen->context_create(screen, NULL); + ctx = screen->context_create(screen, NULL, 0); if (ctx == NULL) exit(3); diff --git a/src/gallium/tests/graw/tri-gs.c b/src/gallium/tests/graw/tri-gs.c index 5c6f426e96a..6aad51c94a0 100644 --- a/src/gallium/tests/graw/tri-gs.c +++ b/src/gallium/tests/graw/tri-gs.c @@ -195,7 +195,7 @@ static void init( void ) exit(1); } - ctx = screen->context_create(screen, NULL); + ctx = screen->context_create(screen, NULL, 0); if (ctx == NULL) exit(3); diff --git a/src/gallium/tests/graw/tri-instanced.c b/src/gallium/tests/graw/tri-instanced.c index a71bf71b61b..2065c11f68d 100644 --- a/src/gallium/tests/graw/tri-instanced.c +++ b/src/gallium/tests/graw/tri-instanced.c @@ -246,7 +246,7 @@ static void init( void ) exit(1); } - ctx = screen->context_create(screen, NULL); + ctx = screen->context_create(screen, NULL, 0); if (ctx == NULL) exit(3); diff --git a/src/gallium/tests/graw/vs-test.c b/src/gallium/tests/graw/vs-test.c index 5189d815f4c..5c7843eb11a 100644 --- a/src/gallium/tests/graw/vs-test.c +++ b/src/gallium/tests/graw/vs-test.c @@ -392,7 +392,7 @@ static void init( void ) exit(1); } - ctx = screen->context_create(screen, NULL); + ctx = screen->context_create(screen, NULL, 0); if (ctx == NULL) exit(3); diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c index 4edb8576f52..b344f78b25c 100644 --- a/src/gallium/tests/trivial/compute.c +++ b/src/gallium/tests/trivial/compute.c @@ -77,7 +77,7 @@ static void init_ctx(struct context *ctx) ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR); assert(ctx->screen); - ctx->pipe = ctx->screen->context_create(ctx->screen, NULL); + ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0); assert(ctx->pipe); DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION); diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index c019c7bb0a3..f66f63043da 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -100,7 +100,7 @@ static void init_prog(struct program *p) assert(p->screen); /* create the pipe driver context and cso context */ - p->pipe = p->screen->context_create(p->screen, NULL); + p->pipe = p->screen->context_create(p->screen, NULL, 0); p->cso = cso_create_context(p->pipe); /* set clear color */ diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index 078beb8f43f..a555200842e 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -95,7 +95,7 @@ static void init_prog(struct program *p) assert(p->screen); /* create the pipe driver context and cso context */ - p->pipe = p->screen->context_create(p->screen, NULL); + p->pipe = p->screen->context_create(p->screen, NULL, 0); p->cso = cso_create_context(p->pipe); /* set clear color */ diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c index 9b90eaa018b..5c179930d9b 100644 --- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c +++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c @@ -299,7 +299,7 @@ wrapper_sw_winsys_wrap_pipe_screen(struct pipe_screen *screen) wsw->base.destroy = wsw_destroy; wsw->screen = screen; - wsw->pipe = screen->context_create(screen, NULL); + wsw->pipe = screen->context_create(screen, NULL, 0); if (!wsw->pipe) goto err_free; diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 2e2c8ffaed9..7abd128e719 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -657,7 +657,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, break; } - pipe = smapi->screen->context_create(smapi->screen, NULL); + pipe = smapi->screen->context_create(smapi->screen, NULL, 0); if (!pipe) { *error = ST_CONTEXT_ERROR_NO_MEMORY; return NULL; From 525921ed51176255474c73adacfc6801a7bf2783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 4 Jul 2015 14:10:21 +0200 Subject: [PATCH 14/26] gallium/ddebug: new pipe for hang detection and driver state dumping (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: lots of improvements This is like identity or trace, but simpler. It doesn't wrap most states. Run with: GALLIUM_DDEBUG=1000 [executable] where "executable" is the app and "1000" is in miliseconds, meaning that the context will be considered hung if a fence fails to signal in 1000 ms. If that happens, all shaders, context states, bound resources, draw parameters, and driver debug information (if any) will be dumped into: /home/$username/dd_dumps/$processname_$pid_$index. Note that the context is flushed after every draw/clear/copy/blit operation and then waited for to find the exact call that hangs. You can also do: GALLIUM_DDEBUG=always to do the dumping after every draw/clear/copy/blit operation without flushing and waiting. Examples of driver states that can be dumped are: - Hardware status registers saying which hw block is busy (hung). - Disassembled shaders in a human-readable form. - The last submitted command buffer in a human-readable form. v2: drop pipe-loader changes, drop SConscript rename dd.h -> dd_pipe.h Acked-by: Christian König Acked-by: Alex Deucher --- configure.ac | 1 + src/gallium/Makefile.am | 1 + .../target-helpers/inline_debug_helper.h | 8 + src/gallium/drivers/ddebug/Makefile.am | 9 + src/gallium/drivers/ddebug/Makefile.sources | 6 + src/gallium/drivers/ddebug/dd_context.c | 771 +++++++++++++++++ src/gallium/drivers/ddebug/dd_draw.c | 807 ++++++++++++++++++ src/gallium/drivers/ddebug/dd_pipe.h | 141 +++ src/gallium/drivers/ddebug/dd_public.h | 36 + src/gallium/drivers/ddebug/dd_screen.c | 353 ++++++++ src/gallium/targets/dri/Makefile.am | 2 + 11 files changed, 2135 insertions(+) create mode 100644 src/gallium/drivers/ddebug/Makefile.am create mode 100644 src/gallium/drivers/ddebug/Makefile.sources create mode 100644 src/gallium/drivers/ddebug/dd_context.c create mode 100644 src/gallium/drivers/ddebug/dd_draw.c create mode 100644 src/gallium/drivers/ddebug/dd_pipe.h create mode 100644 src/gallium/drivers/ddebug/dd_public.h create mode 100644 src/gallium/drivers/ddebug/dd_screen.c diff --git a/configure.ac b/configure.ac index e3b5f2e3124..90ba4feb572 100644 --- a/configure.ac +++ b/configure.ac @@ -2317,6 +2317,7 @@ AC_CONFIG_FILES([Makefile src/gallium/auxiliary/Makefile src/gallium/auxiliary/pipe-loader/Makefile src/gallium/drivers/freedreno/Makefile + src/gallium/drivers/ddebug/Makefile src/gallium/drivers/i915/Makefile src/gallium/drivers/ilo/Makefile src/gallium/drivers/llvmpipe/Makefile diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am index e2c1090aa26..a7c3606de0a 100644 --- a/src/gallium/Makefile.am +++ b/src/gallium/Makefile.am @@ -11,6 +11,7 @@ SUBDIRS += auxiliary ## SUBDIRS += \ + drivers/ddebug \ drivers/noop \ drivers/trace \ drivers/rbug diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h index d353ab81e34..2443bf21468 100644 --- a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h @@ -11,6 +11,10 @@ * one or more debug driver: rbug, trace. */ +#ifdef GALLIUM_DDEBUG +#include "ddebug/dd_public.h" +#endif + #ifdef GALLIUM_TRACE #include "trace/tr_public.h" #endif @@ -30,6 +34,10 @@ static inline struct pipe_screen * debug_screen_wrap(struct pipe_screen *screen) { +#if defined(GALLIUM_DDEBUG) + screen = ddebug_screen_create(screen); +#endif + #if defined(GALLIUM_RBUG) screen = rbug_screen_create(screen); #endif diff --git a/src/gallium/drivers/ddebug/Makefile.am b/src/gallium/drivers/ddebug/Makefile.am new file mode 100644 index 00000000000..f0e1662d0bd --- /dev/null +++ b/src/gallium/drivers/ddebug/Makefile.am @@ -0,0 +1,9 @@ +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + $(GALLIUM_DRIVER_CFLAGS) + +noinst_LTLIBRARIES = libddebug.la + +libddebug_la_SOURCES = $(C_SOURCES) diff --git a/src/gallium/drivers/ddebug/Makefile.sources b/src/gallium/drivers/ddebug/Makefile.sources new file mode 100644 index 00000000000..780edd8aa00 --- /dev/null +++ b/src/gallium/drivers/ddebug/Makefile.sources @@ -0,0 +1,6 @@ +C_SOURCES := \ + dd_pipe.h \ + dd_public.h \ + dd_context.c \ + dd_draw.c \ + dd_screen.c diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c new file mode 100644 index 00000000000..3ae7764ff3f --- /dev/null +++ b/src/gallium/drivers/ddebug/dd_context.c @@ -0,0 +1,771 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "dd_pipe.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_memory.h" + + +static void +safe_memcpy(void *dst, const void *src, size_t size) +{ + if (src) + memcpy(dst, src, size); + else + memset(dst, 0, size); +} + + +/******************************************************************** + * queries + */ + +static struct dd_query * +dd_query(struct pipe_query *query) +{ + return (struct dd_query *)query; +} + +static struct pipe_query * +dd_query_unwrap(struct pipe_query *query) +{ + if (query) { + return dd_query(query)->query; + } else { + return NULL; + } +} + +static struct pipe_query * +dd_context_create_query(struct pipe_context *_pipe, unsigned query_type, + unsigned index) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + struct pipe_query *query; + + query = pipe->create_query(pipe, query_type, index); + + /* Wrap query object. */ + if (query) { + struct dd_query *dd_query = CALLOC_STRUCT(dd_query); + if (dd_query) { + dd_query->type = query_type; + dd_query->query = query; + query = (struct pipe_query *)dd_query; + } else { + pipe->destroy_query(pipe, query); + query = NULL; + } + } + + return query; +} + +static void +dd_context_destroy_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->destroy_query(pipe, dd_query_unwrap(query)); + FREE(query); +} + +static boolean +dd_context_begin_query(struct pipe_context *_pipe, struct pipe_query *query) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + return pipe->begin_query(pipe, dd_query_unwrap(query)); +} + +static void +dd_context_end_query(struct pipe_context *_pipe, struct pipe_query *query) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + pipe->end_query(pipe, dd_query_unwrap(query)); +} + +static boolean +dd_context_get_query_result(struct pipe_context *_pipe, + struct pipe_query *query, boolean wait, + union pipe_query_result *result) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + return pipe->get_query_result(pipe, dd_query_unwrap(query), wait, result); +} + +static void +dd_context_render_condition(struct pipe_context *_pipe, + struct pipe_query *query, boolean condition, + uint mode) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + pipe->render_condition(pipe, dd_query_unwrap(query), condition, mode); + dctx->render_cond.query = dd_query(query); + dctx->render_cond.condition = condition; + dctx->render_cond.mode = mode; +} + + +/******************************************************************** + * constant (immutable) non-shader states + */ + +#define DD_CSO_CREATE(name, shortname) \ + static void * \ + dd_context_create_##name##_state(struct pipe_context *_pipe, \ + const struct pipe_##name##_state *state) \ + { \ + struct pipe_context *pipe = dd_context(_pipe)->pipe; \ + struct dd_state *hstate = CALLOC_STRUCT(dd_state); \ + \ + if (!hstate) \ + return NULL; \ + hstate->cso = pipe->create_##name##_state(pipe, state); \ + hstate->state.shortname = *state; \ + return hstate; \ + } + +#define DD_CSO_BIND(name, shortname) \ + static void \ + dd_context_bind_##name##_state(struct pipe_context *_pipe, void *state) \ + { \ + struct dd_context *dctx = dd_context(_pipe); \ + struct pipe_context *pipe = dctx->pipe; \ + struct dd_state *hstate = state; \ + \ + dctx->shortname = hstate; \ + pipe->bind_##name##_state(pipe, hstate ? hstate->cso : NULL); \ + } + +#define DD_CSO_DELETE(name) \ + static void \ + dd_context_delete_##name##_state(struct pipe_context *_pipe, void *state) \ + { \ + struct dd_context *dctx = dd_context(_pipe); \ + struct pipe_context *pipe = dctx->pipe; \ + struct dd_state *hstate = state; \ + \ + pipe->delete_##name##_state(pipe, hstate->cso); \ + FREE(hstate); \ + } + +#define DD_CSO_WHOLE(name, shortname) \ + DD_CSO_CREATE(name, shortname) \ + DD_CSO_BIND(name, shortname) \ + DD_CSO_DELETE(name) + +DD_CSO_WHOLE(blend, blend) +DD_CSO_WHOLE(rasterizer, rs) +DD_CSO_WHOLE(depth_stencil_alpha, dsa) + +DD_CSO_CREATE(sampler, sampler) +DD_CSO_DELETE(sampler) + +static void +dd_context_bind_sampler_states(struct pipe_context *_pipe, unsigned shader, + unsigned start, unsigned count, void **states) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + memcpy(&dctx->sampler_states[shader][start], states, + sizeof(void*) * count); + + if (states) { + void *samp[PIPE_MAX_SAMPLERS]; + int i; + + for (i = 0; i < count; i++) { + struct dd_state *s = states[i]; + samp[i] = s ? s->cso : NULL; + } + + pipe->bind_sampler_states(pipe, shader, start, count, samp); + } + else + pipe->bind_sampler_states(pipe, shader, start, count, NULL); +} + +static void * +dd_context_create_vertex_elements_state(struct pipe_context *_pipe, + unsigned num_elems, + const struct pipe_vertex_element *elems) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + struct dd_state *hstate = CALLOC_STRUCT(dd_state); + + if (!hstate) + return NULL; + hstate->cso = pipe->create_vertex_elements_state(pipe, num_elems, elems); + memcpy(hstate->state.velems.velems, elems, sizeof(elems[0]) * num_elems); + hstate->state.velems.count = num_elems; + return hstate; +} + +DD_CSO_BIND(vertex_elements, velems) +DD_CSO_DELETE(vertex_elements) + + +/******************************************************************** + * shaders + */ + +#define DD_SHADER(NAME, name) \ + static void * \ + dd_context_create_##name##_state(struct pipe_context *_pipe, \ + const struct pipe_shader_state *state) \ + { \ + struct pipe_context *pipe = dd_context(_pipe)->pipe; \ + struct dd_state *hstate = CALLOC_STRUCT(dd_state); \ + \ + if (!hstate) \ + return NULL; \ + hstate->cso = pipe->create_##name##_state(pipe, state); \ + hstate->state.shader = *state; \ + hstate->state.shader.tokens = tgsi_dup_tokens(state->tokens); \ + return hstate; \ + } \ + \ + static void \ + dd_context_bind_##name##_state(struct pipe_context *_pipe, void *state) \ + { \ + struct dd_context *dctx = dd_context(_pipe); \ + struct pipe_context *pipe = dctx->pipe; \ + struct dd_state *hstate = state; \ + \ + dctx->shaders[PIPE_SHADER_##NAME] = hstate; \ + pipe->bind_##name##_state(pipe, hstate ? hstate->cso : NULL); \ + } \ + \ + static void \ + dd_context_delete_##name##_state(struct pipe_context *_pipe, void *state) \ + { \ + struct dd_context *dctx = dd_context(_pipe); \ + struct pipe_context *pipe = dctx->pipe; \ + struct dd_state *hstate = state; \ + \ + pipe->delete_##name##_state(pipe, hstate->cso); \ + tgsi_free_tokens(hstate->state.shader.tokens); \ + FREE(hstate); \ + } + +DD_SHADER(FRAGMENT, fs) +DD_SHADER(VERTEX, vs) +DD_SHADER(GEOMETRY, gs) +DD_SHADER(TESS_CTRL, tcs) +DD_SHADER(TESS_EVAL, tes) + + +/******************************************************************** + * immediate states + */ + +#define DD_IMM_STATE(name, type, deref, ref) \ + static void \ + dd_context_set_##name(struct pipe_context *_pipe, type deref) \ + { \ + struct dd_context *dctx = dd_context(_pipe); \ + struct pipe_context *pipe = dctx->pipe; \ + \ + dctx->name = deref; \ + pipe->set_##name(pipe, ref); \ + } + +DD_IMM_STATE(blend_color, const struct pipe_blend_color, *state, state) +DD_IMM_STATE(stencil_ref, const struct pipe_stencil_ref, *state, state) +DD_IMM_STATE(clip_state, const struct pipe_clip_state, *state, state) +DD_IMM_STATE(sample_mask, unsigned, sample_mask, sample_mask) +DD_IMM_STATE(min_samples, unsigned, min_samples, min_samples) +DD_IMM_STATE(framebuffer_state, const struct pipe_framebuffer_state, *state, state) +DD_IMM_STATE(polygon_stipple, const struct pipe_poly_stipple, *state, state) + +static void +dd_context_set_constant_buffer(struct pipe_context *_pipe, + uint shader, uint index, + struct pipe_constant_buffer *constant_buffer) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + safe_memcpy(&dctx->constant_buffers[shader][index], constant_buffer, + sizeof(*constant_buffer)); + pipe->set_constant_buffer(pipe, shader, index, constant_buffer); +} + +static void +dd_context_set_scissor_states(struct pipe_context *_pipe, + unsigned start_slot, unsigned num_scissors, + const struct pipe_scissor_state *states) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + safe_memcpy(&dctx->scissors[start_slot], states, + sizeof(*states) * num_scissors); + pipe->set_scissor_states(pipe, start_slot, num_scissors, states); +} + +static void +dd_context_set_viewport_states(struct pipe_context *_pipe, + unsigned start_slot, unsigned num_viewports, + const struct pipe_viewport_state *states) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + safe_memcpy(&dctx->viewports[start_slot], states, + sizeof(*states) * num_viewports); + pipe->set_viewport_states(pipe, start_slot, num_viewports, states); +} + +static void dd_context_set_tess_state(struct pipe_context *_pipe, + const float default_outer_level[4], + const float default_inner_level[2]) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + memcpy(dctx->tess_default_levels, default_outer_level, sizeof(float) * 4); + memcpy(dctx->tess_default_levels+4, default_inner_level, sizeof(float) * 2); + pipe->set_tess_state(pipe, default_outer_level, default_inner_level); +} + + +/******************************************************************** + * views + */ + +static struct pipe_surface * +dd_context_create_surface(struct pipe_context *_pipe, + struct pipe_resource *resource, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + struct pipe_surface *view = + pipe->create_surface(pipe, resource, surf_tmpl); + + if (!view) + return NULL; + view->context = _pipe; + return view; +} + +static void +dd_context_surface_destroy(struct pipe_context *_pipe, + struct pipe_surface *surf) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->surface_destroy(pipe, surf); +} + +static struct pipe_sampler_view * +dd_context_create_sampler_view(struct pipe_context *_pipe, + struct pipe_resource *resource, + const struct pipe_sampler_view *templ) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + struct pipe_sampler_view *view = + pipe->create_sampler_view(pipe, resource, templ); + + if (!view) + return NULL; + view->context = _pipe; + return view; +} + +static void +dd_context_sampler_view_destroy(struct pipe_context *_pipe, + struct pipe_sampler_view *view) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->sampler_view_destroy(pipe, view); +} + +static struct pipe_image_view * +dd_context_create_image_view(struct pipe_context *_pipe, + struct pipe_resource *resource, + const struct pipe_image_view *templ) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + struct pipe_image_view *view = + pipe->create_image_view(pipe, resource, templ); + + if (!view) + return NULL; + view->context = _pipe; + return view; +} + +static void +dd_context_image_view_destroy(struct pipe_context *_pipe, + struct pipe_image_view *view) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->image_view_destroy(pipe, view); +} + +static struct pipe_stream_output_target * +dd_context_create_stream_output_target(struct pipe_context *_pipe, + struct pipe_resource *res, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + struct pipe_stream_output_target *view = + pipe->create_stream_output_target(pipe, res, buffer_offset, + buffer_size); + + if (!view) + return NULL; + view->context = _pipe; + return view; +} + +static void +dd_context_stream_output_target_destroy(struct pipe_context *_pipe, + struct pipe_stream_output_target *target) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->stream_output_target_destroy(pipe, target); +} + + +/******************************************************************** + * set states + */ + +static void +dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader, + unsigned start, unsigned num, + struct pipe_sampler_view **views) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + safe_memcpy(&dctx->sampler_views[shader][start], views, + sizeof(views[0]) * num); + pipe->set_sampler_views(pipe, shader, start, num, views); +} + +static void +dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader, + unsigned start, unsigned num, + struct pipe_image_view **views) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + safe_memcpy(&dctx->shader_images[shader][start], views, + sizeof(views[0]) * num); + pipe->set_shader_images(pipe, shader, start, num, views); +} + +static void +dd_context_set_shader_buffers(struct pipe_context *_pipe, unsigned shader, + unsigned start, unsigned num_buffers, + struct pipe_shader_buffer *buffers) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + safe_memcpy(&dctx->shader_buffers[shader][start], buffers, + sizeof(buffers[0]) * num_buffers); + pipe->set_shader_buffers(pipe, shader, start, num_buffers, buffers); +} + +static void +dd_context_set_vertex_buffers(struct pipe_context *_pipe, + unsigned start, unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + safe_memcpy(&dctx->vertex_buffers[start], buffers, + sizeof(buffers[0]) * num_buffers); + pipe->set_vertex_buffers(pipe, start, num_buffers, buffers); +} + +static void +dd_context_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *ib) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + safe_memcpy(&dctx->index_buffer, ib, sizeof(*ib)); + pipe->set_index_buffer(pipe, ib); +} + +static void +dd_context_set_stream_output_targets(struct pipe_context *_pipe, + unsigned num_targets, + struct pipe_stream_output_target **tgs, + const unsigned *offsets) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + dctx->num_so_targets = num_targets; + safe_memcpy(dctx->so_targets, tgs, sizeof(*tgs) * num_targets); + safe_memcpy(dctx->so_offsets, offsets, sizeof(*offsets) * num_targets); + pipe->set_stream_output_targets(pipe, num_targets, tgs, offsets); +} + +static void +dd_context_destroy(struct pipe_context *_pipe) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + pipe->destroy(pipe); + FREE(dctx); +} + + +/******************************************************************** + * transfer + */ + +static void * +dd_context_transfer_map(struct pipe_context *_pipe, + struct pipe_resource *resource, unsigned level, + unsigned usage, const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + return pipe->transfer_map(pipe, resource, level, usage, box, transfer); +} + +static void +dd_context_transfer_flush_region(struct pipe_context *_pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->transfer_flush_region(pipe, transfer, box); +} + +static void +dd_context_transfer_unmap(struct pipe_context *_pipe, + struct pipe_transfer *transfer) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->transfer_unmap(pipe, transfer); +} + +static void +dd_context_transfer_inline_write(struct pipe_context *_pipe, + struct pipe_resource *resource, + unsigned level, unsigned usage, + const struct pipe_box *box, + const void *data, unsigned stride, + unsigned layer_stride) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->transfer_inline_write(pipe, resource, level, usage, box, data, + stride, layer_stride); +} + + +/******************************************************************** + * miscellaneous + */ + +static void +dd_context_texture_barrier(struct pipe_context *_pipe) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->texture_barrier(pipe); +} + +static void +dd_context_memory_barrier(struct pipe_context *_pipe, unsigned flags) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->memory_barrier(pipe, flags); +} + +static void +dd_context_get_sample_position(struct pipe_context *_pipe, + unsigned sample_count, unsigned sample_index, + float *out_value) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + return pipe->get_sample_position(pipe, sample_count, sample_index, + out_value); +} + +static void +dd_context_invalidate_resource(struct pipe_context *_pipe, + struct pipe_resource *resource) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + pipe->invalidate_resource(pipe, resource); +} + +static enum pipe_reset_status +dd_context_get_device_reset_status(struct pipe_context *_pipe) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + return pipe->get_device_reset_status(pipe); +} + +static void +dd_context_dump_debug_state(struct pipe_context *_pipe, FILE *stream, + unsigned flags) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + + return pipe->dump_debug_state(pipe, stream, flags); +} + +struct pipe_context * +dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) +{ + struct dd_context *dctx; + + if (!pipe) + return NULL; + + dctx = CALLOC_STRUCT(dd_context); + if (!dctx) { + pipe->destroy(pipe); + return NULL; + } + + dctx->pipe = pipe; + dctx->base.priv = pipe->priv; /* expose wrapped priv data */ + dctx->base.screen = &dscreen->base; + + dctx->base.destroy = dd_context_destroy; + + CTX_INIT(render_condition); + CTX_INIT(create_query); + CTX_INIT(destroy_query); + CTX_INIT(begin_query); + CTX_INIT(end_query); + CTX_INIT(get_query_result); + CTX_INIT(create_blend_state); + CTX_INIT(bind_blend_state); + CTX_INIT(delete_blend_state); + CTX_INIT(create_sampler_state); + CTX_INIT(bind_sampler_states); + CTX_INIT(delete_sampler_state); + CTX_INIT(create_rasterizer_state); + CTX_INIT(bind_rasterizer_state); + CTX_INIT(delete_rasterizer_state); + CTX_INIT(create_depth_stencil_alpha_state); + CTX_INIT(bind_depth_stencil_alpha_state); + CTX_INIT(delete_depth_stencil_alpha_state); + CTX_INIT(create_fs_state); + CTX_INIT(bind_fs_state); + CTX_INIT(delete_fs_state); + CTX_INIT(create_vs_state); + CTX_INIT(bind_vs_state); + CTX_INIT(delete_vs_state); + CTX_INIT(create_gs_state); + CTX_INIT(bind_gs_state); + CTX_INIT(delete_gs_state); + CTX_INIT(create_tcs_state); + CTX_INIT(bind_tcs_state); + CTX_INIT(delete_tcs_state); + CTX_INIT(create_tes_state); + CTX_INIT(bind_tes_state); + CTX_INIT(delete_tes_state); + CTX_INIT(create_vertex_elements_state); + CTX_INIT(bind_vertex_elements_state); + CTX_INIT(delete_vertex_elements_state); + CTX_INIT(set_blend_color); + CTX_INIT(set_stencil_ref); + CTX_INIT(set_sample_mask); + CTX_INIT(set_min_samples); + CTX_INIT(set_clip_state); + CTX_INIT(set_constant_buffer); + CTX_INIT(set_framebuffer_state); + CTX_INIT(set_polygon_stipple); + CTX_INIT(set_scissor_states); + CTX_INIT(set_viewport_states); + CTX_INIT(set_sampler_views); + CTX_INIT(set_tess_state); + CTX_INIT(set_shader_buffers); + CTX_INIT(set_shader_images); + CTX_INIT(set_vertex_buffers); + CTX_INIT(set_index_buffer); + CTX_INIT(create_stream_output_target); + CTX_INIT(stream_output_target_destroy); + CTX_INIT(set_stream_output_targets); + CTX_INIT(create_sampler_view); + CTX_INIT(sampler_view_destroy); + CTX_INIT(create_surface); + CTX_INIT(surface_destroy); + CTX_INIT(create_image_view); + CTX_INIT(image_view_destroy); + CTX_INIT(transfer_map); + CTX_INIT(transfer_flush_region); + CTX_INIT(transfer_unmap); + CTX_INIT(transfer_inline_write); + CTX_INIT(texture_barrier); + CTX_INIT(memory_barrier); + /* create_video_codec */ + /* create_video_buffer */ + /* create_compute_state */ + /* bind_compute_state */ + /* delete_compute_state */ + /* set_compute_resources */ + /* set_global_binding */ + CTX_INIT(get_sample_position); + CTX_INIT(invalidate_resource); + CTX_INIT(get_device_reset_status); + CTX_INIT(dump_debug_state); + + dd_init_draw_functions(dctx); + + dctx->sample_mask = ~0; + return &dctx->base; +} diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c new file mode 100644 index 00000000000..1c986238708 --- /dev/null +++ b/src/gallium/drivers/ddebug/dd_draw.c @@ -0,0 +1,807 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "dd_pipe.h" + +#include "util/u_dump.h" +#include "util/u_format.h" +#include "tgsi/tgsi_scan.h" +#include "os/os_process.h" +#include +#include + + +enum call_type +{ + CALL_DRAW_VBO, + CALL_RESOURCE_COPY_REGION, + CALL_BLIT, + CALL_FLUSH_RESOURCE, + CALL_CLEAR, + CALL_CLEAR_BUFFER, + CALL_CLEAR_RENDER_TARGET, + CALL_CLEAR_DEPTH_STENCIL, +}; + +struct call_resource_copy_region +{ + struct pipe_resource *dst; + unsigned dst_level; + unsigned dstx, dsty, dstz; + struct pipe_resource *src; + unsigned src_level; + const struct pipe_box *src_box; +}; + +struct call_clear +{ + unsigned buffers; + const union pipe_color_union *color; + double depth; + unsigned stencil; +}; + +struct call_clear_buffer +{ + struct pipe_resource *res; + unsigned offset; + unsigned size; + const void *clear_value; + int clear_value_size; +}; + +struct dd_call +{ + enum call_type type; + + union { + struct pipe_draw_info draw_vbo; + struct call_resource_copy_region resource_copy_region; + struct pipe_blit_info blit; + struct pipe_resource *flush_resource; + struct call_clear clear; + struct call_clear_buffer clear_buffer; + } info; +}; + + +static FILE * +dd_get_file_stream(struct dd_context *dctx) +{ + struct pipe_screen *screen = dctx->pipe->screen; + static unsigned index; + char proc_name[128], dir[256], name[512]; + FILE *f; + + if (!os_get_process_name(proc_name, sizeof(proc_name))) { + fprintf(stderr, "dd: can't get the process name\n"); + return NULL; + } + + snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", ".")); + + if (mkdir(dir, 0774) && errno != EEXIST) { + fprintf(stderr, "dd: can't create a directory (%i)\n", errno); + return NULL; + } + + snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++); + f = fopen(name, "w"); + if (!f) { + fprintf(stderr, "dd: can't open file %s\n", name); + return NULL; + } + + fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen)); + fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen)); + fprintf(f, "Device name: %s\n\n", screen->get_name(screen)); + return f; +} + +static void +dd_close_file_stream(FILE *f) +{ + fclose(f); +} + +static unsigned +dd_num_active_viewports(struct dd_context *dctx) +{ + struct tgsi_shader_info info; + const struct tgsi_token *tokens; + + if (dctx->shaders[PIPE_SHADER_GEOMETRY]) + tokens = dctx->shaders[PIPE_SHADER_GEOMETRY]->state.shader.tokens; + else if (dctx->shaders[PIPE_SHADER_TESS_EVAL]) + tokens = dctx->shaders[PIPE_SHADER_TESS_EVAL]->state.shader.tokens; + else if (dctx->shaders[PIPE_SHADER_VERTEX]) + tokens = dctx->shaders[PIPE_SHADER_VERTEX]->state.shader.tokens; + else + return 1; + + tgsi_scan_shader(tokens, &info); + return info.writes_viewport_index ? PIPE_MAX_VIEWPORTS : 1; +} + +#define COLOR_RESET "\033[0m" +#define COLOR_SHADER "\033[1;32m" +#define COLOR_STATE "\033[1;33m" + +#define DUMP(name, var) do { \ + fprintf(f, COLOR_STATE #name ": " COLOR_RESET); \ + util_dump_##name(f, var); \ + fprintf(f, "\n"); \ +} while(0) + +#define DUMP_I(name, var, i) do { \ + fprintf(f, COLOR_STATE #name " %i: " COLOR_RESET, i); \ + util_dump_##name(f, var); \ + fprintf(f, "\n"); \ +} while(0) + +#define DUMP_M(name, var, member) do { \ + fprintf(f, " " #member ": "); \ + util_dump_##name(f, (var)->member); \ + fprintf(f, "\n"); \ +} while(0) + +#define DUMP_M_ADDR(name, var, member) do { \ + fprintf(f, " " #member ": "); \ + util_dump_##name(f, &(var)->member); \ + fprintf(f, "\n"); \ +} while(0) + +static void +print_named_value(FILE *f, const char *name, int value) +{ + fprintf(f, COLOR_STATE "%s" COLOR_RESET " = %i\n", name, value); +} + +static void +print_named_xvalue(FILE *f, const char *name, int value) +{ + fprintf(f, COLOR_STATE "%s" COLOR_RESET " = 0x%08x\n", name, value); +} + +static void +util_dump_uint(FILE *f, unsigned i) +{ + fprintf(f, "%u", i); +} + +static void +util_dump_hex(FILE *f, unsigned i) +{ + fprintf(f, "0x%x", i); +} + +static void +util_dump_double(FILE *f, double d) +{ + fprintf(f, "%f", d); +} + +static void +util_dump_format(FILE *f, enum pipe_format format) +{ + fprintf(f, "%s", util_format_name(format)); +} + +static void +util_dump_color_union(FILE *f, const union pipe_color_union *color) +{ + fprintf(f, "{f = {%f, %f, %f, %f}, ui = {%u, %u, %u, %u}", + color->f[0], color->f[1], color->f[2], color->f[3], + color->ui[0], color->ui[1], color->ui[2], color->ui[3]); +} + +static void +util_dump_query(FILE *f, struct dd_query *query) +{ + if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC) + fprintf(f, "PIPE_QUERY_DRIVER_SPECIFIC + %i", + query->type - PIPE_QUERY_DRIVER_SPECIFIC); + else + fprintf(f, "%s", util_dump_query_type(query->type, false)); +} + +static void +dd_dump_render_condition(struct dd_context *dctx, FILE *f) +{ + if (dctx->render_cond.query) { + fprintf(f, "render condition:\n"); + DUMP_M(query, &dctx->render_cond, query); + DUMP_M(uint, &dctx->render_cond, condition); + DUMP_M(uint, &dctx->render_cond, mode); + fprintf(f, "\n"); + } +} + +static void +dd_dump_draw_vbo(struct dd_context *dctx, struct pipe_draw_info *info, FILE *f) +{ + int sh, i; + const char *shader_str[PIPE_SHADER_TYPES]; + + shader_str[PIPE_SHADER_VERTEX] = "VERTEX"; + shader_str[PIPE_SHADER_TESS_CTRL] = "TESS_CTRL"; + shader_str[PIPE_SHADER_TESS_EVAL] = "TESS_EVAL"; + shader_str[PIPE_SHADER_GEOMETRY] = "GEOMETRY"; + shader_str[PIPE_SHADER_FRAGMENT] = "FRAGMENT"; + shader_str[PIPE_SHADER_COMPUTE] = "COMPUTE"; + + DUMP(draw_info, info); + if (info->indexed) { + DUMP(index_buffer, &dctx->index_buffer); + if (dctx->index_buffer.buffer) + DUMP_M(resource, &dctx->index_buffer, buffer); + } + if (info->count_from_stream_output) + DUMP_M(stream_output_target, info, + count_from_stream_output); + if (info->indirect) + DUMP_M(resource, info, indirect); + fprintf(f, "\n"); + + /* TODO: dump active queries */ + + dd_dump_render_condition(dctx, f); + + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) + if (dctx->vertex_buffers[i].buffer || + dctx->vertex_buffers[i].user_buffer) { + DUMP_I(vertex_buffer, &dctx->vertex_buffers[i], i); + if (dctx->vertex_buffers[i].buffer) + DUMP_M(resource, &dctx->vertex_buffers[i], buffer); + } + + if (dctx->velems) { + print_named_value(f, "num vertex elements", + dctx->velems->state.velems.count); + for (i = 0; i < dctx->velems->state.velems.count; i++) { + fprintf(f, " "); + DUMP_I(vertex_element, &dctx->velems->state.velems.velems[i], i); + } + } + + print_named_value(f, "num stream output targets", dctx->num_so_targets); + for (i = 0; i < dctx->num_so_targets; i++) + if (dctx->so_targets[i]) { + DUMP_I(stream_output_target, dctx->so_targets[i], i); + DUMP_M(resource, dctx->so_targets[i], buffer); + fprintf(f, " offset = %i\n", dctx->so_offsets[i]); + } + + fprintf(f, "\n"); + for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { + if (sh == PIPE_SHADER_COMPUTE) + continue; + + if (sh == PIPE_SHADER_TESS_CTRL && + !dctx->shaders[PIPE_SHADER_TESS_CTRL] && + dctx->shaders[PIPE_SHADER_TESS_EVAL]) + fprintf(f, "tess_state: {default_outer_level = {%f, %f, %f, %f}, " + "default_inner_level = {%f, %f}}\n", + dctx->tess_default_levels[0], + dctx->tess_default_levels[1], + dctx->tess_default_levels[2], + dctx->tess_default_levels[3], + dctx->tess_default_levels[4], + dctx->tess_default_levels[5]); + + if (sh == PIPE_SHADER_FRAGMENT) + if (dctx->rs) { + unsigned num_viewports = dd_num_active_viewports(dctx); + + if (dctx->rs->state.rs.clip_plane_enable) + DUMP(clip_state, &dctx->clip_state); + + for (i = 0; i < num_viewports; i++) + DUMP_I(viewport_state, &dctx->viewports[i], i); + + if (dctx->rs->state.rs.scissor) + for (i = 0; i < num_viewports; i++) + DUMP_I(scissor_state, &dctx->scissors[i], i); + + DUMP(rasterizer_state, &dctx->rs->state.rs); + + if (dctx->rs->state.rs.poly_stipple_enable) + DUMP(poly_stipple, &dctx->polygon_stipple); + fprintf(f, "\n"); + } + + if (!dctx->shaders[sh]) + continue; + + fprintf(f, COLOR_SHADER "begin shader: %s" COLOR_RESET "\n", shader_str[sh]); + DUMP(shader_state, &dctx->shaders[sh]->state.shader); + + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) + if (dctx->constant_buffers[sh][i].buffer || + dctx->constant_buffers[sh][i].user_buffer) { + DUMP_I(constant_buffer, &dctx->constant_buffers[sh][i], i); + if (dctx->constant_buffers[sh][i].buffer) + DUMP_M(resource, &dctx->constant_buffers[sh][i], buffer); + } + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + if (dctx->sampler_states[sh][i]) + DUMP_I(sampler_state, &dctx->sampler_states[sh][i]->state.sampler, i); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + if (dctx->sampler_views[sh][i]) { + DUMP_I(sampler_view, dctx->sampler_views[sh][i], i); + DUMP_M(resource, dctx->sampler_views[sh][i], texture); + } + + /* TODO: print shader images */ + /* TODO: print shader buffers */ + + fprintf(f, COLOR_SHADER "end shader: %s" COLOR_RESET "\n\n", shader_str[sh]); + } + + if (dctx->dsa) + DUMP(depth_stencil_alpha_state, &dctx->dsa->state.dsa); + DUMP(stencil_ref, &dctx->stencil_ref); + + if (dctx->blend) + DUMP(blend_state, &dctx->blend->state.blend); + DUMP(blend_color, &dctx->blend_color); + + print_named_value(f, "min_samples", dctx->min_samples); + print_named_xvalue(f, "sample_mask", dctx->sample_mask); + fprintf(f, "\n"); + + DUMP(framebuffer_state, &dctx->framebuffer_state); + for (i = 0; i < dctx->framebuffer_state.nr_cbufs; i++) + if (dctx->framebuffer_state.cbufs[i]) { + fprintf(f, " " COLOR_STATE "cbufs[%i]:" COLOR_RESET "\n ", i); + DUMP(surface, dctx->framebuffer_state.cbufs[i]); + fprintf(f, " "); + DUMP(resource, dctx->framebuffer_state.cbufs[i]->texture); + } + if (dctx->framebuffer_state.zsbuf) { + fprintf(f, " " COLOR_STATE "zsbuf:" COLOR_RESET "\n "); + DUMP(surface, dctx->framebuffer_state.zsbuf); + fprintf(f, " "); + DUMP(resource, dctx->framebuffer_state.zsbuf->texture); + } + fprintf(f, "\n"); +} + +static void +dd_dump_resource_copy_region(struct dd_context *dctx, + struct call_resource_copy_region *info, + FILE *f) +{ + fprintf(f, "%s:\n", __func__+8); + DUMP_M(resource, info, dst); + DUMP_M(uint, info, dst_level); + DUMP_M(uint, info, dstx); + DUMP_M(uint, info, dsty); + DUMP_M(uint, info, dstz); + DUMP_M(resource, info, src); + DUMP_M(uint, info, src_level); + DUMP_M(box, info, src_box); +} + +static void +dd_dump_blit(struct dd_context *dctx, struct pipe_blit_info *info, FILE *f) +{ + fprintf(f, "%s:\n", __func__+8); + DUMP_M(resource, info, dst.resource); + DUMP_M(uint, info, dst.level); + DUMP_M_ADDR(box, info, dst.box); + DUMP_M(format, info, dst.format); + + DUMP_M(resource, info, src.resource); + DUMP_M(uint, info, src.level); + DUMP_M_ADDR(box, info, src.box); + DUMP_M(format, info, src.format); + + DUMP_M(hex, info, mask); + DUMP_M(uint, info, filter); + DUMP_M(uint, info, scissor_enable); + DUMP_M_ADDR(scissor_state, info, scissor); + DUMP_M(uint, info, render_condition_enable); + + if (info->render_condition_enable) + dd_dump_render_condition(dctx, f); +} + +static void +dd_dump_flush_resource(struct dd_context *dctx, struct pipe_resource *res, + FILE *f) +{ + fprintf(f, "%s:\n", __func__+8); + DUMP(resource, res); +} + +static void +dd_dump_clear(struct dd_context *dctx, struct call_clear *info, FILE *f) +{ + fprintf(f, "%s:\n", __func__+8); + DUMP_M(uint, info, buffers); + DUMP_M(color_union, info, color); + DUMP_M(double, info, depth); + DUMP_M(hex, info, stencil); +} + +static void +dd_dump_clear_buffer(struct dd_context *dctx, struct call_clear_buffer *info, + FILE *f) +{ + int i; + const char *value = (const char*)info->clear_value; + + fprintf(f, "%s:\n", __func__+8); + DUMP_M(resource, info, res); + DUMP_M(uint, info, offset); + DUMP_M(uint, info, size); + DUMP_M(uint, info, clear_value_size); + + fprintf(f, " clear_value:"); + for (i = 0; i < info->clear_value_size; i++) + fprintf(f, " %02x", value[i]); + fprintf(f, "\n"); +} + +static void +dd_dump_clear_render_target(struct dd_context *dctx, FILE *f) +{ + fprintf(f, "%s:\n", __func__+8); + /* TODO */ +} + +static void +dd_dump_clear_depth_stencil(struct dd_context *dctx, FILE *f) +{ + fprintf(f, "%s:\n", __func__+8); + /* TODO */ +} + +static void +dd_dump_driver_state(struct dd_context *dctx, FILE *f, unsigned flags) +{ + if (dctx->pipe->dump_debug_state) { + fprintf(f,"\n\n**************************************************" + "***************************\n"); + fprintf(f, "Driver-specific state:\n\n"); + dctx->pipe->dump_debug_state(dctx->pipe, f, flags); + } +} + +static void +dd_dump_call(struct dd_context *dctx, struct dd_call *call, unsigned flags) +{ + FILE *f = dd_get_file_stream(dctx); + + if (!f) + return; + + switch (call->type) { + case CALL_DRAW_VBO: + dd_dump_draw_vbo(dctx, &call->info.draw_vbo, f); + break; + case CALL_RESOURCE_COPY_REGION: + dd_dump_resource_copy_region(dctx, &call->info.resource_copy_region, f); + break; + case CALL_BLIT: + dd_dump_blit(dctx, &call->info.blit, f); + break; + case CALL_FLUSH_RESOURCE: + dd_dump_flush_resource(dctx, call->info.flush_resource, f); + break; + case CALL_CLEAR: + dd_dump_clear(dctx, &call->info.clear, f); + break; + case CALL_CLEAR_BUFFER: + dd_dump_clear_buffer(dctx, &call->info.clear_buffer, f); + break; + case CALL_CLEAR_RENDER_TARGET: + dd_dump_clear_render_target(dctx, f); + break; + case CALL_CLEAR_DEPTH_STENCIL: + dd_dump_clear_depth_stencil(dctx, f); + } + + dd_dump_driver_state(dctx, f, flags); + dd_close_file_stream(f); +} + +static void +dd_kill_process(void) +{ + sync(); + fprintf(stderr, "dd: Aborting the process...\n"); + fflush(stdout); + fflush(stderr); + abort(); +} + +static bool +dd_flush_and_check_hang(struct dd_context *dctx, + struct pipe_fence_handle **flush_fence, + unsigned flush_flags) +{ + struct pipe_fence_handle *fence = NULL; + struct pipe_context *pipe = dctx->pipe; + struct pipe_screen *screen = pipe->screen; + uint64_t timeout_ms = dd_screen(dctx->base.screen)->timeout_ms; + bool idle; + + assert(timeout_ms > 0); + + pipe->flush(pipe, &fence, flush_flags); + if (flush_fence) + screen->fence_reference(screen, flush_fence, fence); + if (!fence) + return false; + + idle = screen->fence_finish(screen, fence, timeout_ms * 1000000); + screen->fence_reference(screen, &fence, NULL); + if (!idle) + fprintf(stderr, "dd: GPU hang detected!\n"); + return !idle; +} + +static void +dd_flush_and_handle_hang(struct dd_context *dctx, + struct pipe_fence_handle **fence, unsigned flags, + const char *cause) +{ + if (dd_flush_and_check_hang(dctx, fence, flags)) { + FILE *f = dd_get_file_stream(dctx); + + if (f) { + fprintf(f, "dd: %s.\n", cause); + dd_dump_driver_state(dctx, f, PIPE_DEBUG_DEVICE_IS_HUNG); + dd_close_file_stream(f); + } + + /* Terminate the process to prevent future hangs. */ + dd_kill_process(); + } +} + +static void +dd_context_flush(struct pipe_context *_pipe, + struct pipe_fence_handle **fence, unsigned flags) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + switch (dd_screen(dctx->base.screen)->mode) { + case DD_DETECT_HANGS: + dd_flush_and_handle_hang(dctx, fence, flags, + "GPU hang detected in pipe->flush()"); + break; + case DD_DUMP_ALL_CALLS: + pipe->flush(pipe, fence, flags); + break; + default: + assert(0); + } +} + +static void +dd_before_draw(struct dd_context *dctx) +{ + if (dd_screen(dctx->base.screen)->mode == DD_DETECT_HANGS && + !dd_screen(dctx->base.screen)->no_flush) + dd_flush_and_handle_hang(dctx, NULL, 0, + "GPU hang most likely caused by internal " + "driver commands"); +} + +static void +dd_after_draw(struct dd_context *dctx, struct dd_call *call) +{ + switch (dd_screen(dctx->base.screen)->mode) { + case DD_DETECT_HANGS: + if (!dd_screen(dctx->base.screen)->no_flush && + dd_flush_and_check_hang(dctx, NULL, 0)) { + dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG); + + /* Terminate the process to prevent future hangs. */ + dd_kill_process(); + } + break; + case DD_DUMP_ALL_CALLS: + dd_dump_call(dctx, call, 0); + break; + default: + assert(0); + } +} + +static void +dd_context_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_DRAW_VBO; + call.info.draw_vbo = *info; + + dd_before_draw(dctx); + pipe->draw_vbo(pipe, info); + dd_after_draw(dctx, &call); +} + +static void +dd_context_resource_copy_region(struct pipe_context *_pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_RESOURCE_COPY_REGION; + call.info.resource_copy_region.dst = dst; + call.info.resource_copy_region.dst_level = dst_level; + call.info.resource_copy_region.dstx = dstx; + call.info.resource_copy_region.dsty = dsty; + call.info.resource_copy_region.dstz = dstz; + call.info.resource_copy_region.src = src; + call.info.resource_copy_region.src_level = src_level; + call.info.resource_copy_region.src_box = src_box; + + dd_before_draw(dctx); + pipe->resource_copy_region(pipe, + dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + dd_after_draw(dctx, &call); +} + +static void +dd_context_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_BLIT; + call.info.blit = *info; + + dd_before_draw(dctx); + pipe->blit(pipe, info); + dd_after_draw(dctx, &call); +} + +static void +dd_context_flush_resource(struct pipe_context *_pipe, + struct pipe_resource *resource) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_FLUSH_RESOURCE; + call.info.flush_resource = resource; + + dd_before_draw(dctx); + pipe->flush_resource(pipe, resource); + dd_after_draw(dctx, &call); +} + +static void +dd_context_clear(struct pipe_context *_pipe, unsigned buffers, + const union pipe_color_union *color, double depth, + unsigned stencil) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_CLEAR; + call.info.clear.buffers = buffers; + call.info.clear.color = color; + call.info.clear.depth = depth; + call.info.clear.stencil = stencil; + + dd_before_draw(dctx); + pipe->clear(pipe, buffers, color, depth, stencil); + dd_after_draw(dctx, &call); +} + +static void +dd_context_clear_render_target(struct pipe_context *_pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_CLEAR_RENDER_TARGET; + + dd_before_draw(dctx); + pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height); + dd_after_draw(dctx, &call); +} + +static void +dd_context_clear_depth_stencil(struct pipe_context *_pipe, + struct pipe_surface *dst, unsigned clear_flags, + double depth, unsigned stencil, unsigned dstx, + unsigned dsty, unsigned width, unsigned height) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_CLEAR_DEPTH_STENCIL; + + dd_before_draw(dctx); + pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil, + dstx, dsty, width, height); + dd_after_draw(dctx, &call); +} + +static void +dd_context_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res, + unsigned offset, unsigned size, + const void *clear_value, int clear_value_size) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_CLEAR_BUFFER; + call.info.clear_buffer.res = res; + call.info.clear_buffer.offset = offset; + call.info.clear_buffer.size = size; + call.info.clear_buffer.clear_value = clear_value; + call.info.clear_buffer.clear_value_size = clear_value_size; + + dd_before_draw(dctx); + pipe->clear_buffer(pipe, res, offset, size, clear_value, clear_value_size); + dd_after_draw(dctx, &call); +} + +void +dd_init_draw_functions(struct dd_context *dctx) +{ + CTX_INIT(flush); + CTX_INIT(draw_vbo); + CTX_INIT(resource_copy_region); + CTX_INIT(blit); + CTX_INIT(clear); + CTX_INIT(clear_render_target); + CTX_INIT(clear_depth_stencil); + CTX_INIT(clear_buffer); + CTX_INIT(flush_resource); + /* launch_grid */ +} diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h new file mode 100644 index 00000000000..c78d112988a --- /dev/null +++ b/src/gallium/drivers/ddebug/dd_pipe.h @@ -0,0 +1,141 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DD_H_ +#define DD_H_ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_screen.h" + +/* name of the directory in home */ +#define DD_DIR "ddebug_dumps" + +enum dd_mode { + DD_DETECT_HANGS, + DD_DUMP_ALL_CALLS +}; + +struct dd_screen +{ + struct pipe_screen base; + struct pipe_screen *screen; + unsigned timeout_ms; + enum dd_mode mode; + bool no_flush; +}; + +struct dd_query +{ + unsigned type; + struct pipe_query *query; +}; + +struct dd_state +{ + void *cso; + + union { + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_rasterizer_state rs; + struct pipe_sampler_state sampler; + struct { + struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS]; + unsigned count; + } velems; + struct pipe_shader_state shader; + } state; +}; + +struct dd_context +{ + struct pipe_context base; + struct pipe_context *pipe; + + struct { + struct dd_query *query; + bool condition; + unsigned mode; + } render_cond; + + struct pipe_index_buffer index_buffer; + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + + unsigned num_so_targets; + struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS]; + unsigned so_offsets[PIPE_MAX_SO_BUFFERS]; + + struct dd_state *shaders[PIPE_SHADER_TYPES]; + struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; + struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS]; + + struct dd_state *velems; + struct dd_state *rs; + struct dd_state *dsa; + struct dd_state *blend; + + struct pipe_blend_color blend_color; + struct pipe_stencil_ref stencil_ref; + unsigned sample_mask; + unsigned min_samples; + struct pipe_clip_state clip_state; + struct pipe_framebuffer_state framebuffer_state; + struct pipe_poly_stipple polygon_stipple; + struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS]; + struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS]; + float tess_default_levels[6]; +}; + + +struct pipe_context * +dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe); + +void +dd_init_draw_functions(struct dd_context *dctx); + + +static inline struct dd_context * +dd_context(struct pipe_context *pipe) +{ + return (struct dd_context *)pipe; +} + +static inline struct dd_screen * +dd_screen(struct pipe_screen *screen) +{ + return (struct dd_screen*)screen; +} + + +#define CTX_INIT(_member) \ + dctx->base._member = dctx->pipe->_member ? dd_context_##_member : NULL + +#endif /* DD_H_ */ diff --git a/src/gallium/drivers/ddebug/dd_public.h b/src/gallium/drivers/ddebug/dd_public.h new file mode 100644 index 00000000000..e6607655753 --- /dev/null +++ b/src/gallium/drivers/ddebug/dd_public.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DD_PUBLIC_H_ +#define DD_PUBLIC_H_ + +struct pipe_screen; + +struct pipe_screen * +ddebug_screen_create(struct pipe_screen *screen); + +#endif /* DD_PUBLIC_H_ */ diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c new file mode 100644 index 00000000000..a776580c9bb --- /dev/null +++ b/src/gallium/drivers/ddebug/dd_screen.c @@ -0,0 +1,353 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "dd_pipe.h" +#include "dd_public.h" +#include "util/u_memory.h" +#include + + +static const char * +dd_screen_get_name(struct pipe_screen *_screen) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_name(screen); +} + +static const char * +dd_screen_get_vendor(struct pipe_screen *_screen) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_vendor(screen); +} + +static const char * +dd_screen_get_device_vendor(struct pipe_screen *_screen) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_device_vendor(screen); +} + +static int +dd_screen_get_param(struct pipe_screen *_screen, + enum pipe_cap param) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_param(screen, param); +} + +static float +dd_screen_get_paramf(struct pipe_screen *_screen, + enum pipe_capf param) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_paramf(screen, param); +} + +static int +dd_screen_get_shader_param(struct pipe_screen *_screen, unsigned shader, + enum pipe_shader_cap param) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_shader_param(screen, shader, param); +} + +static uint64_t +dd_screen_get_timestamp(struct pipe_screen *_screen) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_timestamp(screen); +} + +static struct pipe_context * +dd_screen_context_create(struct pipe_screen *_screen, void *priv, + unsigned flags) +{ + struct dd_screen *dscreen = dd_screen(_screen); + struct pipe_screen *screen = dscreen->screen; + + flags |= PIPE_CONTEXT_DEBUG; + + return dd_context_create(dscreen, + screen->context_create(screen, priv, flags)); +} + +static boolean +dd_screen_is_format_supported(struct pipe_screen *_screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned tex_usage) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->is_format_supported(screen, format, target, sample_count, + tex_usage); +} + +static boolean +dd_screen_can_create_resource(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->can_create_resource(screen, templat); +} + +static void +dd_screen_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *context_private, + struct pipe_box *sub_box) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + screen->flush_frontbuffer(screen, resource, level, layer, context_private, + sub_box); +} + +static int +dd_screen_get_driver_query_info(struct pipe_screen *_screen, + unsigned index, + struct pipe_driver_query_info *info) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_driver_query_info(screen, index, info); +} + +static int +dd_screen_get_driver_query_group_info(struct pipe_screen *_screen, + unsigned index, + struct pipe_driver_query_group_info *info) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_driver_query_group_info(screen, index, info); +} + + +/******************************************************************** + * resource + */ + +static struct pipe_resource * +dd_screen_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + struct pipe_resource *res = screen->resource_create(screen, templat); + + if (!res) + return NULL; + res->screen = _screen; + return res; +} + +static struct pipe_resource * +dd_screen_resource_from_handle(struct pipe_screen *_screen, + const struct pipe_resource *templ, + struct winsys_handle *handle) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + struct pipe_resource *res = + screen->resource_from_handle(screen, templ, handle); + + if (!res) + return NULL; + res->screen = _screen; + return res; +} + +static struct pipe_resource * +dd_screen_resource_from_user_memory(struct pipe_screen *_screen, + const struct pipe_resource *templ, + void *user_memory) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + struct pipe_resource *res = + screen->resource_from_user_memory(screen, templ, user_memory); + + if (!res) + return NULL; + res->screen = _screen; + return res; +} + +static void +dd_screen_resource_destroy(struct pipe_screen *_screen, + struct pipe_resource *res) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + screen->resource_destroy(screen, res); +} + +static boolean +dd_screen_resource_get_handle(struct pipe_screen *_screen, + struct pipe_resource *resource, + struct winsys_handle *handle) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->resource_get_handle(screen, resource, handle); +} + + +/******************************************************************** + * fence + */ + +static void +dd_screen_fence_reference(struct pipe_screen *_screen, + struct pipe_fence_handle **pdst, + struct pipe_fence_handle *src) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + screen->fence_reference(screen, pdst, src); +} + +static boolean +dd_screen_fence_finish(struct pipe_screen *_screen, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->fence_finish(screen, fence, timeout); +} + + +/******************************************************************** + * screen + */ + +static void +dd_screen_destroy(struct pipe_screen *_screen) +{ + struct dd_screen *dscreen = dd_screen(_screen); + struct pipe_screen *screen = dscreen->screen; + + screen->destroy(screen); + FREE(dscreen); +} + +struct pipe_screen * +ddebug_screen_create(struct pipe_screen *screen) +{ + struct dd_screen *dscreen; + const char *option = debug_get_option("GALLIUM_DDEBUG", NULL); + bool dump_always = option && !strcmp(option, "always"); + bool no_flush = option && strstr(option, "noflush"); + bool help = option && !strcmp(option, "help"); + unsigned timeout = 0; + + if (help) { + puts("Gallium driver debugger"); + puts(""); + puts("Usage:"); + puts(""); + puts(" GALLIUM_DDEBUG=always"); + puts(" Dump context and driver information after every draw call into"); + puts(" $HOME/"DD_DIR"/."); + puts(""); + puts(" GALLIUM_DDEBUG=[timeout in ms] noflush"); + puts(" Flush and detect a device hang after every draw call based on the given"); + puts(" fence timeout and dump context and driver information into"); + puts(" $HOME/"DD_DIR"/ when a hang is detected."); + puts(" If 'noflush' is specified, only detect hangs in pipe->flush."); + puts(""); + exit(0); + } + + if (!option) + return screen; + if (!dump_always && sscanf(option, "%u", &timeout) != 1) + return screen; + + dscreen = CALLOC_STRUCT(dd_screen); + if (!dscreen) + return NULL; + +#define SCR_INIT(_member) \ + dscreen->base._member = screen->_member ? dd_screen_##_member : NULL + + dscreen->base.destroy = dd_screen_destroy; + dscreen->base.get_name = dd_screen_get_name; + dscreen->base.get_vendor = dd_screen_get_vendor; + dscreen->base.get_device_vendor = dd_screen_get_device_vendor; + dscreen->base.get_param = dd_screen_get_param; + dscreen->base.get_paramf = dd_screen_get_paramf; + dscreen->base.get_shader_param = dd_screen_get_shader_param; + /* get_video_param */ + /* get_compute_param */ + SCR_INIT(get_timestamp); + dscreen->base.context_create = dd_screen_context_create; + dscreen->base.is_format_supported = dd_screen_is_format_supported; + /* is_video_format_supported */ + SCR_INIT(can_create_resource); + dscreen->base.resource_create = dd_screen_resource_create; + dscreen->base.resource_from_handle = dd_screen_resource_from_handle; + SCR_INIT(resource_from_user_memory); + dscreen->base.resource_get_handle = dd_screen_resource_get_handle; + dscreen->base.resource_destroy = dd_screen_resource_destroy; + SCR_INIT(flush_frontbuffer); + SCR_INIT(fence_reference); + SCR_INIT(fence_finish); + SCR_INIT(get_driver_query_info); + SCR_INIT(get_driver_query_group_info); + +#undef SCR_INIT + + dscreen->screen = screen; + dscreen->timeout_ms = timeout; + dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS : DD_DETECT_HANGS; + dscreen->no_flush = no_flush; + + switch (dscreen->mode) { + case DD_DUMP_ALL_CALLS: + fprintf(stderr, "Gallium debugger active. Logging all calls.\n"); + break; + case DD_DETECT_HANGS: + fprintf(stderr, "Gallium debugger active. " + "The hang detection timout is %i ms.\n", timeout); + break; + default: + assert(0); + } + + return &dscreen->base; +} diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am index 7c86ea13652..7f945d14b5c 100644 --- a/src/gallium/targets/dri/Makefile.am +++ b/src/gallium/targets/dri/Makefile.am @@ -11,6 +11,7 @@ AM_CFLAGS = \ AM_CPPFLAGS = \ $(DEFINES) \ -DDRI_TARGET \ + -DGALLIUM_DDEBUG \ -DGALLIUM_NOOP \ -DGALLIUM_RBUG \ -DGALLIUM_TRACE @@ -45,6 +46,7 @@ gallium_dri_la_LIBADD = \ $(top_builddir)/src/gallium/state_trackers/dri/libdri.la \ $(top_builddir)/src/gallium/auxiliary/libgalliumvl.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/gallium/drivers/ddebug/libddebug.la \ $(top_builddir)/src/gallium/drivers/noop/libnoop.la \ $(top_builddir)/src/gallium/drivers/rbug/librbug.la \ $(top_builddir)/src/gallium/drivers/trace/libtrace.la \ From 93d97db3492cc1af8fb401ed5eb6766e608f744e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 11 Jul 2015 13:13:07 +0200 Subject: [PATCH 15/26] radeonsi: allow si_dump_key to write to a file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_shader.c | 36 ++++++++++++------------ src/gallium/drivers/radeonsi/si_shader.h | 1 + 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6b70a8f4f48..98b42890f7d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3964,48 +3964,48 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, return r; } -static void si_dump_key(unsigned shader, union si_shader_key *key) +void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) { int i; - fprintf(stderr, "SHADER KEY\n"); + fprintf(f, "SHADER KEY\n"); switch (shader) { case PIPE_SHADER_VERTEX: - fprintf(stderr, " instance_divisors = {"); + fprintf(f, " instance_divisors = {"); for (i = 0; i < Elements(key->vs.instance_divisors); i++) - fprintf(stderr, !i ? "%u" : ", %u", + fprintf(f, !i ? "%u" : ", %u", key->vs.instance_divisors[i]); - fprintf(stderr, "}\n"); + fprintf(f, "}\n"); if (key->vs.as_es) - fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n", + fprintf(f, " es_enabled_outputs = 0x%"PRIx64"\n", key->vs.es_enabled_outputs); - fprintf(stderr, " as_es = %u\n", key->vs.as_es); - fprintf(stderr, " as_ls = %u\n", key->vs.as_ls); + fprintf(f, " as_es = %u\n", key->vs.as_es); + fprintf(f, " as_ls = %u\n", key->vs.as_ls); break; case PIPE_SHADER_TESS_CTRL: - fprintf(stderr, " prim_mode = %u\n", key->tcs.prim_mode); + fprintf(f, " prim_mode = %u\n", key->tcs.prim_mode); break; case PIPE_SHADER_TESS_EVAL: if (key->tes.as_es) - fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n", + fprintf(f, " es_enabled_outputs = 0x%"PRIx64"\n", key->tes.es_enabled_outputs); - fprintf(stderr, " as_es = %u\n", key->tes.as_es); + fprintf(f, " as_es = %u\n", key->tes.as_es); break; case PIPE_SHADER_GEOMETRY: break; case PIPE_SHADER_FRAGMENT: - fprintf(stderr, " export_16bpc = 0x%X\n", key->ps.export_16bpc); - fprintf(stderr, " last_cbuf = %u\n", key->ps.last_cbuf); - fprintf(stderr, " color_two_side = %u\n", key->ps.color_two_side); - fprintf(stderr, " alpha_func = %u\n", key->ps.alpha_func); - fprintf(stderr, " alpha_to_one = %u\n", key->ps.alpha_to_one); - fprintf(stderr, " poly_stipple = %u\n", key->ps.poly_stipple); + fprintf(f, " export_16bpc = 0x%X\n", key->ps.export_16bpc); + fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf); + fprintf(f, " color_two_side = %u\n", key->ps.color_two_side); + fprintf(f, " alpha_func = %u\n", key->ps.alpha_func); + fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one); + fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple); break; default: @@ -4036,7 +4036,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, /* Dump TGSI code before doing TGSI->LLVM conversion in case the * conversion fails. */ if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) { - si_dump_key(sel->type, &shader->key); + si_dump_shader_key(sel->type, &shader->key, stderr); tgsi_dump(tokens, 0); si_dump_streamout(&sel->so); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index cd845c12e64..423b849b7c3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -304,6 +304,7 @@ static inline bool si_vs_exports_prim_id(struct si_shader *shader) /* radeonsi_shader.c */ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader); +void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f); int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, LLVMTargetMachineRef tm, LLVMModuleRef mod); void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader); From 110873ed11342426584a75bf27e85b74035c3b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 15 Aug 2015 23:56:22 +0200 Subject: [PATCH 16/26] radeonsi: add an initial dump_debug_state implementation dumping shaders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is usually called after a draw call. Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/Makefile.sources | 1 + src/gallium/drivers/radeonsi/si_debug.c | 59 +++++++++++++++++++ src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 3 + 4 files changed, 64 insertions(+) create mode 100644 src/gallium/drivers/radeonsi/si_debug.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index a0b1414f4bb..ae8fe5994b7 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -4,6 +4,7 @@ C_SOURCES := \ si_commands.c \ si_compute.c \ si_cp_dma.c \ + si_debug.c \ si_descriptors.c \ sid.h \ si_dma.c \ diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c new file mode 100644 index 00000000000..fa2ec0342ce --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -0,0 +1,59 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Marek Olšák + */ + +#include "si_pipe.h" +#include "si_shader.h" +#include "sid.h" + + +static void si_dump_shader(struct si_shader_selector *sel, const char *name, + FILE *f) +{ + if (!sel || !sel->current) + return; + + fprintf(f, "%s shader disassembly:\n", name); + si_dump_shader_key(sel->type, &sel->current->key, f); + fprintf(f, "%s\n\n", sel->current->binary.disasm_string); +} + +static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, + unsigned flags) +{ + struct si_context *sctx = (struct si_context*)ctx; + + si_dump_shader(sctx->vs_shader, "Vertex", f); + si_dump_shader(sctx->tcs_shader, "Tessellation control", f); + si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f); + si_dump_shader(sctx->gs_shader, "Geometry", f); + si_dump_shader(sctx->ps_shader, "Fragment", f); + fprintf(f, "Done.\n"); +} + +void si_init_debug_functions(struct si_context *sctx) +{ + sctx->b.b.dump_debug_state = si_dump_debug_state; +} diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 029b3cce488..f6d4a5a9fb8 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -122,6 +122,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); + si_init_debug_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 553e1f32683..3ab95385d8e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -275,6 +275,9 @@ void si_copy_buffer(struct si_context *sctx, bool is_framebuffer); void si_init_cp_dma_functions(struct si_context *sctx); +/* si_debug.c */ +void si_init_debug_functions(struct si_context *sctx); + /* si_dma.c */ void si_dma_copy(struct pipe_context *ctx, struct pipe_resource *dst, From c59ad265df655a19285d813144f6b76d7f49d7fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 15 Aug 2015 18:43:27 +0200 Subject: [PATCH 17/26] r600g,radeonsi: remove unused ill-formed register field definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/r600/r600d.h | 1 - src/gallium/drivers/radeonsi/sid.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index bce8b4ea065..3c08ba5906a 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -3428,7 +3428,6 @@ #define S_0085F0_SO3_DEST_BASE_ENA(x) (((x) & 0x1) << 5) #define G_0085F0_SO3_DEST_BASE_ENA(x) (((x) >> 5) & 0x1) #define C_0085F0_SO3_DEST_BASE_ENA 0xFFFFFFDF -#define S_0085F0_CB0_DEST_BASE_ENA_SHIFT 6 #define S_0085F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6) #define G_0085F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1) #define C_0085F0_CB0_DEST_BASE_ENA 0xFFFFFFBF diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 66fdf35c8af..05d20dbf10f 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -614,7 +614,6 @@ #define S_0085F0_DEST_BASE_1_ENA(x) (((x) & 0x1) << 1) #define G_0085F0_DEST_BASE_1_ENA(x) (((x) >> 1) & 0x1) #define C_0085F0_DEST_BASE_1_ENA 0xFFFFFFFD -#define S_0085F0_CB0_DEST_BASE_ENA_SHIFT 6 #define S_0085F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6) #define G_0085F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1) #define C_0085F0_CB0_DEST_BASE_ENA 0xFFFFFFBF From d15b71b4bd4666619f5bee0e7fcb21d4608edf70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 15 Aug 2015 18:48:06 +0200 Subject: [PATCH 18/26] radeonsi: remove duplicated register definitions and instruction definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instruction encoding isn't needed in Mesa. The border color address registers were duplicated. Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/sid.h | 3160 ---------------------------- 1 file changed, 3160 deletions(-) diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 05d20dbf10f..66660e32824 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -1821,1223 +1821,6 @@ #define S_008C0C_RNG(x) (((x) & 0x7FF) << 10) #define G_008C0C_RNG(x) (((x) >> 10) & 0x7FF) #define C_008C0C_RNG 0xFFE003FF -#if 0 -/* CIK */ -#define R_008DFC_SQ_FLAT_1 0x008DFC -#define S_008DFC_ADDR(x) (((x) & 0xFF) << 0) -#define G_008DFC_ADDR(x) (((x) >> 0) & 0xFF) -#define C_008DFC_ADDR 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_DATA(x) (((x) & 0xFF) << 8) -#define G_008DFC_DATA(x) (((x) >> 8) & 0xFF) -#define C_008DFC_DATA 0xFFFF00FF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_TFE(x) (((x) & 0x1) << 23) -#define G_008DFC_TFE(x) (((x) >> 23) & 0x1) -#define C_008DFC_TFE 0xFF7FFFFF -#define S_008DFC_VDST(x) (((x) & 0xFF) << 24) -#define G_008DFC_VDST(x) (((x) >> 24) & 0xFF) -#define C_008DFC_VDST 0x00FFFFFF -#define V_008DFC_SQ_VGPR 0x00 -/* */ -#define R_008DFC_SQ_INST 0x008DFC -#define R_030D20_SQC_CACHES 0x030D20 -#define S_030D20_TARGET_INST(x) (((x) & 0x1) << 0) -#define G_030D20_TARGET_INST(x) (((x) >> 0) & 0x1) -#define C_030D20_TARGET_INST 0xFFFFFFFE -#define S_030D20_TARGET_DATA(x) (((x) & 0x1) << 1) -#define G_030D20_TARGET_DATA(x) (((x) >> 1) & 0x1) -#define C_030D20_TARGET_DATA 0xFFFFFFFD -#define S_030D20_INVALIDATE(x) (((x) & 0x1) << 2) -#define G_030D20_INVALIDATE(x) (((x) >> 2) & 0x1) -#define C_030D20_INVALIDATE 0xFFFFFFFB -#define S_030D20_WRITEBACK(x) (((x) & 0x1) << 3) -#define G_030D20_WRITEBACK(x) (((x) >> 3) & 0x1) -#define C_030D20_WRITEBACK 0xFFFFFFF7 -#define S_030D20_VOL(x) (((x) & 0x1) << 4) -#define G_030D20_VOL(x) (((x) >> 4) & 0x1) -#define C_030D20_VOL 0xFFFFFFEF -#define S_030D20_COMPLETE(x) (((x) & 0x1) << 16) -#define G_030D20_COMPLETE(x) (((x) >> 16) & 0x1) -#define C_030D20_COMPLETE 0xFFFEFFFF -#define R_030D24_SQC_WRITEBACK 0x030D24 -#define S_030D24_DWB(x) (((x) & 0x1) << 0) -#define G_030D24_DWB(x) (((x) >> 0) & 0x1) -#define C_030D24_DWB 0xFFFFFFFE -#define S_030D24_DIRTY(x) (((x) & 0x1) << 1) -#define G_030D24_DIRTY(x) (((x) >> 1) & 0x1) -#define C_030D24_DIRTY 0xFFFFFFFD -#define R_008DFC_SQ_VOP1 0x008DFC -#define S_008DFC_SRC0(x) (((x) & 0x1FF) << 0) -#define G_008DFC_SRC0(x) (((x) >> 0) & 0x1FF) -#define C_008DFC_SRC0 0xFFFFFE00 -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define V_008DFC_SQ_SRC_VGPR 0x100 -#define S_008DFC_OP(x) (((x) & 0xFF) << 9) -#define G_008DFC_OP(x) (((x) >> 9) & 0xFF) -#define C_008DFC_OP 0xFFFE01FF -#define V_008DFC_SQ_V_NOP 0x00 -#define V_008DFC_SQ_V_MOV_B32 0x01 -#define V_008DFC_SQ_V_READFIRSTLANE_B32 0x02 -#define V_008DFC_SQ_V_CVT_I32_F64 0x03 -#define V_008DFC_SQ_V_CVT_F64_I32 0x04 -#define V_008DFC_SQ_V_CVT_F32_I32 0x05 -#define V_008DFC_SQ_V_CVT_F32_U32 0x06 -#define V_008DFC_SQ_V_CVT_U32_F32 0x07 -#define V_008DFC_SQ_V_CVT_I32_F32 0x08 -#define V_008DFC_SQ_V_MOV_FED_B32 0x09 -#define V_008DFC_SQ_V_CVT_F16_F32 0x0A -#define V_008DFC_SQ_V_CVT_F32_F16 0x0B -#define V_008DFC_SQ_V_CVT_RPI_I32_F32 0x0C -#define V_008DFC_SQ_V_CVT_FLR_I32_F32 0x0D -#define V_008DFC_SQ_V_CVT_OFF_F32_I4 0x0E -#define V_008DFC_SQ_V_CVT_F32_F64 0x0F -#define V_008DFC_SQ_V_CVT_F64_F32 0x10 -#define V_008DFC_SQ_V_CVT_F32_UBYTE0 0x11 -#define V_008DFC_SQ_V_CVT_F32_UBYTE1 0x12 -#define V_008DFC_SQ_V_CVT_F32_UBYTE2 0x13 -#define V_008DFC_SQ_V_CVT_F32_UBYTE3 0x14 -#define V_008DFC_SQ_V_CVT_U32_F64 0x15 -#define V_008DFC_SQ_V_CVT_F64_U32 0x16 -/* CIK */ -#define V_008DFC_SQ_V_TRUNC_F64 0x17 -#define V_008DFC_SQ_V_CEIL_F64 0x18 -#define V_008DFC_SQ_V_RNDNE_F64 0x19 -#define V_008DFC_SQ_V_FLOOR_F64 0x1A -/* */ -#define V_008DFC_SQ_V_FRACT_F32 0x20 -#define V_008DFC_SQ_V_TRUNC_F32 0x21 -#define V_008DFC_SQ_V_CEIL_F32 0x22 -#define V_008DFC_SQ_V_RNDNE_F32 0x23 -#define V_008DFC_SQ_V_FLOOR_F32 0x24 -#define V_008DFC_SQ_V_EXP_F32 0x25 -#define V_008DFC_SQ_V_LOG_CLAMP_F32 0x26 -#define V_008DFC_SQ_V_LOG_F32 0x27 -#define V_008DFC_SQ_V_RCP_CLAMP_F32 0x28 -#define V_008DFC_SQ_V_RCP_LEGACY_F32 0x29 -#define V_008DFC_SQ_V_RCP_F32 0x2A -#define V_008DFC_SQ_V_RCP_IFLAG_F32 0x2B -#define V_008DFC_SQ_V_RSQ_CLAMP_F32 0x2C -#define V_008DFC_SQ_V_RSQ_LEGACY_F32 0x2D -#define V_008DFC_SQ_V_RSQ_F32 0x2E -#define V_008DFC_SQ_V_RCP_F64 0x2F -#define V_008DFC_SQ_V_RCP_CLAMP_F64 0x30 -#define V_008DFC_SQ_V_RSQ_F64 0x31 -#define V_008DFC_SQ_V_RSQ_CLAMP_F64 0x32 -#define V_008DFC_SQ_V_SQRT_F32 0x33 -#define V_008DFC_SQ_V_SQRT_F64 0x34 -#define V_008DFC_SQ_V_SIN_F32 0x35 -#define V_008DFC_SQ_V_COS_F32 0x36 -#define V_008DFC_SQ_V_NOT_B32 0x37 -#define V_008DFC_SQ_V_BFREV_B32 0x38 -#define V_008DFC_SQ_V_FFBH_U32 0x39 -#define V_008DFC_SQ_V_FFBL_B32 0x3A -#define V_008DFC_SQ_V_FFBH_I32 0x3B -#define V_008DFC_SQ_V_FREXP_EXP_I32_F64 0x3C -#define V_008DFC_SQ_V_FREXP_MANT_F64 0x3D -#define V_008DFC_SQ_V_FRACT_F64 0x3E -#define V_008DFC_SQ_V_FREXP_EXP_I32_F32 0x3F -#define V_008DFC_SQ_V_FREXP_MANT_F32 0x40 -#define V_008DFC_SQ_V_CLREXCP 0x41 -#define V_008DFC_SQ_V_MOVRELD_B32 0x42 -#define V_008DFC_SQ_V_MOVRELS_B32 0x43 -#define V_008DFC_SQ_V_MOVRELSD_B32 0x44 -/* CIK */ -#define V_008DFC_SQ_V_LOG_LEGACY_F32 0x45 -#define V_008DFC_SQ_V_EXP_LEGACY_F32 0x46 -/* */ -#define S_008DFC_VDST(x) (((x) & 0xFF) << 17) -#define G_008DFC_VDST(x) (((x) >> 17) & 0xFF) -#define C_008DFC_VDST 0xFE01FFFF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_ENCODING(x) (((x) & 0x7F) << 25) -#define G_008DFC_ENCODING(x) (((x) >> 25) & 0x7F) -#define C_008DFC_ENCODING 0x01FFFFFF -#define V_008DFC_SQ_ENC_VOP1_FIELD 0x3F -#define R_008DFC_SQ_MIMG_1 0x008DFC -#define S_008DFC_VADDR(x) (((x) & 0xFF) << 0) -#define G_008DFC_VADDR(x) (((x) >> 0) & 0xFF) -#define C_008DFC_VADDR 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_VDATA(x) (((x) & 0xFF) << 8) -#define G_008DFC_VDATA(x) (((x) >> 8) & 0xFF) -#define C_008DFC_VDATA 0xFFFF00FF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_SRSRC(x) (((x) & 0x1F) << 16) -#define G_008DFC_SRSRC(x) (((x) >> 16) & 0x1F) -#define C_008DFC_SRSRC 0xFFE0FFFF -#define S_008DFC_SSAMP(x) (((x) & 0x1F) << 21) -#define G_008DFC_SSAMP(x) (((x) >> 21) & 0x1F) -#define C_008DFC_SSAMP 0xFC1FFFFF -#define R_008DFC_SQ_VOP3_1 0x008DFC -#define S_008DFC_SRC0(x) (((x) & 0x1FF) << 0) -#define G_008DFC_SRC0(x) (((x) >> 0) & 0x1FF) -#define C_008DFC_SRC0 0xFFFFFE00 -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define V_008DFC_SQ_SRC_VGPR 0x100 -#define S_008DFC_SRC1(x) (((x) & 0x1FF) << 9) -#define G_008DFC_SRC1(x) (((x) >> 9) & 0x1FF) -#define C_008DFC_SRC1 0xFFFC01FF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define V_008DFC_SQ_SRC_VGPR 0x100 -#define S_008DFC_SRC2(x) (((x) & 0x1FF) << 18) -#define G_008DFC_SRC2(x) (((x) >> 18) & 0x1FF) -#define C_008DFC_SRC2 0xF803FFFF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define V_008DFC_SQ_SRC_VGPR 0x100 -#define S_008DFC_OMOD(x) (((x) & 0x03) << 27) -#define G_008DFC_OMOD(x) (((x) >> 27) & 0x03) -#define C_008DFC_OMOD 0xE7FFFFFF -#define V_008DFC_SQ_OMOD_OFF 0x00 -#define V_008DFC_SQ_OMOD_M2 0x01 -#define V_008DFC_SQ_OMOD_M4 0x02 -#define V_008DFC_SQ_OMOD_D2 0x03 -#define S_008DFC_NEG(x) (((x) & 0x07) << 29) -#define G_008DFC_NEG(x) (((x) >> 29) & 0x07) -#define C_008DFC_NEG 0x1FFFFFFF -#define R_008DFC_SQ_MUBUF_1 0x008DFC -#define S_008DFC_VADDR(x) (((x) & 0xFF) << 0) -#define G_008DFC_VADDR(x) (((x) >> 0) & 0xFF) -#define C_008DFC_VADDR 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_VDATA(x) (((x) & 0xFF) << 8) -#define G_008DFC_VDATA(x) (((x) >> 8) & 0xFF) -#define C_008DFC_VDATA 0xFFFF00FF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_SRSRC(x) (((x) & 0x1F) << 16) -#define G_008DFC_SRSRC(x) (((x) >> 16) & 0x1F) -#define C_008DFC_SRSRC 0xFFE0FFFF -#define S_008DFC_SLC(x) (((x) & 0x1) << 22) -#define G_008DFC_SLC(x) (((x) >> 22) & 0x1) -#define C_008DFC_SLC 0xFFBFFFFF -#define S_008DFC_TFE(x) (((x) & 0x1) << 23) -#define G_008DFC_TFE(x) (((x) >> 23) & 0x1) -#define C_008DFC_TFE 0xFF7FFFFF -#define S_008DFC_SOFFSET(x) (((x) & 0xFF) << 24) -#define G_008DFC_SOFFSET(x) (((x) >> 24) & 0xFF) -#define C_008DFC_SOFFSET 0x00FFFFFF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define R_008DFC_SQ_DS_0 0x008DFC -#define S_008DFC_OFFSET0(x) (((x) & 0xFF) << 0) -#define G_008DFC_OFFSET0(x) (((x) >> 0) & 0xFF) -#define C_008DFC_OFFSET0 0xFFFFFF00 -#define S_008DFC_OFFSET1(x) (((x) & 0xFF) << 8) -#define G_008DFC_OFFSET1(x) (((x) >> 8) & 0xFF) -#define C_008DFC_OFFSET1 0xFFFF00FF -#define S_008DFC_GDS(x) (((x) & 0x1) << 17) -#define G_008DFC_GDS(x) (((x) >> 17) & 0x1) -#define C_008DFC_GDS 0xFFFDFFFF -#define S_008DFC_OP(x) (((x) & 0xFF) << 18) -#define G_008DFC_OP(x) (((x) >> 18) & 0xFF) -#define C_008DFC_OP 0xFC03FFFF -#define V_008DFC_SQ_DS_ADD_U32 0x00 -#define V_008DFC_SQ_DS_SUB_U32 0x01 -#define V_008DFC_SQ_DS_RSUB_U32 0x02 -#define V_008DFC_SQ_DS_INC_U32 0x03 -#define V_008DFC_SQ_DS_DEC_U32 0x04 -#define V_008DFC_SQ_DS_MIN_I32 0x05 -#define V_008DFC_SQ_DS_MAX_I32 0x06 -#define V_008DFC_SQ_DS_MIN_U32 0x07 -#define V_008DFC_SQ_DS_MAX_U32 0x08 -#define V_008DFC_SQ_DS_AND_B32 0x09 -#define V_008DFC_SQ_DS_OR_B32 0x0A -#define V_008DFC_SQ_DS_XOR_B32 0x0B -#define V_008DFC_SQ_DS_MSKOR_B32 0x0C -#define V_008DFC_SQ_DS_WRITE_B32 0x0D -#define V_008DFC_SQ_DS_WRITE2_B32 0x0E -#define V_008DFC_SQ_DS_WRITE2ST64_B32 0x0F -#define V_008DFC_SQ_DS_CMPST_B32 0x10 -#define V_008DFC_SQ_DS_CMPST_F32 0x11 -#define V_008DFC_SQ_DS_MIN_F32 0x12 -#define V_008DFC_SQ_DS_MAX_F32 0x13 -/* CIK */ -#define V_008DFC_SQ_DS_NOP 0x14 -/* */ -#define V_008DFC_SQ_DS_GWS_INIT 0x19 -#define V_008DFC_SQ_DS_GWS_SEMA_V 0x1A -#define V_008DFC_SQ_DS_GWS_SEMA_BR 0x1B -#define V_008DFC_SQ_DS_GWS_SEMA_P 0x1C -#define V_008DFC_SQ_DS_GWS_BARRIER 0x1D -#define V_008DFC_SQ_DS_WRITE_B8 0x1E -#define V_008DFC_SQ_DS_WRITE_B16 0x1F -#define V_008DFC_SQ_DS_ADD_RTN_U32 0x20 -#define V_008DFC_SQ_DS_SUB_RTN_U32 0x21 -#define V_008DFC_SQ_DS_RSUB_RTN_U32 0x22 -#define V_008DFC_SQ_DS_INC_RTN_U32 0x23 -#define V_008DFC_SQ_DS_DEC_RTN_U32 0x24 -#define V_008DFC_SQ_DS_MIN_RTN_I32 0x25 -#define V_008DFC_SQ_DS_MAX_RTN_I32 0x26 -#define V_008DFC_SQ_DS_MIN_RTN_U32 0x27 -#define V_008DFC_SQ_DS_MAX_RTN_U32 0x28 -#define V_008DFC_SQ_DS_AND_RTN_B32 0x29 -#define V_008DFC_SQ_DS_OR_RTN_B32 0x2A -#define V_008DFC_SQ_DS_XOR_RTN_B32 0x2B -#define V_008DFC_SQ_DS_MSKOR_RTN_B32 0x2C -#define V_008DFC_SQ_DS_WRXCHG_RTN_B32 0x2D -#define V_008DFC_SQ_DS_WRXCHG2_RTN_B32 0x2E -#define V_008DFC_SQ_DS_WRXCHG2ST64_RTN_B32 0x2F -#define V_008DFC_SQ_DS_CMPST_RTN_B32 0x30 -#define V_008DFC_SQ_DS_CMPST_RTN_F32 0x31 -#define V_008DFC_SQ_DS_MIN_RTN_F32 0x32 -#define V_008DFC_SQ_DS_MAX_RTN_F32 0x33 -#define V_008DFC_SQ_DS_SWIZZLE_B32 0x35 -#define V_008DFC_SQ_DS_READ_B32 0x36 -#define V_008DFC_SQ_DS_READ2_B32 0x37 -#define V_008DFC_SQ_DS_READ2ST64_B32 0x38 -#define V_008DFC_SQ_DS_READ_I8 0x39 -#define V_008DFC_SQ_DS_READ_U8 0x3A -#define V_008DFC_SQ_DS_READ_I16 0x3B -#define V_008DFC_SQ_DS_READ_U16 0x3C -#define V_008DFC_SQ_DS_CONSUME 0x3D -#define V_008DFC_SQ_DS_APPEND 0x3E -#define V_008DFC_SQ_DS_ORDERED_COUNT 0x3F -#define V_008DFC_SQ_DS_ADD_U64 0x40 -#define V_008DFC_SQ_DS_SUB_U64 0x41 -#define V_008DFC_SQ_DS_RSUB_U64 0x42 -#define V_008DFC_SQ_DS_INC_U64 0x43 -#define V_008DFC_SQ_DS_DEC_U64 0x44 -#define V_008DFC_SQ_DS_MIN_I64 0x45 -#define V_008DFC_SQ_DS_MAX_I64 0x46 -#define V_008DFC_SQ_DS_MIN_U64 0x47 -#define V_008DFC_SQ_DS_MAX_U64 0x48 -#define V_008DFC_SQ_DS_AND_B64 0x49 -#define V_008DFC_SQ_DS_OR_B64 0x4A -#define V_008DFC_SQ_DS_XOR_B64 0x4B -#define V_008DFC_SQ_DS_MSKOR_B64 0x4C -#define V_008DFC_SQ_DS_WRITE_B64 0x4D -#define V_008DFC_SQ_DS_WRITE2_B64 0x4E -#define V_008DFC_SQ_DS_WRITE2ST64_B64 0x4F -#define V_008DFC_SQ_DS_CMPST_B64 0x50 -#define V_008DFC_SQ_DS_CMPST_F64 0x51 -#define V_008DFC_SQ_DS_MIN_F64 0x52 -#define V_008DFC_SQ_DS_MAX_F64 0x53 -#define V_008DFC_SQ_DS_ADD_RTN_U64 0x60 -#define V_008DFC_SQ_DS_SUB_RTN_U64 0x61 -#define V_008DFC_SQ_DS_RSUB_RTN_U64 0x62 -#define V_008DFC_SQ_DS_INC_RTN_U64 0x63 -#define V_008DFC_SQ_DS_DEC_RTN_U64 0x64 -#define V_008DFC_SQ_DS_MIN_RTN_I64 0x65 -#define V_008DFC_SQ_DS_MAX_RTN_I64 0x66 -#define V_008DFC_SQ_DS_MIN_RTN_U64 0x67 -#define V_008DFC_SQ_DS_MAX_RTN_U64 0x68 -#define V_008DFC_SQ_DS_AND_RTN_B64 0x69 -#define V_008DFC_SQ_DS_OR_RTN_B64 0x6A -#define V_008DFC_SQ_DS_XOR_RTN_B64 0x6B -#define V_008DFC_SQ_DS_MSKOR_RTN_B64 0x6C -#define V_008DFC_SQ_DS_WRXCHG_RTN_B64 0x6D -#define V_008DFC_SQ_DS_WRXCHG2_RTN_B64 0x6E -#define V_008DFC_SQ_DS_WRXCHG2ST64_RTN_B64 0x6F -#define V_008DFC_SQ_DS_CMPST_RTN_B64 0x70 -#define V_008DFC_SQ_DS_CMPST_RTN_F64 0x71 -#define V_008DFC_SQ_DS_MIN_RTN_F64 0x72 -#define V_008DFC_SQ_DS_MAX_RTN_F64 0x73 -#define V_008DFC_SQ_DS_READ_B64 0x76 -#define V_008DFC_SQ_DS_READ2_B64 0x77 -#define V_008DFC_SQ_DS_READ2ST64_B64 0x78 -/* CIK */ -#define V_008DFC_SQ_DS_CONDXCHG32_RTN_B64 0x7E -/* */ -#define V_008DFC_SQ_DS_ADD_SRC2_U32 0x80 -#define V_008DFC_SQ_DS_SUB_SRC2_U32 0x81 -#define V_008DFC_SQ_DS_RSUB_SRC2_U32 0x82 -#define V_008DFC_SQ_DS_INC_SRC2_U32 0x83 -#define V_008DFC_SQ_DS_DEC_SRC2_U32 0x84 -#define V_008DFC_SQ_DS_MIN_SRC2_I32 0x85 -#define V_008DFC_SQ_DS_MAX_SRC2_I32 0x86 -#define V_008DFC_SQ_DS_MIN_SRC2_U32 0x87 -#define V_008DFC_SQ_DS_MAX_SRC2_U32 0x88 -#define V_008DFC_SQ_DS_AND_SRC2_B32 0x89 -#define V_008DFC_SQ_DS_OR_SRC2_B32 0x8A -#define V_008DFC_SQ_DS_XOR_SRC2_B32 0x8B -#define V_008DFC_SQ_DS_WRITE_SRC2_B32 0x8D -#define V_008DFC_SQ_DS_MIN_SRC2_F32 0x92 -#define V_008DFC_SQ_DS_MAX_SRC2_F32 0x93 -#define V_008DFC_SQ_DS_ADD_SRC2_U64 0xC0 -#define V_008DFC_SQ_DS_SUB_SRC2_U64 0xC1 -#define V_008DFC_SQ_DS_RSUB_SRC2_U64 0xC2 -#define V_008DFC_SQ_DS_INC_SRC2_U64 0xC3 -#define V_008DFC_SQ_DS_DEC_SRC2_U64 0xC4 -#define V_008DFC_SQ_DS_MIN_SRC2_I64 0xC5 -#define V_008DFC_SQ_DS_MAX_SRC2_I64 0xC6 -#define V_008DFC_SQ_DS_MIN_SRC2_U64 0xC7 -#define V_008DFC_SQ_DS_MAX_SRC2_U64 0xC8 -#define V_008DFC_SQ_DS_AND_SRC2_B64 0xC9 -#define V_008DFC_SQ_DS_OR_SRC2_B64 0xCA -#define V_008DFC_SQ_DS_XOR_SRC2_B64 0xCB -#define V_008DFC_SQ_DS_WRITE_SRC2_B64 0xCD -#define V_008DFC_SQ_DS_MIN_SRC2_F64 0xD2 -#define V_008DFC_SQ_DS_MAX_SRC2_F64 0xD3 -/* CIK */ -#define V_008DFC_SQ_DS_WRITE_B96 0xDE -#define V_008DFC_SQ_DS_WRITE_B128 0xDF -#define V_008DFC_SQ_DS_CONDXCHG32_RTN_B128 0xFD -#define V_008DFC_SQ_DS_READ_B96 0xFE -#define V_008DFC_SQ_DS_READ_B128 0xFF -/* */ -#define S_008DFC_ENCODING(x) (((x) & 0x3F) << 26) -#define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) -#define C_008DFC_ENCODING 0x03FFFFFF -#define V_008DFC_SQ_ENC_DS_FIELD 0x36 -#define R_008DFC_SQ_SOPC 0x008DFC -#define S_008DFC_SSRC0(x) (((x) & 0xFF) << 0) -#define G_008DFC_SSRC0(x) (((x) >> 0) & 0xFF) -#define C_008DFC_SSRC0 0xFFFFFF00 -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define S_008DFC_SSRC1(x) (((x) & 0xFF) << 8) -#define G_008DFC_SSRC1(x) (((x) >> 8) & 0xFF) -#define C_008DFC_SSRC1 0xFFFF00FF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define S_008DFC_OP(x) (((x) & 0x7F) << 16) -#define G_008DFC_OP(x) (((x) >> 16) & 0x7F) -#define C_008DFC_OP 0xFF80FFFF -#define V_008DFC_SQ_S_CMP_EQ_I32 0x00 -#define V_008DFC_SQ_S_CMP_LG_I32 0x01 -#define V_008DFC_SQ_S_CMP_GT_I32 0x02 -#define V_008DFC_SQ_S_CMP_GE_I32 0x03 -#define V_008DFC_SQ_S_CMP_LT_I32 0x04 -#define V_008DFC_SQ_S_CMP_LE_I32 0x05 -#define V_008DFC_SQ_S_CMP_EQ_U32 0x06 -#define V_008DFC_SQ_S_CMP_LG_U32 0x07 -#define V_008DFC_SQ_S_CMP_GT_U32 0x08 -#define V_008DFC_SQ_S_CMP_GE_U32 0x09 -#define V_008DFC_SQ_S_CMP_LT_U32 0x0A -#define V_008DFC_SQ_S_CMP_LE_U32 0x0B -#define V_008DFC_SQ_S_BITCMP0_B32 0x0C -#define V_008DFC_SQ_S_BITCMP1_B32 0x0D -#define V_008DFC_SQ_S_BITCMP0_B64 0x0E -#define V_008DFC_SQ_S_BITCMP1_B64 0x0F -#define V_008DFC_SQ_S_SETVSKIP 0x10 -#define S_008DFC_ENCODING(x) (((x) & 0x1FF) << 23) -#define G_008DFC_ENCODING(x) (((x) >> 23) & 0x1FF) -#define C_008DFC_ENCODING 0x007FFFFF -#define V_008DFC_SQ_ENC_SOPC_FIELD 0x17E -#endif #define R_008DFC_SQ_EXP_0 0x008DFC #define S_008DFC_EN(x) (((x) & 0x0F) << 0) #define G_008DFC_EN(x) (((x) >> 0) & 0x0F) @@ -3063,1942 +1846,6 @@ #define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) #define C_008DFC_ENCODING 0x03FFFFFF #define V_008DFC_SQ_ENC_EXP_FIELD 0x3E -#if 0 -#define R_008DFC_SQ_MIMG_0 0x008DFC -#define S_008DFC_DMASK(x) (((x) & 0x0F) << 8) -#define G_008DFC_DMASK(x) (((x) >> 8) & 0x0F) -#define C_008DFC_DMASK 0xFFFFF0FF -#define S_008DFC_UNORM(x) (((x) & 0x1) << 12) -#define G_008DFC_UNORM(x) (((x) >> 12) & 0x1) -#define C_008DFC_UNORM 0xFFFFEFFF -#define S_008DFC_GLC(x) (((x) & 0x1) << 13) -#define G_008DFC_GLC(x) (((x) >> 13) & 0x1) -#define C_008DFC_GLC 0xFFFFDFFF -#define S_008DFC_DA(x) (((x) & 0x1) << 14) -#define G_008DFC_DA(x) (((x) >> 14) & 0x1) -#define C_008DFC_DA 0xFFFFBFFF -#define S_008DFC_R128(x) (((x) & 0x1) << 15) -#define G_008DFC_R128(x) (((x) >> 15) & 0x1) -#define C_008DFC_R128 0xFFFF7FFF -#define S_008DFC_TFE(x) (((x) & 0x1) << 16) -#define G_008DFC_TFE(x) (((x) >> 16) & 0x1) -#define C_008DFC_TFE 0xFFFEFFFF -#define S_008DFC_LWE(x) (((x) & 0x1) << 17) -#define G_008DFC_LWE(x) (((x) >> 17) & 0x1) -#define C_008DFC_LWE 0xFFFDFFFF -#define S_008DFC_OP(x) (((x) & 0x7F) << 18) -#define G_008DFC_OP(x) (((x) >> 18) & 0x7F) -#define C_008DFC_OP 0xFE03FFFF -#define V_008DFC_SQ_IMAGE_LOAD 0x00 -#define V_008DFC_SQ_IMAGE_LOAD_MIP 0x01 -#define V_008DFC_SQ_IMAGE_LOAD_PCK 0x02 -#define V_008DFC_SQ_IMAGE_LOAD_PCK_SGN 0x03 -#define V_008DFC_SQ_IMAGE_LOAD_MIP_PCK 0x04 -#define V_008DFC_SQ_IMAGE_LOAD_MIP_PCK_SGN 0x05 -#define V_008DFC_SQ_IMAGE_STORE 0x08 -#define V_008DFC_SQ_IMAGE_STORE_MIP 0x09 -#define V_008DFC_SQ_IMAGE_STORE_PCK 0x0A -#define V_008DFC_SQ_IMAGE_STORE_MIP_PCK 0x0B -#define V_008DFC_SQ_IMAGE_GET_RESINFO 0x0E -#define V_008DFC_SQ_IMAGE_ATOMIC_SWAP 0x0F -#define V_008DFC_SQ_IMAGE_ATOMIC_CMPSWAP 0x10 -#define V_008DFC_SQ_IMAGE_ATOMIC_ADD 0x11 -#define V_008DFC_SQ_IMAGE_ATOMIC_SUB 0x12 -#define V_008DFC_SQ_IMAGE_ATOMIC_RSUB 0x13 /* not on CIK */ -#define V_008DFC_SQ_IMAGE_ATOMIC_SMIN 0x14 -#define V_008DFC_SQ_IMAGE_ATOMIC_UMIN 0x15 -#define V_008DFC_SQ_IMAGE_ATOMIC_SMAX 0x16 -#define V_008DFC_SQ_IMAGE_ATOMIC_UMAX 0x17 -#define V_008DFC_SQ_IMAGE_ATOMIC_AND 0x18 -#define V_008DFC_SQ_IMAGE_ATOMIC_OR 0x19 -#define V_008DFC_SQ_IMAGE_ATOMIC_XOR 0x1A -#define V_008DFC_SQ_IMAGE_ATOMIC_INC 0x1B -#define V_008DFC_SQ_IMAGE_ATOMIC_DEC 0x1C -#define V_008DFC_SQ_IMAGE_ATOMIC_FCMPSWAP 0x1D -#define V_008DFC_SQ_IMAGE_ATOMIC_FMIN 0x1E -#define V_008DFC_SQ_IMAGE_ATOMIC_FMAX 0x1F -#define V_008DFC_SQ_IMAGE_SAMPLE 0x20 -#define V_008DFC_SQ_IMAGE_SAMPLE_CL 0x21 -#define V_008DFC_SQ_IMAGE_SAMPLE_D 0x22 -#define V_008DFC_SQ_IMAGE_SAMPLE_D_CL 0x23 -#define V_008DFC_SQ_IMAGE_SAMPLE_L 0x24 -#define V_008DFC_SQ_IMAGE_SAMPLE_B 0x25 -#define V_008DFC_SQ_IMAGE_SAMPLE_B_CL 0x26 -#define V_008DFC_SQ_IMAGE_SAMPLE_LZ 0x27 -#define V_008DFC_SQ_IMAGE_SAMPLE_C 0x28 -#define V_008DFC_SQ_IMAGE_SAMPLE_C_CL 0x29 -#define V_008DFC_SQ_IMAGE_SAMPLE_C_D 0x2A -#define V_008DFC_SQ_IMAGE_SAMPLE_C_D_CL 0x2B -#define V_008DFC_SQ_IMAGE_SAMPLE_C_L 0x2C -#define V_008DFC_SQ_IMAGE_SAMPLE_C_B 0x2D -#define V_008DFC_SQ_IMAGE_SAMPLE_C_B_CL 0x2E -#define V_008DFC_SQ_IMAGE_SAMPLE_C_LZ 0x2F -#define V_008DFC_SQ_IMAGE_SAMPLE_O 0x30 -#define V_008DFC_SQ_IMAGE_SAMPLE_CL_O 0x31 -#define V_008DFC_SQ_IMAGE_SAMPLE_D_O 0x32 -#define V_008DFC_SQ_IMAGE_SAMPLE_D_CL_O 0x33 -#define V_008DFC_SQ_IMAGE_SAMPLE_L_O 0x34 -#define V_008DFC_SQ_IMAGE_SAMPLE_B_O 0x35 -#define V_008DFC_SQ_IMAGE_SAMPLE_B_CL_O 0x36 -#define V_008DFC_SQ_IMAGE_SAMPLE_LZ_O 0x37 -#define V_008DFC_SQ_IMAGE_SAMPLE_C_O 0x38 -#define V_008DFC_SQ_IMAGE_SAMPLE_C_CL_O 0x39 -#define V_008DFC_SQ_IMAGE_SAMPLE_C_D_O 0x3A -#define V_008DFC_SQ_IMAGE_SAMPLE_C_D_CL_O 0x3B -#define V_008DFC_SQ_IMAGE_SAMPLE_C_L_O 0x3C -#define V_008DFC_SQ_IMAGE_SAMPLE_C_B_O 0x3D -#define V_008DFC_SQ_IMAGE_SAMPLE_C_B_CL_O 0x3E -#define V_008DFC_SQ_IMAGE_SAMPLE_C_LZ_O 0x3F -#define V_008DFC_SQ_IMAGE_GATHER4 0x40 -#define V_008DFC_SQ_IMAGE_GATHER4_CL 0x41 -#define V_008DFC_SQ_IMAGE_GATHER4_L 0x44 -#define V_008DFC_SQ_IMAGE_GATHER4_B 0x45 -#define V_008DFC_SQ_IMAGE_GATHER4_B_CL 0x46 -#define V_008DFC_SQ_IMAGE_GATHER4_LZ 0x47 -#define V_008DFC_SQ_IMAGE_GATHER4_C 0x48 -#define V_008DFC_SQ_IMAGE_GATHER4_C_CL 0x49 -#define V_008DFC_SQ_IMAGE_GATHER4_C_L 0x4C -#define V_008DFC_SQ_IMAGE_GATHER4_C_B 0x4D -#define V_008DFC_SQ_IMAGE_GATHER4_C_B_CL 0x4E -#define V_008DFC_SQ_IMAGE_GATHER4_C_LZ 0x4F -#define V_008DFC_SQ_IMAGE_GATHER4_O 0x50 -#define V_008DFC_SQ_IMAGE_GATHER4_CL_O 0x51 -#define V_008DFC_SQ_IMAGE_GATHER4_L_O 0x54 -#define V_008DFC_SQ_IMAGE_GATHER4_B_O 0x55 -#define V_008DFC_SQ_IMAGE_GATHER4_B_CL_O 0x56 -#define V_008DFC_SQ_IMAGE_GATHER4_LZ_O 0x57 -#define V_008DFC_SQ_IMAGE_GATHER4_C_O 0x58 -#define V_008DFC_SQ_IMAGE_GATHER4_C_CL_O 0x59 -#define V_008DFC_SQ_IMAGE_GATHER4_C_L_O 0x5C -#define V_008DFC_SQ_IMAGE_GATHER4_C_B_O 0x5D -#define V_008DFC_SQ_IMAGE_GATHER4_C_B_CL_O 0x5E -#define V_008DFC_SQ_IMAGE_GATHER4_C_LZ_O 0x5F -#define V_008DFC_SQ_IMAGE_GET_LOD 0x60 -#define V_008DFC_SQ_IMAGE_SAMPLE_CD 0x68 -#define V_008DFC_SQ_IMAGE_SAMPLE_CD_CL 0x69 -#define V_008DFC_SQ_IMAGE_SAMPLE_C_CD 0x6A -#define V_008DFC_SQ_IMAGE_SAMPLE_C_CD_CL 0x6B -#define V_008DFC_SQ_IMAGE_SAMPLE_CD_O 0x6C -#define V_008DFC_SQ_IMAGE_SAMPLE_CD_CL_O 0x6D -#define V_008DFC_SQ_IMAGE_SAMPLE_C_CD_O 0x6E -#define V_008DFC_SQ_IMAGE_SAMPLE_C_CD_CL_O 0x6F -#define S_008DFC_SLC(x) (((x) & 0x1) << 25) -#define G_008DFC_SLC(x) (((x) >> 25) & 0x1) -#define C_008DFC_SLC 0xFDFFFFFF -#define S_008DFC_ENCODING(x) (((x) & 0x3F) << 26) -#define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) -#define C_008DFC_ENCODING 0x03FFFFFF -#define V_008DFC_SQ_ENC_MIMG_FIELD 0x3C -#define R_008DFC_SQ_SOPP 0x008DFC -#define S_008DFC_SIMM16(x) (((x) & 0xFFFF) << 0) -#define G_008DFC_SIMM16(x) (((x) >> 0) & 0xFFFF) -#define C_008DFC_SIMM16 0xFFFF0000 -#define S_008DFC_OP(x) (((x) & 0x7F) << 16) -#define G_008DFC_OP(x) (((x) >> 16) & 0x7F) -#define C_008DFC_OP 0xFF80FFFF -#define V_008DFC_SQ_S_NOP 0x00 -#define V_008DFC_SQ_S_ENDPGM 0x01 -#define V_008DFC_SQ_S_BRANCH 0x02 -#define V_008DFC_SQ_S_CBRANCH_SCC0 0x04 -#define V_008DFC_SQ_S_CBRANCH_SCC1 0x05 -#define V_008DFC_SQ_S_CBRANCH_VCCZ 0x06 -#define V_008DFC_SQ_S_CBRANCH_VCCNZ 0x07 -#define V_008DFC_SQ_S_CBRANCH_EXECZ 0x08 -#define V_008DFC_SQ_S_CBRANCH_EXECNZ 0x09 -#define V_008DFC_SQ_S_BARRIER 0x0A -/* CIK */ -#define V_008DFC_SQ_S_SETKILL 0x0B -/* */ -#define V_008DFC_SQ_S_WAITCNT 0x0C -#define V_008DFC_SQ_S_SETHALT 0x0D -#define V_008DFC_SQ_S_SLEEP 0x0E -#define V_008DFC_SQ_S_SETPRIO 0x0F -#define V_008DFC_SQ_S_SENDMSG 0x10 -#define V_008DFC_SQ_S_SENDMSGHALT 0x11 -#define V_008DFC_SQ_S_TRAP 0x12 -#define V_008DFC_SQ_S_ICACHE_INV 0x13 -#define V_008DFC_SQ_S_INCPERFLEVEL 0x14 -#define V_008DFC_SQ_S_DECPERFLEVEL 0x15 -#define V_008DFC_SQ_S_TTRACEDATA 0x16 -/* CIK */ -#define V_008DFC_SQ_S_CBRANCH_CDBGSYS 0x17 -#define V_008DFC_SQ_S_CBRANCH_CDBGUSER 0x18 -#define V_008DFC_SQ_S_CBRANCH_CDBGSYS_OR_USER 0x19 -#define V_008DFC_SQ_S_CBRANCH_CDBGSYS_AND_USER 0x1A -/* */ -#define S_008DFC_ENCODING(x) (((x) & 0x1FF) << 23) -#define G_008DFC_ENCODING(x) (((x) >> 23) & 0x1FF) -#define C_008DFC_ENCODING 0x007FFFFF -#define V_008DFC_SQ_ENC_SOPP_FIELD 0x17F -#define R_008DFC_SQ_VINTRP 0x008DFC -#define S_008DFC_VSRC(x) (((x) & 0xFF) << 0) -#define G_008DFC_VSRC(x) (((x) >> 0) & 0xFF) -#define C_008DFC_VSRC 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_ATTRCHAN(x) (((x) & 0x03) << 8) -#define G_008DFC_ATTRCHAN(x) (((x) >> 8) & 0x03) -#define C_008DFC_ATTRCHAN 0xFFFFFCFF -#define V_008DFC_SQ_CHAN_X 0x00 -#define V_008DFC_SQ_CHAN_Y 0x01 -#define V_008DFC_SQ_CHAN_Z 0x02 -#define V_008DFC_SQ_CHAN_W 0x03 -#define S_008DFC_ATTR(x) (((x) & 0x3F) << 10) -#define G_008DFC_ATTR(x) (((x) >> 10) & 0x3F) -#define C_008DFC_ATTR 0xFFFF03FF -#define V_008DFC_SQ_ATTR 0x00 -#define S_008DFC_OP(x) (((x) & 0x03) << 16) -#define G_008DFC_OP(x) (((x) >> 16) & 0x03) -#define C_008DFC_OP 0xFFFCFFFF -#define V_008DFC_SQ_V_INTERP_P1_F32 0x00 -#define V_008DFC_SQ_V_INTERP_P2_F32 0x01 -#define V_008DFC_SQ_V_INTERP_MOV_F32 0x02 -#define S_008DFC_VDST(x) (((x) & 0xFF) << 18) -#define G_008DFC_VDST(x) (((x) >> 18) & 0xFF) -#define C_008DFC_VDST 0xFC03FFFF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_ENCODING(x) (((x) & 0x3F) << 26) -#define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) -#define C_008DFC_ENCODING 0x03FFFFFF -#define V_008DFC_SQ_ENC_VINTRP_FIELD 0x32 -#define R_008DFC_SQ_MTBUF_0 0x008DFC -#define S_008DFC_OFFSET(x) (((x) & 0xFFF) << 0) -#define G_008DFC_OFFSET(x) (((x) >> 0) & 0xFFF) -#define C_008DFC_OFFSET 0xFFFFF000 -#define S_008DFC_OFFEN(x) (((x) & 0x1) << 12) -#define G_008DFC_OFFEN(x) (((x) >> 12) & 0x1) -#define C_008DFC_OFFEN 0xFFFFEFFF -#define S_008DFC_IDXEN(x) (((x) & 0x1) << 13) -#define G_008DFC_IDXEN(x) (((x) >> 13) & 0x1) -#define C_008DFC_IDXEN 0xFFFFDFFF -#define S_008DFC_GLC(x) (((x) & 0x1) << 14) -#define G_008DFC_GLC(x) (((x) >> 14) & 0x1) -#define C_008DFC_GLC 0xFFFFBFFF -#define S_008DFC_ADDR64(x) (((x) & 0x1) << 15) -#define G_008DFC_ADDR64(x) (((x) >> 15) & 0x1) -#define C_008DFC_ADDR64 0xFFFF7FFF -#define S_008DFC_OP(x) (((x) & 0x07) << 16) -#define G_008DFC_OP(x) (((x) >> 16) & 0x07) -#define C_008DFC_OP 0xFFF8FFFF -#define V_008DFC_SQ_TBUFFER_LOAD_FORMAT_X 0x00 -#define V_008DFC_SQ_TBUFFER_LOAD_FORMAT_XY 0x01 -#define V_008DFC_SQ_TBUFFER_LOAD_FORMAT_XYZ 0x02 -#define V_008DFC_SQ_TBUFFER_LOAD_FORMAT_XYZW 0x03 -#define V_008DFC_SQ_TBUFFER_STORE_FORMAT_X 0x04 -#define V_008DFC_SQ_TBUFFER_STORE_FORMAT_XY 0x05 -#define V_008DFC_SQ_TBUFFER_STORE_FORMAT_XYZ 0x06 -#define V_008DFC_SQ_TBUFFER_STORE_FORMAT_XYZW 0x07 -#define S_008DFC_DFMT(x) (((x) & 0x0F) << 19) -#define G_008DFC_DFMT(x) (((x) >> 19) & 0x0F) -#define C_008DFC_DFMT 0xFF87FFFF -#define S_008DFC_NFMT(x) (((x) & 0x07) << 23) -#define G_008DFC_NFMT(x) (((x) >> 23) & 0x07) -#define C_008DFC_NFMT 0xFC7FFFFF -#define S_008DFC_ENCODING(x) (((x) & 0x3F) << 26) -#define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) -#define C_008DFC_ENCODING 0x03FFFFFF -#define V_008DFC_SQ_ENC_MTBUF_FIELD 0x3A -#define R_008DFC_SQ_SMRD 0x008DFC -#define S_008DFC_OFFSET(x) (((x) & 0xFF) << 0) -#define G_008DFC_OFFSET(x) (((x) >> 0) & 0xFF) -#define C_008DFC_OFFSET 0xFFFFFF00 -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -/* CIK */ -#define V_008DFC_SQ_SRC_LITERAL 0xFF -/* */ -#define S_008DFC_IMM(x) (((x) & 0x1) << 8) -#define G_008DFC_IMM(x) (((x) >> 8) & 0x1) -#define C_008DFC_IMM 0xFFFFFEFF -#define S_008DFC_SBASE(x) (((x) & 0x3F) << 9) -#define G_008DFC_SBASE(x) (((x) >> 9) & 0x3F) -#define C_008DFC_SBASE 0xFFFF81FF -#define S_008DFC_SDST(x) (((x) & 0x7F) << 15) -#define G_008DFC_SDST(x) (((x) >> 15) & 0x7F) -#define C_008DFC_SDST 0xFFC07FFF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define S_008DFC_OP(x) (((x) & 0x1F) << 22) -#define G_008DFC_OP(x) (((x) >> 22) & 0x1F) -#define C_008DFC_OP 0xF83FFFFF -#define V_008DFC_SQ_S_LOAD_DWORD 0x00 -#define V_008DFC_SQ_S_LOAD_DWORDX2 0x01 -#define V_008DFC_SQ_S_LOAD_DWORDX4 0x02 -#define V_008DFC_SQ_S_LOAD_DWORDX8 0x03 -#define V_008DFC_SQ_S_LOAD_DWORDX16 0x04 -#define V_008DFC_SQ_S_BUFFER_LOAD_DWORD 0x08 -#define V_008DFC_SQ_S_BUFFER_LOAD_DWORDX2 0x09 -#define V_008DFC_SQ_S_BUFFER_LOAD_DWORDX4 0x0A -#define V_008DFC_SQ_S_BUFFER_LOAD_DWORDX8 0x0B -#define V_008DFC_SQ_S_BUFFER_LOAD_DWORDX16 0x0C -/* CIK */ -#define V_008DFC_SQ_S_DCACHE_INV_VOL 0x1D -/* */ -#define V_008DFC_SQ_S_MEMTIME 0x1E -#define V_008DFC_SQ_S_DCACHE_INV 0x1F -#define S_008DFC_ENCODING(x) (((x) & 0x1F) << 27) -#define G_008DFC_ENCODING(x) (((x) >> 27) & 0x1F) -#define C_008DFC_ENCODING 0x07FFFFFF -#define V_008DFC_SQ_ENC_SMRD_FIELD 0x18 -/* CIK */ -#define R_008DFC_SQ_FLAT_0 0x008DFC -#define S_008DFC_GLC(x) (((x) & 0x1) << 16) -#define G_008DFC_GLC(x) (((x) >> 16) & 0x1) -#define C_008DFC_GLC 0xFFFEFFFF -#define S_008DFC_SLC(x) (((x) & 0x1) << 17) -#define G_008DFC_SLC(x) (((x) >> 17) & 0x1) -#define C_008DFC_SLC 0xFFFDFFFF -#define S_008DFC_OP(x) (((x) & 0x7F) << 18) -#define G_008DFC_OP(x) (((x) >> 18) & 0x7F) -#define C_008DFC_OP 0xFE03FFFF -#define V_008DFC_SQ_FLAT_LOAD_UBYTE 0x08 -#define V_008DFC_SQ_FLAT_LOAD_SBYTE 0x09 -#define V_008DFC_SQ_FLAT_LOAD_USHORT 0x0A -#define V_008DFC_SQ_FLAT_LOAD_SSHORT 0x0B -#define V_008DFC_SQ_FLAT_LOAD_DWORD 0x0C -#define V_008DFC_SQ_FLAT_LOAD_DWORDX2 0x0D -#define V_008DFC_SQ_FLAT_LOAD_DWORDX4 0x0E -#define V_008DFC_SQ_FLAT_LOAD_DWORDX3 0x0F -#define V_008DFC_SQ_FLAT_STORE_BYTE 0x18 -#define V_008DFC_SQ_FLAT_STORE_SHORT 0x1A -#define V_008DFC_SQ_FLAT_STORE_DWORD 0x1C -#define V_008DFC_SQ_FLAT_STORE_DWORDX2 0x1D -#define V_008DFC_SQ_FLAT_STORE_DWORDX4 0x1E -#define V_008DFC_SQ_FLAT_STORE_DWORDX3 0x1F -#define V_008DFC_SQ_FLAT_ATOMIC_SWAP 0x30 -#define V_008DFC_SQ_FLAT_ATOMIC_CMPSWAP 0x31 -#define V_008DFC_SQ_FLAT_ATOMIC_ADD 0x32 -#define V_008DFC_SQ_FLAT_ATOMIC_SUB 0x33 -#define V_008DFC_SQ_FLAT_ATOMIC_SMIN 0x35 -#define V_008DFC_SQ_FLAT_ATOMIC_UMIN 0x36 -#define V_008DFC_SQ_FLAT_ATOMIC_SMAX 0x37 -#define V_008DFC_SQ_FLAT_ATOMIC_UMAX 0x38 -#define V_008DFC_SQ_FLAT_ATOMIC_AND 0x39 -#define V_008DFC_SQ_FLAT_ATOMIC_OR 0x3A -#define V_008DFC_SQ_FLAT_ATOMIC_XOR 0x3B -#define V_008DFC_SQ_FLAT_ATOMIC_INC 0x3C -#define V_008DFC_SQ_FLAT_ATOMIC_DEC 0x3D -#define V_008DFC_SQ_FLAT_ATOMIC_FCMPSWAP 0x3E -#define V_008DFC_SQ_FLAT_ATOMIC_FMIN 0x3F -#define V_008DFC_SQ_FLAT_ATOMIC_FMAX 0x40 -#define V_008DFC_SQ_FLAT_ATOMIC_SWAP_X2 0x50 -#define V_008DFC_SQ_FLAT_ATOMIC_CMPSWAP_X2 0x51 -#define V_008DFC_SQ_FLAT_ATOMIC_ADD_X2 0x52 -#define V_008DFC_SQ_FLAT_ATOMIC_SUB_X2 0x53 -#define V_008DFC_SQ_FLAT_ATOMIC_SMIN_X2 0x55 -#define V_008DFC_SQ_FLAT_ATOMIC_UMIN_X2 0x56 -#define V_008DFC_SQ_FLAT_ATOMIC_SMAX_X2 0x57 -#define V_008DFC_SQ_FLAT_ATOMIC_UMAX_X2 0x58 -#define V_008DFC_SQ_FLAT_ATOMIC_AND_X2 0x59 -#define V_008DFC_SQ_FLAT_ATOMIC_OR_X2 0x5A -#define V_008DFC_SQ_FLAT_ATOMIC_XOR_X2 0x5B -#define V_008DFC_SQ_FLAT_ATOMIC_INC_X2 0x5C -#define V_008DFC_SQ_FLAT_ATOMIC_DEC_X2 0x5D -#define V_008DFC_SQ_FLAT_ATOMIC_FCMPSWAP_X2 0x5E -#define V_008DFC_SQ_FLAT_ATOMIC_FMIN_X2 0x5F -#define V_008DFC_SQ_FLAT_ATOMIC_FMAX_X2 0x60 -#define S_008DFC_ENCODING(x) (((x) & 0x3F) << 26) -#define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) -#define C_008DFC_ENCODING 0x03FFFFFF -#define V_008DFC_SQ_ENC_FLAT_FIELD 0x37 -/* */ -#define R_008DFC_SQ_EXP_1 0x008DFC -#define S_008DFC_VSRC0(x) (((x) & 0xFF) << 0) -#define G_008DFC_VSRC0(x) (((x) >> 0) & 0xFF) -#define C_008DFC_VSRC0 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_VSRC1(x) (((x) & 0xFF) << 8) -#define G_008DFC_VSRC1(x) (((x) >> 8) & 0xFF) -#define C_008DFC_VSRC1 0xFFFF00FF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_VSRC2(x) (((x) & 0xFF) << 16) -#define G_008DFC_VSRC2(x) (((x) >> 16) & 0xFF) -#define C_008DFC_VSRC2 0xFF00FFFF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_VSRC3(x) (((x) & 0xFF) << 24) -#define G_008DFC_VSRC3(x) (((x) >> 24) & 0xFF) -#define C_008DFC_VSRC3 0x00FFFFFF -#define V_008DFC_SQ_VGPR 0x00 -#define R_008DFC_SQ_DS_1 0x008DFC -#define S_008DFC_ADDR(x) (((x) & 0xFF) << 0) -#define G_008DFC_ADDR(x) (((x) >> 0) & 0xFF) -#define C_008DFC_ADDR 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_DATA0(x) (((x) & 0xFF) << 8) -#define G_008DFC_DATA0(x) (((x) >> 8) & 0xFF) -#define C_008DFC_DATA0 0xFFFF00FF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_DATA1(x) (((x) & 0xFF) << 16) -#define G_008DFC_DATA1(x) (((x) >> 16) & 0xFF) -#define C_008DFC_DATA1 0xFF00FFFF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_VDST(x) (((x) & 0xFF) << 24) -#define G_008DFC_VDST(x) (((x) >> 24) & 0xFF) -#define C_008DFC_VDST 0x00FFFFFF -#define V_008DFC_SQ_VGPR 0x00 -#define R_008DFC_SQ_VOPC 0x008DFC -#define S_008DFC_SRC0(x) (((x) & 0x1FF) << 0) -#define G_008DFC_SRC0(x) (((x) >> 0) & 0x1FF) -#define C_008DFC_SRC0 0xFFFFFE00 -#define V_008DFC_SQ_SGPR 0x00 -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define V_008DFC_SQ_SRC_VGPR 0x100 -#define S_008DFC_VSRC1(x) (((x) & 0xFF) << 9) -#define G_008DFC_VSRC1(x) (((x) >> 9) & 0xFF) -#define C_008DFC_VSRC1 0xFFFE01FF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_OP(x) (((x) & 0xFF) << 17) -#define G_008DFC_OP(x) (((x) >> 17) & 0xFF) -#define C_008DFC_OP 0xFE01FFFF -#define V_008DFC_SQ_V_CMP_F_F32 0x00 -#define V_008DFC_SQ_V_CMP_LT_F32 0x01 -#define V_008DFC_SQ_V_CMP_EQ_F32 0x02 -#define V_008DFC_SQ_V_CMP_LE_F32 0x03 -#define V_008DFC_SQ_V_CMP_GT_F32 0x04 -#define V_008DFC_SQ_V_CMP_LG_F32 0x05 -#define V_008DFC_SQ_V_CMP_GE_F32 0x06 -#define V_008DFC_SQ_V_CMP_O_F32 0x07 -#define V_008DFC_SQ_V_CMP_U_F32 0x08 -#define V_008DFC_SQ_V_CMP_NGE_F32 0x09 -#define V_008DFC_SQ_V_CMP_NLG_F32 0x0A -#define V_008DFC_SQ_V_CMP_NGT_F32 0x0B -#define V_008DFC_SQ_V_CMP_NLE_F32 0x0C -#define V_008DFC_SQ_V_CMP_NEQ_F32 0x0D -#define V_008DFC_SQ_V_CMP_NLT_F32 0x0E -#define V_008DFC_SQ_V_CMP_TRU_F32 0x0F -#define V_008DFC_SQ_V_CMPX_F_F32 0x10 -#define V_008DFC_SQ_V_CMPX_LT_F32 0x11 -#define V_008DFC_SQ_V_CMPX_EQ_F32 0x12 -#define V_008DFC_SQ_V_CMPX_LE_F32 0x13 -#define V_008DFC_SQ_V_CMPX_GT_F32 0x14 -#define V_008DFC_SQ_V_CMPX_LG_F32 0x15 -#define V_008DFC_SQ_V_CMPX_GE_F32 0x16 -#define V_008DFC_SQ_V_CMPX_O_F32 0x17 -#define V_008DFC_SQ_V_CMPX_U_F32 0x18 -#define V_008DFC_SQ_V_CMPX_NGE_F32 0x19 -#define V_008DFC_SQ_V_CMPX_NLG_F32 0x1A -#define V_008DFC_SQ_V_CMPX_NGT_F32 0x1B -#define V_008DFC_SQ_V_CMPX_NLE_F32 0x1C -#define V_008DFC_SQ_V_CMPX_NEQ_F32 0x1D -#define V_008DFC_SQ_V_CMPX_NLT_F32 0x1E -#define V_008DFC_SQ_V_CMPX_TRU_F32 0x1F -#define V_008DFC_SQ_V_CMP_F_F64 0x20 -#define V_008DFC_SQ_V_CMP_LT_F64 0x21 -#define V_008DFC_SQ_V_CMP_EQ_F64 0x22 -#define V_008DFC_SQ_V_CMP_LE_F64 0x23 -#define V_008DFC_SQ_V_CMP_GT_F64 0x24 -#define V_008DFC_SQ_V_CMP_LG_F64 0x25 -#define V_008DFC_SQ_V_CMP_GE_F64 0x26 -#define V_008DFC_SQ_V_CMP_O_F64 0x27 -#define V_008DFC_SQ_V_CMP_U_F64 0x28 -#define V_008DFC_SQ_V_CMP_NGE_F64 0x29 -#define V_008DFC_SQ_V_CMP_NLG_F64 0x2A -#define V_008DFC_SQ_V_CMP_NGT_F64 0x2B -#define V_008DFC_SQ_V_CMP_NLE_F64 0x2C -#define V_008DFC_SQ_V_CMP_NEQ_F64 0x2D -#define V_008DFC_SQ_V_CMP_NLT_F64 0x2E -#define V_008DFC_SQ_V_CMP_TRU_F64 0x2F -#define V_008DFC_SQ_V_CMPX_F_F64 0x30 -#define V_008DFC_SQ_V_CMPX_LT_F64 0x31 -#define V_008DFC_SQ_V_CMPX_EQ_F64 0x32 -#define V_008DFC_SQ_V_CMPX_LE_F64 0x33 -#define V_008DFC_SQ_V_CMPX_GT_F64 0x34 -#define V_008DFC_SQ_V_CMPX_LG_F64 0x35 -#define V_008DFC_SQ_V_CMPX_GE_F64 0x36 -#define V_008DFC_SQ_V_CMPX_O_F64 0x37 -#define V_008DFC_SQ_V_CMPX_U_F64 0x38 -#define V_008DFC_SQ_V_CMPX_NGE_F64 0x39 -#define V_008DFC_SQ_V_CMPX_NLG_F64 0x3A -#define V_008DFC_SQ_V_CMPX_NGT_F64 0x3B -#define V_008DFC_SQ_V_CMPX_NLE_F64 0x3C -#define V_008DFC_SQ_V_CMPX_NEQ_F64 0x3D -#define V_008DFC_SQ_V_CMPX_NLT_F64 0x3E -#define V_008DFC_SQ_V_CMPX_TRU_F64 0x3F -#define V_008DFC_SQ_V_CMPS_F_F32 0x40 -#define V_008DFC_SQ_V_CMPS_LT_F32 0x41 -#define V_008DFC_SQ_V_CMPS_EQ_F32 0x42 -#define V_008DFC_SQ_V_CMPS_LE_F32 0x43 -#define V_008DFC_SQ_V_CMPS_GT_F32 0x44 -#define V_008DFC_SQ_V_CMPS_LG_F32 0x45 -#define V_008DFC_SQ_V_CMPS_GE_F32 0x46 -#define V_008DFC_SQ_V_CMPS_O_F32 0x47 -#define V_008DFC_SQ_V_CMPS_U_F32 0x48 -#define V_008DFC_SQ_V_CMPS_NGE_F32 0x49 -#define V_008DFC_SQ_V_CMPS_NLG_F32 0x4A -#define V_008DFC_SQ_V_CMPS_NGT_F32 0x4B -#define V_008DFC_SQ_V_CMPS_NLE_F32 0x4C -#define V_008DFC_SQ_V_CMPS_NEQ_F32 0x4D -#define V_008DFC_SQ_V_CMPS_NLT_F32 0x4E -#define V_008DFC_SQ_V_CMPS_TRU_F32 0x4F -#define V_008DFC_SQ_V_CMPSX_F_F32 0x50 -#define V_008DFC_SQ_V_CMPSX_LT_F32 0x51 -#define V_008DFC_SQ_V_CMPSX_EQ_F32 0x52 -#define V_008DFC_SQ_V_CMPSX_LE_F32 0x53 -#define V_008DFC_SQ_V_CMPSX_GT_F32 0x54 -#define V_008DFC_SQ_V_CMPSX_LG_F32 0x55 -#define V_008DFC_SQ_V_CMPSX_GE_F32 0x56 -#define V_008DFC_SQ_V_CMPSX_O_F32 0x57 -#define V_008DFC_SQ_V_CMPSX_U_F32 0x58 -#define V_008DFC_SQ_V_CMPSX_NGE_F32 0x59 -#define V_008DFC_SQ_V_CMPSX_NLG_F32 0x5A -#define V_008DFC_SQ_V_CMPSX_NGT_F32 0x5B -#define V_008DFC_SQ_V_CMPSX_NLE_F32 0x5C -#define V_008DFC_SQ_V_CMPSX_NEQ_F32 0x5D -#define V_008DFC_SQ_V_CMPSX_NLT_F32 0x5E -#define V_008DFC_SQ_V_CMPSX_TRU_F32 0x5F -#define V_008DFC_SQ_V_CMPS_F_F64 0x60 -#define V_008DFC_SQ_V_CMPS_LT_F64 0x61 -#define V_008DFC_SQ_V_CMPS_EQ_F64 0x62 -#define V_008DFC_SQ_V_CMPS_LE_F64 0x63 -#define V_008DFC_SQ_V_CMPS_GT_F64 0x64 -#define V_008DFC_SQ_V_CMPS_LG_F64 0x65 -#define V_008DFC_SQ_V_CMPS_GE_F64 0x66 -#define V_008DFC_SQ_V_CMPS_O_F64 0x67 -#define V_008DFC_SQ_V_CMPS_U_F64 0x68 -#define V_008DFC_SQ_V_CMPS_NGE_F64 0x69 -#define V_008DFC_SQ_V_CMPS_NLG_F64 0x6A -#define V_008DFC_SQ_V_CMPS_NGT_F64 0x6B -#define V_008DFC_SQ_V_CMPS_NLE_F64 0x6C -#define V_008DFC_SQ_V_CMPS_NEQ_F64 0x6D -#define V_008DFC_SQ_V_CMPS_NLT_F64 0x6E -#define V_008DFC_SQ_V_CMPS_TRU_F64 0x6F -#define V_008DFC_SQ_V_CMPSX_F_F64 0x70 -#define V_008DFC_SQ_V_CMPSX_LT_F64 0x71 -#define V_008DFC_SQ_V_CMPSX_EQ_F64 0x72 -#define V_008DFC_SQ_V_CMPSX_LE_F64 0x73 -#define V_008DFC_SQ_V_CMPSX_GT_F64 0x74 -#define V_008DFC_SQ_V_CMPSX_LG_F64 0x75 -#define V_008DFC_SQ_V_CMPSX_GE_F64 0x76 -#define V_008DFC_SQ_V_CMPSX_O_F64 0x77 -#define V_008DFC_SQ_V_CMPSX_U_F64 0x78 -#define V_008DFC_SQ_V_CMPSX_NGE_F64 0x79 -#define V_008DFC_SQ_V_CMPSX_NLG_F64 0x7A -#define V_008DFC_SQ_V_CMPSX_NGT_F64 0x7B -#define V_008DFC_SQ_V_CMPSX_NLE_F64 0x7C -#define V_008DFC_SQ_V_CMPSX_NEQ_F64 0x7D -#define V_008DFC_SQ_V_CMPSX_NLT_F64 0x7E -#define V_008DFC_SQ_V_CMPSX_TRU_F64 0x7F -#define V_008DFC_SQ_V_CMP_F_I32 0x80 -#define V_008DFC_SQ_V_CMP_LT_I32 0x81 -#define V_008DFC_SQ_V_CMP_EQ_I32 0x82 -#define V_008DFC_SQ_V_CMP_LE_I32 0x83 -#define V_008DFC_SQ_V_CMP_GT_I32 0x84 -#define V_008DFC_SQ_V_CMP_NE_I32 0x85 -#define V_008DFC_SQ_V_CMP_GE_I32 0x86 -#define V_008DFC_SQ_V_CMP_T_I32 0x87 -#define V_008DFC_SQ_V_CMP_CLASS_F32 0x88 -#define V_008DFC_SQ_V_CMPX_F_I32 0x90 -#define V_008DFC_SQ_V_CMPX_LT_I32 0x91 -#define V_008DFC_SQ_V_CMPX_EQ_I32 0x92 -#define V_008DFC_SQ_V_CMPX_LE_I32 0x93 -#define V_008DFC_SQ_V_CMPX_GT_I32 0x94 -#define V_008DFC_SQ_V_CMPX_NE_I32 0x95 -#define V_008DFC_SQ_V_CMPX_GE_I32 0x96 -#define V_008DFC_SQ_V_CMPX_T_I32 0x97 -#define V_008DFC_SQ_V_CMPX_CLASS_F32 0x98 -#define V_008DFC_SQ_V_CMP_F_I64 0xA0 -#define V_008DFC_SQ_V_CMP_LT_I64 0xA1 -#define V_008DFC_SQ_V_CMP_EQ_I64 0xA2 -#define V_008DFC_SQ_V_CMP_LE_I64 0xA3 -#define V_008DFC_SQ_V_CMP_GT_I64 0xA4 -#define V_008DFC_SQ_V_CMP_NE_I64 0xA5 -#define V_008DFC_SQ_V_CMP_GE_I64 0xA6 -#define V_008DFC_SQ_V_CMP_T_I64 0xA7 -#define V_008DFC_SQ_V_CMP_CLASS_F64 0xA8 -#define V_008DFC_SQ_V_CMPX_F_I64 0xB0 -#define V_008DFC_SQ_V_CMPX_LT_I64 0xB1 -#define V_008DFC_SQ_V_CMPX_EQ_I64 0xB2 -#define V_008DFC_SQ_V_CMPX_LE_I64 0xB3 -#define V_008DFC_SQ_V_CMPX_GT_I64 0xB4 -#define V_008DFC_SQ_V_CMPX_NE_I64 0xB5 -#define V_008DFC_SQ_V_CMPX_GE_I64 0xB6 -#define V_008DFC_SQ_V_CMPX_T_I64 0xB7 -#define V_008DFC_SQ_V_CMPX_CLASS_F64 0xB8 -#define V_008DFC_SQ_V_CMP_F_U32 0xC0 -#define V_008DFC_SQ_V_CMP_LT_U32 0xC1 -#define V_008DFC_SQ_V_CMP_EQ_U32 0xC2 -#define V_008DFC_SQ_V_CMP_LE_U32 0xC3 -#define V_008DFC_SQ_V_CMP_GT_U32 0xC4 -#define V_008DFC_SQ_V_CMP_NE_U32 0xC5 -#define V_008DFC_SQ_V_CMP_GE_U32 0xC6 -#define V_008DFC_SQ_V_CMP_T_U32 0xC7 -#define V_008DFC_SQ_V_CMPX_F_U32 0xD0 -#define V_008DFC_SQ_V_CMPX_LT_U32 0xD1 -#define V_008DFC_SQ_V_CMPX_EQ_U32 0xD2 -#define V_008DFC_SQ_V_CMPX_LE_U32 0xD3 -#define V_008DFC_SQ_V_CMPX_GT_U32 0xD4 -#define V_008DFC_SQ_V_CMPX_NE_U32 0xD5 -#define V_008DFC_SQ_V_CMPX_GE_U32 0xD6 -#define V_008DFC_SQ_V_CMPX_T_U32 0xD7 -#define V_008DFC_SQ_V_CMP_F_U64 0xE0 -#define V_008DFC_SQ_V_CMP_LT_U64 0xE1 -#define V_008DFC_SQ_V_CMP_EQ_U64 0xE2 -#define V_008DFC_SQ_V_CMP_LE_U64 0xE3 -#define V_008DFC_SQ_V_CMP_GT_U64 0xE4 -#define V_008DFC_SQ_V_CMP_NE_U64 0xE5 -#define V_008DFC_SQ_V_CMP_GE_U64 0xE6 -#define V_008DFC_SQ_V_CMP_T_U64 0xE7 -#define V_008DFC_SQ_V_CMPX_F_U64 0xF0 -#define V_008DFC_SQ_V_CMPX_LT_U64 0xF1 -#define V_008DFC_SQ_V_CMPX_EQ_U64 0xF2 -#define V_008DFC_SQ_V_CMPX_LE_U64 0xF3 -#define V_008DFC_SQ_V_CMPX_GT_U64 0xF4 -#define V_008DFC_SQ_V_CMPX_NE_U64 0xF5 -#define V_008DFC_SQ_V_CMPX_GE_U64 0xF6 -#define V_008DFC_SQ_V_CMPX_T_U64 0xF7 -#define S_008DFC_ENCODING(x) (((x) & 0x7F) << 25) -#define G_008DFC_ENCODING(x) (((x) >> 25) & 0x7F) -#define C_008DFC_ENCODING 0x01FFFFFF -#define V_008DFC_SQ_ENC_VOPC_FIELD 0x3E -#define R_008DFC_SQ_SOP1 0x008DFC -#define S_008DFC_SSRC0(x) (((x) & 0xFF) << 0) -#define G_008DFC_SSRC0(x) (((x) >> 0) & 0xFF) -#define C_008DFC_SSRC0 0xFFFFFF00 -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define S_008DFC_OP(x) (((x) & 0xFF) << 8) -#define G_008DFC_OP(x) (((x) >> 8) & 0xFF) -#define C_008DFC_OP 0xFFFF00FF -#define V_008DFC_SQ_S_MOV_B32 0x03 -#define V_008DFC_SQ_S_MOV_B64 0x04 -#define V_008DFC_SQ_S_CMOV_B32 0x05 -#define V_008DFC_SQ_S_CMOV_B64 0x06 -#define V_008DFC_SQ_S_NOT_B32 0x07 -#define V_008DFC_SQ_S_NOT_B64 0x08 -#define V_008DFC_SQ_S_WQM_B32 0x09 -#define V_008DFC_SQ_S_WQM_B64 0x0A -#define V_008DFC_SQ_S_BREV_B32 0x0B -#define V_008DFC_SQ_S_BREV_B64 0x0C -#define V_008DFC_SQ_S_BCNT0_I32_B32 0x0D -#define V_008DFC_SQ_S_BCNT0_I32_B64 0x0E -#define V_008DFC_SQ_S_BCNT1_I32_B32 0x0F -#define V_008DFC_SQ_S_BCNT1_I32_B64 0x10 -#define V_008DFC_SQ_S_FF0_I32_B32 0x11 -#define V_008DFC_SQ_S_FF0_I32_B64 0x12 -#define V_008DFC_SQ_S_FF1_I32_B32 0x13 -#define V_008DFC_SQ_S_FF1_I32_B64 0x14 -#define V_008DFC_SQ_S_FLBIT_I32_B32 0x15 -#define V_008DFC_SQ_S_FLBIT_I32_B64 0x16 -#define V_008DFC_SQ_S_FLBIT_I32 0x17 -#define V_008DFC_SQ_S_FLBIT_I32_I64 0x18 -#define V_008DFC_SQ_S_SEXT_I32_I8 0x19 -#define V_008DFC_SQ_S_SEXT_I32_I16 0x1A -#define V_008DFC_SQ_S_BITSET0_B32 0x1B -#define V_008DFC_SQ_S_BITSET0_B64 0x1C -#define V_008DFC_SQ_S_BITSET1_B32 0x1D -#define V_008DFC_SQ_S_BITSET1_B64 0x1E -#define V_008DFC_SQ_S_GETPC_B64 0x1F -#define V_008DFC_SQ_S_SETPC_B64 0x20 -#define V_008DFC_SQ_S_SWAPPC_B64 0x21 -#define V_008DFC_SQ_S_RFE_B64 0x22 -#define V_008DFC_SQ_S_AND_SAVEEXEC_B64 0x24 -#define V_008DFC_SQ_S_OR_SAVEEXEC_B64 0x25 -#define V_008DFC_SQ_S_XOR_SAVEEXEC_B64 0x26 -#define V_008DFC_SQ_S_ANDN2_SAVEEXEC_B64 0x27 -#define V_008DFC_SQ_S_ORN2_SAVEEXEC_B64 0x28 -#define V_008DFC_SQ_S_NAND_SAVEEXEC_B64 0x29 -#define V_008DFC_SQ_S_NOR_SAVEEXEC_B64 0x2A -#define V_008DFC_SQ_S_XNOR_SAVEEXEC_B64 0x2B -#define V_008DFC_SQ_S_QUADMASK_B32 0x2C -#define V_008DFC_SQ_S_QUADMASK_B64 0x2D -#define V_008DFC_SQ_S_MOVRELS_B32 0x2E -#define V_008DFC_SQ_S_MOVRELS_B64 0x2F -#define V_008DFC_SQ_S_MOVRELD_B32 0x30 -#define V_008DFC_SQ_S_MOVRELD_B64 0x31 -#define V_008DFC_SQ_S_CBRANCH_JOIN 0x32 -#define V_008DFC_SQ_S_MOV_REGRD_B32 0x33 -#define V_008DFC_SQ_S_ABS_I32 0x34 -#define V_008DFC_SQ_S_MOV_FED_B32 0x35 -#define S_008DFC_SDST(x) (((x) & 0x7F) << 16) -#define G_008DFC_SDST(x) (((x) >> 16) & 0x7F) -#define C_008DFC_SDST 0xFF80FFFF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define S_008DFC_ENCODING(x) (((x) & 0x1FF) << 23) -#define G_008DFC_ENCODING(x) (((x) >> 23) & 0x1FF) -#define C_008DFC_ENCODING 0x007FFFFF -#define V_008DFC_SQ_ENC_SOP1_FIELD 0x17D -#define R_008DFC_SQ_MTBUF_1 0x008DFC -#define S_008DFC_VADDR(x) (((x) & 0xFF) << 0) -#define G_008DFC_VADDR(x) (((x) >> 0) & 0xFF) -#define C_008DFC_VADDR 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_VDATA(x) (((x) & 0xFF) << 8) -#define G_008DFC_VDATA(x) (((x) >> 8) & 0xFF) -#define C_008DFC_VDATA 0xFFFF00FF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_SRSRC(x) (((x) & 0x1F) << 16) -#define G_008DFC_SRSRC(x) (((x) >> 16) & 0x1F) -#define C_008DFC_SRSRC 0xFFE0FFFF -#define S_008DFC_SLC(x) (((x) & 0x1) << 22) -#define G_008DFC_SLC(x) (((x) >> 22) & 0x1) -#define C_008DFC_SLC 0xFFBFFFFF -#define S_008DFC_TFE(x) (((x) & 0x1) << 23) -#define G_008DFC_TFE(x) (((x) >> 23) & 0x1) -#define C_008DFC_TFE 0xFF7FFFFF -#define S_008DFC_SOFFSET(x) (((x) & 0xFF) << 24) -#define G_008DFC_SOFFSET(x) (((x) >> 24) & 0xFF) -#define C_008DFC_SOFFSET 0x00FFFFFF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define R_008DFC_SQ_SOP2 0x008DFC -#define S_008DFC_SSRC0(x) (((x) & 0xFF) << 0) -#define G_008DFC_SSRC0(x) (((x) >> 0) & 0xFF) -#define C_008DFC_SSRC0 0xFFFFFF00 -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define S_008DFC_SSRC1(x) (((x) & 0xFF) << 8) -#define G_008DFC_SSRC1(x) (((x) >> 8) & 0xFF) -#define C_008DFC_SSRC1 0xFFFF00FF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define S_008DFC_SDST(x) (((x) & 0x7F) << 16) -#define G_008DFC_SDST(x) (((x) >> 16) & 0x7F) -#define C_008DFC_SDST 0xFF80FFFF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define S_008DFC_OP(x) (((x) & 0x7F) << 23) -#define G_008DFC_OP(x) (((x) >> 23) & 0x7F) -#define C_008DFC_OP 0xC07FFFFF -#define V_008DFC_SQ_S_ADD_U32 0x00 -#define V_008DFC_SQ_S_SUB_U32 0x01 -#define V_008DFC_SQ_S_ADD_I32 0x02 -#define V_008DFC_SQ_S_SUB_I32 0x03 -#define V_008DFC_SQ_S_ADDC_U32 0x04 -#define V_008DFC_SQ_S_SUBB_U32 0x05 -#define V_008DFC_SQ_S_MIN_I32 0x06 -#define V_008DFC_SQ_S_MIN_U32 0x07 -#define V_008DFC_SQ_S_MAX_I32 0x08 -#define V_008DFC_SQ_S_MAX_U32 0x09 -#define V_008DFC_SQ_S_CSELECT_B32 0x0A -#define V_008DFC_SQ_S_CSELECT_B64 0x0B -#define V_008DFC_SQ_S_AND_B32 0x0E -#define V_008DFC_SQ_S_AND_B64 0x0F -#define V_008DFC_SQ_S_OR_B32 0x10 -#define V_008DFC_SQ_S_OR_B64 0x11 -#define V_008DFC_SQ_S_XOR_B32 0x12 -#define V_008DFC_SQ_S_XOR_B64 0x13 -#define V_008DFC_SQ_S_ANDN2_B32 0x14 -#define V_008DFC_SQ_S_ANDN2_B64 0x15 -#define V_008DFC_SQ_S_ORN2_B32 0x16 -#define V_008DFC_SQ_S_ORN2_B64 0x17 -#define V_008DFC_SQ_S_NAND_B32 0x18 -#define V_008DFC_SQ_S_NAND_B64 0x19 -#define V_008DFC_SQ_S_NOR_B32 0x1A -#define V_008DFC_SQ_S_NOR_B64 0x1B -#define V_008DFC_SQ_S_XNOR_B32 0x1C -#define V_008DFC_SQ_S_XNOR_B64 0x1D -#define V_008DFC_SQ_S_LSHL_B32 0x1E -#define V_008DFC_SQ_S_LSHL_B64 0x1F -#define V_008DFC_SQ_S_LSHR_B32 0x20 -#define V_008DFC_SQ_S_LSHR_B64 0x21 -#define V_008DFC_SQ_S_ASHR_I32 0x22 -#define V_008DFC_SQ_S_ASHR_I64 0x23 -#define V_008DFC_SQ_S_BFM_B32 0x24 -#define V_008DFC_SQ_S_BFM_B64 0x25 -#define V_008DFC_SQ_S_MUL_I32 0x26 -#define V_008DFC_SQ_S_BFE_U32 0x27 -#define V_008DFC_SQ_S_BFE_I32 0x28 -#define V_008DFC_SQ_S_BFE_U64 0x29 -#define V_008DFC_SQ_S_BFE_I64 0x2A -#define V_008DFC_SQ_S_CBRANCH_G_FORK 0x2B -#define V_008DFC_SQ_S_ABSDIFF_I32 0x2C -#define S_008DFC_ENCODING(x) (((x) & 0x03) << 30) -#define G_008DFC_ENCODING(x) (((x) >> 30) & 0x03) -#define C_008DFC_ENCODING 0x3FFFFFFF -#define V_008DFC_SQ_ENC_SOP2_FIELD 0x02 -#define R_008DFC_SQ_SOPK 0x008DFC -#define S_008DFC_SIMM16(x) (((x) & 0xFFFF) << 0) -#define G_008DFC_SIMM16(x) (((x) >> 0) & 0xFFFF) -#define C_008DFC_SIMM16 0xFFFF0000 -#define S_008DFC_SDST(x) (((x) & 0x7F) << 16) -#define G_008DFC_SDST(x) (((x) >> 16) & 0x7F) -#define C_008DFC_SDST 0xFF80FFFF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define S_008DFC_OP(x) (((x) & 0x1F) << 23) -#define G_008DFC_OP(x) (((x) >> 23) & 0x1F) -#define C_008DFC_OP 0xF07FFFFF -#define V_008DFC_SQ_S_MOVK_I32 0x00 -#define V_008DFC_SQ_S_CMOVK_I32 0x02 -#define V_008DFC_SQ_S_CMPK_EQ_I32 0x03 -#define V_008DFC_SQ_S_CMPK_LG_I32 0x04 -#define V_008DFC_SQ_S_CMPK_GT_I32 0x05 -#define V_008DFC_SQ_S_CMPK_GE_I32 0x06 -#define V_008DFC_SQ_S_CMPK_LT_I32 0x07 -#define V_008DFC_SQ_S_CMPK_LE_I32 0x08 -#define V_008DFC_SQ_S_CMPK_EQ_U32 0x09 -#define V_008DFC_SQ_S_CMPK_LG_U32 0x0A -#define V_008DFC_SQ_S_CMPK_GT_U32 0x0B -#define V_008DFC_SQ_S_CMPK_GE_U32 0x0C -#define V_008DFC_SQ_S_CMPK_LT_U32 0x0D -#define V_008DFC_SQ_S_CMPK_LE_U32 0x0E -#define V_008DFC_SQ_S_ADDK_I32 0x0F -#define V_008DFC_SQ_S_MULK_I32 0x10 -#define V_008DFC_SQ_S_CBRANCH_I_FORK 0x11 -#define V_008DFC_SQ_S_GETREG_B32 0x12 -#define V_008DFC_SQ_S_SETREG_B32 0x13 -#define V_008DFC_SQ_S_GETREG_REGRD_B32 0x14 -#define V_008DFC_SQ_S_SETREG_IMM32_B32 0x15 -#define S_008DFC_ENCODING(x) (((x) & 0x0F) << 28) -#define G_008DFC_ENCODING(x) (((x) >> 28) & 0x0F) -#define C_008DFC_ENCODING 0x0FFFFFFF -#define V_008DFC_SQ_ENC_SOPK_FIELD 0x0B -#define R_008DFC_SQ_VOP3_0 0x008DFC -#define S_008DFC_VDST(x) (((x) & 0xFF) << 0) -#define G_008DFC_VDST(x) (((x) >> 0) & 0xFF) -#define C_008DFC_VDST 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_ABS(x) (((x) & 0x07) << 8) -#define G_008DFC_ABS(x) (((x) >> 8) & 0x07) -#define C_008DFC_ABS 0xFFFFF8FF -#define S_008DFC_CLAMP(x) (((x) & 0x1) << 11) -#define G_008DFC_CLAMP(x) (((x) >> 11) & 0x1) -#define C_008DFC_CLAMP 0xFFFFF7FF -#define S_008DFC_OP(x) (((x) & 0x1FF) << 17) -#define G_008DFC_OP(x) (((x) >> 17) & 0x1FF) -#define C_008DFC_OP 0xFC01FFFF -#define V_008DFC_SQ_V_OPC_OFFSET 0x00 -#define V_008DFC_SQ_V_OP2_OFFSET 0x100 -#define V_008DFC_SQ_V_MAD_LEGACY_F32 0x140 -#define V_008DFC_SQ_V_MAD_F32 0x141 -#define V_008DFC_SQ_V_MAD_I32_I24 0x142 -#define V_008DFC_SQ_V_MAD_U32_U24 0x143 -#define V_008DFC_SQ_V_CUBEID_F32 0x144 -#define V_008DFC_SQ_V_CUBESC_F32 0x145 -#define V_008DFC_SQ_V_CUBETC_F32 0x146 -#define V_008DFC_SQ_V_CUBEMA_F32 0x147 -#define V_008DFC_SQ_V_BFE_U32 0x148 -#define V_008DFC_SQ_V_BFE_I32 0x149 -#define V_008DFC_SQ_V_BFI_B32 0x14A -#define V_008DFC_SQ_V_FMA_F32 0x14B -#define V_008DFC_SQ_V_FMA_F64 0x14C -#define V_008DFC_SQ_V_LERP_U8 0x14D -#define V_008DFC_SQ_V_ALIGNBIT_B32 0x14E -#define V_008DFC_SQ_V_ALIGNBYTE_B32 0x14F -#define V_008DFC_SQ_V_MULLIT_F32 0x150 -#define V_008DFC_SQ_V_MIN3_F32 0x151 -#define V_008DFC_SQ_V_MIN3_I32 0x152 -#define V_008DFC_SQ_V_MIN3_U32 0x153 -#define V_008DFC_SQ_V_MAX3_F32 0x154 -#define V_008DFC_SQ_V_MAX3_I32 0x155 -#define V_008DFC_SQ_V_MAX3_U32 0x156 -#define V_008DFC_SQ_V_MED3_F32 0x157 -#define V_008DFC_SQ_V_MED3_I32 0x158 -#define V_008DFC_SQ_V_MED3_U32 0x159 -#define V_008DFC_SQ_V_SAD_U8 0x15A -#define V_008DFC_SQ_V_SAD_HI_U8 0x15B -#define V_008DFC_SQ_V_SAD_U16 0x15C -#define V_008DFC_SQ_V_SAD_U32 0x15D -#define V_008DFC_SQ_V_CVT_PK_U8_F32 0x15E -#define V_008DFC_SQ_V_DIV_FIXUP_F32 0x15F -#define V_008DFC_SQ_V_DIV_FIXUP_F64 0x160 -#define V_008DFC_SQ_V_LSHL_B64 0x161 -#define V_008DFC_SQ_V_LSHR_B64 0x162 -#define V_008DFC_SQ_V_ASHR_I64 0x163 -#define V_008DFC_SQ_V_ADD_F64 0x164 -#define V_008DFC_SQ_V_MUL_F64 0x165 -#define V_008DFC_SQ_V_MIN_F64 0x166 -#define V_008DFC_SQ_V_MAX_F64 0x167 -#define V_008DFC_SQ_V_LDEXP_F64 0x168 -#define V_008DFC_SQ_V_MUL_LO_U32 0x169 -#define V_008DFC_SQ_V_MUL_HI_U32 0x16A -#define V_008DFC_SQ_V_MUL_LO_I32 0x16B -#define V_008DFC_SQ_V_MUL_HI_I32 0x16C -#define V_008DFC_SQ_V_DIV_SCALE_F32 0x16D -#define V_008DFC_SQ_V_DIV_SCALE_F64 0x16E -#define V_008DFC_SQ_V_DIV_FMAS_F32 0x16F -#define V_008DFC_SQ_V_DIV_FMAS_F64 0x170 -#define V_008DFC_SQ_V_MSAD_U8 0x171 -#define V_008DFC_SQ_V_QSAD_U8 0x172 -#define V_008DFC_SQ_V_MQSAD_U8 0x173 -#define V_008DFC_SQ_V_TRIG_PREOP_F64 0x174 -/* CIK */ -#define V_008DFC_SQ_V_MQSAD_U32_U8 0x175 -#define V_008DFC_SQ_V_MAD_U64_U32 0x176 -#define V_008DFC_SQ_V_MAD_I64_I32 0x177 -/* */ -#define V_008DFC_SQ_V_OP1_OFFSET 0x180 -#define S_008DFC_ENCODING(x) (((x) & 0x3F) << 26) -#define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) -#define C_008DFC_ENCODING 0x03FFFFFF -#define V_008DFC_SQ_ENC_VOP3_FIELD 0x34 -#define R_008DFC_SQ_VOP2 0x008DFC -#define S_008DFC_SRC0(x) (((x) & 0x1FF) << 0) -#define G_008DFC_SRC0(x) (((x) >> 0) & 0x1FF) -#define C_008DFC_SRC0 0xFFFFFE00 -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define V_008DFC_SQ_M0 0x7C -#define V_008DFC_SQ_EXEC_LO 0x7E -#define V_008DFC_SQ_EXEC_HI 0x7F -#define V_008DFC_SQ_SRC_0 0x80 -#define V_008DFC_SQ_SRC_1_INT 0x81 -#define V_008DFC_SQ_SRC_2_INT 0x82 -#define V_008DFC_SQ_SRC_3_INT 0x83 -#define V_008DFC_SQ_SRC_4_INT 0x84 -#define V_008DFC_SQ_SRC_5_INT 0x85 -#define V_008DFC_SQ_SRC_6_INT 0x86 -#define V_008DFC_SQ_SRC_7_INT 0x87 -#define V_008DFC_SQ_SRC_8_INT 0x88 -#define V_008DFC_SQ_SRC_9_INT 0x89 -#define V_008DFC_SQ_SRC_10_INT 0x8A -#define V_008DFC_SQ_SRC_11_INT 0x8B -#define V_008DFC_SQ_SRC_12_INT 0x8C -#define V_008DFC_SQ_SRC_13_INT 0x8D -#define V_008DFC_SQ_SRC_14_INT 0x8E -#define V_008DFC_SQ_SRC_15_INT 0x8F -#define V_008DFC_SQ_SRC_16_INT 0x90 -#define V_008DFC_SQ_SRC_17_INT 0x91 -#define V_008DFC_SQ_SRC_18_INT 0x92 -#define V_008DFC_SQ_SRC_19_INT 0x93 -#define V_008DFC_SQ_SRC_20_INT 0x94 -#define V_008DFC_SQ_SRC_21_INT 0x95 -#define V_008DFC_SQ_SRC_22_INT 0x96 -#define V_008DFC_SQ_SRC_23_INT 0x97 -#define V_008DFC_SQ_SRC_24_INT 0x98 -#define V_008DFC_SQ_SRC_25_INT 0x99 -#define V_008DFC_SQ_SRC_26_INT 0x9A -#define V_008DFC_SQ_SRC_27_INT 0x9B -#define V_008DFC_SQ_SRC_28_INT 0x9C -#define V_008DFC_SQ_SRC_29_INT 0x9D -#define V_008DFC_SQ_SRC_30_INT 0x9E -#define V_008DFC_SQ_SRC_31_INT 0x9F -#define V_008DFC_SQ_SRC_32_INT 0xA0 -#define V_008DFC_SQ_SRC_33_INT 0xA1 -#define V_008DFC_SQ_SRC_34_INT 0xA2 -#define V_008DFC_SQ_SRC_35_INT 0xA3 -#define V_008DFC_SQ_SRC_36_INT 0xA4 -#define V_008DFC_SQ_SRC_37_INT 0xA5 -#define V_008DFC_SQ_SRC_38_INT 0xA6 -#define V_008DFC_SQ_SRC_39_INT 0xA7 -#define V_008DFC_SQ_SRC_40_INT 0xA8 -#define V_008DFC_SQ_SRC_41_INT 0xA9 -#define V_008DFC_SQ_SRC_42_INT 0xAA -#define V_008DFC_SQ_SRC_43_INT 0xAB -#define V_008DFC_SQ_SRC_44_INT 0xAC -#define V_008DFC_SQ_SRC_45_INT 0xAD -#define V_008DFC_SQ_SRC_46_INT 0xAE -#define V_008DFC_SQ_SRC_47_INT 0xAF -#define V_008DFC_SQ_SRC_48_INT 0xB0 -#define V_008DFC_SQ_SRC_49_INT 0xB1 -#define V_008DFC_SQ_SRC_50_INT 0xB2 -#define V_008DFC_SQ_SRC_51_INT 0xB3 -#define V_008DFC_SQ_SRC_52_INT 0xB4 -#define V_008DFC_SQ_SRC_53_INT 0xB5 -#define V_008DFC_SQ_SRC_54_INT 0xB6 -#define V_008DFC_SQ_SRC_55_INT 0xB7 -#define V_008DFC_SQ_SRC_56_INT 0xB8 -#define V_008DFC_SQ_SRC_57_INT 0xB9 -#define V_008DFC_SQ_SRC_58_INT 0xBA -#define V_008DFC_SQ_SRC_59_INT 0xBB -#define V_008DFC_SQ_SRC_60_INT 0xBC -#define V_008DFC_SQ_SRC_61_INT 0xBD -#define V_008DFC_SQ_SRC_62_INT 0xBE -#define V_008DFC_SQ_SRC_63_INT 0xBF -#define V_008DFC_SQ_SRC_64_INT 0xC0 -#define V_008DFC_SQ_SRC_M_1_INT 0xC1 -#define V_008DFC_SQ_SRC_M_2_INT 0xC2 -#define V_008DFC_SQ_SRC_M_3_INT 0xC3 -#define V_008DFC_SQ_SRC_M_4_INT 0xC4 -#define V_008DFC_SQ_SRC_M_5_INT 0xC5 -#define V_008DFC_SQ_SRC_M_6_INT 0xC6 -#define V_008DFC_SQ_SRC_M_7_INT 0xC7 -#define V_008DFC_SQ_SRC_M_8_INT 0xC8 -#define V_008DFC_SQ_SRC_M_9_INT 0xC9 -#define V_008DFC_SQ_SRC_M_10_INT 0xCA -#define V_008DFC_SQ_SRC_M_11_INT 0xCB -#define V_008DFC_SQ_SRC_M_12_INT 0xCC -#define V_008DFC_SQ_SRC_M_13_INT 0xCD -#define V_008DFC_SQ_SRC_M_14_INT 0xCE -#define V_008DFC_SQ_SRC_M_15_INT 0xCF -#define V_008DFC_SQ_SRC_M_16_INT 0xD0 -#define V_008DFC_SQ_SRC_0_5 0xF0 -#define V_008DFC_SQ_SRC_M_0_5 0xF1 -#define V_008DFC_SQ_SRC_1 0xF2 -#define V_008DFC_SQ_SRC_M_1 0xF3 -#define V_008DFC_SQ_SRC_2 0xF4 -#define V_008DFC_SQ_SRC_M_2 0xF5 -#define V_008DFC_SQ_SRC_4 0xF6 -#define V_008DFC_SQ_SRC_M_4 0xF7 -#define V_008DFC_SQ_SRC_VCCZ 0xFB -#define V_008DFC_SQ_SRC_EXECZ 0xFC -#define V_008DFC_SQ_SRC_SCC 0xFD -#define V_008DFC_SQ_SRC_LDS_DIRECT 0xFE -#define V_008DFC_SQ_SRC_VGPR 0x100 -#define S_008DFC_VSRC1(x) (((x) & 0xFF) << 9) -#define G_008DFC_VSRC1(x) (((x) >> 9) & 0xFF) -#define C_008DFC_VSRC1 0xFFFE01FF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_VDST(x) (((x) & 0xFF) << 17) -#define G_008DFC_VDST(x) (((x) >> 17) & 0xFF) -#define C_008DFC_VDST 0xFE01FFFF -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_OP(x) (((x) & 0x3F) << 25) -#define G_008DFC_OP(x) (((x) >> 25) & 0x3F) -#define C_008DFC_OP 0x81FFFFFF -#define V_008DFC_SQ_V_CNDMASK_B32 0x00 -#define V_008DFC_SQ_V_READLANE_B32 0x01 -#define V_008DFC_SQ_V_WRITELANE_B32 0x02 -#define V_008DFC_SQ_V_ADD_F32 0x03 -#define V_008DFC_SQ_V_SUB_F32 0x04 -#define V_008DFC_SQ_V_SUBREV_F32 0x05 -#define V_008DFC_SQ_V_MAC_LEGACY_F32 0x06 -#define V_008DFC_SQ_V_MUL_LEGACY_F32 0x07 -#define V_008DFC_SQ_V_MUL_F32 0x08 -#define V_008DFC_SQ_V_MUL_I32_I24 0x09 -#define V_008DFC_SQ_V_MUL_HI_I32_I24 0x0A -#define V_008DFC_SQ_V_MUL_U32_U24 0x0B -#define V_008DFC_SQ_V_MUL_HI_U32_U24 0x0C -#define V_008DFC_SQ_V_MIN_LEGACY_F32 0x0D -#define V_008DFC_SQ_V_MAX_LEGACY_F32 0x0E -#define V_008DFC_SQ_V_MIN_F32 0x0F -#define V_008DFC_SQ_V_MAX_F32 0x10 -#define V_008DFC_SQ_V_MIN_I32 0x11 -#define V_008DFC_SQ_V_MAX_I32 0x12 -#define V_008DFC_SQ_V_MIN_U32 0x13 -#define V_008DFC_SQ_V_MAX_U32 0x14 -#define V_008DFC_SQ_V_LSHR_B32 0x15 -#define V_008DFC_SQ_V_LSHRREV_B32 0x16 -#define V_008DFC_SQ_V_ASHR_I32 0x17 -#define V_008DFC_SQ_V_ASHRREV_I32 0x18 -#define V_008DFC_SQ_V_LSHL_B32 0x19 -#define V_008DFC_SQ_V_LSHLREV_B32 0x1A -#define V_008DFC_SQ_V_AND_B32 0x1B -#define V_008DFC_SQ_V_OR_B32 0x1C -#define V_008DFC_SQ_V_XOR_B32 0x1D -#define V_008DFC_SQ_V_BFM_B32 0x1E -#define V_008DFC_SQ_V_MAC_F32 0x1F -#define V_008DFC_SQ_V_MADMK_F32 0x20 -#define V_008DFC_SQ_V_MADAK_F32 0x21 -#define V_008DFC_SQ_V_BCNT_U32_B32 0x22 -#define V_008DFC_SQ_V_MBCNT_LO_U32_B32 0x23 -#define V_008DFC_SQ_V_MBCNT_HI_U32_B32 0x24 -#define V_008DFC_SQ_V_ADD_I32 0x25 -#define V_008DFC_SQ_V_SUB_I32 0x26 -#define V_008DFC_SQ_V_SUBREV_I32 0x27 -#define V_008DFC_SQ_V_ADDC_U32 0x28 -#define V_008DFC_SQ_V_SUBB_U32 0x29 -#define V_008DFC_SQ_V_SUBBREV_U32 0x2A -#define V_008DFC_SQ_V_LDEXP_F32 0x2B -#define V_008DFC_SQ_V_CVT_PKACCUM_U8_F32 0x2C -#define V_008DFC_SQ_V_CVT_PKNORM_I16_F32 0x2D -#define V_008DFC_SQ_V_CVT_PKNORM_U16_F32 0x2E -#define V_008DFC_SQ_V_CVT_PKRTZ_F16_F32 0x2F -#define V_008DFC_SQ_V_CVT_PK_U16_U32 0x30 -#define V_008DFC_SQ_V_CVT_PK_I16_I32 0x31 -#define S_008DFC_ENCODING(x) (((x) & 0x1) << 31) -#define G_008DFC_ENCODING(x) (((x) >> 31) & 0x1) -#define C_008DFC_ENCODING 0x7FFFFFFF -#define R_008DFC_SQ_VOP3_0_SDST_ENC 0x008DFC -#define S_008DFC_VDST(x) (((x) & 0xFF) << 0) -#define G_008DFC_VDST(x) (((x) >> 0) & 0xFF) -#define C_008DFC_VDST 0xFFFFFF00 -#define V_008DFC_SQ_VGPR 0x00 -#define S_008DFC_SDST(x) (((x) & 0x7F) << 8) -#define G_008DFC_SDST(x) (((x) >> 8) & 0x7F) -#define C_008DFC_SDST 0xFFFF80FF -#define V_008DFC_SQ_SGPR 0x00 -/* CIK */ -#define V_008DFC_SQ_FLAT_SCRATCH_LO 0x68 -#define V_008DFC_SQ_FLAT_SCRATCH_HI 0x69 -/* */ -#define V_008DFC_SQ_VCC_LO 0x6A -#define V_008DFC_SQ_VCC_HI 0x6B -#define V_008DFC_SQ_TBA_LO 0x6C -#define V_008DFC_SQ_TBA_HI 0x6D -#define V_008DFC_SQ_TMA_LO 0x6E -#define V_008DFC_SQ_TMA_HI 0x6F -#define V_008DFC_SQ_TTMP0 0x70 -#define V_008DFC_SQ_TTMP1 0x71 -#define V_008DFC_SQ_TTMP2 0x72 -#define V_008DFC_SQ_TTMP3 0x73 -#define V_008DFC_SQ_TTMP4 0x74 -#define V_008DFC_SQ_TTMP5 0x75 -#define V_008DFC_SQ_TTMP6 0x76 -#define V_008DFC_SQ_TTMP7 0x77 -#define V_008DFC_SQ_TTMP8 0x78 -#define V_008DFC_SQ_TTMP9 0x79 -#define V_008DFC_SQ_TTMP10 0x7A -#define V_008DFC_SQ_TTMP11 0x7B -#define S_008DFC_OP(x) (((x) & 0x1FF) << 17) -#define G_008DFC_OP(x) (((x) >> 17) & 0x1FF) -#define C_008DFC_OP 0xFC01FFFF -#define V_008DFC_SQ_V_OPC_OFFSET 0x00 -#define V_008DFC_SQ_V_OP2_OFFSET 0x100 -#define V_008DFC_SQ_V_MAD_LEGACY_F32 0x140 -#define V_008DFC_SQ_V_MAD_F32 0x141 -#define V_008DFC_SQ_V_MAD_I32_I24 0x142 -#define V_008DFC_SQ_V_MAD_U32_U24 0x143 -#define V_008DFC_SQ_V_CUBEID_F32 0x144 -#define V_008DFC_SQ_V_CUBESC_F32 0x145 -#define V_008DFC_SQ_V_CUBETC_F32 0x146 -#define V_008DFC_SQ_V_CUBEMA_F32 0x147 -#define V_008DFC_SQ_V_BFE_U32 0x148 -#define V_008DFC_SQ_V_BFE_I32 0x149 -#define V_008DFC_SQ_V_BFI_B32 0x14A -#define V_008DFC_SQ_V_FMA_F32 0x14B -#define V_008DFC_SQ_V_FMA_F64 0x14C -#define V_008DFC_SQ_V_LERP_U8 0x14D -#define V_008DFC_SQ_V_ALIGNBIT_B32 0x14E -#define V_008DFC_SQ_V_ALIGNBYTE_B32 0x14F -#define V_008DFC_SQ_V_MULLIT_F32 0x150 -#define V_008DFC_SQ_V_MIN3_F32 0x151 -#define V_008DFC_SQ_V_MIN3_I32 0x152 -#define V_008DFC_SQ_V_MIN3_U32 0x153 -#define V_008DFC_SQ_V_MAX3_F32 0x154 -#define V_008DFC_SQ_V_MAX3_I32 0x155 -#define V_008DFC_SQ_V_MAX3_U32 0x156 -#define V_008DFC_SQ_V_MED3_F32 0x157 -#define V_008DFC_SQ_V_MED3_I32 0x158 -#define V_008DFC_SQ_V_MED3_U32 0x159 -#define V_008DFC_SQ_V_SAD_U8 0x15A -#define V_008DFC_SQ_V_SAD_HI_U8 0x15B -#define V_008DFC_SQ_V_SAD_U16 0x15C -#define V_008DFC_SQ_V_SAD_U32 0x15D -#define V_008DFC_SQ_V_CVT_PK_U8_F32 0x15E -#define V_008DFC_SQ_V_DIV_FIXUP_F32 0x15F -#define V_008DFC_SQ_V_DIV_FIXUP_F64 0x160 -#define V_008DFC_SQ_V_LSHL_B64 0x161 -#define V_008DFC_SQ_V_LSHR_B64 0x162 -#define V_008DFC_SQ_V_ASHR_I64 0x163 -#define V_008DFC_SQ_V_ADD_F64 0x164 -#define V_008DFC_SQ_V_MUL_F64 0x165 -#define V_008DFC_SQ_V_MIN_F64 0x166 -#define V_008DFC_SQ_V_MAX_F64 0x167 -#define V_008DFC_SQ_V_LDEXP_F64 0x168 -#define V_008DFC_SQ_V_MUL_LO_U32 0x169 -#define V_008DFC_SQ_V_MUL_HI_U32 0x16A -#define V_008DFC_SQ_V_MUL_LO_I32 0x16B -#define V_008DFC_SQ_V_MUL_HI_I32 0x16C -#define V_008DFC_SQ_V_DIV_SCALE_F32 0x16D -#define V_008DFC_SQ_V_DIV_SCALE_F64 0x16E -#define V_008DFC_SQ_V_DIV_FMAS_F32 0x16F -#define V_008DFC_SQ_V_DIV_FMAS_F64 0x170 -#define V_008DFC_SQ_V_MSAD_U8 0x171 -#define V_008DFC_SQ_V_QSAD_U8 0x172 -#define V_008DFC_SQ_V_MQSAD_U8 0x173 -#define V_008DFC_SQ_V_TRIG_PREOP_F64 0x174 -/* CIK */ -#define V_008DFC_SQ_V_MQSAD_U32_U8 0x175 -#define V_008DFC_SQ_V_MAD_U64_U32 0x176 -#define V_008DFC_SQ_V_MAD_I64_I32 0x177 -/* */ -#define V_008DFC_SQ_V_OP1_OFFSET 0x180 -#define S_008DFC_ENCODING(x) (((x) & 0x3F) << 26) -#define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) -#define C_008DFC_ENCODING 0x03FFFFFF -#define V_008DFC_SQ_ENC_VOP3_FIELD 0x34 -#define R_008DFC_SQ_MUBUF_0 0x008DFC -#define S_008DFC_OFFSET(x) (((x) & 0xFFF) << 0) -#define G_008DFC_OFFSET(x) (((x) >> 0) & 0xFFF) -#define C_008DFC_OFFSET 0xFFFFF000 -#define S_008DFC_OFFEN(x) (((x) & 0x1) << 12) -#define G_008DFC_OFFEN(x) (((x) >> 12) & 0x1) -#define C_008DFC_OFFEN 0xFFFFEFFF -#define S_008DFC_IDXEN(x) (((x) & 0x1) << 13) -#define G_008DFC_IDXEN(x) (((x) >> 13) & 0x1) -#define C_008DFC_IDXEN 0xFFFFDFFF -#define S_008DFC_GLC(x) (((x) & 0x1) << 14) -#define G_008DFC_GLC(x) (((x) >> 14) & 0x1) -#define C_008DFC_GLC 0xFFFFBFFF -#define S_008DFC_ADDR64(x) (((x) & 0x1) << 15) -#define G_008DFC_ADDR64(x) (((x) >> 15) & 0x1) -#define C_008DFC_ADDR64 0xFFFF7FFF -#define S_008DFC_LDS(x) (((x) & 0x1) << 16) -#define G_008DFC_LDS(x) (((x) >> 16) & 0x1) -#define C_008DFC_LDS 0xFFFEFFFF -#define S_008DFC_OP(x) (((x) & 0x7F) << 18) -#define G_008DFC_OP(x) (((x) >> 18) & 0x7F) -#define C_008DFC_OP 0xFE03FFFF -#define V_008DFC_SQ_BUFFER_LOAD_FORMAT_X 0x00 -#define V_008DFC_SQ_BUFFER_LOAD_FORMAT_XY 0x01 -#define V_008DFC_SQ_BUFFER_LOAD_FORMAT_XYZ 0x02 -#define V_008DFC_SQ_BUFFER_LOAD_FORMAT_XYZW 0x03 -#define V_008DFC_SQ_BUFFER_STORE_FORMAT_X 0x04 -#define V_008DFC_SQ_BUFFER_STORE_FORMAT_XY 0x05 -#define V_008DFC_SQ_BUFFER_STORE_FORMAT_XYZ 0x06 -#define V_008DFC_SQ_BUFFER_STORE_FORMAT_XYZW 0x07 -#define V_008DFC_SQ_BUFFER_LOAD_UBYTE 0x08 -#define V_008DFC_SQ_BUFFER_LOAD_SBYTE 0x09 -#define V_008DFC_SQ_BUFFER_LOAD_USHORT 0x0A -#define V_008DFC_SQ_BUFFER_LOAD_SSHORT 0x0B -#define V_008DFC_SQ_BUFFER_LOAD_DWORD 0x0C -#define V_008DFC_SQ_BUFFER_LOAD_DWORDX2 0x0D -#define V_008DFC_SQ_BUFFER_LOAD_DWORDX4 0x0E -/* CIK */ -#define V_008DFC_SQ_BUFFER_LOAD_DWORDX3 0x0F -/* */ -#define V_008DFC_SQ_BUFFER_STORE_BYTE 0x18 -#define V_008DFC_SQ_BUFFER_STORE_SHORT 0x1A -#define V_008DFC_SQ_BUFFER_STORE_DWORD 0x1C -#define V_008DFC_SQ_BUFFER_STORE_DWORDX2 0x1D -#define V_008DFC_SQ_BUFFER_STORE_DWORDX4 0x1E -/* CIK */ -#define V_008DFC_SQ_BUFFER_STORE_DWORDX3 0x1F -/* */ -#define V_008DFC_SQ_BUFFER_ATOMIC_SWAP 0x30 -#define V_008DFC_SQ_BUFFER_ATOMIC_CMPSWAP 0x31 -#define V_008DFC_SQ_BUFFER_ATOMIC_ADD 0x32 -#define V_008DFC_SQ_BUFFER_ATOMIC_SUB 0x33 -#define V_008DFC_SQ_BUFFER_ATOMIC_RSUB 0x34 /* not on CIK */ -#define V_008DFC_SQ_BUFFER_ATOMIC_SMIN 0x35 -#define V_008DFC_SQ_BUFFER_ATOMIC_UMIN 0x36 -#define V_008DFC_SQ_BUFFER_ATOMIC_SMAX 0x37 -#define V_008DFC_SQ_BUFFER_ATOMIC_UMAX 0x38 -#define V_008DFC_SQ_BUFFER_ATOMIC_AND 0x39 -#define V_008DFC_SQ_BUFFER_ATOMIC_OR 0x3A -#define V_008DFC_SQ_BUFFER_ATOMIC_XOR 0x3B -#define V_008DFC_SQ_BUFFER_ATOMIC_INC 0x3C -#define V_008DFC_SQ_BUFFER_ATOMIC_DEC 0x3D -#define V_008DFC_SQ_BUFFER_ATOMIC_FCMPSWAP 0x3E -#define V_008DFC_SQ_BUFFER_ATOMIC_FMIN 0x3F -#define V_008DFC_SQ_BUFFER_ATOMIC_FMAX 0x40 -#define V_008DFC_SQ_BUFFER_ATOMIC_SWAP_X2 0x50 -#define V_008DFC_SQ_BUFFER_ATOMIC_CMPSWAP_X2 0x51 -#define V_008DFC_SQ_BUFFER_ATOMIC_ADD_X2 0x52 -#define V_008DFC_SQ_BUFFER_ATOMIC_SUB_X2 0x53 -#define V_008DFC_SQ_BUFFER_ATOMIC_RSUB_X2 0x54 /* not on CIK */ -#define V_008DFC_SQ_BUFFER_ATOMIC_SMIN_X2 0x55 -#define V_008DFC_SQ_BUFFER_ATOMIC_UMIN_X2 0x56 -#define V_008DFC_SQ_BUFFER_ATOMIC_SMAX_X2 0x57 -#define V_008DFC_SQ_BUFFER_ATOMIC_UMAX_X2 0x58 -#define V_008DFC_SQ_BUFFER_ATOMIC_AND_X2 0x59 -#define V_008DFC_SQ_BUFFER_ATOMIC_OR_X2 0x5A -#define V_008DFC_SQ_BUFFER_ATOMIC_XOR_X2 0x5B -#define V_008DFC_SQ_BUFFER_ATOMIC_INC_X2 0x5C -#define V_008DFC_SQ_BUFFER_ATOMIC_DEC_X2 0x5D -#define V_008DFC_SQ_BUFFER_ATOMIC_FCMPSWAP_X2 0x5E -#define V_008DFC_SQ_BUFFER_ATOMIC_FMIN_X2 0x5F -#define V_008DFC_SQ_BUFFER_ATOMIC_FMAX_X2 0x60 -#define V_008DFC_SQ_BUFFER_WBINVL1_SC 0x70 -/* CIK */ -#define V_008DFC_SQ_BUFFER_WBINVL1_VOL 0x70 -/* */ -#define V_008DFC_SQ_BUFFER_WBINVL1 0x71 -#define S_008DFC_ENCODING(x) (((x) & 0x3F) << 26) -#define G_008DFC_ENCODING(x) (((x) >> 26) & 0x3F) -#define C_008DFC_ENCODING 0x03FFFFFF -#define V_008DFC_SQ_ENC_MUBUF_FIELD 0x38 -#endif #define R_030E00_TA_CS_BC_BASE_ADDR 0x030E00 #define R_030E04_TA_CS_BC_BASE_ADDR_HI 0x030E04 #define S_030E04_ADDRESS(x) (((x) & 0xFF) << 0) @@ -5710,13 +2557,6 @@ #define G_00936C_EN_B(x) (((x) >> 31) & 0x1) #define C_00936C_EN_B 0x7FFFFFFF #define R_00950C_TA_CS_BC_BASE_ADDR 0x00950C -/* CIK */ -#define R_030E00_TA_CS_BC_BASE_ADDR 0x030E00 -#define R_030E04_TA_CS_BC_BASE_ADDR_HI 0x030E04 -#define S_030E04_ADDRESS(x) (((x) & 0xFF) << 0) -#define G_030E04_ADDRESS(x) (((x) >> 0) & 0xFF) -#define C_030E04_ADDRESS 0xFFFFFF00 -/* */ #define R_009858_DB_SUBTILE_CONTROL 0x009858 #define S_009858_MSAA1_X(x) (((x) & 0x03) << 0) #define G_009858_MSAA1_X(x) (((x) >> 0) & 0x03) From a6a6c68955bb8526aac3c04af96fdfdee71bc8bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 15 Aug 2015 23:44:04 +0200 Subject: [PATCH 19/26] radeonsi: generate register and packet tables for an IB parser from sid.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes writing a good IB parser a lot easier. It generates 2 tables: - packet3 table - register table with all registers, fields, and named values Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/.gitignore | 1 + src/gallium/drivers/radeonsi/Makefile.am | 9 + src/gallium/drivers/radeonsi/Makefile.sources | 1 + src/gallium/drivers/radeonsi/sid_tables.py | 179 ++++++++++++++++++ 4 files changed, 190 insertions(+) create mode 100644 src/gallium/drivers/radeonsi/.gitignore create mode 100755 src/gallium/drivers/radeonsi/sid_tables.py diff --git a/src/gallium/drivers/radeonsi/.gitignore b/src/gallium/drivers/radeonsi/.gitignore new file mode 100644 index 00000000000..e0ee798519e --- /dev/null +++ b/src/gallium/drivers/radeonsi/.gitignore @@ -0,0 +1 @@ +sid_tables.h diff --git a/src/gallium/drivers/radeonsi/Makefile.am b/src/gallium/drivers/radeonsi/Makefile.am index aa79c5e01ef..ae5035a8937 100644 --- a/src/gallium/drivers/radeonsi/Makefile.am +++ b/src/gallium/drivers/radeonsi/Makefile.am @@ -31,3 +31,12 @@ AM_CFLAGS = \ noinst_LTLIBRARIES = libradeonsi.la libradeonsi_la_SOURCES = $(C_SOURCES) + +sid_tables.h: $(srcdir)/sid_tables.py $(srcdir)/sid.h + $(AM_V_GEN) $(PYTHON2) $(srcdir)/sid_tables.py $(srcdir)/sid.h > $@ + +EXTRA_DIST = \ + sid_tables.py + +BUILT_SOURCES =\ + sid_tables.h diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index ae8fe5994b7..fd44807408e 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -7,6 +7,7 @@ C_SOURCES := \ si_debug.c \ si_descriptors.c \ sid.h \ + sid_tables.h \ si_dma.c \ si_hw_context.c \ si_pipe.c \ diff --git a/src/gallium/drivers/radeonsi/sid_tables.py b/src/gallium/drivers/radeonsi/sid_tables.py new file mode 100755 index 00000000000..1fe5d3c589a --- /dev/null +++ b/src/gallium/drivers/radeonsi/sid_tables.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python + +CopyRight = ''' +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ +''' + +import sys +import re + + +class Field: + def __init__(self, reg, s_name): + self.s_name = s_name + self.name = strip_prefix(s_name) + self.values = [] + self.varname_values = '%s__%s__values' % (reg.r_name.lower(), self.name.lower()) + +class Reg: + def __init__(self, r_name): + self.r_name = r_name + self.name = strip_prefix(r_name) + self.fields = [] + self.varname_fields = '%s__fields' % self.r_name.lower() + self.own_fields = True + + +def strip_prefix(s): + '''Strip prefix in the form ._.*_, e.g. R_001234_''' + return s[s[2:].find('_')+3:] + + +def parse(filename): + stream = open(filename) + regs = [] + packets = [] + + for line in stream: + if not line.startswith('#define '): + continue + + line = line[8:].strip() + + if line.startswith('R_'): + reg = Reg(line.split()[0]) + regs.append(reg) + + elif line.startswith('S_'): + field = Field(reg, line[:line.find('(')]) + reg.fields.append(field) + + elif line.startswith('V_'): + field.values.append(line.split()[0]) + + elif line.startswith('PKT3_') and line.find('0x') != -1 and line.find('(') == -1: + packets.append(line.split()[0]) + + # Copy fields to indexed registers which have their fields only defined + # at register index 0. + # For example, copy fields from CB_COLOR0_INFO to CB_COLORn_INFO, n > 0. + match_number = re.compile('[0-9]+') + reg_dict = dict() + + # Create a dict of registers with fields and '0' in their name + for reg in regs: + if len(reg.fields) and reg.name.find('0') != -1: + reg_dict[reg.name] = reg + + # Assign fields + for reg in regs: + if not len(reg.fields): + reg0 = reg_dict.get(match_number.sub('0', reg.name)) + if reg0 != None: + reg.fields = reg0.fields + reg.varname_fields = reg0.varname_fields + reg.own_fields = False + + return (regs, packets) + + +def write_tables(tables): + regs = tables[0] + packets = tables[1] + + print '/* This file is autogenerated by sid_tables.py from sid.h. Do not edit directly. */' + print + print CopyRight.strip() + print ''' +#ifndef SID_TABLES_H +#define SID_TABLES_H + +struct si_field { + const char *name; + unsigned mask; + unsigned num_values; + const char **values; +}; + +struct si_reg { + const char *name; + unsigned offset; + unsigned num_fields; + const struct si_field *fields; +}; + +struct si_packet3 { + const char *name; + unsigned op; +}; +''' + + print 'static const struct si_packet3 packet3_table[] = {' + for pkt in packets: + print '\t{"%s", %s},' % (pkt[5:], pkt) + print '};' + print + + for reg in regs: + if len(reg.fields) and reg.own_fields: + for field in reg.fields: + if len(field.values): + print 'static const char *%s[] = {' % (field.varname_values) + for value in field.values: + print '\t[%s] = "%s",' % (value, strip_prefix(value)) + print '};' + print + + print 'static const struct si_field %s[] = {' % (reg.varname_fields) + for field in reg.fields: + if len(field.values): + print '\t{"%s", %s(~0u), ARRAY_SIZE(%s), %s},' % (field.name, + field.s_name, field.varname_values, field.varname_values) + else: + print '\t{"%s", %s(~0u)},' % (field.name, field.s_name) + print '};' + print + + print 'static const struct si_reg reg_table[] = {' + for reg in regs: + if len(reg.fields): + print '\t{"%s", %s, ARRAY_SIZE(%s), %s},' % (reg.name, reg.r_name, + reg.varname_fields, reg.varname_fields) + else: + print '\t{"%s", %s},' % (reg.name, reg.r_name) + print '};' + print + print '#endif' + + +def main(): + tables = [] + for arg in sys.argv[1:]: + tables.extend(parse(arg)) + write_tables(tables) + + +if __name__ == '__main__': + main() From be6dc8777662645958d4be6639ee2bb47c5054d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 15 Aug 2015 12:46:17 +0200 Subject: [PATCH 20/26] radeonsi: save the contents of indirect buffers for debug contexts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used by the IB parser. Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_hw_context.c | 8 ++++++++ src/gallium/drivers/radeonsi/si_pipe.c | 2 ++ src/gallium/drivers/radeonsi/si_pipe.h | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 307dc391431..f848c648946 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -130,6 +130,14 @@ void si_context_gfx_flush(void *context, unsigned flags, /* force to keep tiling flags */ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; + /* Save the IB for debug contexts. */ + if (ctx->is_debug) { + free(ctx->last_ib); + ctx->last_ib_dw_size = cs->cdw; + ctx->last_ib = malloc(cs->cdw * 4); + memcpy(ctx->last_ib, cs->buf, cs->cdw * 4); + } + /* Flush the CS. */ ws->cs_flush(cs, flags, &ctx->last_gfx_fence, ctx->screen->b.cs_count++); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index f6d4a5a9fb8..e5900b74806 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -81,6 +81,7 @@ static void si_destroy_context(struct pipe_context *context) LLVMDisposeTargetMachine(sctx->tm); #endif + free(sctx->last_ib); FREE(sctx); } @@ -112,6 +113,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->b.b.destroy = si_destroy_context; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ + sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3ab95385d8e..3672fec33a3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -243,6 +243,11 @@ struct si_context { struct si_shader_selector *last_tcs; int last_num_tcs_input_cp; int last_tes_sh_base; + + /* Debug state. */ + bool is_debug; + uint32_t *last_ib; + unsigned last_ib_dw_size; }; /* cik_sdma.c */ From 61df4f0cd3f34ed11dd64932aeb7731d354f2bde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 15 Aug 2015 23:57:22 +0200 Subject: [PATCH 21/26] radeonsi: add an IB parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_debug.c | 245 ++++++++++++++++++++++++ 1 file changed, 245 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index fa2ec0342ce..d692547ee82 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -27,6 +27,7 @@ #include "si_pipe.h" #include "si_shader.h" #include "sid.h" +#include "sid_tables.h" static void si_dump_shader(struct si_shader_selector *sel, const char *name, @@ -40,6 +41,243 @@ static void si_dump_shader(struct si_shader_selector *sel, const char *name, fprintf(f, "%s\n\n", sel->current->binary.disasm_string); } +/* Parsed IBs are difficult to read without colors. Use "less -R file" to + * read them, or use "aha -b -f file" to convert them to html. + */ +#define COLOR_RESET "\033[0m" +#define COLOR_RED "\033[31m" +#define COLOR_GREEN "\033[1;32m" +#define COLOR_YELLOW "\033[1;33m" +#define COLOR_CYAN "\033[1;36m" + +#define INDENT_PKT 8 + +static void print_spaces(FILE *f, unsigned num) +{ + fprintf(f, "%*s", num, ""); +} + +static void print_value(FILE *file, uint32_t value, int bits) +{ + /* Guess if it's int or float */ + if (value <= (1 << 15)) + fprintf(file, "%u\n", value); + else { + float f = uif(value); + + if (fabs(f) < 100000 && f*10 == floor(f*10)) + fprintf(file, "%.1ff\n", f); + else + /* Don't print more leading zeros than there are bits. */ + fprintf(file, "0x%0*x\n", bits / 4, value); + } +} + +static void print_named_value(FILE *file, const char *name, uint32_t value, + int bits) +{ + print_spaces(file, INDENT_PKT); + fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", name); + print_value(file, value, bits); +} + +static void si_dump_reg(FILE *file, unsigned offset, uint32_t value, + uint32_t field_mask) +{ + int r, f; + + for (r = 0; r < ARRAY_SIZE(reg_table); r++) { + const struct si_reg *reg = ®_table[r]; + + if (reg->offset == offset) { + bool first_field = true; + + print_spaces(file, INDENT_PKT); + fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", + reg->name); + + if (!reg->num_fields) { + print_value(file, value, 32); + return; + } + + for (f = 0; f < reg->num_fields; f++) { + const struct si_field *field = ®->fields[f]; + uint32_t val = (value & field->mask) >> + (ffs(field->mask) - 1); + + if (!(field->mask & field_mask)) + continue; + + /* Indent the field. */ + if (!first_field) + print_spaces(file, + INDENT_PKT + strlen(reg->name) + 4); + + /* Print the field. */ + fprintf(file, "%s = ", field->name); + + if (val < field->num_values && field->values[val]) + fprintf(file, "%s\n", field->values[val]); + else + print_value(file, val, + util_bitcount(field->mask)); + + first_field = false; + } + return; + } + } + + fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " = 0x%08x", offset, value); +} + +static void si_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count, + unsigned reg_offset) +{ + unsigned reg = (ib[1] << 2) + reg_offset; + int i; + + for (i = 0; i < count; i++) + si_dump_reg(f, reg + i*4, ib[2+i], ~0); +} + +static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) +{ + unsigned count = PKT_COUNT_G(ib[0]); + unsigned op = PKT3_IT_OPCODE_G(ib[0]); + const char *predicate = PKT3_PREDICATE(ib[0]) ? "(predicate)" : ""; + int i; + + /* Print the name first. */ + for (i = 0; i < ARRAY_SIZE(packet3_table); i++) + if (packet3_table[i].op == op) + break; + + if (i < ARRAY_SIZE(packet3_table)) + if (op == PKT3_SET_CONTEXT_REG || + op == PKT3_SET_CONFIG_REG || + op == PKT3_SET_UCONFIG_REG || + op == PKT3_SET_SH_REG) + fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n", + packet3_table[i].name, predicate); + else + fprintf(f, COLOR_GREEN "%s%s" COLOR_RESET ":\n", + packet3_table[i].name, predicate); + else + fprintf(f, COLOR_RED "PKT3_UNKNOWN 0x%x%s" COLOR_RESET ":\n", + op, predicate); + + /* Print the contents. */ + switch (op) { + case PKT3_SET_CONTEXT_REG: + si_parse_set_reg_packet(f, ib, count, SI_CONTEXT_REG_OFFSET); + break; + case PKT3_SET_CONFIG_REG: + si_parse_set_reg_packet(f, ib, count, SI_CONFIG_REG_OFFSET); + break; + case PKT3_SET_UCONFIG_REG: + si_parse_set_reg_packet(f, ib, count, CIK_UCONFIG_REG_OFFSET); + break; + case PKT3_SET_SH_REG: + si_parse_set_reg_packet(f, ib, count, SI_SH_REG_OFFSET); + break; + case PKT3_DRAW_PREAMBLE: + si_dump_reg(f, R_030908_VGT_PRIMITIVE_TYPE, ib[1], ~0); + si_dump_reg(f, R_028AA8_IA_MULTI_VGT_PARAM, ib[2], ~0); + si_dump_reg(f, R_028B58_VGT_LS_HS_CONFIG, ib[3], ~0); + break; + case PKT3_ACQUIRE_MEM: + si_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0); + si_dump_reg(f, R_0301F4_CP_COHER_SIZE, ib[2], ~0); + si_dump_reg(f, R_030230_CP_COHER_SIZE_HI, ib[3], ~0); + si_dump_reg(f, R_0301F8_CP_COHER_BASE, ib[4], ~0); + si_dump_reg(f, R_0301E4_CP_COHER_BASE_HI, ib[5], ~0); + print_named_value(f, "POLL_INTERVAL", ib[6], 16); + break; + case PKT3_SURFACE_SYNC: + si_dump_reg(f, R_0085F0_CP_COHER_CNTL, ib[1], ~0); + si_dump_reg(f, R_0085F4_CP_COHER_SIZE, ib[2], ~0); + si_dump_reg(f, R_0085F8_CP_COHER_BASE, ib[3], ~0); + print_named_value(f, "POLL_INTERVAL", ib[4], 16); + break; + case PKT3_EVENT_WRITE: + si_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1], + S_028A90_EVENT_TYPE(~0)); + print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4); + print_named_value(f, "INV_L2", (ib[1] >> 20) & 0x1, 1); + if (count > 0) { + print_named_value(f, "ADDRESS_LO", ib[2], 32); + print_named_value(f, "ADDRESS_HI", ib[3], 16); + } + break; + case PKT3_DRAW_INDEX_AUTO: + si_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[1], ~0); + si_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[2], ~0); + break; + case PKT3_DRAW_INDEX_2: + si_dump_reg(f, R_028A78_VGT_DMA_MAX_SIZE, ib[1], ~0); + si_dump_reg(f, R_0287E8_VGT_DMA_BASE, ib[2], ~0); + si_dump_reg(f, R_0287E4_VGT_DMA_BASE_HI, ib[3], ~0); + si_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[4], ~0); + si_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[5], ~0); + break; + case PKT3_INDEX_TYPE: + si_dump_reg(f, R_028A7C_VGT_DMA_INDEX_TYPE, ib[1], ~0); + break; + case PKT3_NUM_INSTANCES: + si_dump_reg(f, R_030934_VGT_NUM_INSTANCES, ib[1], ~0); + break; + case PKT3_NOP: + if (ib[0] == 0xffff1000) { + count = -1; /* One dword NOP. */ + break; + } + /* fall through, print all dwords */ + default: + for (i = 0; i < count+1; i++) { + print_spaces(f, INDENT_PKT); + fprintf(f, "0x%08x\n", ib[1+i]); + } + } + + ib += count + 2; + *num_dw -= count + 2; + return ib; +} + +static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) +{ + fprintf(f, "------------------ IB begin ------------------\n"); + + while (num_dw > 0) { + unsigned type = PKT_TYPE_G(ib[0]); + + switch (type) { + case 3: + ib = si_parse_packet3(f, ib, &num_dw); + break; + case 2: + /* type-2 nop */ + if (ib[0] == 0x80000000) { + fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n"); + ib++; + break; + } + /* fall through */ + default: + fprintf(f, "Unknown packet type %i\n", type); + return; + } + } + + fprintf(f, "------------------- IB end -------------------\n"); + if (num_dw < 0) { + printf("Packet ends after the end of IB.\n"); + exit(0); + } +} + static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flags) { @@ -50,6 +288,13 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f); si_dump_shader(sctx->gs_shader, "Geometry", f); si_dump_shader(sctx->ps_shader, "Fragment", f); + + if (sctx->last_ib) { + si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size); + free(sctx->last_ib); /* dump only once */ + sctx->last_ib = NULL; + } + fprintf(f, "Done.\n"); } From df6a5666b6398613e552f66cd092369b12cce9ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 16 Aug 2015 00:54:34 +0200 Subject: [PATCH 22/26] radeonsi: parse and dump status registers on GPU hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPU hang detection must be enabled by setting: GALLIUM_DDEBUG=[timeout in ms] This may print too much information that we might not understand yet, but some of the bits are very useful. Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_debug.c | 52 +++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index d692547ee82..72b7989e92c 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -278,11 +278,63 @@ static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) } } +static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f, + unsigned offset) +{ + struct radeon_winsys *ws = sctx->b.ws; + uint32_t value; + + ws->read_registers(ws, offset, 1, &value); + si_dump_reg(f, offset, value, ~0); +} + +static void si_dump_debug_registers(struct si_context *sctx, FILE *f) +{ + if (sctx->screen->b.info.drm_major == 2 && + sctx->screen->b.info.drm_minor < 42) + return; /* no radeon support */ + + fprintf(f, "Memory-mapped registers:\n"); + si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS); + + /* No other registers can be read on DRM < 3.1.0. */ + if (sctx->screen->b.info.drm_major < 3 || + sctx->screen->b.info.drm_minor < 1) { + fprintf(f, "\n"); + return; + } + + si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2); + si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0); + si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1); + si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2); + si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); + si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); + si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); + si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); + si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); + si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); + si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT); + si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1); + si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2); + si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3); + si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS); + si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT); + si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1); + si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS); + si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT); + si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1); + fprintf(f, "\n"); +} + static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flags) { struct si_context *sctx = (struct si_context*)ctx; + if (flags & PIPE_DEBUG_DEVICE_IS_HUNG) + si_dump_debug_registers(sctx, f); + si_dump_shader(sctx->vs_shader, "Vertex", f); si_dump_shader(sctx->tcs_shader, "Tessellation control", f); si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f); From 189953ee13ad7d6b5d9d04ac21a230e8137a700d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 17 Aug 2015 19:17:16 +0200 Subject: [PATCH 23/26] radeonsi: remove old CS tracing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of it is left there and it will be re-used in the next commit. Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_hw_context.c | 25 +------------------- src/gallium/drivers/radeonsi/si_pipe.h | 5 ---- src/gallium/drivers/radeonsi/si_pm4.c | 12 ---------- src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 7 +----- 5 files changed, 3 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index f848c648946..b726eb3cdd8 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -88,11 +88,8 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, /* Count in framebuffer cache flushes at the end of CS. */ num_dw += ctx->atoms.s.cache_flush->num_dw; -#if SI_TRACE_CS - if (ctx->screen->b.trace_bo) { + if (ctx->screen->b.trace_bo) num_dw += SI_TRACE_CS_DWORDS; - } -#endif /* Flush if there's not enough space. */ if (num_dw > cs->max_dw) { @@ -146,26 +143,6 @@ void si_context_gfx_flush(void *context, unsigned flags, if (fence) ws->fence_reference(fence, ctx->last_gfx_fence); -#if SI_TRACE_CS - if (ctx->screen->b.trace_bo) { - struct si_screen *sscreen = ctx->screen; - unsigned i; - - for (i = 0; i < 10; i++) { - usleep(5); - if (!ws->buffer_is_busy(sscreen->b.trace_bo->buf, RADEON_USAGE_READWRITE)) { - break; - } - } - if (i == 10) { - fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n", - sscreen->b.trace_ptr[1], sscreen->b.trace_ptr[0]); - } else { - fprintf(stderr, "cs %d executed in %dms\n", sscreen->b.trace_ptr[1], i * 5); - } - } -#endif - si_begin_new_cs(ctx); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3672fec33a3..09a21ceb618 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -43,7 +43,6 @@ #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 -#define SI_TRACE_CS 0 #define SI_TRACE_CS_DWORDS 6 #define SI_MAX_DRAW_CS_DWORDS \ @@ -298,10 +297,6 @@ void si_context_gfx_flush(void *context, unsigned flags, void si_begin_new_cs(struct si_context *ctx); void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean count_draw_in); -#if SI_TRACE_CS -void si_trace_emit(struct si_context *sctx); -#endif - /* si_compute.c */ void si_init_compute_functions(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index e9b9a5f556a..036d90cabb1 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -135,12 +135,6 @@ unsigned si_pm4_dirty_dw(struct si_context *sctx) continue; count += state->ndw; -#if SI_TRACE_CS - /* for tracing each states */ - if (sctx->screen->b.trace_bo) { - count += SI_TRACE_CS_DWORDS; - } -#endif } return count; @@ -161,12 +155,6 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) } cs->cdw += state->ndw; - -#if SI_TRACE_CS - if (sctx->screen->b.trace_bo) { - si_trace_emit(sctx); - } -#endif } void si_pm4_emit_dirty(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index b8f63c5dd36..118c5622c62 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -281,6 +281,7 @@ extern const struct r600_atom si_atom_msaa_sample_locs; extern const struct r600_atom si_atom_msaa_config; void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom); void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo); +void si_trace_emit(struct si_context *sctx); /* si_commands.c */ void si_cmd_context_control(struct si_pm4_state *pm4); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 4c21655596c..e56c9e70eca 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -835,11 +835,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_emit_draw_registers(sctx, info); si_emit_draw_packets(sctx, info, &ib); -#if SI_TRACE_CS - if (sctx->screen->b.trace_bo) { + if (sctx->screen->b.trace_bo) si_trace_emit(sctx); - } -#endif /* Workaround for a VGT hang when streamout is enabled. * It must be done after drawing. */ @@ -874,7 +871,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) sctx->b.num_draw_calls++; } -#if SI_TRACE_CS void si_trace_emit(struct si_context *sctx) { struct si_screen *sscreen = sctx->screen; @@ -893,4 +889,3 @@ void si_trace_emit(struct si_context *sctx) radeon_emit(cs, cs->cdw); radeon_emit(cs, sscreen->b.cs_count); } -#endif From 2c14a6d3b1c53d5814414ce9e91fd8d24c90b787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 19 Aug 2015 11:53:25 +0200 Subject: [PATCH 24/26] radeonsi: add IB tracing support for debug contexts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds trace points to all IBs and the parser prints them and also prints which trace points were reached (executed) by the CP. This can help pinpoint a problematic packet, draw call, etc. Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_debug.c | 67 ++++++++++++++++++-- src/gallium/drivers/radeonsi/si_hw_context.c | 24 ++++++- src/gallium/drivers/radeonsi/si_pipe.c | 2 + src/gallium/drivers/radeonsi/si_pipe.h | 9 ++- src/gallium/drivers/radeonsi/si_state_draw.c | 19 +++--- 5 files changed, 105 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 72b7989e92c..cf09686c636 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -142,7 +142,8 @@ static void si_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count, si_dump_reg(f, reg + i*4, ib[2+i], ~0); } -static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) +static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, + int trace_id) { unsigned count = PKT_COUNT_G(ib[0]); unsigned op = PKT3_IT_OPCODE_G(ib[0]); @@ -232,6 +233,36 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) if (ib[0] == 0xffff1000) { count = -1; /* One dword NOP. */ break; + } else if (count == 0 && SI_IS_TRACE_POINT(ib[1])) { + unsigned packet_id = SI_GET_TRACE_POINT_ID(ib[1]); + + print_spaces(f, INDENT_PKT); + fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id); + + if (trace_id == -1) + break; /* tracing was disabled */ + + print_spaces(f, INDENT_PKT); + if (packet_id < trace_id) + fprintf(f, COLOR_RED + "This trace point was reached by the CP." + COLOR_RESET "\n"); + else if (packet_id == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the last trace point that " + "was reached by the CP !!!!!" + COLOR_RESET "\n"); + else if (packet_id+1 == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the first trace point that " + "was NOT been reached by the CP !!!!!" + COLOR_RESET "\n"); + else + fprintf(f, COLOR_RED + "!!!!! This trace point was NOT reached " + "by the CP !!!!!" + COLOR_RESET "\n"); + break; } /* fall through, print all dwords */ default: @@ -246,7 +277,17 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) return ib; } -static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) +/** + * Parse and print an IB into a file. + * + * \param f file + * \param ib IB + * \param num_dw size of the IB + * \param chip_class chip class + * \param trace_id the last trace ID that is known to have been reached + * and executed by the CP, typically read from a buffer + */ +static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id) { fprintf(f, "------------------ IB begin ------------------\n"); @@ -255,7 +296,7 @@ static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) switch (type) { case 3: - ib = si_parse_packet3(f, ib, &num_dw); + ib = si_parse_packet3(f, ib, &num_dw, trace_id); break; case 2: /* type-2 nop */ @@ -342,9 +383,27 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, si_dump_shader(sctx->ps_shader, "Fragment", f); if (sctx->last_ib) { - si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size); + int last_trace_id = -1; + + if (sctx->last_trace_buf) { + /* We are expecting that the ddebug pipe has already + * waited for the context, so this buffer should be idle. + * If the GPU is hung, there is no point in waiting for it. + */ + uint32_t *map = + sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf, + NULL, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ); + if (map) + last_trace_id = *map; + } + + si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size, + last_trace_id); free(sctx->last_ib); /* dump only once */ sctx->last_ib = NULL; + r600_resource_reference(&sctx->last_trace_buf, NULL); } fprintf(f, "Done.\n"); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index b726eb3cdd8..110e3163021 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -89,7 +89,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, num_dw += ctx->atoms.s.cache_flush->num_dw; if (ctx->screen->b.trace_bo) - num_dw += SI_TRACE_CS_DWORDS; + num_dw += SI_TRACE_CS_DWORDS * 2; /* Flush if there's not enough space. */ if (num_dw > cs->max_dw) { @@ -127,12 +127,17 @@ void si_context_gfx_flush(void *context, unsigned flags, /* force to keep tiling flags */ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; + if (ctx->trace_buf) + si_trace_emit(ctx); + /* Save the IB for debug contexts. */ if (ctx->is_debug) { free(ctx->last_ib); ctx->last_ib_dw_size = cs->cdw; ctx->last_ib = malloc(cs->cdw * 4); memcpy(ctx->last_ib, cs->buf, cs->cdw * 4); + r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); + r600_resource_reference(&ctx->trace_buf, NULL); } /* Flush the CS. */ @@ -148,6 +153,23 @@ void si_context_gfx_flush(void *context, unsigned flags, void si_begin_new_cs(struct si_context *ctx) { + if (ctx->is_debug) { + uint32_t zero = 0; + + /* Create a buffer used for writing trace IDs and initialize it to 0. */ + assert(!ctx->trace_buf); + ctx->trace_buf = (struct r600_resource*) + pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, 4); + if (ctx->trace_buf) + pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, + 0, sizeof(zero), &zero); + ctx->trace_id = 0; + } + + if (ctx->trace_buf) + si_trace_emit(ctx); + /* Flush read caches at the beginning of CS. */ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | SI_CONTEXT_INV_TC_L1 | diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e5900b74806..92c6ae3de2b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -81,6 +81,8 @@ static void si_destroy_context(struct pipe_context *context) LLVMDisposeTargetMachine(sctx->tm); #endif + r600_resource_reference(&sctx->trace_buf, NULL); + r600_resource_reference(&sctx->last_trace_buf, NULL); free(sctx->last_ib); FREE(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 09a21ceb618..52167f24a95 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -43,7 +43,7 @@ #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 -#define SI_TRACE_CS_DWORDS 6 +#define SI_TRACE_CS_DWORDS 7 #define SI_MAX_DRAW_CS_DWORDS \ (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \ @@ -81,6 +81,10 @@ SI_CONTEXT_FLUSH_AND_INV_DB | \ SI_CONTEXT_FLUSH_AND_INV_DB_META) +#define SI_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff)) +#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000) +#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff) + struct si_compute; struct si_screen { @@ -247,6 +251,9 @@ struct si_context { bool is_debug; uint32_t *last_ib; unsigned last_ib_dw_size; + struct r600_resource *last_trace_buf; + struct r600_resource *trace_buf; + unsigned trace_id; }; /* cik_sdma.c */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e56c9e70eca..b1aba1290d6 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -835,7 +835,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_emit_draw_registers(sctx, info); si_emit_draw_packets(sctx, info, &ib); - if (sctx->screen->b.trace_bo) + if (sctx->trace_buf) si_trace_emit(sctx); /* Workaround for a VGT hang when streamout is enabled. @@ -873,19 +873,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) void si_trace_emit(struct si_context *sctx) { - struct si_screen *sscreen = sctx->screen; struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint64_t va; - va = sscreen->b.trace_bo->gpu_address; - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sscreen->b.trace_bo, + sctx->trace_id++; + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf, RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0)); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | PKT3_WRITE_DATA_WR_CONFIRM | PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME)); - radeon_emit(cs, va & 0xFFFFFFFFUL); - radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL); - radeon_emit(cs, cs->cdw); - radeon_emit(cs, sscreen->b.cs_count); + radeon_emit(cs, sctx->trace_buf->gpu_address); + radeon_emit(cs, sctx->trace_buf->gpu_address >> 32); + radeon_emit(cs, sctx->trace_id); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, SI_ENCODE_TRACE_POINT(sctx->trace_id)); } From 16e5d8ad388445c2e577406953a403608f1addc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 19 Aug 2015 18:45:11 +0200 Subject: [PATCH 25/26] radeonsi: add IB parser support for CP DMA packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the packet encoding is defined in the same format as register definitions, the python script can process them automatically and the parser support becomes trivial. Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_cp_dma.c | 17 +-- src/gallium/drivers/radeonsi/si_debug.c | 24 ++++ src/gallium/drivers/radeonsi/si_state_draw.c | 6 +- src/gallium/drivers/radeonsi/sid.h | 136 ++++++++++++------- 4 files changed, 122 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index f8a9da45a10..7bdac97eaa4 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -47,10 +47,11 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx, unsigned size, unsigned flags) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0; - uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0; + uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0; + uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0; uint32_t sel = flags & CIK_CP_DMA_USE_L2 ? - PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0; + S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | + S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0; assert(size); assert((size & ((1<<21)-1)) == size); @@ -79,16 +80,16 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx, uint32_t clear_value, unsigned flags) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0; - uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0; - uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0; + uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0; + uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0; + uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0; assert(size); assert((size & ((1<<21)-1)) == size); if (sctx->b.chip_class >= CIK) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); - radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ + radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */ radeon_emit(cs, clear_value); /* DATA [31:0] */ radeon_emit(cs, 0); radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ @@ -97,7 +98,7 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx, } else { radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, clear_value); /* DATA [31:0] */ - radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ + radeon_emit(cs, sync_flag | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index cf09686c636..22d6f250b03 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -229,6 +229,30 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, case PKT3_NUM_INSTANCES: si_dump_reg(f, R_030934_VGT_NUM_INSTANCES, ib[1], ~0); break; + case PKT3_WRITE_DATA: + si_dump_reg(f, R_370_CONTROL, ib[1], ~0); + si_dump_reg(f, R_371_DST_ADDR_LO, ib[2], ~0); + si_dump_reg(f, R_372_DST_ADDR_HI, ib[3], ~0); + for (i = 2; i < count; i++) { + print_spaces(f, INDENT_PKT); + fprintf(f, "0x%08x\n", ib[2+i]); + } + break; + case PKT3_CP_DMA: + si_dump_reg(f, R_410_CP_DMA_WORD0, ib[1], ~0); + si_dump_reg(f, R_411_CP_DMA_WORD1, ib[2], ~0); + si_dump_reg(f, R_412_CP_DMA_WORD2, ib[3], ~0); + si_dump_reg(f, R_413_CP_DMA_WORD3, ib[4], ~0); + si_dump_reg(f, R_414_COMMAND, ib[5], ~0); + break; + case PKT3_DMA_DATA: + si_dump_reg(f, R_500_DMA_DATA_WORD0, ib[1], ~0); + si_dump_reg(f, R_501_SRC_ADDR_LO, ib[2], ~0); + si_dump_reg(f, R_502_SRC_ADDR_HI, ib[3], ~0); + si_dump_reg(f, R_503_DST_ADDR_LO, ib[4], ~0); + si_dump_reg(f, R_504_DST_ADDR_HI, ib[5], ~0); + si_dump_reg(f, R_414_COMMAND, ib[6], ~0); + break; case PKT3_NOP: if (ib[0] == 0xffff1000) { count = -1; /* One dword NOP. */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index b1aba1290d6..fd2fecaa741 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -879,9 +879,9 @@ void si_trace_emit(struct si_context *sctx) r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf, RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | - PKT3_WRITE_DATA_WR_CONFIRM | - PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, sctx->trace_buf->gpu_address); radeon_emit(cs, sctx->trace_buf->gpu_address >> 32); radeon_emit(cs, sctx->trace_id); diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 66660e32824..cd6be73f66c 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -69,6 +69,10 @@ #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 +/* All registers defined in this packet section don't exist and the only + * purpose of these definitions is to define packet encoding that + * the IB parser understands, and also to have an accurate documentation. + */ #define PKT3_NOP 0x10 #define PKT3_SET_BASE 0x11 #define PKT3_CLEAR_STATE 0x12 @@ -95,19 +99,23 @@ #define PKT3_DRAW_INDEX_OFFSET_2 0x35 #define PKT3_DRAW_PREAMBLE 0x36 /* new on CIK, required on GFX7.2 and later */ #define PKT3_WRITE_DATA 0x37 -#define PKT3_WRITE_DATA_DST_SEL(x) ((x) << 8) -#define PKT3_WRITE_DATA_DST_SEL_REG 0 -#define PKT3_WRITE_DATA_DST_SEL_MEM_SYNC 1 -#define PKT3_WRITE_DATA_DST_SEL_TC_L2 2 -#define PKT3_WRITE_DATA_DST_SEL_GDS 3 -#define PKT3_WRITE_DATA_DST_SEL_RESERVED_4 4 -#define PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC 5 -#define PKT3_WR_ONE_ADDR (1 << 16) -#define PKT3_WRITE_DATA_WR_CONFIRM (1 << 20) -#define PKT3_WRITE_DATA_ENGINE_SEL(x) ((x) << 30) -#define PKT3_WRITE_DATA_ENGINE_SEL_ME 0 -#define PKT3_WRITE_DATA_ENGINE_SEL_PFP 1 -#define PKT3_WRITE_DATA_ENGINE_SEL_CE 2 +#define R_370_CONTROL 0x370 /* 0x[packet number][word index] */ +#define S_370_ENGINE_SEL(x) (((x) & 0x3) << 30) +#define V_370_ME 0 +#define V_370_PFP 1 +#define V_370_CE 2 +#define V_370_DE 3 +#define S_370_WR_CONFIRM(x) (((x) & 0x1) << 20) +#define S_370_WR_ONE_ADDR(x) (((x) & 0x1) << 16) +#define S_370_DST_SEL(x) (((x) & 0xf) << 8) +#define V_370_MEM_MAPPED_REGISTER 0 +#define V_370_MEMORY_SYNC 1 +#define V_370_TC_L2 2 +#define V_370_GDS 3 +#define V_370_RESERVED 4 +#define V_370_MEM_ASYNC 5 +#define R_371_DST_ADDR_LO 0x371 +#define R_372_DST_ADDR_HI 0x372 #define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38 #define PKT3_MEM_SEMAPHORE 0x39 #define PKT3_MPEG_INDEX 0x3A /* not on CIK */ @@ -159,42 +167,53 @@ * 5. DST_ADDR_HI [15:0] * 6. COMMAND [29:22] | BYTE_COUNT [20:0] */ -#define PKT3_CP_DMA_CP_SYNC (1 << 31) -#define PKT3_CP_DMA_SRC_SEL(x) ((x) << 29) -/* 0 - SRC_ADDR - * 1 - GDS (program SAS to 1 as well) - * 2 - DATA - * 3 - SRC_ADDR using TC L2 (DMA_DATA only) - */ -#define PKT3_CP_DMA_DST_SEL(x) ((x) << 20) -/* 0 - DST_ADDR - * 1 - GDS (program DAS to 1 as well) - * 3 - DST_ADDR using TC L2 (DMA_DATA only) - */ -/* COMMAND */ -#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) -/* 0 - none - * 1 - 8 in 16 - * 2 - 8 in 32 - * 3 - 8 in 64 - */ -#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) -/* 0 - none - * 1 - 8 in 16 - * 2 - 8 in 32 - * 3 - 8 in 64 - */ -#define PKT3_CP_DMA_CMD_SAS (1 << 26) -/* 0 - memory - * 1 - register - */ -#define PKT3_CP_DMA_CMD_DAS (1 << 27) -/* 0 - memory - * 1 - register - */ -#define PKT3_CP_DMA_CMD_SAIC (1 << 28) -#define PKT3_CP_DMA_CMD_DAIC (1 << 29) -#define PKT3_CP_DMA_CMD_RAW_WAIT (1 << 30) +#define R_410_CP_DMA_WORD0 0x410 /* 0x[packet number][word index] */ +#define S_410_SRC_ADDR_LO(x) ((x) & 0xffffffff) +#define R_411_CP_DMA_WORD1 0x411 +#define S_411_CP_SYNC(x) (((x) & 0x1) << 31) +#define S_411_SRC_SEL(x) (((x) & 0x3) << 29) +#define V_411_SRC_ADDR 0 +#define V_411_GDS 1 /* program SAS to 1 as well */ +#define V_411_DATA 2 +#define V_411_SRC_ADDR_TC_L2 3 /* new for CIK */ +#define S_411_ENGINE(x) (((x) & 0x1) << 27) +#define V_411_ME 0 +#define V_411_PFP 1 +#define S_411_DSL_SEL(x) (((x) & 0x3) << 20) +#define V_411_DST_ADDR 0 +#define V_411_GDS 1 /* program DAS to 1 as well */ +#define V_411_DST_ADDR_TC_L2 3 /* new for CIK */ +#define S_411_SRC_ADDR_HI(x) ((x) & 0xffff) +#define R_412_CP_DMA_WORD2 0x412 /* 0x[packet number][word index] */ +#define S_412_DST_ADDR_LO(x) ((x) & 0xffffffff) +#define R_413_CP_DMA_WORD3 0x413 /* 0x[packet number][word index] */ +#define S_413_DST_ADDR_HI(x) ((x) & 0xffff) +#define R_414_COMMAND 0x414 +#define S_414_BYTE_COUNT(x) ((x) & 0x1fffff) +#define S_414_DISABLE_WR_CONFIRM(x) (((x) & 0x1) << 21) +#define S_414_SRC_SWAP(x) (((x) & 0x3) << 22) +#define V_414_NONE 0 +#define V_414_8_IN_16 1 +#define V_414_8_IN_32 2 +#define V_414_8_IN_64 3 +#define S_414_DST_SWAP(x) (((x) & 0x3) << 24) +#define V_414_NONE 0 +#define V_414_8_IN_16 1 +#define V_414_8_IN_32 2 +#define V_414_8_IN_64 3 +#define S_414_SAS(x) (((x) & 0x1) << 26) +#define V_414_MEMORY 0 +#define V_414_REGISTER 1 +#define S_414_DAS(x) (((x) & 0x1) << 27) +#define V_414_MEMORY 0 +#define V_414_REGISTER 1 +#define S_414_SAIC(x) (((x) & 0x1) << 28) +#define V_414_INCREMENT 0 +#define V_414_NO_INCREMENT 1 +#define S_414_DAIC(x) (((x) & 0x1) << 29) +#define V_414_INCREMENT 0 +#define V_414_NO_INCREMENT 1 +#define S_414_RAW_WAIT(x) (((x) & 0x1) << 30) #define PKT3_DMA_DATA 0x50 /* new for CIK */ /* 1. header @@ -205,7 +224,24 @@ * 5. DST_ADDR_HI [31:0] * 6. COMMAND [29:22] | BYTE_COUNT [20:0] */ - +#define R_500_DMA_DATA_WORD0 0x500 /* 0x[packet number][word index] */ +#define S_500_CP_SYNC(x) (((x) & 0x1) << 31) +#define S_500_SRC_SEL(x) (((x) & 0x3) << 29) +#define V_500_SRC_ADDR 0 +#define V_500_GDS 1 /* program SAS to 1 as well */ +#define V_500_DATA 2 +#define V_500_SRC_ADDR_TC_L2 3 /* new for CIK */ +#define S_500_DSL_SEL(x) (((x) & 0x3) << 20) +#define V_500_DST_ADDR 0 +#define V_500_GDS 1 /* program DAS to 1 as well */ +#define V_500_DST_ADDR_TC_L2 3 /* new for CIK */ +#define S_500_ENGINE(x) ((x) & 0x1) +#define V_500_ME 0 +#define V_500_PFP 1 +#define R_501_SRC_ADDR_LO 0x501 +#define R_502_SRC_ADDR_HI 0x502 +#define R_503_DST_ADDR_LO 0x503 +#define R_504_DST_ADDR_HI 0x504 #define R_000E4C_SRBM_STATUS2 0x000E4C #define S_000E4C_SDMA_RQ_PENDING(x) (((x) & 0x1) << 0) From 6924ecac77d1d041420c571de9d31cff1d30aecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 22 Aug 2015 14:17:10 +0200 Subject: [PATCH 26/26] gallium/radeon: read_registers should return bool meaning success or failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeon/radeon_winsys.h | 2 +- src/gallium/drivers/radeonsi/si_debug.c | 4 ++-- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 6 +++--- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 ++++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 7ab6e56e099..a4a2ae15acd 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -680,7 +680,7 @@ struct radeon_winsys { uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value); - void (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, + bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers, uint32_t *out); }; diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 22d6f250b03..d3fd201ae26 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -349,8 +349,8 @@ static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f, struct radeon_winsys *ws = sctx->b.ws; uint32_t value; - ws->read_registers(ws, offset, 1, &value); - si_dump_reg(f, offset, value, ~0); + if (ws->read_registers(ws, offset, 1, &value)) + si_dump_reg(f, offset, value, ~0); } static void si_dump_debug_registers(struct si_context *sctx, FILE *f) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 012c9003b69..875dcd09c6b 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -350,14 +350,14 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws, return 0; } -static void amdgpu_read_registers(struct radeon_winsys *rws, +static bool amdgpu_read_registers(struct radeon_winsys *rws, unsigned reg_offset, unsigned num_registers, uint32_t *out) { struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; - amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, - 0xffffffff, 0, out); + return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, + 0xffffffff, 0, out) == 0; } static unsigned hash_dev(void *key) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index f7784fb795e..384d7280380 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -583,7 +583,7 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws, return 0; } -static void radeon_read_registers(struct radeon_winsys *rws, +static bool radeon_read_registers(struct radeon_winsys *rws, unsigned reg_offset, unsigned num_registers, uint32_t *out) { @@ -593,9 +593,11 @@ static void radeon_read_registers(struct radeon_winsys *rws, for (i = 0; i < num_registers; i++) { uint32_t reg = reg_offset + i*4; - radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, "read-reg", ®); + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, NULL, ®)) + return false; out[i] = reg; } + return true; } static unsigned hash_fd(void *key)